summaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorArtur Grabowski <art@cvs.openbsd.org>2001-11-27 05:27:13 +0000
committerArtur Grabowski <art@cvs.openbsd.org>2001-11-27 05:27:13 +0000
commit8a1845e49f56720cbfccd4c7f5f80ba5b980fdf4 (patch)
treed4a522dc41cdc79ba48fe761e94663b795da8cc0 /sys
parent0d68e9b5af14f4bfa04d22dbebab5972ac647b26 (diff)
Merge in the unified buffer cache code as found in NetBSD 2001/03/10. The
code is written mostly by Chuck Silvers <chuq@chuq.com>/<chs@netbsd.org>. Tested for the past few weeks by many developers, should be in a pretty stable state, but will require optimizations and additional cleanups.
Diffstat (limited to 'sys')
-rw-r--r--sys/adosfs/advnops.c28
-rw-r--r--sys/arch/alpha/alpha/pmap.c6
-rw-r--r--sys/arch/i386/i386/vm_machdep.c7
-rw-r--r--sys/conf/files3
-rw-r--r--sys/dev/vnd.c6
-rw-r--r--sys/isofs/cd9660/cd9660_vfsops.c4
-rw-r--r--sys/isofs/cd9660/cd9660_vnops.c34
-rw-r--r--sys/kern/exec_subr.c3
-rw-r--r--sys/kern/init_main.c4
-rw-r--r--sys/kern/kern_exec.c3
-rw-r--r--sys/kern/vfs_bio.c44
-rw-r--r--sys/kern/vfs_default.c683
-rw-r--r--sys/kern/vfs_subr.c88
-rw-r--r--sys/kern/vfs_sync.c17
-rw-r--r--sys/kern/vfs_syscalls.c13
-rw-r--r--sys/kern/vfs_vnops.c31
-rw-r--r--sys/kern/vnode_if.c140
-rw-r--r--sys/kern/vnode_if.src47
-rw-r--r--sys/msdosfs/msdosfs_denode.c19
-rw-r--r--sys/msdosfs/msdosfs_fat.c40
-rw-r--r--sys/msdosfs/msdosfs_vfsops.c23
-rw-r--r--sys/msdosfs/msdosfs_vnops.c190
-rw-r--r--sys/nfs/nfs.h20
-rw-r--r--sys/nfs/nfs_bio.c928
-rw-r--r--sys/nfs/nfs_node.c27
-rw-r--r--sys/nfs/nfs_serv.c11
-rw-r--r--sys/nfs/nfs_subs.c267
-rw-r--r--sys/nfs/nfs_syscalls.c43
-rw-r--r--sys/nfs/nfs_var.h14
-rw-r--r--sys/nfs/nfs_vfsops.c9
-rw-r--r--sys/nfs/nfs_vnops.c264
-rw-r--r--sys/nfs/nfsnode.h16
-rw-r--r--sys/sys/buf.h10
-rw-r--r--sys/sys/mount.h4
-rw-r--r--sys/sys/param.h15
-rw-r--r--sys/sys/vnode.h20
-rw-r--r--sys/sys/vnode_if.h48
-rw-r--r--sys/ufs/ext2fs/ext2fs_balloc.c230
-rw-r--r--sys/ufs/ext2fs/ext2fs_extern.h7
-rw-r--r--sys/ufs/ext2fs/ext2fs_inode.c58
-rw-r--r--sys/ufs/ext2fs/ext2fs_readwrite.c103
-rw-r--r--sys/ufs/ext2fs/ext2fs_subr.c5
-rw-r--r--sys/ufs/ext2fs/ext2fs_vfsops.c15
-rw-r--r--sys/ufs/ext2fs/ext2fs_vnops.c12
-rw-r--r--sys/ufs/ffs/ffs_alloc.c11
-rw-r--r--sys/ufs/ffs/ffs_balloc.c60
-rw-r--r--sys/ufs/ffs/ffs_extern.h5
-rw-r--r--sys/ufs/ffs/ffs_inode.c121
-rw-r--r--sys/ufs/ffs/ffs_softdep.c274
-rw-r--r--sys/ufs/ffs/ffs_vfsops.c7
-rw-r--r--sys/ufs/ffs/ffs_vnops.c59
-rw-r--r--sys/ufs/ufs/inode.h3
-rw-r--r--sys/ufs/ufs/ufs_bmap.c16
-rw-r--r--sys/ufs/ufs/ufs_extern.h3
-rw-r--r--sys/ufs/ufs/ufs_inode.c149
-rw-r--r--sys/ufs/ufs/ufs_readwrite.c130
-rw-r--r--sys/ufs/ufs/ufs_vnops.c4
-rw-r--r--sys/ufs/ufs/ufsmount.h3
-rw-r--r--sys/uvm/uvm_anon.c5
-rw-r--r--sys/uvm/uvm_aobj.c12
-rw-r--r--sys/uvm/uvm_bio.c547
-rw-r--r--sys/uvm/uvm_extern.h35
-rw-r--r--sys/uvm/uvm_fault.c6
-rw-r--r--sys/uvm/uvm_map.h4
-rw-r--r--sys/uvm/uvm_mmap.c34
-rw-r--r--sys/uvm/uvm_page.c8
-rw-r--r--sys/uvm/uvm_page_i.h5
-rw-r--r--sys/uvm/uvm_pager.c12
-rw-r--r--sys/uvm/uvm_param.h6
-rw-r--r--sys/uvm/uvm_swap.c30
-rw-r--r--sys/uvm/uvm_vnode.c1602
-rw-r--r--sys/uvm/uvm_vnode.h52
72 files changed, 4246 insertions, 2516 deletions
diff --git a/sys/adosfs/advnops.c b/sys/adosfs/advnops.c
index 78d237f41e5..19bfdcc5738 100644
--- a/sys/adosfs/advnops.c
+++ b/sys/adosfs/advnops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: advnops.c,v 1.17 2001/06/23 02:14:21 csapuntz Exp $ */
+/* $OpenBSD: advnops.c,v 1.18 2001/11/27 05:27:11 art Exp $ */
/* $NetBSD: advnops.c,v 1.32 1996/10/13 02:52:09 christos Exp $ */
/*
@@ -131,7 +131,9 @@ struct vnodeopv_entry_desc adosfs_vnodeop_entries[] = {
{ &vop_pathconf_desc, adosfs_pathconf }, /* pathconf */
{ &vop_advlock_desc, adosfs_advlock }, /* advlock */
{ &vop_bwrite_desc, adosfs_bwrite }, /* bwrite */
- { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
+ { &vop_getpages_desc, genfs_getpages },
+ { &vop_size_desc, genfs_size },
+ { NULL, NULL }
};
struct vnodeopv_desc adosfs_vnodeop_opv_desc =
@@ -272,6 +274,28 @@ adosfs_read(v)
/*
* taken from ufs_read()
*/
+
+ if (sp->a_vp->v_type == VREG) {
+ error = 0;
+ while (uio->uio_resid > 0) {
+ void *win;
+ vsize_t bytelen = min(ap->fsize - uio->uio_offset,
+ uio->uio_resid);
+
+ if (bytelen == 0) {
+ break;
+ }
+ win = ubc_alloc(&sp->a_vp->v_uvm.u_obj, uio->uio_offset,
+ &bytelen, UBC_READ);
+ error = uiomove(win, bytelen, uio);
+ ubc_release(win, 0);
+ if (error) {
+ break;
+ }
+ }
+ goto reterr;
+ }
+
do {
/*
* we are only supporting ADosFFS currently
diff --git a/sys/arch/alpha/alpha/pmap.c b/sys/arch/alpha/alpha/pmap.c
index 9ff390da8c2..1d50a35d446 100644
--- a/sys/arch/alpha/alpha/pmap.c
+++ b/sys/arch/alpha/alpha/pmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.c,v 1.23 2001/11/09 15:31:11 art Exp $ */
+/* $OpenBSD: pmap.c,v 1.24 2001/11/27 05:27:11 art Exp $ */
/* $NetBSD: pmap.c,v 1.154 2000/12/07 22:18:55 thorpej Exp $ */
/*-
@@ -804,8 +804,8 @@ pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids)
/*
* Figure out how many PTE's are necessary to map the kernel.
*/
- lev3mapsize = (VM_PHYS_SIZE +
- nbuf * MAXBSIZE + + PAGER_MAP_SIZE + 16 * NCARGS) / NBPG +
+ lev3mapsize = (VM_PHYS_SIZE + ubc_nwins * ubc_winsize +
+ nbuf * MAXBSIZE + 16 * NCARGS + PAGER_MAP_SIZE) / NBPG +
(maxproc * UPAGES) + NKMEMCLUSTERS;
#ifdef SYSVSHM
diff --git a/sys/arch/i386/i386/vm_machdep.c b/sys/arch/i386/i386/vm_machdep.c
index 516dea6ebea..7de82391532 100644
--- a/sys/arch/i386/i386/vm_machdep.c
+++ b/sys/arch/i386/i386/vm_machdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vm_machdep.c,v 1.32 2001/11/06 19:53:14 miod Exp $ */
+/* $OpenBSD: vm_machdep.c,v 1.33 2001/11/27 05:27:11 art Exp $ */
/* $NetBSD: vm_machdep.c,v 1.61 1996/05/03 19:42:35 christos Exp $ */
/*-
@@ -371,9 +371,7 @@ vmapbuf(bp, len)
while (len) {
pmap_extract(vm_map_pmap(&bp->b_proc->p_vmspace->vm_map),
faddr, &fpa);
- pmap_enter(vm_map_pmap(phys_map), taddr, fpa,
- VM_PROT_READ | VM_PROT_WRITE,
- VM_PROT_READ | VM_PROT_WRITE | PMAP_WIRED);
+ pmap_kenter_pa(taddr, fpa, VM_PROT_READ|VM_PROT_WRITE);
faddr += PAGE_SIZE;
taddr += PAGE_SIZE;
len -= PAGE_SIZE;
@@ -396,6 +394,7 @@ vunmapbuf(bp, len)
addr = trunc_page((vaddr_t)bp->b_data);
off = (vm_offset_t)bp->b_data - addr;
len = round_page(off + len);
+ pmap_kremove(addr, len);
uvm_km_free_wakeup(phys_map, addr, len);
bp->b_data = bp->b_saveaddr;
bp->b_saveaddr = 0;
diff --git a/sys/conf/files b/sys/conf/files
index a6ce3bcedb0..0ec11fc5bbb 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -1,4 +1,4 @@
-# $OpenBSD: files,v 1.230 2001/11/21 21:23:56 csapuntz Exp $
+# $OpenBSD: files,v 1.231 2001/11/27 05:27:11 art Exp $
# $NetBSD: files,v 1.87 1996/05/19 17:17:50 jonathan Exp $
# @(#)files.newconf 7.5 (Berkeley) 5/10/93
@@ -801,6 +801,7 @@ file xfs/xfs_syscalls-dummy.c !xfs
file uvm/uvm_amap.c
file uvm/uvm_anon.c
file uvm/uvm_aobj.c
+file uvm/uvm_bio.c
file uvm/uvm_device.c
file uvm/uvm_fault.c
file uvm/uvm_glue.c
diff --git a/sys/dev/vnd.c b/sys/dev/vnd.c
index b2935e0edba..6f8c268a283 100644
--- a/sys/dev/vnd.c
+++ b/sys/dev/vnd.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vnd.c,v 1.28 2001/11/15 23:15:15 art Exp $ */
+/* $OpenBSD: vnd.c,v 1.29 2001/11/27 05:27:11 art Exp $ */
/* $NetBSD: vnd.c,v 1.26 1996/03/30 23:06:11 christos Exp $ */
/*
@@ -558,10 +558,6 @@ vndstrategy(bp)
nbp->vb_buf.b_proc = bp->b_proc;
nbp->vb_buf.b_iodone = vndiodone;
nbp->vb_buf.b_vp = vp;
- nbp->vb_buf.b_dirtyoff = bp->b_dirtyoff;
- nbp->vb_buf.b_dirtyend = bp->b_dirtyend;
- nbp->vb_buf.b_validoff = bp->b_validoff;
- nbp->vb_buf.b_validend = bp->b_validend;
LIST_INIT(&nbp->vb_buf.b_dep);
/* save a reference to the old buffer */
diff --git a/sys/isofs/cd9660/cd9660_vfsops.c b/sys/isofs/cd9660/cd9660_vfsops.c
index b4199c4df15..b2b1455e6eb 100644
--- a/sys/isofs/cd9660/cd9660_vfsops.c
+++ b/sys/isofs/cd9660/cd9660_vfsops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: cd9660_vfsops.c,v 1.24 2001/11/15 08:27:28 art Exp $ */
+/* $OpenBSD: cd9660_vfsops.c,v 1.25 2001/11/27 05:27:11 art Exp $ */
/* $NetBSD: cd9660_vfsops.c,v 1.26 1997/06/13 15:38:58 pk Exp $ */
/*-
@@ -359,6 +359,8 @@ iso_mountfs(devvp, mp, p, argp)
mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
mp->mnt_maxsymlinklen = 0;
mp->mnt_flag |= MNT_LOCAL;
+ mp->mnt_dev_bshift = iso_bsize;
+ mp->mnt_fs_bshift = isomp->im_bshift;
isomp->im_mountp = mp;
isomp->im_dev = dev;
isomp->im_devvp = devvp;
diff --git a/sys/isofs/cd9660/cd9660_vnops.c b/sys/isofs/cd9660/cd9660_vnops.c
index 5f05dc9d65f..cd5567a77b4 100644
--- a/sys/isofs/cd9660/cd9660_vnops.c
+++ b/sys/isofs/cd9660/cd9660_vnops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: cd9660_vnops.c,v 1.14 2001/06/23 02:14:23 csapuntz Exp $ */
+/* $OpenBSD: cd9660_vnops.c,v 1.15 2001/11/27 05:27:11 art Exp $ */
/* $NetBSD: cd9660_vnops.c,v 1.42 1997/10/16 23:56:57 christos Exp $ */
/*-
@@ -314,9 +314,9 @@ cd9660_read(v)
struct ucred *a_cred;
} */ *ap = v;
struct vnode *vp = ap->a_vp;
- register struct uio *uio = ap->a_uio;
- register struct iso_node *ip = VTOI(vp);
- register struct iso_mnt *imp;
+ struct uio *uio = ap->a_uio;
+ struct iso_node *ip = VTOI(vp);
+ struct iso_mnt *imp;
struct buf *bp;
daddr_t lbn, rablock;
off_t diff;
@@ -329,6 +329,26 @@ cd9660_read(v)
return (EINVAL);
ip->i_flag |= IN_ACCESS;
imp = ip->i_mnt;
+
+ if (vp->v_type == VREG) {
+ error = 0;
+ while (uio->uio_resid > 0) {
+ void *win;
+ vsize_t bytelen = MIN(ip->i_size - uio->uio_offset,
+ uio->uio_resid);
+
+ if (bytelen == 0)
+ break;
+ win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset,
+ &bytelen, UBC_READ);
+ error = uiomove(win, bytelen, uio);
+ ubc_release(win, 0);
+ if (error)
+ break;
+ }
+ goto out;
+ }
+
do {
lbn = lblkno(imp, uio->uio_offset);
on = blkoff(imp, uio->uio_offset);
@@ -370,6 +390,8 @@ cd9660_read(v)
bp->b_flags |= B_AGE;
brelse(bp);
} while (error == 0 && uio->uio_resid > 0 && n != 0);
+
+out:
return (error);
}
@@ -1045,7 +1067,9 @@ struct vnodeopv_entry_desc cd9660_vnodeop_entries[] = {
{ &vop_pathconf_desc, cd9660_pathconf },/* pathconf */
{ &vop_advlock_desc, cd9660_advlock }, /* advlock */
{ &vop_bwrite_desc, vop_generic_bwrite },
- { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
+ { &vop_getpages_desc, genfs_getpages },
+ { &vop_size_desc, genfs_size },
+ { NULL, NULL }
};
struct vnodeopv_desc cd9660_vnodeop_opv_desc =
{ &cd9660_vnodeop_p, cd9660_vnodeop_entries };
diff --git a/sys/kern/exec_subr.c b/sys/kern/exec_subr.c
index 770a29f8adc..e79db64dcae 100644
--- a/sys/kern/exec_subr.c
+++ b/sys/kern/exec_subr.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: exec_subr.c,v 1.14 2001/11/07 01:18:01 art Exp $ */
+/* $OpenBSD: exec_subr.c,v 1.15 2001/11/27 05:27:11 art Exp $ */
/* $NetBSD: exec_subr.c,v 1.9 1994/12/04 03:10:42 mycroft Exp $ */
/*
@@ -167,6 +167,7 @@ vmcmd_map_pagedvn(p, cmd)
uobj = uvn_attach((void *) cmd->ev_vp, VM_PROT_READ|VM_PROT_EXECUTE);
if (uobj == NULL)
return(ENOMEM);
+ VREF(cmd->ev_vp);
/*
* do the map
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index c909a23141b..f807a181062 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: init_main.c,v 1.84 2001/11/10 18:42:31 art Exp $ */
+/* $OpenBSD: init_main.c,v 1.85 2001/11/27 05:27:11 art Exp $ */
/* $NetBSD: init_main.c,v 1.84.4.1 1996/06/02 09:08:06 mrg Exp $ */
/*
@@ -217,6 +217,8 @@ main(framep)
cpu_configure();
+ ubc_init(); /* Initialize the unified buffer cache */
+
/* Initialize sysctls (must be done before any processes run) */
sysctl_init();
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 2d12034b386..9f621da43d2 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_exec.c,v 1.60 2001/11/12 01:26:09 art Exp $ */
+/* $OpenBSD: kern_exec.c,v 1.61 2001/11/27 05:27:11 art Exp $ */
/* $NetBSD: kern_exec.c,v 1.75 1996/02/09 18:59:28 christos Exp $ */
/*-
@@ -150,6 +150,7 @@ check_exec(p, epp)
goto bad1;
/* now we have the file, get the exec header */
+ uvn_attach(vp, VM_PROT_READ);
error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0,
UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred, &resid, p);
if (error)
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 71674e95236..6f361c989c0 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vfs_bio.c,v 1.51 2001/11/15 23:25:37 art Exp $ */
+/* $OpenBSD: vfs_bio.c,v 1.52 2001/11/27 05:27:11 art Exp $ */
/* $NetBSD: vfs_bio.c,v 1.44 1996/06/11 11:15:36 pk Exp $ */
/*-
@@ -406,7 +406,6 @@ bwrite(bp)
/* Initiate disk write. Make sure the appropriate party is charged. */
bp->b_vp->v_numoutput++;
splx(s);
- SET(bp->b_flags, B_WRITEINPROG);
VOP_STRATEGY(bp);
if (async)
@@ -466,7 +465,6 @@ bdwrite(bp)
}
/* Otherwise, the "write" is done, so mark and release the buffer. */
- CLR(bp->b_flags, B_NEEDCOMMIT);
SET(bp->b_flags, B_DONE);
brelse(bp);
}
@@ -588,6 +586,7 @@ brelse(bp)
/* Unlock the buffer. */
CLR(bp->b_flags, (B_AGE | B_ASYNC | B_BUSY | B_NOCACHE | B_DEFERRED));
+ SET(bp->b_flags, B_CACHE);
/* Allow disk interrupts. */
splx(s);
@@ -651,44 +650,30 @@ getblk(vp, blkno, size, slpflag, slptimeo)
daddr_t blkno;
int size, slpflag, slptimeo;
{
- struct bufhashhdr *bh;
struct buf *bp, *nbp = NULL;
int s, err;
- /*
- * XXX
- * The following is an inlined version of 'incore()', but with
- * the 'invalid' test moved to after the 'busy' test. It's
- * necessary because there are some cases in which the NFS
- * code sets B_INVAL prior to writing data to the server, but
- * in which the buffers actually contain valid data. In this
- * case, we can't allow the system to allocate a new buffer for
- * the block until the write is finished.
- */
- bh = BUFHASH(vp, blkno);
start:
- bp = bh->lh_first;
- for (; bp != NULL; bp = bp->b_hash.le_next) {
- if (bp->b_lblkno != blkno || bp->b_vp != vp)
- continue;
-
+ bp = incore(vp, blkno);
+ if (bp != NULL) {
s = splbio();
if (ISSET(bp->b_flags, B_BUSY)) {
SET(bp->b_flags, B_WANTED);
err = tsleep(bp, slpflag | (PRIBIO + 1), "getblk",
slptimeo);
splx(s);
- if (err)
+ if (err) {
+ if (nbp != NULL) {
+ SET(nbp->b_flags, B_AGE);
+ brelse(nbp);
+ }
return (NULL);
+ }
goto start;
}
- if (!ISSET(bp->b_flags, B_INVAL)) {
- SET(bp->b_flags, (B_BUSY | B_CACHE));
- bremfree(bp);
- splx(s);
- break;
- }
+ SET(bp->b_flags, (B_BUSY | B_CACHE));
+ bremfree(bp);
splx(s);
}
@@ -697,7 +682,7 @@ start:
goto start;
}
bp = nbp;
- binshash(bp, bh);
+ binshash(bp, BUFHASH(vp, blkno));
bp->b_blkno = bp->b_lblkno = blkno;
s = splbio();
bgetvp(vp, bp);
@@ -900,8 +885,6 @@ start:
bp->b_error = 0;
bp->b_resid = 0;
bp->b_bcount = 0;
- bp->b_dirtyoff = bp->b_dirtyend = 0;
- bp->b_validoff = bp->b_validend = 0;
bremhash(bp);
*bpp = bp;
@@ -1022,7 +1005,6 @@ biodone(bp)
buf_complete(bp);
if (!ISSET(bp->b_flags, B_READ)) {
- CLR(bp->b_flags, B_WRITEINPROG);
vwakeup(bp->b_vp);
}
diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
index 8f426b3a3f5..61f6d0217e9 100644
--- a/sys/kern/vfs_default.c
+++ b/sys/kern/vfs_default.c
@@ -1,10 +1,9 @@
-/* $OpenBSD: vfs_default.c,v 1.7 2001/06/25 03:28:03 csapuntz Exp $ */
-
+/* $OpenBSD: vfs_default.c,v 1.8 2001/11/27 05:27:12 art Exp $ */
/*
* Portions of this code are:
*
- * Copyright (c) 1989, 1993
+ * Copyright (c) 1982, 1986, 1989, 1993
* The Regents of the University of California. All rights reserved.
* (c) UNIX System Laboratories, Inc.
* All or some portions of this file are derived from material licensed
@@ -49,9 +48,11 @@
#include <sys/vnode.h>
#include <sys/namei.h>
#include <sys/malloc.h>
+#include <sys/pool.h>
#include <sys/event.h>
#include <miscfs/specfs/specdev.h>
+#include <uvm/uvm.h>
extern struct simplelock spechash_slock;
@@ -310,3 +311,679 @@ lease_check(void *v)
{
return (0);
}
+
+/*
+ * generic VM getpages routine.
+ * Return PG_BUSY pages for the given range,
+ * reading from backing store if necessary.
+ */
+
+int
+genfs_getpages(v)
+ void *v;
+{
+ struct vop_getpages_args /* {
+ struct vnode *a_vp;
+ voff_t a_offset;
+ vm_page_t *a_m;
+ int *a_count;
+ int a_centeridx;
+ vm_prot_t a_access_type;
+ int a_advice;
+ int a_flags;
+ } */ *ap = v;
+
+ off_t newsize, diskeof, memeof;
+ off_t offset, origoffset, startoffset, endoffset, raoffset;
+ daddr_t lbn, blkno;
+ int s, i, error, npages, orignpages, npgs, run, ridx, pidx, pcount;
+ int fs_bshift, fs_bsize, dev_bshift, dev_bsize;
+ int flags = ap->a_flags;
+ size_t bytes, iobytes, tailbytes, totalbytes, skipbytes;
+ vaddr_t kva;
+ struct buf *bp, *mbp;
+ struct vnode *vp = ap->a_vp;
+ struct uvm_object *uobj = &vp->v_uvm.u_obj;
+ struct vm_page *pgs[16]; /* XXXUBC 16 */
+ struct ucred *cred = curproc->p_ucred; /* XXXUBC curproc */
+ boolean_t async = (flags & PGO_SYNCIO) == 0;
+ boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0;
+ boolean_t sawhole = FALSE;
+ struct proc *p = curproc;
+ UVMHIST_FUNC("genfs_getpages"); UVMHIST_CALLED(ubchist);
+
+ UVMHIST_LOG(ubchist, "vp %p off 0x%x/%x count %d",
+ vp, ap->a_offset >> 32, ap->a_offset, *ap->a_count);
+
+ /* XXXUBC temp limit */
+ if (*ap->a_count > 16) {
+ return EINVAL;
+ }
+
+ error = 0;
+ origoffset = ap->a_offset;
+ orignpages = *ap->a_count;
+ error = VOP_SIZE(vp, vp->v_uvm.u_size, &diskeof);
+ if (error) {
+ return error;
+ }
+ if (flags & PGO_PASTEOF) {
+ newsize = MAX(vp->v_uvm.u_size,
+ origoffset + (orignpages << PAGE_SHIFT));
+ error = VOP_SIZE(vp, newsize, &memeof);
+ if (error) {
+ return error;
+ }
+ } else {
+ memeof = diskeof;
+ }
+ KASSERT(ap->a_centeridx >= 0 || ap->a_centeridx <= orignpages);
+ KASSERT((origoffset & (PAGE_SIZE - 1)) == 0 && origoffset >= 0);
+ KASSERT(orignpages > 0);
+
+ /*
+ * Bounds-check the request.
+ */
+
+ if (origoffset + (ap->a_centeridx << PAGE_SHIFT) >= memeof) {
+ if ((flags & PGO_LOCKED) == 0) {
+ simple_unlock(&uobj->vmobjlock);
+ }
+ UVMHIST_LOG(ubchist, "off 0x%x count %d goes past EOF 0x%x",
+ origoffset, *ap->a_count, memeof,0);
+ return EINVAL;
+ }
+
+ /*
+ * For PGO_LOCKED requests, just return whatever's in memory.
+ */
+
+ if (flags & PGO_LOCKED) {
+ uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m,
+ UFP_NOWAIT|UFP_NOALLOC|UFP_NORDONLY);
+
+ return ap->a_m[ap->a_centeridx] == NULL ? EBUSY : 0;
+ }
+
+ /* vnode is VOP_LOCKed, uobj is locked */
+
+ if (write && (vp->v_bioflag & VBIOONSYNCLIST) == 0) {
+ vn_syncer_add_to_worklist(vp, syncdelay);
+ }
+
+ /*
+ * find the requested pages and make some simple checks.
+ * leave space in the page array for a whole block.
+ */
+
+ fs_bshift = vp->v_mount->mnt_fs_bshift;
+ fs_bsize = 1 << fs_bshift;
+ dev_bshift = vp->v_mount->mnt_dev_bshift;
+ dev_bsize = 1 << dev_bshift;
+ KASSERT((diskeof & (dev_bsize - 1)) == 0);
+ KASSERT((memeof & (dev_bsize - 1)) == 0);
+
+ orignpages = MIN(orignpages,
+ round_page(memeof - origoffset) >> PAGE_SHIFT);
+ npages = orignpages;
+ startoffset = origoffset & ~(fs_bsize - 1);
+ endoffset = round_page((origoffset + (npages << PAGE_SHIFT)
+ + fs_bsize - 1) & ~(fs_bsize - 1));
+ endoffset = MIN(endoffset, round_page(memeof));
+ ridx = (origoffset - startoffset) >> PAGE_SHIFT;
+
+ memset(pgs, 0, sizeof(pgs));
+ uvn_findpages(uobj, origoffset, &npages, &pgs[ridx], UFP_ALL);
+
+ /*
+ * if PGO_OVERWRITE is set, don't bother reading the pages.
+ * PGO_OVERWRITE also means that the caller guarantees
+ * that the pages already have backing store allocated.
+ */
+
+ if (flags & PGO_OVERWRITE) {
+ UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0);
+
+ for (i = 0; i < npages; i++) {
+ struct vm_page *pg = pgs[ridx + i];
+
+ if (pg->flags & PG_FAKE) {
+ uvm_pagezero(pg);
+ pg->flags &= ~(PG_FAKE);
+ }
+ pg->flags &= ~(PG_RDONLY);
+ }
+ npages += ridx;
+ goto out;
+ }
+
+ /*
+ * if the pages are already resident, just return them.
+ */
+
+ for (i = 0; i < npages; i++) {
+ struct vm_page *pg = pgs[ridx + i];
+
+ if ((pg->flags & PG_FAKE) ||
+ (write && (pg->flags & PG_RDONLY))) {
+ break;
+ }
+ }
+ if (i == npages) {
+ UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0);
+ raoffset = origoffset + (orignpages << PAGE_SHIFT);
+ npages += ridx;
+ goto raout;
+ }
+
+ /*
+ * the page wasn't resident and we're not overwriting,
+ * so we're going to have to do some i/o.
+ * find any additional pages needed to cover the expanded range.
+ */
+
+ if (startoffset != origoffset) {
+
+ /*
+ * XXXUBC we need to avoid deadlocks caused by locking
+ * additional pages at lower offsets than pages we
+ * already have locked. for now, unlock them all and
+ * start over.
+ */
+
+ for (i = 0; i < npages; i++) {
+ struct vm_page *pg = pgs[ridx + i];
+
+ if (pg->flags & PG_FAKE) {
+ pg->flags |= PG_RELEASED;
+ }
+ }
+ uvm_page_unbusy(&pgs[ridx], npages);
+ memset(pgs, 0, sizeof(pgs));
+
+ UVMHIST_LOG(ubchist, "reset npages start 0x%x end 0x%x",
+ startoffset, endoffset, 0,0);
+ npages = (endoffset - startoffset) >> PAGE_SHIFT;
+ npgs = npages;
+ uvn_findpages(uobj, startoffset, &npgs, pgs, UFP_ALL);
+ }
+ simple_unlock(&uobj->vmobjlock);
+
+ /*
+ * read the desired page(s).
+ */
+
+ totalbytes = npages << PAGE_SHIFT;
+ bytes = MIN(totalbytes, MAX(diskeof - startoffset, 0));
+ tailbytes = totalbytes - bytes;
+ skipbytes = 0;
+
+ kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WAITOK |
+ UVMPAGER_MAPIN_READ);
+
+ s = splbio();
+ mbp = pool_get(&bufpool, PR_WAITOK);
+ splx(s);
+ mbp->b_bufsize = totalbytes;
+ mbp->b_data = (void *)kva;
+ mbp->b_resid = mbp->b_bcount = bytes;
+ mbp->b_flags = B_BUSY|B_READ| (async ? B_CALL : 0);
+ mbp->b_iodone = uvm_aio_biodone;
+ mbp->b_vp = vp;
+ LIST_INIT(&mbp->b_dep);
+
+ /*
+ * if EOF is in the middle of the range, zero the part past EOF.
+ */
+
+ if (tailbytes > 0) {
+ memset((void *)(kva + bytes), 0, tailbytes);
+ }
+
+ /*
+ * now loop over the pages, reading as needed.
+ */
+
+ if (write) {
+ lockmgr(&vp->v_glock, LK_EXCLUSIVE, NULL, p);
+ } else {
+ lockmgr(&vp->v_glock, LK_SHARED, NULL, p);
+ }
+
+ bp = NULL;
+ for (offset = startoffset;
+ bytes > 0;
+ offset += iobytes, bytes -= iobytes) {
+
+ /*
+ * skip pages which don't need to be read.
+ */
+
+ pidx = (offset - startoffset) >> PAGE_SHIFT;
+ while ((pgs[pidx]->flags & PG_FAKE) == 0) {
+ size_t b;
+
+ KASSERT((offset & (PAGE_SIZE - 1)) == 0);
+ b = MIN(PAGE_SIZE, bytes);
+ offset += b;
+ bytes -= b;
+ skipbytes += b;
+ pidx++;
+ UVMHIST_LOG(ubchist, "skipping, new offset 0x%x",
+ offset, 0,0,0);
+ if (bytes == 0) {
+ goto loopdone;
+ }
+ }
+
+ /*
+ * bmap the file to find out the blkno to read from and
+ * how much we can read in one i/o. if bmap returns an error,
+ * skip the rest of the top-level i/o.
+ */
+
+ lbn = offset >> fs_bshift;
+ error = VOP_BMAP(vp, lbn, NULL, &blkno, &run);
+ if (error) {
+ UVMHIST_LOG(ubchist, "VOP_BMAP lbn 0x%x -> %d\n",
+ lbn, error,0,0);
+ skipbytes += bytes;
+ goto loopdone;
+ }
+
+ /*
+ * see how many pages can be read with this i/o.
+ * reduce the i/o size if necessary to avoid
+ * overwriting pages with valid data.
+ */
+
+ iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
+ bytes);
+ if (offset + iobytes > round_page(offset)) {
+ pcount = 1;
+ while (pidx + pcount < npages &&
+ pgs[pidx + pcount]->flags & PG_FAKE) {
+ pcount++;
+ }
+ iobytes = MIN(iobytes, (pcount << PAGE_SHIFT) -
+ (offset - trunc_page(offset)));
+ }
+
+ /*
+ * if this block isn't allocated, zero it instead of reading it.
+ * if this is a read access, mark the pages we zeroed PG_RDONLY.
+ */
+
+ if (blkno < 0) {
+ UVMHIST_LOG(ubchist, "lbn 0x%x -> HOLE", lbn,0,0,0);
+
+ sawhole = TRUE;
+ memset((char *)kva + (offset - startoffset), 0,
+ iobytes);
+ skipbytes += iobytes;
+
+ if (!write) {
+ int holepages =
+ (round_page(offset + iobytes) -
+ trunc_page(offset)) >> PAGE_SHIFT;
+ for (i = 0; i < holepages; i++) {
+ pgs[pidx + i]->flags |= PG_RDONLY;
+ }
+ }
+ continue;
+ }
+
+ /*
+ * allocate a sub-buf for this piece of the i/o
+ * (or just use mbp if there's only 1 piece),
+ * and start it going.
+ */
+
+ if (offset == startoffset && iobytes == bytes) {
+ bp = mbp;
+ } else {
+ s = splbio();
+ bp = pool_get(&bufpool, PR_WAITOK);
+ splx(s);
+ bp->b_data = (char *)kva + offset - startoffset;
+ bp->b_resid = bp->b_bcount = iobytes;
+ bp->b_flags = B_BUSY|B_READ|B_CALL;
+ bp->b_iodone = uvm_aio_biodone1;
+ bp->b_vp = vp;
+ LIST_INIT(&bp->b_dep);
+ }
+ bp->b_lblkno = 0;
+ bp->b_private = mbp;
+
+ /* adjust physical blkno for partial blocks */
+ bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
+ dev_bshift);
+
+ UVMHIST_LOG(ubchist, "bp %p offset 0x%x bcount 0x%x blkno 0x%x",
+ bp, offset, iobytes, bp->b_blkno);
+
+ VOP_STRATEGY(bp);
+ }
+
+loopdone:
+ if (skipbytes) {
+ s = splbio();
+ if (error) {
+ mbp->b_flags |= B_ERROR;
+ mbp->b_error = error;
+ }
+ mbp->b_resid -= skipbytes;
+ if (mbp->b_resid == 0) {
+ biodone(mbp);
+ }
+ splx(s);
+ }
+
+ if (async) {
+ UVMHIST_LOG(ubchist, "returning PEND",0,0,0,0);
+ lockmgr(&vp->v_glock, LK_RELEASE, NULL, p);
+ return EINPROGRESS;
+ }
+ if (bp != NULL) {
+ error = biowait(mbp);
+ }
+ s = splbio();
+ pool_put(&bufpool, mbp);
+ splx(s);
+ uvm_pagermapout(kva, npages);
+ raoffset = startoffset + totalbytes;
+
+ /*
+ * if this we encountered a hole then we have to do a little more work.
+ * for read faults, we marked the page PG_RDONLY so that future
+ * write accesses to the page will fault again.
+ * for write faults, we must make sure that the backing store for
+ * the page is completely allocated while the pages are locked.
+ */
+
+ if (error == 0 && sawhole && write) {
+ error = VOP_BALLOCN(vp, startoffset, npages << PAGE_SHIFT,
+ cred, 0);
+ if (error) {
+ UVMHIST_LOG(ubchist, "balloc lbn 0x%x -> %d",
+ lbn, error,0,0);
+ lockmgr(&vp->v_glock, LK_RELEASE, NULL, p);
+ simple_lock(&uobj->vmobjlock);
+ goto out;
+ }
+ }
+ lockmgr(&vp->v_glock, LK_RELEASE, NULL, p);
+ simple_lock(&uobj->vmobjlock);
+
+ /*
+ * see if we want to start any readahead.
+ * XXXUBC for now, just read the next 128k on 64k boundaries.
+ * this is pretty nonsensical, but it is 50% faster than reading
+ * just the next 64k.
+ */
+
+raout:
+ if (!error && !async && !write && ((int)raoffset & 0xffff) == 0 &&
+ PAGE_SHIFT <= 16) {
+ int racount;
+
+ racount = 1 << (16 - PAGE_SHIFT);
+ (void) VOP_GETPAGES(vp, raoffset, NULL, &racount, 0,
+ VM_PROT_READ, 0, 0);
+ simple_lock(&uobj->vmobjlock);
+
+ racount = 1 << (16 - PAGE_SHIFT);
+ (void) VOP_GETPAGES(vp, raoffset + 0x10000, NULL, &racount, 0,
+ VM_PROT_READ, 0, 0);
+ simple_lock(&uobj->vmobjlock);
+ }
+
+ /*
+ * we're almost done! release the pages...
+ * for errors, we free the pages.
+ * otherwise we activate them and mark them as valid and clean.
+ * also, unbusy pages that were not actually requested.
+ */
+
+out:
+ if (error) {
+ uvm_lock_pageq();
+ for (i = 0; i < npages; i++) {
+ if (pgs[i] == NULL) {
+ continue;
+ }
+ UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
+ pgs[i], pgs[i]->flags, 0,0);
+ if (pgs[i]->flags & PG_WANTED) {
+ wakeup(pgs[i]);
+ }
+ if (pgs[i]->flags & PG_RELEASED) {
+ uvm_unlock_pageq();
+ (uobj->pgops->pgo_releasepg)(pgs[i], NULL);
+ uvm_lock_pageq();
+ continue;
+ }
+ if (pgs[i]->flags & PG_FAKE) {
+ uvm_pagefree(pgs[i]);
+ continue;
+ }
+ uvm_pageactivate(pgs[i]);
+ pgs[i]->flags &= ~(PG_WANTED|PG_BUSY);
+ UVM_PAGE_OWN(pgs[i], NULL);
+ }
+ uvm_unlock_pageq();
+ simple_unlock(&uobj->vmobjlock);
+ UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0);
+ return error;
+ }
+
+ UVMHIST_LOG(ubchist, "succeeding, npages %d", npages,0,0,0);
+ uvm_lock_pageq();
+ for (i = 0; i < npages; i++) {
+ if (pgs[i] == NULL) {
+ continue;
+ }
+ UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
+ pgs[i], pgs[i]->flags, 0,0);
+ if (pgs[i]->flags & PG_FAKE) {
+ UVMHIST_LOG(ubchist, "unfaking pg %p offset 0x%x",
+ pgs[i], pgs[i]->offset,0,0);
+ pgs[i]->flags &= ~(PG_FAKE);
+ pmap_clear_modify(pgs[i]);
+ pmap_clear_reference(pgs[i]);
+ }
+ if (write) {
+ pgs[i]->flags &= ~(PG_RDONLY);
+ }
+ if (i < ridx || i >= ridx + orignpages || async) {
+ UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x",
+ pgs[i], pgs[i]->offset,0,0);
+ if (pgs[i]->flags & PG_WANTED) {
+ wakeup(pgs[i]);
+ }
+ if (pgs[i]->flags & PG_RELEASED) {
+ uvm_unlock_pageq();
+ (uobj->pgops->pgo_releasepg)(pgs[i], NULL);
+ uvm_lock_pageq();
+ continue;
+ }
+ uvm_pageactivate(pgs[i]);
+ pgs[i]->flags &= ~(PG_WANTED|PG_BUSY);
+ UVM_PAGE_OWN(pgs[i], NULL);
+ }
+ }
+ uvm_unlock_pageq();
+ simple_unlock(&uobj->vmobjlock);
+ if (ap->a_m != NULL) {
+ memcpy(ap->a_m, &pgs[ridx],
+ orignpages * sizeof(struct vm_page *));
+ }
+ return 0;
+}
+
+/*
+ * generic VM putpages routine.
+ * Write the given range of pages to backing store.
+ */
+
+int
+genfs_putpages(v)
+ void *v;
+{
+ struct vop_putpages_args /* {
+ struct vnode *a_vp;
+ struct vm_page **a_m;
+ int a_count;
+ int a_flags;
+ int *a_rtvals;
+ } */ *ap = v;
+
+ int s, error, npages, run;
+ int fs_bshift, dev_bshift, dev_bsize;
+ vaddr_t kva;
+ off_t eof, offset, startoffset;
+ size_t bytes, iobytes, skipbytes;
+ daddr_t lbn, blkno;
+ struct vm_page *pg;
+ struct buf *mbp, *bp;
+ struct vnode *vp = ap->a_vp;
+ boolean_t async = (ap->a_flags & PGO_SYNCIO) == 0;
+ UVMHIST_FUNC("genfs_putpages"); UVMHIST_CALLED(ubchist);
+ UVMHIST_LOG(ubchist, "vp %p offset 0x%x count %d",
+ vp, ap->a_m[0]->offset, ap->a_count, 0);
+
+ simple_unlock(&vp->v_uvm.u_obj.vmobjlock);
+
+ error = VOP_SIZE(vp, vp->v_uvm.u_size, &eof);
+ if (error) {
+ return error;
+ }
+
+ error = 0;
+ npages = ap->a_count;
+ fs_bshift = vp->v_mount->mnt_fs_bshift;
+ dev_bshift = vp->v_mount->mnt_dev_bshift;
+ dev_bsize = 1 << dev_bshift;
+ KASSERT((eof & (dev_bsize - 1)) == 0);
+
+ pg = ap->a_m[0];
+ startoffset = pg->offset;
+ bytes = MIN(npages << PAGE_SHIFT, eof - startoffset);
+ skipbytes = 0;
+ KASSERT(bytes != 0);
+
+ kva = uvm_pagermapin(ap->a_m, npages, UVMPAGER_MAPIN_WAITOK);
+
+ s = splbio();
+ vp->v_numoutput += 2;
+ mbp = pool_get(&bufpool, PR_WAITOK);
+ UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x",
+ vp, mbp, vp->v_numoutput, bytes);
+ splx(s);
+ mbp->b_bufsize = npages << PAGE_SHIFT;
+ mbp->b_data = (void *)kva;
+ mbp->b_resid = mbp->b_bcount = bytes;
+ mbp->b_flags = B_BUSY|B_WRITE|B_AGE |
+ (async ? B_CALL : 0) |
+ (curproc == uvm.pagedaemon_proc ? B_PDAEMON : 0);
+ mbp->b_iodone = uvm_aio_biodone;
+ mbp->b_vp = vp;
+ LIST_INIT(&mbp->b_dep);
+
+ bp = NULL;
+ for (offset = startoffset;
+ bytes > 0;
+ offset += iobytes, bytes -= iobytes) {
+ lbn = offset >> fs_bshift;
+ error = VOP_BMAP(vp, lbn, NULL, &blkno, &run);
+ if (error) {
+ UVMHIST_LOG(ubchist, "VOP_BMAP() -> %d", error,0,0,0);
+ skipbytes += bytes;
+ bytes = 0;
+ break;
+ }
+
+ iobytes = MIN((((off_t)lbn + 1 + run) << fs_bshift) - offset,
+ bytes);
+ if (blkno == (daddr_t)-1) {
+ skipbytes += iobytes;
+ continue;
+ }
+
+ /* if it's really one i/o, don't make a second buf */
+ if (offset == startoffset && iobytes == bytes) {
+ bp = mbp;
+ } else {
+ s = splbio();
+ vp->v_numoutput++;
+ bp = pool_get(&bufpool, PR_WAITOK);
+ UVMHIST_LOG(ubchist, "vp %p bp %p num now %d",
+ vp, bp, vp->v_numoutput, 0);
+ splx(s);
+ bp->b_data = (char *)kva +
+ (vaddr_t)(offset - pg->offset);
+ bp->b_resid = bp->b_bcount = iobytes;
+ bp->b_flags = B_BUSY|B_WRITE|B_CALL|B_ASYNC;
+ bp->b_iodone = uvm_aio_biodone1;
+ bp->b_vp = vp;
+ LIST_INIT(&bp->b_dep);
+ }
+ bp->b_lblkno = 0;
+ bp->b_private = mbp;
+
+ /* adjust physical blkno for partial blocks */
+ bp->b_blkno = blkno + ((offset - ((off_t)lbn << fs_bshift)) >>
+ dev_bshift);
+ UVMHIST_LOG(ubchist, "vp %p offset 0x%x bcount 0x%x blkno 0x%x",
+ vp, offset, bp->b_bcount, bp->b_blkno);
+ VOP_STRATEGY(bp);
+ }
+ if (skipbytes) {
+ UVMHIST_LOG(ubchist, "skipbytes %d", skipbytes, 0,0,0);
+ s = splbio();
+ mbp->b_resid -= skipbytes;
+ if (error) {
+ mbp->b_flags |= B_ERROR;
+ mbp->b_error = error;
+ }
+ if (mbp->b_resid == 0) {
+ biodone(mbp);
+ }
+ splx(s);
+ }
+ if (async) {
+ UVMHIST_LOG(ubchist, "returning PEND", 0,0,0,0);
+ return EINPROGRESS;
+ }
+ if (bp != NULL) {
+ UVMHIST_LOG(ubchist, "waiting for mbp %p", mbp,0,0,0);
+ error = biowait(mbp);
+ }
+ if (bioops.io_pageiodone) {
+ (*bioops.io_pageiodone)(mbp);
+ }
+ s = splbio();
+ if (mbp->b_vp)
+ vwakeup(mbp->b_vp);
+ pool_put(&bufpool, mbp);
+ splx(s);
+ uvm_pagermapout(kva, npages);
+ UVMHIST_LOG(ubchist, "returning, error %d", error,0,0,0);
+ return error;
+}
+
+int
+genfs_size(v)
+ void *v;
+{
+ struct vop_size_args /* {
+ struct vnode *a_vp;
+ off_t a_size;
+ off_t *a_eobp;
+ } */ *ap = v;
+ int bsize;
+
+ bsize = 1 << ap->a_vp->v_mount->mnt_fs_bshift;
+ *ap->a_eobp = (ap->a_size + bsize - 1) & ~(bsize - 1);
+ return 0;
+}
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index e4efaff930f..7f668a7edde 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vfs_subr.c,v 1.72 2001/11/21 21:13:34 csapuntz Exp $ */
+/* $OpenBSD: vfs_subr.c,v 1.73 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $ */
/*
@@ -377,6 +377,8 @@ getnewvnode(tag, mp, vops, vpp)
int (**vops) __P((void *));
struct vnode **vpp;
{
+ extern struct uvm_pagerops uvm_vnodeops;
+ struct uvm_object *uobj;
struct proc *p = curproc; /* XXX */
struct freelst *listhd;
static int toggle;
@@ -410,7 +412,7 @@ getnewvnode(tag, mp, vops, vpp)
splx(s);
simple_unlock(&vnode_free_list_slock);
vp = pool_get(&vnode_pool, PR_WAITOK);
- bzero((char *)vp, sizeof *vp);
+ bzero(vp, sizeof *vp);
numvnodes++;
} else {
for (vp = TAILQ_FIRST(listhd); vp != NULLVP;
@@ -423,7 +425,7 @@ getnewvnode(tag, mp, vops, vpp)
* the first NCPUS items on the free list are
* locked, so this is close enough to being empty.
*/
- if (vp == NULLVP) {
+ if (vp == NULL) {
splx(s);
simple_unlock(&vnode_free_list_slock);
tablefull("vnode");
@@ -458,6 +460,7 @@ getnewvnode(tag, mp, vops, vpp)
vp->v_socket = 0;
}
vp->v_type = VNON;
+ lockinit(&vp->v_glock, PVFS, "glock", 0, 0);
cache_purge(vp);
vp->v_tag = tag;
vp->v_op = vops;
@@ -466,6 +469,16 @@ getnewvnode(tag, mp, vops, vpp)
vp->v_usecount = 1;
vp->v_data = 0;
simple_lock_init(&vp->v_uvm.u_obj.vmobjlock);
+
+ /*
+ * initialize uvm_object within vnode.
+ */
+
+ uobj = &vp->v_uvm.u_obj;
+ uobj->pgops = &uvm_vnodeops;
+ TAILQ_INIT(&uobj->memq);
+ vp->v_uvm.u_size = VSIZENOTSET;
+
return (0);
}
@@ -669,6 +682,10 @@ vget(vp, flags, p)
flags |= LK_INTERLOCK;
}
if (vp->v_flag & VXLOCK) {
+ if (flags & LK_NOWAIT) {
+ simple_unlock(&vp->v_interlock);
+ return (EBUSY);
+ }
vp->v_flag |= VXWANT;
simple_unlock(&vp->v_interlock);
tsleep((caddr_t)vp, PINOD, "vget", 0);
@@ -787,6 +804,11 @@ vput(vp)
#endif
vputonfreelist(vp);
+ if (vp->v_flag & VTEXT) {
+ uvmexp.vtextpages -= vp->v_uvm.u_obj.uo_npages;
+ uvmexp.vnodepages += vp->v_uvm.u_obj.uo_npages;
+ }
+ vp->v_flag &= ~VTEXT;
simple_unlock(&vp->v_interlock);
VOP_INACTIVE(vp, p);
@@ -827,6 +849,11 @@ vrele(vp)
#endif
vputonfreelist(vp);
+ if (vp->v_flag & VTEXT) {
+ uvmexp.vtextpages -= vp->v_uvm.u_obj.uo_npages;
+ uvmexp.vnodepages += vp->v_uvm.u_obj.uo_npages;
+ }
+ vp->v_flag &= ~VTEXT;
if (vn_lock(vp, LK_EXCLUSIVE|LK_INTERLOCK, p) == 0)
VOP_INACTIVE(vp, p);
}
@@ -1009,6 +1036,12 @@ vclean(vp, flags, p)
if (vp->v_flag & VXLOCK)
panic("vclean: deadlock");
vp->v_flag |= VXLOCK;
+ if (vp->v_flag & VTEXT) {
+ uvmexp.vtextpages -= vp->v_uvm.u_obj.uo_npages;
+ uvmexp.vnodepages += vp->v_uvm.u_obj.uo_npages;
+ }
+ vp->v_flag &= ~VTEXT;
+
/*
* Even if the count is zero, the VOP_INACTIVE routine may still
* have the object locked while it cleans it out. The VOP_LOCK
@@ -1019,11 +1052,7 @@ vclean(vp, flags, p)
VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
/*
- * clean out any VM data associated with the vnode.
- */
- uvm_vnp_terminate(vp);
- /*
- * Clean out any buffers associated with the vnode.
+ * Clean out any cached data associated with the vnode.
*/
if (flags & DOCLOSE)
vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
@@ -1968,9 +1997,22 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
struct proc *p;
int slpflag, slptimeo;
{
- register struct buf *bp;
+ struct uvm_object *uobj = &vp->v_uvm.u_obj;
+ struct buf *bp;
struct buf *nbp, *blist;
- int s, error;
+ int s, error, rv;
+ int flushflags = PGO_ALLPAGES|PGO_FREE|PGO_SYNCIO|
+ (flags & V_SAVE ? PGO_CLEANIT : 0);
+
+ /* XXXUBC this doesn't look at flags or slp* */
+ if (vp->v_type == VREG) {
+ simple_lock(&uobj->vmobjlock);
+ rv = (uobj->pgops->pgo_flush)(uobj, 0, 0, flushflags);
+ simple_unlock(&uobj->vmobjlock);
+ if (!rv) {
+ return EIO;
+ }
+ }
if (flags & V_SAVE) {
s = splbio();
@@ -2040,12 +2082,21 @@ loop:
void
vflushbuf(vp, sync)
- register struct vnode *vp;
+ struct vnode *vp;
int sync;
{
- register struct buf *bp, *nbp;
+ struct uvm_object *uobj = &vp->v_uvm.u_obj;
+ struct buf *bp, *nbp;
int s;
+ if (vp->v_type == VREG) {
+ int flags = PGO_CLEANIT|PGO_ALLPAGES| (sync ? PGO_SYNCIO : 0);
+
+ simple_lock(&uobj->vmobjlock);
+ (uobj->pgops->pgo_flush)(uobj, 0, 0, flags);
+ simple_unlock(&uobj->vmobjlock);
+ }
+
loop:
s = splbio();
for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
@@ -2112,23 +2163,25 @@ bgetvp(vp, bp)
*/
void
brelvp(bp)
- register struct buf *bp;
+ struct buf *bp;
{
struct vnode *vp;
- if ((vp = bp->b_vp) == (struct vnode *) 0)
+ if ((vp = bp->b_vp) == NULL)
panic("brelvp: NULL");
+
/*
* Delete from old vnode list, if on one.
*/
if (bp->b_vnbufs.le_next != NOLIST)
bufremvn(bp);
- if ((vp->v_bioflag & VBIOONSYNCLIST) &&
+ if (TAILQ_EMPTY(&vp->v_uvm.u_obj.memq) &&
+ (vp->v_bioflag & VBIOONSYNCLIST) &&
LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
vp->v_bioflag &= ~VBIOONSYNCLIST;
LIST_REMOVE(vp, v_synclist);
}
- bp->b_vp = (struct vnode *) 0;
+ bp->b_vp = NULL;
simple_lock(&vp->v_interlock);
#ifdef DIAGNOSTIC
@@ -2205,7 +2258,8 @@ reassignbuf(bp)
*/
if ((bp->b_flags & B_DELWRI) == 0) {
listheadp = &vp->v_cleanblkhd;
- if ((vp->v_bioflag & VBIOONSYNCLIST) &&
+ if (TAILQ_EMPTY(&vp->v_uvm.u_obj.memq) &&
+ (vp->v_bioflag & VBIOONSYNCLIST) &&
LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
vp->v_bioflag &= ~VBIOONSYNCLIST;
LIST_REMOVE(vp, v_synclist);
diff --git a/sys/kern/vfs_sync.c b/sys/kern/vfs_sync.c
index 4b07d0f373a..0adeb2f3065 100644
--- a/sys/kern/vfs_sync.c
+++ b/sys/kern/vfs_sync.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vfs_sync.c,v 1.20 2001/11/15 06:38:48 art Exp $ */
+/* $OpenBSD: vfs_sync.c,v 1.21 2001/11/27 05:27:12 art Exp $ */
/*
* Portions of this code are:
@@ -176,15 +176,12 @@ sched_sync(p)
VOP_UNLOCK(vp, 0, p);
s = splbio();
if (LIST_FIRST(slp) == vp) {
- /*
- * Note: disk vps can remain on the
- * worklist too with no dirty blocks, but
- * since sync_fsync() moves it to a different
- * slot we are safe.
- */
- if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
- vp->v_type != VBLK)
- panic("sched_sync: fsync failed");
+#ifdef DIAGNOSTIC
+ if (!(vp->v_bioflag & VBIOONSYNCLIST)) {
+ vprint("vnode", vp);
+ panic("sched_fsync: on synclist, but no flag");
+ }
+#endif
/*
* Put us back on the worklist. The worklist
* routine will remove us from our current
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 284fad0fbda..5433711decd 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vfs_syscalls.c,v 1.82 2001/11/06 19:53:20 miod Exp $ */
+/* $OpenBSD: vfs_syscalls.c,v 1.83 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: vfs_syscalls.c,v 1.71 1996/04/23 10:29:02 mycroft Exp $ */
/*
@@ -493,7 +493,6 @@ sys_sync(p, v, retval)
if ((mp->mnt_flag & MNT_RDONLY) == 0) {
asyncflag = mp->mnt_flag & MNT_ASYNC;
mp->mnt_flag &= ~MNT_ASYNC;
- uvm_vnp_sync(mp);
VFS_SYNC(mp, MNT_NOWAIT, p->p_ucred, p);
if (asyncflag)
mp->mnt_flag |= MNT_ASYNC;
@@ -1064,6 +1063,13 @@ sys_fhopen(p, v, retval)
}
if ((error = VOP_OPEN(vp, flags, cred, p)) != 0)
goto bad;
+
+ if (vp->v_type == VREG &&
+ uvn_attach(vp, flags & FWRITE ? VM_PROT_WRITE : 0) == NULL) {
+ error = EIO;
+ goto bad;
+ }
+
if (flags & FWRITE)
vp->v_writecount++;
@@ -1475,8 +1481,6 @@ sys_unlink(p, v, retval)
goto out;
}
- (void)uvm_vnp_uncache(vp);
-
VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
@@ -2338,7 +2342,6 @@ out:
if (fromnd.ni_dvp != tdvp)
VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
if (tvp) {
- (void)uvm_vnp_uncache(tvp);
VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE);
}
error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index ee5eb0baee2..491db1172fa 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: vfs_vnops.c,v 1.35 2001/11/15 06:22:30 art Exp $ */
+/* $OpenBSD: vfs_vnops.c,v 1.36 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: vfs_vnops.c,v 1.20 1996/02/04 02:18:41 christos Exp $ */
/*
@@ -165,6 +165,11 @@ vn_open(ndp, fmode, cmode)
}
if ((error = VOP_OPEN(vp, fmode, cred, p)) != 0)
goto bad;
+ if (vp->v_type == VREG &&
+ uvn_attach(vp, fmode & FWRITE ? VM_PROT_WRITE : 0) == NULL) {
+ error = EIO;
+ goto bad;
+ }
if (fmode & FWRITE)
vp->v_writecount++;
return (0);
@@ -197,11 +202,10 @@ vn_writechk(vp)
}
}
/*
- * If there's shared text associated with
- * the vnode, try to free it up once. If
- * we fail, we can't allow writing.
+ * If the vnode is in use as a process's text,
+ * we can't allow writing.
*/
- if ((vp->v_flag & VTEXT) && !uvm_vnp_uncache(vp))
+ if (vp->v_flag & VTEXT)
return (ETXTBSY);
return (0);
@@ -214,6 +218,23 @@ void
vn_marktext(vp)
struct vnode *vp;
{
+ if ((vp->v_flag & VTEXT) == 0) {
+ uvmexp.vnodepages -= vp->v_uvm.u_obj.uo_npages;
+ uvmexp.vtextpages += vp->v_uvm.u_obj.uo_npages;
+#if 0
+ /*
+ * Doesn't help much because the pager is borked and ubc_flush is
+ * slow.
+ */
+#ifdef PMAP_PREFER
+ /*
+ * Get rid of any cached reads from this vnode.
+ * exec can't respect PMAP_PREFER when mapping the text.
+ */
+ ubc_flush(&vp->v_uvm.u_obj, 0, 0);
+#endif
+#endif
+ }
vp->v_flag |= VTEXT;
}
diff --git a/sys/kern/vnode_if.c b/sys/kern/vnode_if.c
index 1f30d85c507..d2a3d8298bf 100644
--- a/sys/kern/vnode_if.c
+++ b/sys/kern/vnode_if.c
@@ -3,7 +3,7 @@
* (Modifications made here may easily be lost!)
*
* Created from the file:
- * OpenBSD: vnode_if.src,v 1.11 2001/06/23 02:21:05 csapuntz Exp
+ * OpenBSD: vnode_if.src,v 1.13 2001/07/26 20:24:47 millert Exp
* by the script:
* OpenBSD: vnode_if.sh,v 1.8 2001/02/26 17:34:18 art Exp
*/
@@ -1230,6 +1230,140 @@ int VOP_WHITEOUT(dvp, cnp, flags)
return (VCALL(dvp, VOFFSET(vop_whiteout), &a));
}
+int vop_ballocn_vp_offsets[] = {
+ VOPARG_OFFSETOF(struct vop_ballocn_args,a_vp),
+ VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_ballocn_desc = {
+ 0,
+ "vop_ballocn",
+ 0,
+ vop_ballocn_vp_offsets,
+ VDESC_NO_OFFSET,
+ VOPARG_OFFSETOF(struct vop_ballocn_args, a_cred),
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ NULL,
+};
+
+int VOP_BALLOCN(vp, offset, length, cred, flags)
+ struct vnode *vp;
+ off_t offset;
+ off_t length;
+ struct ucred *cred;
+ int flags;
+{
+ struct vop_ballocn_args a;
+ a.a_desc = VDESC(vop_ballocn);
+ a.a_vp = vp;
+ a.a_offset = offset;
+ a.a_length = length;
+ a.a_cred = cred;
+ a.a_flags = flags;
+ return (VCALL(vp, VOFFSET(vop_ballocn), &a));
+}
+
+int vop_getpages_vp_offsets[] = {
+ VOPARG_OFFSETOF(struct vop_getpages_args,a_vp),
+ VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_getpages_desc = {
+ 0,
+ "vop_getpages",
+ 0,
+ vop_getpages_vp_offsets,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ NULL,
+};
+
+int VOP_GETPAGES(vp, offset, m, count, centeridx, access_type, advice, flags)
+ struct vnode *vp;
+ voff_t offset;
+ vm_page_t *m;
+ int *count;
+ int centeridx;
+ vm_prot_t access_type;
+ int advice;
+ int flags;
+{
+ struct vop_getpages_args a;
+ a.a_desc = VDESC(vop_getpages);
+ a.a_vp = vp;
+ a.a_offset = offset;
+ a.a_m = m;
+ a.a_count = count;
+ a.a_centeridx = centeridx;
+ a.a_access_type = access_type;
+ a.a_advice = advice;
+ a.a_flags = flags;
+ return (VCALL(vp, VOFFSET(vop_getpages), &a));
+}
+
+int vop_putpages_vp_offsets[] = {
+ VOPARG_OFFSETOF(struct vop_putpages_args,a_vp),
+ VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_putpages_desc = {
+ 0,
+ "vop_putpages",
+ 0,
+ vop_putpages_vp_offsets,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ NULL,
+};
+
+int VOP_PUTPAGES(vp, m, count, flags, rtvals)
+ struct vnode *vp;
+ vm_page_t *m;
+ int count;
+ int flags;
+ int *rtvals;
+{
+ struct vop_putpages_args a;
+ a.a_desc = VDESC(vop_putpages);
+ a.a_vp = vp;
+ a.a_m = m;
+ a.a_count = count;
+ a.a_flags = flags;
+ a.a_rtvals = rtvals;
+ return (VCALL(vp, VOFFSET(vop_putpages), &a));
+}
+
+int vop_size_vp_offsets[] = {
+ VOPARG_OFFSETOF(struct vop_size_args,a_vp),
+ VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_size_desc = {
+ 0,
+ "vop_size",
+ 0,
+ vop_size_vp_offsets,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ NULL,
+};
+
+int VOP_SIZE(vp, size, eobp)
+ struct vnode *vp;
+ off_t size;
+ off_t *eobp;
+{
+ struct vop_size_args a;
+ a.a_desc = VDESC(vop_size);
+ a.a_vp = vp;
+ a.a_size = size;
+ a.a_eobp = eobp;
+ return (VCALL(vp, VOFFSET(vop_size), &a));
+}
+
/* Special cases: */
int vop_strategy_vp_offsets[] = {
@@ -1323,6 +1457,10 @@ struct vnodeop_desc *vfs_op_descs[] = {
&vop_advlock_desc,
&vop_reallocblks_desc,
&vop_whiteout_desc,
+ &vop_ballocn_desc,
+ &vop_getpages_desc,
+ &vop_putpages_desc,
+ &vop_size_desc,
NULL
};
diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src
index fdf8e6e4015..1af0f56e276 100644
--- a/sys/kern/vnode_if.src
+++ b/sys/kern/vnode_if.src
@@ -1,4 +1,4 @@
-# $OpenBSD: vnode_if.src,v 1.13 2001/07/26 20:24:47 millert Exp $
+# $OpenBSD: vnode_if.src,v 1.14 2001/11/27 05:27:12 art Exp $
# $NetBSD: vnode_if.src,v 1.10 1996/05/11 18:26:27 mycroft Exp $
#
# Copyright (c) 1992, 1993
@@ -467,3 +467,48 @@ vop_whiteout {
#vop_bwrite {
# IN struct buf *bp;
#};
+
+#
+#% ballocn vp L L L
+#
+vop_ballocn {
+ IN struct vnode *vp;
+ IN off_t offset;
+ IN off_t length;
+ IN struct ucred *cred;
+ IN int flags;
+};
+
+#
+#% getpages vp L L L
+#
+vop_getpages {
+ IN struct vnode *vp;
+ IN voff_t offset;
+ IN vm_page_t *m;
+ IN int *count;
+ IN int centeridx;
+ IN vm_prot_t access_type;
+ IN int advice;
+ IN int flags;
+};
+
+#
+#% putpages vp L L L
+#
+vop_putpages {
+ IN struct vnode *vp;
+ IN vm_page_t *m;
+ IN int count;
+ IN int flags;
+ IN int *rtvals;
+};
+
+#
+#% size vp = = =
+#
+vop_size {
+ IN struct vnode *vp;
+ IN off_t size;
+ OUT off_t *eobp;
+};
diff --git a/sys/msdosfs/msdosfs_denode.c b/sys/msdosfs/msdosfs_denode.c
index eb82f75afe3..f4ab33d5272 100644
--- a/sys/msdosfs/msdosfs_denode.c
+++ b/sys/msdosfs/msdosfs_denode.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: msdosfs_denode.c,v 1.19 2001/11/06 19:53:20 miod Exp $ */
+/* $OpenBSD: msdosfs_denode.c,v 1.20 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: msdosfs_denode.c,v 1.23 1997/10/17 11:23:58 ws Exp $ */
/*-
@@ -72,6 +72,8 @@ u_long dehash; /* size of hash table - 1 */
#define DEHASH(dev, dcl, doff) (((dev) + (dcl) + (doff) / sizeof(struct direntry)) \
& dehash)
+extern int prtactive;
+
static struct denode *msdosfs_hashget __P((dev_t, u_long, u_long));
static int msdosfs_hashins __P((struct denode *));
static void msdosfs_hashrem __P((struct denode *));
@@ -332,6 +334,7 @@ retry:
nvp->v_type = VREG;
VREF(ldep->de_devvp);
*depp = ldep;
+ nvp->v_uvm.u_size = ldep->de_FileSize;
return (0);
}
@@ -461,7 +464,7 @@ detrunc(dep, length, flags, cred, p)
#endif
return (error);
}
- uvm_vnp_uncache(DETOV(dep));
+
/*
* is this the right place for it?
*/
@@ -524,7 +527,7 @@ deextend(dep, length, cred)
struct ucred *cred;
{
struct msdosfsmount *pmp = dep->de_pmp;
- u_long count;
+ u_long count, osize;
int error;
/*
@@ -557,8 +560,12 @@ deextend(dep, length, cred)
}
}
+ osize = dep->de_FileSize;
dep->de_FileSize = length;
+ uvm_vnp_setsize(DETOV(dep), (voff_t)dep->de_FileSize);
dep->de_flag |= DE_UPDATE|DE_MODIFIED;
+ uvm_vnp_zerorange(DETOV(dep), (off_t)osize,
+ (size_t)(dep->de_FileSize - osize));
return (deupdat(dep, 1));
}
@@ -593,7 +600,6 @@ msdosfs_reclaim(v)
} */ *ap = v;
struct vnode *vp = ap->a_vp;
struct denode *dep = VTODE(vp);
- extern int prtactive;
#ifdef MSDOSFS_DEBUG
printf("msdosfs_reclaim(): dep %08x, file %s, refcnt %d\n",
@@ -634,7 +640,6 @@ msdosfs_inactive(v)
struct denode *dep = VTODE(vp);
struct proc *p = ap->a_p;
int error;
- extern int prtactive;
#ifdef MSDOSFS_DEBUG
printf("msdosfs_inactive(): dep %08x, de_Name[0] %x\n", dep, dep->de_Name[0]);
@@ -661,7 +666,9 @@ msdosfs_inactive(v)
dep, dep->de_refcnt, vp->v_mount->mnt_flag, MNT_RDONLY);
#endif
if (dep->de_refcnt <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
- error = detrunc(dep, (u_long)0, 0, NOCRED, NULL);
+ if (dep->de_FileSize != 0) {
+ error = detrunc(dep, (u_long)0, 0, NOCRED, NULL);
+ }
dep->de_Name[0] = SLOT_DELETED;
}
deupdat(dep, 0);
diff --git a/sys/msdosfs/msdosfs_fat.c b/sys/msdosfs/msdosfs_fat.c
index 772bdfb67e9..3576a663cdc 100644
--- a/sys/msdosfs/msdosfs_fat.c
+++ b/sys/msdosfs/msdosfs_fat.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: msdosfs_fat.c,v 1.8 1999/01/10 21:50:32 art Exp $ */
+/* $OpenBSD: msdosfs_fat.c,v 1.9 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: msdosfs_fat.c,v 1.26 1997/10/17 11:24:02 ws Exp $ */
/*-
@@ -988,8 +988,7 @@ extendfile(dep, count, bpp, ncp, flags)
int flags;
{
int error;
- u_long frcn;
- u_long cn, got;
+ u_long frcn = 0, cn, got;
struct msdosfsmount *pmp = dep->de_pmp;
struct buf *bp;
@@ -1060,41 +1059,26 @@ extendfile(dep, count, bpp, ncp, flags)
}
/*
- * Update the "last cluster of the file" entry in the denode's fat
- * cache.
+ * Update the "last cluster of the file" entry in the
+ * denode's fat cache.
*/
+
fc_setcache(dep, FC_LASTFC, frcn + got - 1, cn + got - 1);
-
- if (flags & DE_CLEAR) {
+ if (flags & DE_CLEAR &&
+ (dep->de_Attributes & ATTR_DIRECTORY)) {
while (got-- > 0) {
- /*
- * Get the buf header for the new block of the file.
- */
- if (dep->de_Attributes & ATTR_DIRECTORY)
- bp = getblk(pmp->pm_devvp, cntobn(pmp, cn++),
- pmp->pm_bpcluster, 0, 0);
- else {
- bp = getblk(DETOV(dep), de_cn2bn(pmp, frcn++),
- pmp->pm_bpcluster, 0, 0);
- /*
- * Do the bmap now, as in msdosfs_write
- */
- if (pcbmap(dep,
- de_bn2cn(pmp, bp->b_lblkno),
- &bp->b_blkno, 0, 0))
- bp->b_blkno = -1;
- if (bp->b_blkno == -1)
- panic("extendfile: pcbmap");
- }
+ bp = getblk(pmp->pm_devvp, cntobn(pmp, cn++),
+ pmp->pm_bpcluster, 0, 0);
clrbuf(bp);
if (bpp) {
*bpp = bp;
bpp = NULL;
- } else
+ } else {
bdwrite(bp);
+ }
}
}
}
-
+
return (0);
}
diff --git a/sys/msdosfs/msdosfs_vfsops.c b/sys/msdosfs/msdosfs_vfsops.c
index fec59174189..63175e08754 100644
--- a/sys/msdosfs/msdosfs_vfsops.c
+++ b/sys/msdosfs/msdosfs_vfsops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: msdosfs_vfsops.c,v 1.25 2001/11/21 21:37:01 csapuntz Exp $ */
+/* $OpenBSD: msdosfs_vfsops.c,v 1.26 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: msdosfs_vfsops.c,v 1.48 1997/10/18 02:54:57 briggs Exp $ */
/*-
@@ -584,15 +584,9 @@ msdosfs_mountfs(devvp, mp, p, argp)
mp->mnt_data = (qaddr_t)pmp;
mp->mnt_stat.f_fsid.val[0] = (long)dev;
mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
-#ifdef QUOTA
- /*
- * If we ever do quotas for DOS filesystems this would be a place
- * to fill in the info in the msdosfsmount structure. You dolt,
- * quotas on dos filesystems make no sense because files have no
- * owners on dos filesystems. of course there is some empty space
- * in the directory entry where we could put uid's and gid's.
- */
-#endif
+ mp->mnt_dev_bshift = pmp->pm_bnshift;
+ mp->mnt_fs_bshift = pmp->pm_cnshift;
+
devvp->v_specmountpoint = mp;
return (0);
@@ -720,10 +714,11 @@ msdosfs_sync_vnode(struct vnode *vp, void *arg)
struct denode *dep;
dep = VTODE(vp);
- if (vp->v_type == VNON ||
- ((dep->de_flag & (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0
- && vp->v_dirtyblkhd.lh_first == NULL) ||
- msa->waitfor == MNT_LAZY) {
+ if (msa->waitfor == MNT_LAZY || vp->v_type == VNON ||
+ (((dep->de_flag &
+ (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0) &&
+ (LIST_EMPTY(&vp->v_dirtyblkhd) &&
+ vp->v_uvm.u_obj.uo_npages == 0))) {
simple_unlock(&vp->v_interlock);
return (0);
}
diff --git a/sys/msdosfs/msdosfs_vnops.c b/sys/msdosfs/msdosfs_vnops.c
index 7f1ab384295..1e364039937 100644
--- a/sys/msdosfs/msdosfs_vnops.c
+++ b/sys/msdosfs/msdosfs_vnops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: msdosfs_vnops.c,v 1.28 2001/11/06 19:53:20 miod Exp $ */
+/* $OpenBSD: msdosfs_vnops.c,v 1.29 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: msdosfs_vnops.c,v 1.63 1997/10/17 11:24:19 ws Exp $ */
/*-
@@ -413,11 +413,11 @@ msdosfs_read(v)
int error = 0;
int diff;
int blsize;
- int isadir;
long n;
long on;
daddr_t lbn;
- daddr_t rablock;
+ void *win;
+ vsize_t bytelen;
struct buf *bp;
struct vnode *vp = ap->a_vp;
struct denode *dep = VTODE(vp);
@@ -432,42 +432,45 @@ msdosfs_read(v)
if (uio->uio_offset < 0)
return (EINVAL);
- isadir = dep->de_Attributes & ATTR_DIRECTORY;
+ if (vp->v_type == VREG) {
+ while (uio->uio_resid > 0) {
+ bytelen = MIN(dep->de_FileSize - uio->uio_offset,
+ uio->uio_resid);
+
+ if (bytelen == 0)
+ break;
+ win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset,
+ &bytelen, UBC_READ);
+ error = uiomove(win, bytelen, uio);
+ ubc_release(win, 0);
+ if (error)
+ break;
+ }
+ dep->de_flag |= DE_ACCESS;
+ goto out;
+ }
+
+ /* this loop is only for directories now */
do {
lbn = de_cluster(pmp, uio->uio_offset);
on = uio->uio_offset & pmp->pm_crbomask;
- n = min((u_long) (pmp->pm_bpcluster - on), uio->uio_resid);
+ n = MIN((pmp->pm_bpcluster - on), uio->uio_resid);
diff = dep->de_FileSize - uio->uio_offset;
if (diff <= 0)
return (0);
if (diff < n)
n = diff;
/* convert cluster # to block # if a directory */
- if (isadir) {
- error = pcbmap(dep, lbn, &lbn, 0, &blsize);
- if (error)
- return (error);
- }
+ error = pcbmap(dep, lbn, &lbn, 0, &blsize);
+ if (error)
+ return (error);
/*
* If we are operating on a directory file then be sure to
* do i/o with the vnode for the filesystem instead of the
* vnode for the directory.
*/
- if (isadir) {
- error = bread(pmp->pm_devvp, lbn, blsize, NOCRED, &bp);
- } else {
- rablock = lbn + 1;
- if (dep->de_lastr + 1 == lbn &&
- de_cn2off(pmp, rablock) < dep->de_FileSize)
- error = breada(vp, de_cn2bn(pmp, lbn),
- pmp->pm_bpcluster, de_cn2bn(pmp, rablock),
- pmp->pm_bpcluster, NOCRED, &bp);
- else
- error = bread(vp, de_cn2bn(pmp, lbn),
- pmp->pm_bpcluster, NOCRED, &bp);
- dep->de_lastr = lbn;
- }
- n = min(n, pmp->pm_bpcluster - bp->b_resid);
+ error = bread(pmp->pm_devvp, lbn, blsize, NOCRED, &bp);
+ n = MIN(n, pmp->pm_bpcluster - bp->b_resid);
if (error) {
brelse(bp);
return (error);
@@ -475,8 +478,10 @@ msdosfs_read(v)
error = uiomove(bp->b_data + on, (int) n, uio);
brelse(bp);
} while (error == 0 && uio->uio_resid > 0 && n != 0);
- if (!isadir && !(vp->v_mount->mnt_flag & MNT_NOATIME))
- dep->de_flag |= DE_ACCESS;
+
+out:
+ if ((ap->a_ioflag & IO_SYNC) == IO_SYNC)
+ error = deupdat(dep, 1);
return (error);
}
@@ -493,19 +498,19 @@ msdosfs_write(v)
int a_ioflag;
struct ucred *a_cred;
} */ *ap = v;
- int n;
- int croffset;
int resid;
u_long osize;
int error = 0;
u_long count;
- daddr_t bn, lastcn;
- struct buf *bp;
+ daddr_t lastcn;
int ioflag = ap->a_ioflag;
+ void *win;
+ vsize_t bytelen;
+ off_t oldoff;
+ boolean_t rv;
struct uio *uio = ap->a_uio;
struct proc *p = uio->uio_procp;
struct vnode *vp = ap->a_vp;
- struct vnode *thisvp;
struct denode *dep = VTODE(vp);
struct msdosfsmount *pmp = dep->de_pmp;
struct ucred *cred = ap->a_cred;
@@ -521,7 +526,6 @@ msdosfs_write(v)
case VREG:
if (ioflag & IO_APPEND)
uio->uio_offset = dep->de_FileSize;
- thisvp = vp;
break;
case VDIR:
return EISDIR;
@@ -576,84 +580,52 @@ msdosfs_write(v)
} else
lastcn = de_clcount(pmp, osize) - 1;
+ if (dep->de_FileSize < uio->uio_offset + resid) {
+ dep->de_FileSize = uio->uio_offset + resid;
+ uvm_vnp_setsize(vp, dep->de_FileSize);
+ }
+
do {
- if (de_cluster(pmp, uio->uio_offset) > lastcn) {
+ oldoff = uio->uio_offset;
+ if (de_cluster(pmp, oldoff) > lastcn) {
error = ENOSPC;
break;
}
-
- bn = de_blk(pmp, uio->uio_offset);
- if ((uio->uio_offset & pmp->pm_crbomask) == 0
- && (de_blk(pmp, uio->uio_offset + uio->uio_resid) > de_blk(pmp, uio->uio_offset)
- || uio->uio_offset + uio->uio_resid >= dep->de_FileSize)) {
- /*
- * If either the whole cluster gets written,
- * or we write the cluster from its start beyond EOF,
- * then no need to read data from disk.
- */
- bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0);
- clrbuf(bp);
- /*
- * Do the bmap now, since pcbmap needs buffers
- * for the fat table. (see msdosfs_strategy)
- */
- if (bp->b_blkno == bp->b_lblkno) {
- error = pcbmap(dep,
- de_bn2cn(pmp, bp->b_lblkno),
- &bp->b_blkno, 0, 0);
- if (error)
- bp->b_blkno = -1;
- }
- if (bp->b_blkno == -1) {
- brelse(bp);
- if (!error)
- error = EIO; /* XXX */
- break;
- }
- } else {
- /*
- * The block we need to write into exists, so read it in.
- */
- error = bread(thisvp, bn, pmp->pm_bpcluster,
- NOCRED, &bp);
- if (error) {
- brelse(bp);
- break;
- }
- }
-
- croffset = uio->uio_offset & pmp->pm_crbomask;
- n = min(uio->uio_resid, pmp->pm_bpcluster - croffset);
- if (uio->uio_offset + n > dep->de_FileSize) {
- dep->de_FileSize = uio->uio_offset + n;
- uvm_vnp_setsize(vp, dep->de_FileSize);
- }
- uvm_vnp_uncache(vp);
- /*
- * Should these vnode_pager_* functions be done on dir
- * files?
- */
+ bytelen = MIN(dep->de_FileSize - oldoff, uio->uio_resid);
/*
- * Copy the data from user space into the buf header.
+ * XXXUBC if file is mapped and this is the last block,
+ * process one page at a time.
*/
- error = uiomove(bp->b_data + croffset, n, uio);
+ if (bytelen == 0)
+ break;
+ win = ubc_alloc(&vp->v_uvm.u_obj, oldoff, &bytelen, UBC_READ);
+ error = uiomove(win, bytelen, uio);
+ ubc_release(win, 0);
+ if (error) {
+ break;
+ }
/*
- * If they want this synchronous then write it and wait for
- * it. Otherwise, if on a cluster boundary write it
- * asynchronously so we can move on to the next block
- * without delay. Otherwise do a delayed write because we
- * may want to write somemore into the block later.
+ * flush what we just wrote if necessary.
+ * XXXUBC simplistic async flushing.
*/
- if (ioflag & IO_SYNC)
- (void) bwrite(bp);
- else if (n + croffset == pmp->pm_bpcluster)
- bawrite(bp);
- else
- bdwrite(bp);
- dep->de_flag |= DE_UPDATE;
+ if (ioflag & IO_SYNC) {
+
+ simple_lock(&vp->v_uvm.u_obj.vmobjlock);
+ rv = vp->v_uvm.u_obj.pgops->pgo_flush(
+ &vp->v_uvm.u_obj, oldoff,
+ oldoff + bytelen, PGO_CLEANIT|PGO_SYNCIO);
+ simple_unlock(&vp->v_uvm.u_obj.vmobjlock);
+ } else if (oldoff >> 16 != uio->uio_offset >> 16) {
+ simple_lock(&vp->v_uvm.u_obj.vmobjlock);
+ rv = vp->v_uvm.u_obj.pgops->pgo_flush(
+ &vp->v_uvm.u_obj, (oldoff >> 16) << 16,
+ (uio->uio_offset >> 16) << 16, PGO_CLEANIT);
+ simple_unlock(&vp->v_uvm.u_obj.vmobjlock);
+ }
} while (error == 0 && uio->uio_resid > 0);
+ dep->de_flag |= DE_UPDATE;
/*
* If the write failed and they want us to, truncate the file back
@@ -666,7 +638,8 @@ errexit:
uio->uio_offset -= resid - uio->uio_resid;
uio->uio_resid = resid;
} else {
- detrunc(dep, dep->de_FileSize, ioflag & IO_SYNC, NOCRED, NULL);
+ detrunc(dep, dep->de_FileSize, ioflag & IO_SYNC, NOCRED,
+ NULL);
if (uio->uio_resid != resid)
error = 0;
}
@@ -1506,11 +1479,11 @@ msdosfs_readdir(v)
while (uio->uio_resid > 0) {
lbn = de_cluster(pmp, offset - bias);
on = (offset - bias) & pmp->pm_crbomask;
- n = min(pmp->pm_bpcluster - on, uio->uio_resid);
+ n = MIN(pmp->pm_bpcluster - on, uio->uio_resid);
diff = dep->de_FileSize - (offset - bias);
if (diff <= 0)
break;
- n = min(n, diff);
+ n = MIN(n, diff);
if ((error = pcbmap(dep, lbn, &bn, &cn, &blsize)) != 0)
break;
error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp);
@@ -1518,7 +1491,7 @@ msdosfs_readdir(v)
brelse(bp);
return (error);
}
- n = min(n, blsize - bp->b_resid);
+ n = MIN(n, blsize - bp->b_resid);
/*
* Convert from dos directory entries to fs-independent
@@ -1779,12 +1752,12 @@ msdosfs_strategy(v)
biodone(bp);
return (error);
}
-#ifdef DIAGNOSTIC
-#endif
+
/*
* Read/write the block from/to the disk that contains the desired
* file block.
*/
+
vp = dep->de_devvp;
bp->b_dev = vp->v_rdev;
VOCALL(vp->v_op, VOFFSET(vop_strategy), ap);
@@ -1902,7 +1875,10 @@ struct vnodeopv_entry_desc msdosfs_vnodeop_entries[] = {
{ &vop_advlock_desc, msdosfs_advlock }, /* advlock */
{ &vop_reallocblks_desc, msdosfs_reallocblks }, /* reallocblks */
{ &vop_bwrite_desc, vop_generic_bwrite }, /* bwrite */
- { (struct vnodeop_desc *)NULL, (int (*) __P((void *)))NULL }
+ { &vop_getpages_desc, genfs_getpages },
+ { &vop_putpages_desc, genfs_putpages },
+ { &vop_size_desc, genfs_size },
+ { NULL, NULL }
};
struct vnodeopv_desc msdosfs_vnodeop_opv_desc =
{ &msdosfs_vnodeop_p, msdosfs_vnodeop_entries };
diff --git a/sys/nfs/nfs.h b/sys/nfs/nfs.h
index 33435dc23e4..b86819902f2 100644
--- a/sys/nfs/nfs.h
+++ b/sys/nfs/nfs.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: nfs.h,v 1.13 2001/09/16 00:42:44 millert Exp $ */
+/* $OpenBSD: nfs.h,v 1.14 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: nfs.h,v 1.10.4.1 1996/05/27 11:23:56 fvdl Exp $ */
/*
@@ -78,8 +78,18 @@
* Ideally, NFS_DIRBLKSIZ should be bigger, but I've seen servers with
* broken NFS/ethernet drivers that won't work with anything bigger (Linux..)
*/
-#define NFS_DIRBLKSIZ 1024 /* Must be a multiple of DIRBLKSIZ */
+#if 1
+/*
+ * XXXUBC temp hack because of the removal of b_validend.
+ * eventually we'll store NFS VDIR data in the page cache as well,
+ * we'll fix this at that point.
+ */
+#define NFS_DIRBLKSIZ PAGE_SIZE
+#define NFS_READDIRBLKSIZ PAGE_SIZE
+#else
+#define NFS_DIRBLKSIZ 1024 /* Must be a multiple of DIRBLKSIZ */
#define NFS_READDIRBLKSIZ 512 /* Size of read dir blocks. XXX */
+#endif
/*
* Oddballs
@@ -111,10 +121,10 @@
#endif
/*
- * The B_INVAFTERWRITE flag should be set to whatever is required by the
- * buffer cache code to say "Invalidate the block after it is written back".
+ * Use the vm_page flag reserved for pager use to indicate pages
+ * which have been written to the server but not yet committed.
*/
-#define B_INVAFTERWRITE B_INVAL
+#define PG_NEEDCOMMIT PG_PAGER1
/*
* The IO_METASYNC flag should be implemented for local file systems.
diff --git a/sys/nfs/nfs_bio.c b/sys/nfs/nfs_bio.c
index 1f33bc2eab7..42b25763a88 100644
--- a/sys/nfs/nfs_bio.c
+++ b/sys/nfs/nfs_bio.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: nfs_bio.c,v 1.24 2001/11/15 23:15:15 art Exp $ */
+/* $OpenBSD: nfs_bio.c,v 1.25 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: nfs_bio.c,v 1.25.4.2 1996/07/08 20:47:04 jtc Exp $ */
/*
@@ -50,8 +50,9 @@
#include <sys/mount.h>
#include <sys/kernel.h>
#include <sys/namei.h>
+#include <sys/pool.h>
-#include <uvm/uvm_extern.h>
+#include <uvm/uvm.h>
#include <nfs/rpcv2.h>
#include <nfs/nfsproto.h>
@@ -70,20 +71,19 @@ struct nfsstats nfsstats;
*/
int
nfs_bioread(vp, uio, ioflag, cred)
- register struct vnode *vp;
- register struct uio *uio;
+ struct vnode *vp;
+ struct uio *uio;
int ioflag;
struct ucred *cred;
{
- register struct nfsnode *np = VTONFS(vp);
- register int biosize, diff;
- struct buf *bp = NULL, *rabp;
+ struct nfsnode *np = VTONFS(vp);
+ int biosize;
+ struct buf *bp = NULL;
struct vattr vattr;
struct proc *p;
struct nfsmount *nmp = VFSTONFS(vp->v_mount);
- daddr_t lbn, bn, rabn;
caddr_t baddr;
- int got_buf = 0, nra, error = 0, n = 0, on = 0, not_readin;
+ int got_buf = 0, error = 0, n = 0, on = 0;
#ifdef DIAGNOSTIC
if (uio->uio_rw != UIO_READ)
@@ -153,87 +153,25 @@ nfs_bioread(vp, uio, ioflag, cred)
switch (vp->v_type) {
case VREG:
nfsstats.biocache_reads++;
- lbn = uio->uio_offset / biosize;
- on = uio->uio_offset & (biosize - 1);
- bn = lbn * (biosize / DEV_BSIZE);
- not_readin = 1;
-
- /*
- * Start the read ahead(s), as required.
- */
- if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
- for (nra = 0; nra < nmp->nm_readahead &&
- (lbn + 1 + nra) * biosize < np->n_size; nra++) {
- rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE);
- if (!incore(vp, rabn)) {
- rabp = nfs_getcacheblk(vp, rabn, biosize, p);
- if (!rabp)
- return (EINTR);
- if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) {
- rabp->b_flags |= (B_READ | B_ASYNC);
- if (nfs_asyncio(rabp)) {
- rabp->b_flags |= B_INVAL;
- brelse(rabp);
- }
- } else
- brelse(rabp);
- }
- }
- }
+ error = 0;
+ while (uio->uio_resid > 0) {
+ void *win;
+ vsize_t bytelen = MIN(np->n_size - uio->uio_offset,
+ uio->uio_resid);
- /*
- * If the block is in the cache and has the required data
- * in a valid region, just copy it out.
- * Otherwise, get the block and write back/read in,
- * as required.
- */
- if ((bp = incore(vp, bn)) &&
- (bp->b_flags & (B_BUSY | B_WRITEINPROG)) ==
- (B_BUSY | B_WRITEINPROG))
- got_buf = 0;
- else {
-again:
- bp = nfs_getcacheblk(vp, bn, biosize, p);
- if (!bp)
- return (EINTR);
- got_buf = 1;
- if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
- bp->b_flags |= B_READ;
- not_readin = 0;
- error = nfs_doio(bp, p);
- if (error) {
- brelse(bp);
- return (error);
- }
- }
- }
- n = min((unsigned)(biosize - on), uio->uio_resid);
- diff = np->n_size - uio->uio_offset;
- if (diff < n)
- n = diff;
- if (not_readin && n > 0) {
- if (on < bp->b_validoff || (on + n) > bp->b_validend) {
- if (!got_buf) {
- bp = nfs_getcacheblk(vp, bn, biosize, p);
- if (!bp)
- return (EINTR);
- got_buf = 1;
- }
- bp->b_flags |= B_INVAFTERWRITE;
- if (bp->b_dirtyend > 0) {
- if ((bp->b_flags & B_DELWRI) == 0)
- panic("nfsbioread");
- if (VOP_BWRITE(bp) == EINTR)
- return (EINTR);
- } else
- brelse(bp);
- goto again;
+ if (bytelen == 0)
+ break;
+ win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset,
+ &bytelen, UBC_READ);
+ error = uiomove(win, bytelen, uio);
+ ubc_release(win, 0);
+ if (error) {
+ break;
}
}
- diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
- if (diff < n)
- n = diff;
+ n = 0;
break;
+
case VLNK:
nfsstats.biocache_readlinks++;
bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
@@ -247,7 +185,7 @@ again:
return (error);
}
}
- n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
+ n = MIN(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
got_buf = 1;
on = 0;
break;
@@ -289,18 +227,17 @@ nfs_write(v)
int a_ioflag;
struct ucred *a_cred;
} */ *ap = v;
- register int biosize;
- register struct uio *uio = ap->a_uio;
+ int biosize;
+ struct uio *uio = ap->a_uio;
struct proc *p = uio->uio_procp;
- register struct vnode *vp = ap->a_vp;
+ struct vnode *vp = ap->a_vp;
struct nfsnode *np = VTONFS(vp);
- register struct ucred *cred = ap->a_cred;
+ struct ucred *cred = ap->a_cred;
int ioflag = ap->a_ioflag;
- struct buf *bp;
struct vattr vattr;
struct nfsmount *nmp = VFSTONFS(vp->v_mount);
- daddr_t lbn, bn;
- int n, on, error = 0;
+ int error = 0;
+ int rv;
#ifdef DIAGNOSTIC
if (uio->uio_rw != UIO_WRITE)
@@ -360,85 +297,47 @@ nfs_write(v)
*/
biosize = nmp->nm_rsize;
do {
-
- /*
- * XXX make sure we aren't cached in the VM page cache
- */
- uvm_vnp_uncache(vp);
+ void *win;
+ voff_t oldoff = uio->uio_offset;
+ vsize_t bytelen = uio->uio_resid;
nfsstats.biocache_writes++;
- lbn = uio->uio_offset / biosize;
- on = uio->uio_offset & (biosize-1);
- n = min((unsigned)(biosize - on), uio->uio_resid);
- bn = lbn * (biosize / DEV_BSIZE);
-again:
- bp = nfs_getcacheblk(vp, bn, biosize, p);
- if (!bp)
- return (EINTR);
np->n_flag |= NMODIFIED;
- if (uio->uio_offset + n > np->n_size) {
- np->n_size = uio->uio_offset + n;
- uvm_vnp_setsize(vp, (u_long)np->n_size);
- }
-
- /*
- * If the new write will leave a contiguous dirty
- * area, just update the b_dirtyoff and b_dirtyend,
- * otherwise force a write rpc of the old dirty area.
- */
- if (bp->b_dirtyend > 0 &&
- (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
- bp->b_proc = p;
- if (VOP_BWRITE(bp) == EINTR)
- return (EINTR);
- goto again;
- }
-
- error = uiomove((char *)bp->b_data + on, n, uio);
- if (error) {
- bp->b_flags |= B_ERROR;
- brelse(bp);
- return (error);
+ if (np->n_size < uio->uio_offset + bytelen) {
+ np->n_size = uio->uio_offset + bytelen;
+ uvm_vnp_setsize(vp, np->n_size);
}
- if (bp->b_dirtyend > 0) {
- bp->b_dirtyoff = min(on, bp->b_dirtyoff);
- bp->b_dirtyend = max((on + n), bp->b_dirtyend);
- } else {
- bp->b_dirtyoff = on;
- bp->b_dirtyend = on + n;
+ win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset, &bytelen,
+ UBC_WRITE);
+ error = uiomove(win, bytelen, uio);
+ ubc_release(win, 0);
+ rv = 1;
+ if ((ioflag & IO_SYNC)) {
+ simple_lock(&vp->v_uvm.u_obj.vmobjlock);
+ rv = vp->v_uvm.u_obj.pgops->pgo_flush(
+ &vp->v_uvm.u_obj,
+ oldoff & ~(nmp->nm_wsize - 1),
+ uio->uio_offset & ~(nmp->nm_wsize - 1),
+ PGO_CLEANIT|PGO_SYNCIO);
+ simple_unlock(&vp->v_uvm.u_obj.vmobjlock);
+ } else if ((oldoff & ~(nmp->nm_wsize - 1)) !=
+ (uio->uio_offset & ~(nmp->nm_wsize - 1))) {
+ simple_lock(&vp->v_uvm.u_obj.vmobjlock);
+ rv = vp->v_uvm.u_obj.pgops->pgo_flush(
+ &vp->v_uvm.u_obj,
+ oldoff & ~(nmp->nm_wsize - 1),
+ uio->uio_offset & ~(nmp->nm_wsize - 1),
+ PGO_CLEANIT|PGO_WEAK);
+ simple_unlock(&vp->v_uvm.u_obj.vmobjlock);
}
- if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
- bp->b_validoff > bp->b_dirtyend) {
- bp->b_validoff = bp->b_dirtyoff;
- bp->b_validend = bp->b_dirtyend;
- } else {
- bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
- bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
+ if (!rv) {
+ error = EIO;
}
-
- /*
- * Since this block is being modified, it must be written
- * again and not just committed.
- */
- bp->b_flags &= ~B_NEEDCOMMIT;
-
- /*
- * If the lease is non-cachable or IO_SYNC do bwrite().
- */
- if (ioflag & IO_SYNC) {
- bp->b_proc = p;
- error = VOP_BWRITE(bp);
- if (error)
- return (error);
- } else if ((n + on) == biosize) {
- bp->b_proc = (struct proc *)0;
- bp->b_flags |= B_ASYNC;
- (void)nfs_writebp(bp, 0);
- } else {
- bdwrite(bp);
+ if (error) {
+ break;
}
- } while (uio->uio_resid > 0 && n > 0);
- return (0);
+ } while (uio->uio_resid > 0);
+ return (error);
}
/*
@@ -460,9 +359,9 @@ nfs_getcacheblk(vp, bn, size, p)
if (nmp->nm_flag & NFSMNT_INT) {
bp = getblk(vp, bn, size, PCATCH, 0);
- while (bp == (struct buf *)0) {
- if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
- return ((struct buf *)0);
+ while (bp == NULL) {
+ if (nfs_sigintr(nmp, NULL, p))
+ return (NULL);
bp = getblk(vp, bn, size, 0, 2 * hz);
}
} else
@@ -502,7 +401,7 @@ nfs_vinvalbuf(vp, flags, cred, p, intrflg)
np->n_flag |= NFLUSHWANT;
error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
slptimeo);
- if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
+ if (error && intrflg && nfs_sigintr(nmp, NULL, p))
return (EINTR);
}
@@ -512,7 +411,7 @@ nfs_vinvalbuf(vp, flags, cred, p, intrflg)
np->n_flag |= NFLUSHINPROG;
error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
while (error) {
- if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
+ if (intrflg && nfs_sigintr(nmp, NULL, p)) {
np->n_flag &= ~NFLUSHINPROG;
if (np->n_flag & NFLUSHWANT) {
np->n_flag &= ~NFLUSHWANT;
@@ -539,41 +438,20 @@ int
nfs_asyncio(bp)
struct buf *bp;
{
- int i,s;
+ int i;
if (nfs_numasync == 0)
return (EIO);
- for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
+ for (i = 0; i < NFS_MAXASYNCDAEMON; i++) {
if (nfs_iodwant[i]) {
- if ((bp->b_flags & B_READ) == 0) {
- bp->b_flags |= B_WRITEINPROG;
- }
-
TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist);
- nfs_iodwant[i] = (struct proc *)0;
+ nfs_iodwant[i] = NULL;
wakeup((caddr_t)&nfs_iodwant[i]);
return (0);
}
+ }
- /*
- * If it is a read or a write already marked B_WRITEINPROG or B_NOCACHE
- * return EIO so the process will call nfs_doio() and do it
- * synchronously.
- */
- if (bp->b_flags & (B_READ | B_WRITEINPROG | B_NOCACHE))
- return (EIO);
-
- /*
- * Just turn the async write into a delayed write, instead of
- * doing in synchronously. Hopefully, at least one of the nfsiods
- * is currently doing a write for this file and will pick up the
- * delayed writes before going back to sleep.
- */
- s = splbio();
- buf_dirty(bp);
- splx(s);
- biodone(bp);
- return (0);
+ return (EIO);
}
/*
@@ -589,7 +467,7 @@ nfs_doio(bp, p)
register struct vnode *vp;
struct nfsnode *np;
struct nfsmount *nmp;
- int s, error = 0, diff, len, iomode, must_commit = 0;
+ int error = 0, diff, len, iomode, must_commit = 0;
struct uio uio;
struct iovec io;
@@ -636,9 +514,7 @@ nfs_doio(bp, p)
uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT;
nfsstats.read_bios++;
error = nfs_readrpc(vp, uiop);
- if (!error) {
- bp->b_validoff = 0;
- if (uiop->uio_resid) {
+ if (!error && uiop->uio_resid) {
/*
* If len > 0, there is a hole in the file and
* no writes after the hole have been pushed to
@@ -649,13 +525,9 @@ nfs_doio(bp, p)
len = np->n_size - ((((off_t)bp->b_blkno) << DEV_BSHIFT)
+ diff);
if (len > 0) {
- len = min(len, uiop->uio_resid);
- bzero((char *)bp->b_data + diff, len);
- bp->b_validend = diff + len;
- } else
- bp->b_validend = diff;
- } else
- bp->b_validend = bp->b_bcount;
+ len = MIN(len, uiop->uio_resid);
+ memset((char *)bp->b_data + diff, 0, len);
+ }
}
if (p && (vp->v_flag & VTEXT) &&
(np->n_mtime != np->n_vattr.va_mtime.tv_sec)) {
@@ -672,62 +544,19 @@ nfs_doio(bp, p)
default:
printf("nfs_doio: type %x unexpected\n",vp->v_type);
break;
- };
+ }
if (error) {
bp->b_flags |= B_ERROR;
bp->b_error = error;
}
} else {
- io.iov_len = uiop->uio_resid = bp->b_dirtyend
- - bp->b_dirtyoff;
- uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE
- + bp->b_dirtyoff;
- io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
+ io.iov_base = bp->b_data;
+ io.iov_len = uiop->uio_resid = bp->b_bcount;
+ uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT;
uiop->uio_rw = UIO_WRITE;
nfsstats.write_bios++;
- if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == B_ASYNC)
- iomode = NFSV3WRITE_UNSTABLE;
- else
- iomode = NFSV3WRITE_FILESYNC;
- bp->b_flags |= B_WRITEINPROG;
-#ifdef fvdl_debug
- printf("nfs_doio(%x): bp %x doff %d dend %d\n",
- vp, bp, bp->b_dirtyoff, bp->b_dirtyend);
-#endif
+ iomode = NFSV3WRITE_UNSTABLE;
error = nfs_writerpc(vp, uiop, &iomode, &must_commit);
- if (!error && iomode == NFSV3WRITE_UNSTABLE)
- bp->b_flags |= B_NEEDCOMMIT;
- else
- bp->b_flags &= ~B_NEEDCOMMIT;
- bp->b_flags &= ~B_WRITEINPROG;
-
- /*
- * For an interrupted write, the buffer is still valid and the
- * write hasn't been pushed to the server yet, so we can't set
- * B_ERROR and report the interruption by setting B_EINTR. For
- * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt
- * is essentially a noop.
- * For the case of a V3 write rpc not being committed to stable
- * storage, the block is still dirty and requires either a commit
- * rpc or another write rpc with iomode == NFSV3WRITE_FILESYNC
- * before the block is reused. This is indicated by setting the
- * B_DELWRI and B_NEEDCOMMIT flags.
- */
- if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
- s = splbio();
- buf_dirty(bp);
- splx(s);
-
- if (!(bp->b_flags & B_ASYNC) && error)
- bp->b_flags |= B_EINTR;
- } else {
- if (error) {
- bp->b_flags |= B_ERROR;
- bp->b_error = np->n_error = error;
- np->n_flag |= NWRITEERR;
- }
- bp->b_dirtyoff = bp->b_dirtyend = 0;
- }
}
bp->b_resid = uiop->uio_resid;
if (must_commit)
@@ -735,3 +564,590 @@ nfs_doio(bp, p)
biodone(bp);
return (error);
}
+
+/*
+ * Vnode op for VM getpages.
+ */
+int
+nfs_getpages(v)
+ void *v;
+{
+ struct vop_getpages_args /* {
+ struct vnode *a_vp;
+ voff_t a_offset;
+ vm_page_t *a_m;
+ int *a_count;
+ int a_centeridx;
+ vm_prot_t a_access_type;
+ int a_advice;
+ int a_flags;
+ } */ *ap = v;
+
+ off_t eof, offset, origoffset, startoffset, endoffset;
+ int s, i, error, npages, orignpages, npgs, ridx, pidx, pcount;
+ vaddr_t kva;
+ struct buf *bp, *mbp;
+ struct vnode *vp = ap->a_vp;
+ struct nfsnode *np = VTONFS(vp);
+ struct uvm_object *uobj = &vp->v_uvm.u_obj;
+ struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+ size_t bytes, iobytes, tailbytes, totalbytes, skipbytes;
+ int flags = ap->a_flags;
+ int bsize;
+ struct vm_page *pgs[16]; /* XXXUBC 16 */
+ boolean_t v3 = NFS_ISV3(vp);
+ boolean_t async = (flags & PGO_SYNCIO) == 0;
+ boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0;
+ struct proc *p = curproc;
+
+ UVMHIST_FUNC("nfs_getpages"); UVMHIST_CALLED(ubchist);
+ UVMHIST_LOG(ubchist, "vp %p off 0x%x count %d", vp, (int)ap->a_offset,
+ *ap->a_count,0);
+
+#ifdef DIAGNOSTIC
+ if (ap->a_centeridx < 0 || ap->a_centeridx >= *ap->a_count) {
+ panic("nfs_getpages: centeridx %d out of range",
+ ap->a_centeridx);
+ }
+#endif
+
+ error = 0;
+ origoffset = ap->a_offset;
+ eof = vp->v_uvm.u_size;
+ if (origoffset >= eof) {
+ if ((flags & PGO_LOCKED) == 0) {
+ simple_unlock(&uobj->vmobjlock);
+ }
+ UVMHIST_LOG(ubchist, "off 0x%x past EOF 0x%x",
+ (int)origoffset, (int)eof,0,0);
+ return EINVAL;
+ }
+
+ if (flags & PGO_LOCKED) {
+ uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m,
+ UFP_NOWAIT|UFP_NOALLOC);
+ return 0;
+ }
+
+ /* vnode is VOP_LOCKed, uobj is locked */
+
+ bsize = nmp->nm_rsize;
+ orignpages = MIN(*ap->a_count,
+ round_page(eof - origoffset) >> PAGE_SHIFT);
+ npages = orignpages;
+ startoffset = origoffset & ~(bsize - 1);
+ endoffset = round_page((origoffset + (npages << PAGE_SHIFT)
+ + bsize - 1) & ~(bsize - 1));
+ endoffset = MIN(endoffset, round_page(eof));
+ ridx = (origoffset - startoffset) >> PAGE_SHIFT;
+
+ if (!async && !write) {
+ int rapages = MAX(PAGE_SIZE, nmp->nm_rsize) >> PAGE_SHIFT;
+
+ (void) VOP_GETPAGES(vp, endoffset, NULL, &rapages, 0,
+ VM_PROT_READ, 0, 0);
+ simple_lock(&uobj->vmobjlock);
+ }
+
+ UVMHIST_LOG(ubchist, "npages %d offset 0x%x", npages,
+ (int)origoffset, 0,0);
+ memset(pgs, 0, sizeof(pgs));
+ uvn_findpages(uobj, origoffset, &npages, &pgs[ridx], UFP_ALL);
+
+ if (flags & PGO_OVERWRITE) {
+ UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0);
+
+ /* XXXUBC for now, zero the page if we allocated it */
+ for (i = 0; i < npages; i++) {
+ struct vm_page *pg = pgs[ridx + i];
+
+ if (pg->flags & PG_FAKE) {
+ uvm_pagezero(pg);
+ pg->flags &= ~(PG_FAKE);
+ }
+ }
+ npages += ridx;
+ if (v3) {
+ simple_unlock(&uobj->vmobjlock);
+ goto uncommit;
+ }
+ goto out;
+ }
+
+ /*
+ * if the pages are already resident, just return them.
+ */
+
+ for (i = 0; i < npages; i++) {
+ struct vm_page *pg = pgs[ridx + i];
+
+ if ((pg->flags & PG_FAKE) != 0 ||
+ ((ap->a_access_type & VM_PROT_WRITE) &&
+ (pg->flags & PG_RDONLY))) {
+ break;
+ }
+ }
+ if (i == npages) {
+ UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0);
+ npages += ridx;
+ goto out;
+ }
+
+ /*
+ * the page wasn't resident and we're not overwriting,
+ * so we're going to have to do some i/o.
+ * find any additional pages needed to cover the expanded range.
+ */
+
+ if (startoffset != origoffset ||
+ startoffset + (npages << PAGE_SHIFT) != endoffset) {
+
+ /*
+ * XXXUBC we need to avoid deadlocks caused by locking
+ * additional pages at lower offsets than pages we
+ * already have locked. for now, unlock them all and
+ * start over.
+ */
+
+ for (i = 0; i < npages; i++) {
+ struct vm_page *pg = pgs[ridx + i];
+
+ if (pg->flags & PG_FAKE) {
+ pg->flags |= PG_RELEASED;
+ }
+ }
+ uvm_page_unbusy(&pgs[ridx], npages);
+ memset(pgs, 0, sizeof(pgs));
+
+ UVMHIST_LOG(ubchist, "reset npages start 0x%x end 0x%x",
+ startoffset, endoffset, 0,0);
+ npages = (endoffset - startoffset) >> PAGE_SHIFT;
+ npgs = npages;
+ uvn_findpages(uobj, startoffset, &npgs, pgs, UFP_ALL);
+ }
+ simple_unlock(&uobj->vmobjlock);
+
+ /*
+ * update the cached read creds for this node.
+ */
+
+ if (np->n_rcred) {
+ crfree(np->n_rcred);
+ }
+ np->n_rcred = curproc->p_ucred;
+ crhold(np->n_rcred);
+
+ /*
+ * read the desired page(s).
+ */
+
+ totalbytes = npages << PAGE_SHIFT;
+ bytes = MIN(totalbytes, vp->v_uvm.u_size - startoffset);
+ tailbytes = totalbytes - bytes;
+ skipbytes = 0;
+
+ kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WAITOK |
+ UVMPAGER_MAPIN_READ);
+
+ s = splbio();
+ mbp = pool_get(&bufpool, PR_WAITOK);
+ splx(s);
+ mbp->b_bufsize = totalbytes;
+ mbp->b_data = (void *)kva;
+ mbp->b_resid = mbp->b_bcount = bytes;
+ mbp->b_flags = B_BUSY|B_READ| (async ? B_CALL|B_ASYNC : 0);
+ mbp->b_iodone = uvm_aio_biodone;
+ mbp->b_vp = vp;
+ mbp->b_proc = NULL; /* XXXUBC */
+ LIST_INIT(&mbp->b_dep);
+
+ /*
+ * if EOF is in the middle of the last page, zero the part past EOF.
+ */
+
+ if (tailbytes > 0 && (pgs[bytes >> PAGE_SHIFT]->flags & PG_FAKE)) {
+ memset((char *)kva + bytes, 0, tailbytes);
+ }
+
+ /*
+ * now loop over the pages, reading as needed.
+ */
+
+ bp = NULL;
+ for (offset = startoffset;
+ bytes > 0;
+ offset += iobytes, bytes -= iobytes) {
+
+ /*
+ * skip pages which don't need to be read.
+ */
+
+ pidx = (offset - startoffset) >> PAGE_SHIFT;
+ UVMHIST_LOG(ubchist, "pidx %d offset 0x%x startoffset 0x%x",
+ pidx, (int)offset, (int)startoffset,0);
+ while ((pgs[pidx]->flags & PG_FAKE) == 0) {
+ size_t b;
+
+ KASSERT((offset & (PAGE_SIZE - 1)) == 0);
+ b = MIN(PAGE_SIZE, bytes);
+ offset += b;
+ bytes -= b;
+ skipbytes += b;
+ pidx++;
+ UVMHIST_LOG(ubchist, "skipping, new offset 0x%x",
+ (int)offset, 0,0,0);
+ if (bytes == 0) {
+ goto loopdone;
+ }
+ }
+
+ /*
+ * see how many pages can be read with this i/o.
+ * reduce the i/o size if necessary.
+ */
+
+ iobytes = bytes;
+ if (offset + iobytes > round_page(offset)) {
+ pcount = 1;
+ while (pidx + pcount < npages &&
+ pgs[pidx + pcount]->flags & PG_FAKE) {
+ pcount++;
+ }
+ iobytes = MIN(iobytes, (pcount << PAGE_SHIFT) -
+ (offset - trunc_page(offset)));
+ }
+ iobytes = MIN(iobytes, nmp->nm_rsize);
+
+ /*
+ * allocate a sub-buf for this piece of the i/o
+ * (or just use mbp if there's only 1 piece),
+ * and start it going.
+ */
+
+ if (offset == startoffset && iobytes == bytes) {
+ bp = mbp;
+ } else {
+ s = splbio();
+ bp = pool_get(&bufpool, PR_WAITOK);
+ splx(s);
+ bp->b_data = (char *)kva + offset - startoffset;
+ bp->b_resid = bp->b_bcount = iobytes;
+ bp->b_flags = B_BUSY|B_READ|B_CALL|B_ASYNC;
+ bp->b_iodone = uvm_aio_biodone1;
+ bp->b_vp = vp;
+ bp->b_proc = NULL; /* XXXUBC */
+ LIST_INIT(&bp->b_dep);
+ }
+ bp->b_private = mbp;
+ bp->b_lblkno = bp->b_blkno = offset >> DEV_BSHIFT;
+
+ UVMHIST_LOG(ubchist, "bp %p offset 0x%x bcount 0x%x blkno 0x%x",
+ bp, offset, iobytes, bp->b_blkno);
+
+ VOP_STRATEGY(bp);
+ }
+
+loopdone:
+ if (skipbytes) {
+ s = splbio();
+ mbp->b_resid -= skipbytes;
+ if (mbp->b_resid == 0) {
+ biodone(mbp);
+ }
+ splx(s);
+ }
+ if (async) {
+ UVMHIST_LOG(ubchist, "returning PEND",0,0,0,0);
+ return EINPROGRESS;
+ }
+ if (bp != NULL) {
+ error = biowait(mbp);
+ }
+ s = splbio();
+ pool_put(&bufpool, mbp);
+ splx(s);
+ uvm_pagermapout(kva, npages);
+
+ if (write && v3) {
+uncommit:
+ lockmgr(&np->n_commitlock, LK_EXCLUSIVE, NULL, p);
+ nfs_del_committed_range(vp, origoffset, npages);
+ nfs_del_tobecommitted_range(vp, origoffset, npages);
+ simple_lock(&uobj->vmobjlock);
+ for (i = 0; i < npages; i++) {
+ if (pgs[i] == NULL) {
+ continue;
+ }
+ pgs[i]->flags &= ~(PG_NEEDCOMMIT|PG_RDONLY);
+ }
+ simple_unlock(&uobj->vmobjlock);
+ lockmgr(&np->n_commitlock, LK_RELEASE, NULL, p);
+ }
+
+ simple_lock(&uobj->vmobjlock);
+
+out:
+ if (error) {
+ uvm_lock_pageq();
+ for (i = 0; i < npages; i++) {
+ if (pgs[i] == NULL) {
+ continue;
+ }
+ UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
+ pgs[i], pgs[i]->flags, 0,0);
+ if (pgs[i]->flags & PG_WANTED) {
+ wakeup(pgs[i]);
+ }
+ if (pgs[i]->flags & PG_RELEASED) {
+ uvm_unlock_pageq();
+ (uobj->pgops->pgo_releasepg)(pgs[i], NULL);
+ uvm_lock_pageq();
+ continue;
+ }
+ if (pgs[i]->flags & PG_FAKE) {
+ uvm_pagefree(pgs[i]);
+ continue;
+ }
+ uvm_pageactivate(pgs[i]);
+ pgs[i]->flags &= ~(PG_WANTED|PG_BUSY);
+ UVM_PAGE_OWN(pgs[i], NULL);
+ }
+ uvm_unlock_pageq();
+ simple_unlock(&uobj->vmobjlock);
+ UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0);
+ return error;
+ }
+
+ UVMHIST_LOG(ubchist, "ridx %d count %d", ridx, npages, 0,0);
+ uvm_lock_pageq();
+ for (i = 0; i < npages; i++) {
+ if (pgs[i] == NULL) {
+ continue;
+ }
+ UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",
+ pgs[i], pgs[i]->flags, 0,0);
+ if (pgs[i]->flags & PG_FAKE) {
+ UVMHIST_LOG(ubchist, "unfaking pg %p offset 0x%x",
+ pgs[i], (int)pgs[i]->offset,0,0);
+ pgs[i]->flags &= ~(PG_FAKE);
+ pmap_clear_modify(pgs[i]);
+ pmap_clear_reference(pgs[i]);
+ }
+ if (i < ridx || i >= ridx + orignpages || async) {
+ UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x",
+ pgs[i], (int)pgs[i]->offset,0,0);
+ if (pgs[i]->flags & PG_WANTED) {
+ wakeup(pgs[i]);
+ }
+ if (pgs[i]->flags & PG_RELEASED) {
+ uvm_unlock_pageq();
+ (uobj->pgops->pgo_releasepg)(pgs[i], NULL);
+ uvm_lock_pageq();
+ continue;
+ }
+ uvm_pageactivate(pgs[i]);
+ pgs[i]->flags &= ~(PG_WANTED|PG_BUSY);
+ UVM_PAGE_OWN(pgs[i], NULL);
+ }
+ }
+ uvm_unlock_pageq();
+ simple_unlock(&uobj->vmobjlock);
+ if (ap->a_m != NULL) {
+ memcpy(ap->a_m, &pgs[ridx],
+ *ap->a_count * sizeof(struct vm_page *));
+ }
+ return 0;
+}
+
+/*
+ * Vnode op for VM putpages.
+ */
+int
+nfs_putpages(v)
+ void *v;
+{
+ struct vop_putpages_args /* {
+ struct vnode *a_vp;
+ struct vm_page **a_m;
+ int a_count;
+ int a_flags;
+ int *a_rtvals;
+ } */ *ap = v;
+
+ struct vnode *vp = ap->a_vp;
+ struct nfsnode *np = VTONFS(vp);
+ struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+ struct buf *bp, *mbp;
+ struct vm_page **pgs = ap->a_m;
+ int flags = ap->a_flags;
+ int npages = ap->a_count;
+ int s, error, i;
+ size_t bytes, iobytes, skipbytes;
+ vaddr_t kva;
+ off_t offset, origoffset, commitoff;
+ uint32_t commitbytes;
+ boolean_t v3 = NFS_ISV3(vp);
+ boolean_t async = (flags & PGO_SYNCIO) == 0;
+ boolean_t weak = (flags & PGO_WEAK) && v3;
+ struct proc *p = curproc;
+ UVMHIST_FUNC("nfs_putpages"); UVMHIST_CALLED(ubchist);
+
+ UVMHIST_LOG(ubchist, "vp %p pgp %p count %d",
+ vp, ap->a_m, ap->a_count,0);
+
+ simple_unlock(&vp->v_uvm.u_obj.vmobjlock);
+
+ error = 0;
+ origoffset = pgs[0]->offset;
+ bytes = MIN(ap->a_count << PAGE_SHIFT, vp->v_uvm.u_size - origoffset);
+ skipbytes = 0;
+
+ /*
+ * if the range has been committed already, mark the pages thus.
+ * if the range just needs to be committed, we're done
+ * if it's a weak putpage, otherwise commit the range.
+ */
+
+ if (v3) {
+ lockmgr(&np->n_commitlock, LK_EXCLUSIVE, NULL, p);
+ if (nfs_in_committed_range(vp, origoffset, bytes)) {
+ goto committed;
+ }
+ if (nfs_in_tobecommitted_range(vp, origoffset, bytes)) {
+ if (weak) {
+ lockmgr(&np->n_commitlock, LK_RELEASE, NULL, p);
+ return 0;
+ } else {
+ commitoff = np->n_pushlo;
+ commitbytes = (uint32_t)(np->n_pushhi -
+ np->n_pushlo);
+ goto commit;
+ }
+ }
+ lockmgr(&np->n_commitlock, LK_RELEASE, NULL, p);
+ }
+
+ /*
+ * otherwise write or commit all the pages.
+ */
+
+ kva = uvm_pagermapin(pgs, ap->a_count, UVMPAGER_MAPIN_WAITOK|
+ UVMPAGER_MAPIN_WRITE);
+
+ s = splbio();
+ vp->v_numoutput += 2;
+ mbp = pool_get(&bufpool, PR_WAITOK);
+ UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x",
+ vp, mbp, vp->v_numoutput, bytes);
+ splx(s);
+ mbp->b_bufsize = npages << PAGE_SHIFT;
+ mbp->b_data = (void *)kva;
+ mbp->b_resid = mbp->b_bcount = bytes;
+ mbp->b_flags = B_BUSY|B_WRITE|B_AGE |
+ (async ? B_CALL|B_ASYNC : 0) |
+ (curproc == uvm.pagedaemon_proc ? B_PDAEMON : 0);
+ mbp->b_iodone = uvm_aio_biodone;
+ mbp->b_vp = vp;
+ mbp->b_proc = NULL; /* XXXUBC */
+ LIST_INIT(&mbp->b_dep);
+
+ for (offset = origoffset;
+ bytes > 0;
+ offset += iobytes, bytes -= iobytes) {
+ iobytes = MIN(nmp->nm_wsize, bytes);
+
+ /*
+ * skip writing any pages which only need a commit.
+ */
+
+ if ((pgs[(offset - origoffset) >> PAGE_SHIFT]->flags &
+ PG_NEEDCOMMIT) != 0) {
+ KASSERT((offset & (PAGE_SIZE - 1)) == 0);
+ iobytes = MIN(PAGE_SIZE, bytes);
+ skipbytes += iobytes;
+ continue;
+ }
+
+ /* if it's really one i/o, don't make a second buf */
+ if (offset == origoffset && iobytes == bytes) {
+ bp = mbp;
+ } else {
+ s = splbio();
+ vp->v_numoutput++;
+ bp = pool_get(&bufpool, PR_WAITOK);
+ UVMHIST_LOG(ubchist, "vp %p bp %p num now %d",
+ vp, bp, vp->v_numoutput, 0);
+ splx(s);
+ bp->b_data = (char *)kva + (offset - origoffset);
+ bp->b_resid = bp->b_bcount = iobytes;
+ bp->b_flags = B_BUSY|B_WRITE|B_CALL|B_ASYNC;
+ bp->b_iodone = uvm_aio_biodone1;
+ bp->b_vp = vp;
+ bp->b_proc = NULL; /* XXXUBC */
+ LIST_INIT(&bp->b_dep);
+ }
+ bp->b_private = mbp;
+ bp->b_lblkno = bp->b_blkno = (daddr_t)(offset >> DEV_BSHIFT);
+ UVMHIST_LOG(ubchist, "bp %p numout %d",
+ bp, vp->v_numoutput,0,0);
+ VOP_STRATEGY(bp);
+ }
+ if (skipbytes) {
+ UVMHIST_LOG(ubchist, "skipbytes %d", bytes, 0,0,0);
+ s = splbio();
+ mbp->b_resid -= skipbytes;
+ if (mbp->b_resid == 0) {
+ biodone(mbp);
+ }
+ splx(s);
+ }
+ if (async) {
+ return EINPROGRESS;
+ }
+ if (bp != NULL) {
+ error = biowait(mbp);
+ }
+
+ s = splbio();
+ if (mbp->b_vp)
+ vwakeup(mbp->b_vp);
+ pool_put(&bufpool, mbp);
+ splx(s);
+
+ uvm_pagermapout(kva, ap->a_count);
+ if (error || !v3) {
+ UVMHIST_LOG(ubchist, "returning error %d", error, 0,0,0);
+ return error;
+ }
+
+ /*
+ * for a weak put, mark the range as "to be committed"
+ * and mark the pages read-only so that we will be notified
+ * to remove the pages from the "to be committed" range
+ * if they are made dirty again.
+ * for a strong put, commit the pages and remove them from the
+ * "to be committed" range. also, mark them as writable
+ * and not cleanable with just a commit.
+ */
+
+ lockmgr(&np->n_commitlock, LK_EXCLUSIVE, NULL, p);
+ if (weak) {
+ nfs_add_tobecommitted_range(vp, origoffset,
+ npages << PAGE_SHIFT);
+ for (i = 0; i < npages; i++) {
+ pgs[i]->flags |= PG_NEEDCOMMIT|PG_RDONLY;
+ }
+ } else {
+ commitoff = origoffset;
+ commitbytes = npages << PAGE_SHIFT;
+commit:
+ error = nfs_commit(vp, commitoff, commitbytes, curproc);
+ nfs_del_tobecommitted_range(vp, commitoff, commitbytes);
+committed:
+ for (i = 0; i < npages; i++) {
+ pgs[i]->flags &= ~(PG_NEEDCOMMIT|PG_RDONLY);
+ }
+ }
+ lockmgr(&np->n_commitlock, LK_RELEASE, NULL, p);
+ return error;
+}
diff --git a/sys/nfs/nfs_node.c b/sys/nfs/nfs_node.c
index 987259eadc3..567738584da 100644
--- a/sys/nfs/nfs_node.c
+++ b/sys/nfs/nfs_node.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: nfs_node.c,v 1.16 2001/11/15 23:15:15 art Exp $ */
+/* $OpenBSD: nfs_node.c,v 1.17 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: nfs_node.c,v 1.16 1996/02/18 11:53:42 fvdl Exp $ */
/*
@@ -145,6 +145,7 @@ loop:
vp = nvp;
np = pool_get(&nfs_node_pool, PR_WAITOK);
bzero((caddr_t)np, sizeof *np);
+ lockinit(&np->n_commitlock, PINOD, "nfsclock", 0, 0);
vp->v_data = np;
np->n_vnode = vp;
@@ -169,6 +170,17 @@ loop:
np->n_fhp = &np->n_fh;
bcopy((caddr_t)fhp, (caddr_t)np->n_fhp, fhsize);
np->n_fhsize = fhsize;
+
+ /*
+ * XXXUBC doing this while holding the nfs_hashlock is bad,
+ * but there's no alternative at the moment.
+ */
+ error = VOP_GETATTR(vp, &np->n_vattr, curproc->p_ucred, curproc);
+ if (error) {
+ return error;
+ }
+ uvm_vnp_setsize(vp, np->n_vattr.va_size);
+
lockmgr(&nfs_hashlock, LK_RELEASE, 0, p);
*npp = np;
return (0);
@@ -185,11 +197,12 @@ nfs_inactive(v)
struct nfsnode *np;
struct sillyrename *sp;
struct proc *p = curproc; /* XXX */
+ struct vnode *vp = ap->a_vp;
- np = VTONFS(ap->a_vp);
- if (prtactive && ap->a_vp->v_usecount != 0)
- vprint("nfs_inactive: pushing active", ap->a_vp);
- if (ap->a_vp->v_type != VDIR) {
+ np = VTONFS(vp);
+ if (prtactive && vp->v_usecount != 0)
+ vprint("nfs_inactive: pushing active", vp);
+ if (vp->v_type != VDIR) {
sp = np->n_sillyrename;
np->n_sillyrename = (struct sillyrename *)0;
} else
@@ -198,7 +211,7 @@ nfs_inactive(v)
/*
* Remove the silly file that was rename'd earlier
*/
- (void) nfs_vinvalbuf(ap->a_vp, 0, sp->s_cred, p, 1);
+ (void) nfs_vinvalbuf(vp, 0, sp->s_cred, p, 1);
nfs_removeit(sp);
crfree(sp->s_cred);
vrele(sp->s_dvp);
@@ -206,7 +219,7 @@ nfs_inactive(v)
}
np->n_flag &= (NMODIFIED | NFLUSHINPROG | NFLUSHWANT);
- VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
+ VOP_UNLOCK(vp, 0, ap->a_p);
return (0);
}
diff --git a/sys/nfs/nfs_serv.c b/sys/nfs/nfs_serv.c
index 9d4de9fd9a1..9534e7221da 100644
--- a/sys/nfs/nfs_serv.c
+++ b/sys/nfs/nfs_serv.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: nfs_serv.c,v 1.27 2001/11/06 19:53:21 miod Exp $ */
+/* $OpenBSD: nfs_serv.c,v 1.28 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: nfs_serv.c,v 1.34 1997/05/12 23:37:12 fvdl Exp $ */
/*
@@ -1663,8 +1663,6 @@ nfsrv_remove(nfsd, slp, procp, mrq)
error = EBUSY;
goto out;
}
- if (vp->v_flag & VTEXT)
- uvm_vnp_uncache(vp);
out:
if (!error) {
error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
@@ -3276,11 +3274,10 @@ nfsrv_access(vp, flags, cred, rdonly, p, override)
}
}
/*
- * If there's shared text associated with
- * the inode, try to free it up once. If
- * we fail, we can't allow writing.
+ * If the vnode is in use as a process's text,
+ * we can't allow writing.
*/
- if ((vp->v_flag & VTEXT) && !uvm_vnp_uncache(vp))
+ if ((vp->v_flag & VTEXT))
return (ETXTBSY);
}
error = VOP_ACCESS(vp, flags, cred, p);
diff --git a/sys/nfs/nfs_subs.c b/sys/nfs/nfs_subs.c
index 9689d9f36a5..4a8bc11528d 100644
--- a/sys/nfs/nfs_subs.c
+++ b/sys/nfs/nfs_subs.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: nfs_subs.c,v 1.35 2001/11/06 19:53:21 miod Exp $ */
+/* $OpenBSD: nfs_subs.c,v 1.36 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: nfs_subs.c,v 1.27.4.3 1996/07/08 20:34:24 jtc Exp $ */
/*
@@ -39,6 +39,40 @@
* @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95
*/
+/*
+ * Copyright 2000 Wasabi Systems, Inc.
+ * All rights reserved.
+ *
+ * Written by Frank van der Linden for Wasabi Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed for the NetBSD Project by
+ * Wasabi Systems, Inc.
+ * 4. The name of Wasabi Systems, Inc. may not be used to endorse
+ * or promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
/*
* These functions support the macros and help fiddle mbuf chains for
@@ -1241,17 +1275,14 @@ nfs_loadattrcache(vpp, mdp, dposp, vaper)
vap->va_filerev = 0;
}
if (vap->va_size != np->n_size) {
- if (vap->va_type == VREG) {
- if (np->n_flag & NMODIFIED) {
- if (vap->va_size < np->n_size)
- vap->va_size = np->n_size;
- else
- np->n_size = vap->va_size;
- } else
- np->n_size = vap->va_size;
- uvm_vnp_setsize(vp, np->n_size);
- } else
+ if ((np->n_flag & NMODIFIED) && vap->va_size < np->n_size) {
+ vap->va_size = np->n_size;
+ } else {
np->n_size = vap->va_size;
+ if (vap->va_type == VREG) {
+ uvm_vnp_setsize(vp, np->n_size);
+ }
+ }
}
np->n_attrstamp = time.tv_sec;
if (vaper != NULL) {
@@ -1741,26 +1772,216 @@ void
nfs_clearcommit(mp)
struct mount *mp;
{
- register struct vnode *vp, *nvp;
- register struct buf *bp, *nbp;
+ struct vnode *vp;
+ struct vm_page *pg;
+ struct nfsnode *np;
int s;
s = splbio();
-loop:
- for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
- if (vp->v_mount != mp) /* Paranoia */
- goto loop;
- nvp = vp->v_mntvnodes.le_next;
- for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
- nbp = bp->b_vnbufs.le_next;
- if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT))
- == (B_DELWRI | B_NEEDCOMMIT))
- bp->b_flags &= ~B_NEEDCOMMIT;
+ LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
+ if (vp->v_type == VNON)
+ continue;
+ np = VTONFS(vp);
+ np->n_pushlo = np->n_pushhi = np->n_pushedlo =
+ np->n_pushedhi = 0;
+ np->n_commitflags &=
+ ~(NFS_COMMIT_PUSH_VALID | NFS_COMMIT_PUSHED_VALID);
+ simple_lock(&vp->v_uvm.u_obj.vmobjlock);
+ TAILQ_FOREACH(pg, &vp->v_uvm.u_obj.memq, listq) {
+ pg->flags &= ~PG_NEEDCOMMIT;
}
+ simple_unlock(&vp->v_uvm.u_obj.vmobjlock);
}
splx(s);
}
+void
+nfs_merge_commit_ranges(vp)
+ struct vnode *vp;
+{
+ struct nfsnode *np = VTONFS(vp);
+
+ if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID)) {
+ np->n_pushedlo = np->n_pushlo;
+ np->n_pushedhi = np->n_pushhi;
+ np->n_commitflags |= NFS_COMMIT_PUSHED_VALID;
+ } else {
+ if (np->n_pushlo < np->n_pushedlo)
+ np->n_pushedlo = np->n_pushlo;
+ if (np->n_pushhi > np->n_pushedhi)
+ np->n_pushedhi = np->n_pushhi;
+ }
+
+ np->n_pushlo = np->n_pushhi = 0;
+ np->n_commitflags &= ~NFS_COMMIT_PUSH_VALID;
+
+#ifdef fvdl_debug
+ printf("merge: committed: %u - %u\n", (unsigned)np->n_pushedlo,
+ (unsigned)np->n_pushedhi);
+#endif
+}
+
+int
+nfs_in_committed_range(vp, off, len)
+ struct vnode *vp;
+ off_t off, len;
+{
+ struct nfsnode *np = VTONFS(vp);
+ off_t lo, hi;
+
+ if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID))
+ return 0;
+ lo = off;
+ hi = lo + len;
+
+ return (lo >= np->n_pushedlo && hi <= np->n_pushedhi);
+}
+
+int
+nfs_in_tobecommitted_range(vp, off, len)
+ struct vnode *vp;
+ off_t off, len;
+{
+ struct nfsnode *np = VTONFS(vp);
+ off_t lo, hi;
+
+ if (!(np->n_commitflags & NFS_COMMIT_PUSH_VALID))
+ return 0;
+ lo = off;
+ hi = lo + len;
+
+ return (lo >= np->n_pushlo && hi <= np->n_pushhi);
+}
+
+void
+nfs_add_committed_range(vp, off, len)
+ struct vnode *vp;
+ off_t off, len;
+{
+ struct nfsnode *np = VTONFS(vp);
+ off_t lo, hi;
+
+ lo = off;
+ hi = lo + len;
+
+ if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID)) {
+ np->n_pushedlo = lo;
+ np->n_pushedhi = hi;
+ np->n_commitflags |= NFS_COMMIT_PUSHED_VALID;
+ } else {
+ if (hi > np->n_pushedhi)
+ np->n_pushedhi = hi;
+ if (lo < np->n_pushedlo)
+ np->n_pushedlo = lo;
+ }
+#ifdef fvdl_debug
+ printf("add: committed: %u - %u\n", (unsigned)np->n_pushedlo,
+ (unsigned)np->n_pushedhi);
+#endif
+}
+
+void
+nfs_del_committed_range(vp, off, len)
+ struct vnode *vp;
+ off_t off, len;
+{
+ struct nfsnode *np = VTONFS(vp);
+ off_t lo, hi;
+
+ if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID))
+ return;
+
+ lo = off;
+ hi = lo + len;
+
+ if (lo > np->n_pushedhi || hi < np->n_pushedlo)
+ return;
+ if (lo <= np->n_pushedlo)
+ np->n_pushedlo = hi;
+ else if (hi >= np->n_pushedhi)
+ np->n_pushedhi = lo;
+ else {
+ /*
+ * XXX There's only one range. If the deleted range
+ * is in the middle, pick the largest of the
+ * contiguous ranges that it leaves.
+ */
+ if ((np->n_pushedlo - lo) > (hi - np->n_pushedhi))
+ np->n_pushedhi = lo;
+ else
+ np->n_pushedlo = hi;
+ }
+#ifdef fvdl_debug
+ printf("del: committed: %u - %u\n", (unsigned)np->n_pushedlo,
+ (unsigned)np->n_pushedhi);
+#endif
+}
+
+void
+nfs_add_tobecommitted_range(vp, off, len)
+ struct vnode *vp;
+ off_t off, len;
+{
+ struct nfsnode *np = VTONFS(vp);
+ off_t lo, hi;
+
+ lo = off;
+ hi = lo + len;
+
+ if (!(np->n_commitflags & NFS_COMMIT_PUSH_VALID)) {
+ np->n_pushlo = lo;
+ np->n_pushhi = hi;
+ np->n_commitflags |= NFS_COMMIT_PUSH_VALID;
+ } else {
+ if (lo < np->n_pushlo)
+ np->n_pushlo = lo;
+ if (hi > np->n_pushhi)
+ np->n_pushhi = hi;
+ }
+#ifdef fvdl_debug
+ printf("add: tobecommitted: %u - %u\n", (unsigned)np->n_pushlo,
+ (unsigned)np->n_pushhi);
+#endif
+}
+
+void
+nfs_del_tobecommitted_range(vp, off, len)
+ struct vnode *vp;
+ off_t off, len;
+{
+ struct nfsnode *np = VTONFS(vp);
+ off_t lo, hi;
+
+ if (!(np->n_commitflags & NFS_COMMIT_PUSH_VALID))
+ return;
+
+ lo = off;
+ hi = lo + len;
+
+ if (lo > np->n_pushhi || hi < np->n_pushlo)
+ return;
+
+ if (lo <= np->n_pushlo)
+ np->n_pushlo = hi;
+ else if (hi >= np->n_pushhi)
+ np->n_pushhi = lo;
+ else {
+ /*
+ * XXX There's only one range. If the deleted range
+ * is in the middle, pick the largest of the
+ * contiguous ranges that it leaves.
+ */
+ if ((np->n_pushlo - lo) > (hi - np->n_pushhi))
+ np->n_pushhi = lo;
+ else
+ np->n_pushlo = hi;
+ }
+#ifdef fvdl_debug
+ printf("del: tobecommitted: %u - %u\n", (unsigned)np->n_pushlo,
+ (unsigned)np->n_pushhi);
+#endif
+}
+
/*
* Map errnos to NFS error numbers. For Version 3 also filter out error
* numbers not specified for the associated procedure.
diff --git a/sys/nfs/nfs_syscalls.c b/sys/nfs/nfs_syscalls.c
index c71a662ccb2..5a189ba344d 100644
--- a/sys/nfs/nfs_syscalls.c
+++ b/sys/nfs/nfs_syscalls.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: nfs_syscalls.c,v 1.20 2001/11/15 23:15:15 art Exp $ */
+/* $OpenBSD: nfs_syscalls.c,v 1.21 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: nfs_syscalls.c,v 1.19 1996/02/18 11:53:52 fvdl Exp $ */
/*
@@ -913,10 +913,9 @@ int
nfssvc_iod(p)
struct proc *p;
{
- register struct buf *bp, *nbp;
- register int i, myiod;
- struct vnode *vp;
- int error = 0, s;
+ struct buf *bp;
+ int i, myiod;
+ int error = 0;
/*
* Assign my position or return error if too many already running
@@ -944,39 +943,7 @@ nfssvc_iod(p)
while ((bp = nfs_bufq.tqh_first) != NULL) {
/* Take one off the front of the list */
TAILQ_REMOVE(&nfs_bufq, bp, b_freelist);
- if (bp->b_flags & B_READ)
- (void) nfs_doio(bp, NULL);
- else do {
- /*
- * Look for a delayed write for the same vnode, so I can do
- * it now. We must grab it before calling nfs_doio() to
- * avoid any risk of the vnode getting vclean()'d while
- * we are doing the write rpc.
- */
- vp = bp->b_vp;
- s = splbio();
- for (nbp = vp->v_dirtyblkhd.lh_first; nbp;
- nbp = nbp->b_vnbufs.le_next) {
- if ((nbp->b_flags &
- (B_BUSY|B_DELWRI|B_NEEDCOMMIT|B_NOCACHE))!=B_DELWRI)
- continue;
- bremfree(nbp);
- nbp->b_flags |= (B_BUSY|B_ASYNC);
- break;
- }
- /*
- * For the delayed write, do the first part of nfs_bwrite()
- * up to, but not including nfs_strategy().
- */
- if (nbp) {
- nbp->b_flags &= ~(B_READ|B_DONE|B_ERROR);
- buf_undirty(bp);
- nbp->b_vp->v_numoutput++;
- }
- splx(s);
-
- (void) nfs_doio(bp, NULL);
- } while ((bp = nbp) != NULL);
+ (void) nfs_doio(bp, NULL);
}
if (error) {
PRELE(p);
diff --git a/sys/nfs/nfs_var.h b/sys/nfs/nfs_var.h
index 861eaf3059e..71985e581a8 100644
--- a/sys/nfs/nfs_var.h
+++ b/sys/nfs/nfs_var.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: nfs_var.h,v 1.15 2001/11/15 23:15:15 art Exp $ */
+/* $OpenBSD: nfs_var.h,v 1.16 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: nfs_var.h,v 1.3 1996/02/18 11:53:54 fvdl Exp $ */
/*
@@ -119,7 +119,7 @@ int nfs_sillyrename __P((struct vnode *, struct vnode *,
struct componentname *));
int nfs_lookitup __P((struct vnode *, char *, int, struct ucred *,
struct proc *, struct nfsnode **));
-int nfs_commit __P((struct vnode *, u_quad_t, int, struct proc *));
+int nfs_commit __P((struct vnode *, u_quad_t, unsigned, struct proc *));
int nfs_bmap __P((void *));
int nfs_strategy __P((void *));
int nfs_mmap __P((void *));
@@ -134,7 +134,6 @@ int nfs_vfree __P((void *));
int nfs_truncate __P((void *));
int nfs_update __P((void *));
int nfs_bwrite __P((void *));
-int nfs_writebp __P((struct buf *, int));
int nfsspec_access __P((void *));
int nfsspec_read __P((void *));
int nfsspec_write __P((void *));
@@ -258,7 +257,16 @@ void nfsm_srvfattr __P((struct nfsrv_descript *, struct vattr *,
int nfsrv_fhtovp __P((fhandle_t *, int, struct vnode **, struct ucred *,
struct nfssvc_sock *, struct mbuf *, int *, int));
int netaddr_match __P((int, union nethostaddr *, struct mbuf *));
+
void nfs_clearcommit __P((struct mount *));
+void nfs_merge_commit_ranges __P((struct vnode *));
+int nfs_in_committed_range __P((struct vnode *, off_t, off_t));
+int nfs_in_tobecommitted_range __P((struct vnode *, off_t, off_t));
+void nfs_add_committed_range __P((struct vnode *, off_t, off_t));
+void nfs_del_committed_range __P((struct vnode *, off_t, off_t));
+void nfs_add_tobecommitted_range __P((struct vnode *, off_t, off_t));
+void nfs_del_tobecommitted_range __P((struct vnode *, off_t, off_t));
+
int nfsrv_errmap __P((struct nfsrv_descript *, int));
void nfsrvw_sort __P((gid_t *, int));
void nfsrv_setcred __P((struct ucred *, struct ucred *));
diff --git a/sys/nfs/nfs_vfsops.c b/sys/nfs/nfs_vfsops.c
index 13420530fc3..91f84da52b6 100644
--- a/sys/nfs/nfs_vfsops.c
+++ b/sys/nfs/nfs_vfsops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: nfs_vfsops.c,v 1.38 2001/11/14 23:37:33 mickey Exp $ */
+/* $OpenBSD: nfs_vfsops.c,v 1.39 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: nfs_vfsops.c,v 1.46.4.1 1996/05/25 22:40:35 fvdl Exp $ */
/*
@@ -748,6 +748,8 @@ mountnfs(argp, mp, nam, pth, hst)
* point.
*/
mp->mnt_stat.f_iosize = NFS_MAXDGRAMDATA;
+ mp->mnt_fs_bshift = DEV_BSHIFT;
+ mp->mnt_dev_bshift = -1;
return (0);
bad:
@@ -856,8 +858,9 @@ loop:
*/
if (vp->v_mount != mp)
goto loop;
- if (VOP_ISLOCKED(vp) || vp->v_dirtyblkhd.lh_first == NULL ||
- waitfor == MNT_LAZY)
+ if (waitfor == MNT_LAZY || VOP_ISLOCKED(vp) ||
+ (LIST_EMPTY(&vp->v_dirtyblkhd) &&
+ vp->v_uvm.u_obj.uo_npages == 0))
continue;
if (vget(vp, LK_EXCLUSIVE, p))
goto loop;
diff --git a/sys/nfs/nfs_vnops.c b/sys/nfs/nfs_vnops.c
index 0813b439cb2..4c176c1c1ec 100644
--- a/sys/nfs/nfs_vnops.c
+++ b/sys/nfs/nfs_vnops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: nfs_vnops.c,v 1.39 2001/11/15 23:15:15 art Exp $ */
+/* $OpenBSD: nfs_vnops.c,v 1.40 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: nfs_vnops.c,v 1.62.4.1 1996/07/08 20:26:52 jtc Exp $ */
/*
@@ -126,7 +126,9 @@ struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
{ &vop_advlock_desc, nfs_advlock }, /* advlock */
{ &vop_reallocblks_desc, nfs_reallocblks }, /* reallocblks */
{ &vop_bwrite_desc, nfs_bwrite },
- { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }
+ { &vop_getpages_desc, nfs_getpages }, /* getpages */
+ { &vop_putpages_desc, nfs_putpages }, /* putpages */
+ { NULL, NULL }
};
struct vnodeopv_desc nfsv2_vnodeop_opv_desc =
{ &nfsv2_vnodeop_p, nfsv2_vnodeop_entries };
@@ -151,7 +153,7 @@ struct vnodeopv_entry_desc spec_nfsv2nodeop_entries[] = {
{ &vop_ioctl_desc, spec_ioctl }, /* ioctl */
{ &vop_select_desc, spec_select }, /* select */
{ &vop_revoke_desc, spec_revoke }, /* revoke */
- { &vop_fsync_desc, nfs_fsync }, /* fsync */
+ { &vop_fsync_desc, spec_fsync }, /* fsync */
{ &vop_remove_desc, spec_remove }, /* remove */
{ &vop_link_desc, spec_link }, /* link */
{ &vop_rename_desc, spec_rename }, /* rename */
@@ -373,11 +375,30 @@ nfs_open(v)
return (EACCES);
}
+ /*
+ * Initialize read and write creds here, for swapfiles
+ * and other paths that don't set the creds themselves.
+ */
+
+ if (ap->a_mode & FREAD) {
+ if (np->n_rcred) {
+ crfree(np->n_rcred);
+ }
+ np->n_rcred = ap->a_cred;
+ crhold(np->n_rcred);
+ }
+ if (ap->a_mode & FWRITE) {
+ if (np->n_wcred) {
+ crfree(np->n_wcred);
+ }
+ np->n_wcred = ap->a_cred;
+ crhold(np->n_wcred);
+ }
+
if (np->n_flag & NMODIFIED) {
if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
ap->a_p, 1)) == EINTR)
return (error);
- uvm_vnp_uncache(vp);
np->n_attrstamp = 0;
if (vp->v_type == VDIR)
np->n_direofoffset = 0;
@@ -395,7 +416,6 @@ nfs_open(v)
if ((error = nfs_vinvalbuf(vp, V_SAVE,
ap->a_cred, ap->a_p, 1)) == EINTR)
return (error);
- uvm_vnp_uncache(vp);
np->n_mtime = vattr.va_mtime.tv_sec;
}
}
@@ -2511,7 +2531,7 @@ int
nfs_commit(vp, offset, cnt, procp)
struct vnode *vp;
u_quad_t offset;
- int cnt;
+ unsigned cnt;
struct proc *procp;
{
caddr_t cp;
@@ -2626,9 +2646,7 @@ nfs_fsync(v)
}
/*
- * Flush all the blocks associated with a vnode.
- * Walk through the buffer pool and push any dirty pages
- * associated with the vnode.
+ * Flush all the data associated with a vnode.
*/
int
nfs_flush(vp, cred, waitfor, p, commit)
@@ -2638,154 +2656,19 @@ nfs_flush(vp, cred, waitfor, p, commit)
struct proc *p;
int commit;
{
+ struct uvm_object *uobj = &vp->v_uvm.u_obj;
struct nfsnode *np = VTONFS(vp);
- struct buf *bp;
- int i;
- struct buf *nbp;
- struct nfsmount *nmp = VFSTONFS(vp->v_mount);
- int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
- int passone = 1;
- u_quad_t off = (u_quad_t)-1, endoff = 0, toff;
-#ifndef NFS_COMMITBVECSIZ
-#define NFS_COMMITBVECSIZ 20
-#endif
- struct buf *bvec[NFS_COMMITBVECSIZ];
+ int error;
+ int flushflags = PGO_ALLPAGES|PGO_CLEANIT|PGO_SYNCIO;
+ int rv;
- if (nmp->nm_flag & NFSMNT_INT)
- slpflag = PCATCH;
- if (!commit)
- passone = 0;
- /*
- * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
- * server, but nas not been committed to stable storage on the server
- * yet. On the first pass, the byte range is worked out and the commit
- * rpc is done. On the second pass, nfs_writebp() is called to do the
- * job.
- */
-again:
- bvecpos = 0;
- if (NFS_ISV3(vp) && commit) {
- s = splbio();
- for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
- nbp = bp->b_vnbufs.le_next;
- if (bvecpos >= NFS_COMMITBVECSIZ)
- break;
- if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT))
- != (B_DELWRI | B_NEEDCOMMIT))
- continue;
- bremfree(bp);
- bp->b_flags |= (B_BUSY | B_WRITEINPROG);
- /*
- * A list of these buffers is kept so that the
- * second loop knows which buffers have actually
- * been committed. This is necessary, since there
- * may be a race between the commit rpc and new
- * uncommitted writes on the file.
- */
- bvec[bvecpos++] = bp;
- toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
- bp->b_dirtyoff;
- if (toff < off)
- off = toff;
- toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
- if (toff > endoff)
- endoff = toff;
- }
- splx(s);
- }
- if (bvecpos > 0) {
- /*
- * Commit data on the server, as required.
- */
- retv = nfs_commit(vp, off, (int)(endoff - off), p);
- if (retv == NFSERR_STALEWRITEVERF)
- nfs_clearcommit(vp->v_mount);
- /*
- * Now, either mark the blocks I/O done or mark the
- * blocks dirty, depending on whether the commit
- * succeeded.
- */
- for (i = 0; i < bvecpos; i++) {
- bp = bvec[i];
- bp->b_flags &= ~(B_NEEDCOMMIT | B_WRITEINPROG);
- if (retv)
- brelse(bp);
- else {
- s = splbio();
- buf_undirty(bp);
- vp->v_numoutput++;
- bp->b_flags |= B_ASYNC;
- bp->b_flags &= ~(B_READ|B_DONE|B_ERROR);
- bp->b_dirtyoff = bp->b_dirtyend = 0;
- splx(s);
- biodone(bp);
- }
- }
- }
+ error = 0;
- /*
- * Start/do any write(s) that are required.
- */
-loop:
- s = splbio();
- for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
- nbp = bp->b_vnbufs.le_next;
- if (bp->b_flags & B_BUSY) {
- if (waitfor != MNT_WAIT || passone)
- continue;
- bp->b_flags |= B_WANTED;
- error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
- "nfsfsync", slptimeo);
- splx(s);
- if (error) {
- if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
- return (EINTR);
- if (slpflag == PCATCH) {
- slpflag = 0;
- slptimeo = 2 * hz;
- }
- }
- goto loop;
- }
- if ((bp->b_flags & B_DELWRI) == 0)
- panic("nfs_fsync: not dirty");
- if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT))
- continue;
- bremfree(bp);
- if (passone || !commit)
- bp->b_flags |= (B_BUSY|B_ASYNC);
- else
- bp->b_flags |= (B_BUSY|B_ASYNC|B_WRITEINPROG|B_NEEDCOMMIT);
- splx(s);
- VOP_BWRITE(bp);
- goto loop;
- }
- splx(s);
- if (passone) {
- passone = 0;
- goto again;
- }
- if (waitfor == MNT_WAIT) {
- loop2:
- s = splbio();
- error = vwaitforio(vp, slpflag, "nfs_fsync", slptimeo);
- splx(s);
- if (error) {
- if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
- return (EINTR);
- if (slpflag == PCATCH) {
- slpflag = 0;
- slptimeo = 2 * hz;
- }
- goto loop2;
- }
-
- if (vp->v_dirtyblkhd.lh_first && commit) {
-#if 0
- vprint("nfs_fsync: dirty", vp);
-#endif
- goto loop;
- }
+ simple_lock(&uobj->vmobjlock);
+ rv = (uobj->pgops->pgo_flush)(uobj, 0, 0, flushflags);
+ simple_unlock(&uobj->vmobjlock);
+ if (!rv) {
+ error = EIO;
}
if (np->n_flag & NWRITEERR) {
error = np->n_error;
@@ -2860,7 +2743,7 @@ nfs_print(v)
}
/*
- * Just call nfs_writebp() with the force argument set to 1.
+ * Just call bwrite().
*/
int
nfs_bwrite(v)
@@ -2870,76 +2753,7 @@ nfs_bwrite(v)
struct buf *a_bp;
} */ *ap = v;
- return (nfs_writebp(ap->a_bp, 1));
-}
-
-/*
- * This is a clone of vop_generic_bwrite(), except that B_WRITEINPROG isn't set unless
- * the force flag is one and it also handles the B_NEEDCOMMIT flag.
- */
-int
-nfs_writebp(bp, force)
- register struct buf *bp;
- int force;
-{
- register int oldflags = bp->b_flags, retv = 1;
- register struct proc *p = curproc; /* XXX */
- off_t off;
- int s;
-
- if(!(bp->b_flags & B_BUSY))
- panic("bwrite: buffer is not busy???");
-
-#ifdef fvdl_debug
- printf("nfs_writebp(%x): vp %x voff %d vend %d doff %d dend %d\n",
- bp, bp->b_vp, bp->b_validoff, bp->b_validend, bp->b_dirtyoff,
- bp->b_dirtyend);
-#endif
- bp->b_flags &= ~(B_READ|B_DONE|B_ERROR);
-
- s = splbio();
- buf_undirty(bp);
-
- if ((oldflags & B_ASYNC) && !(oldflags & B_DELWRI) && p)
- ++p->p_stats->p_ru.ru_oublock;
-
- bp->b_vp->v_numoutput++;
- splx(s);
-
- /*
- * If B_NEEDCOMMIT is set, a commit rpc may do the trick. If not
- * an actual write will have to be scheduled via. VOP_STRATEGY().
- * If B_WRITEINPROG is already set, then push it with a write anyhow.
- */
- if ((oldflags & (B_NEEDCOMMIT | B_WRITEINPROG)) == B_NEEDCOMMIT) {
- off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff;
- bp->b_flags |= B_WRITEINPROG;
- retv = nfs_commit(bp->b_vp, off, bp->b_dirtyend-bp->b_dirtyoff,
- bp->b_proc);
- bp->b_flags &= ~B_WRITEINPROG;
- if (!retv) {
- bp->b_dirtyoff = bp->b_dirtyend = 0;
- bp->b_flags &= ~B_NEEDCOMMIT;
- biodone(bp);
- } else if (retv == NFSERR_STALEWRITEVERF)
- nfs_clearcommit(bp->b_vp->v_mount);
- }
- if (retv) {
- if (force)
- bp->b_flags |= B_WRITEINPROG;
- VOP_STRATEGY(bp);
- }
-
- if( (oldflags & B_ASYNC) == 0) {
- int rtval = biowait(bp);
- if (!(oldflags & B_DELWRI) && p) {
- ++p->p_stats->p_ru.ru_oublock;
- }
- brelse(bp);
- return (rtval);
- }
-
- return (0);
+ return (bwrite(ap->a_bp));
}
/*
diff --git a/sys/nfs/nfsnode.h b/sys/nfs/nfsnode.h
index e1e0fd64327..42aaddfa637 100644
--- a/sys/nfs/nfsnode.h
+++ b/sys/nfs/nfsnode.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: nfsnode.h,v 1.11 2001/11/15 23:15:15 art Exp $ */
+/* $OpenBSD: nfsnode.h,v 1.12 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: nfsnode.h,v 1.16 1996/02/18 11:54:04 fvdl Exp $ */
/*
@@ -119,8 +119,20 @@ struct nfsnode {
nfsfh_t n_fh; /* Small File Handle */
struct ucred *n_rcred;
struct ucred *n_wcred;
+ off_t n_pushedlo; /* 1st blk in commited range */
+ off_t n_pushedhi; /* Last block in range */
+ off_t n_pushlo; /* 1st block in commit range */
+ off_t n_pushhi; /* Last block in range */
+ struct lock n_commitlock; /* Serialize commits XXX */
+ int n_commitflags;
};
+/*
+ * Values for n_commitflags
+ */
+#define NFS_COMMIT_PUSH_VALID 0x0001 /* push range valid */
+#define NFS_COMMIT_PUSHED_VALID 0x0002 /* pushed range valid */
+
#define n_atim n_un1.nf_atim
#define n_mtim n_un2.nf_mtim
#define n_sillyrename n_un3.nf_silly
@@ -199,6 +211,8 @@ int nfs_bwrite __P((void *));
int nfs_vget __P((struct mount *, ino_t, struct vnode **));
#define nfs_reallocblks \
((int (*) __P((void *)))eopnotsupp)
+int nfs_getpages __P((void *));
+int nfs_putpages __P((void *));
/* other stuff */
int nfs_removeit __P((struct sillyrename *));
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
index cabdcbbe084..054a07c24d5 100644
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: buf.h,v 1.33 2001/11/15 23:15:15 art Exp $ */
+/* $OpenBSD: buf.h,v 1.34 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: buf.h,v 1.25 1997/04/09 21:12:17 mycroft Exp $ */
/*
@@ -68,6 +68,7 @@ extern struct bio_ops {
void (*io_deallocate) __P((struct buf *));
void (*io_movedeps) __P((struct buf *, struct buf *));
int (*io_countdeps) __P((struct buf *, int, int));
+ void (*io_pageiodone) __P((struct buf *));
} bioops;
/*
@@ -96,10 +97,7 @@ struct buf {
/* Function to call upon completion. */
void (*b_iodone) __P((struct buf *));
struct vnode *b_vp; /* Device vnode. */
- int b_dirtyoff; /* Offset in buffer of dirty region. */
- int b_dirtyend; /* Offset of end of dirty region. */
- int b_validoff; /* Offset in buffer of valid region. */
- int b_validend; /* Offset of end of valid region. */
+ void *b_private;
struct workhead b_dep; /* List of filesystem dependencies. */
};
@@ -120,7 +118,6 @@ struct buf {
* These flags are kept in b_flags.
*/
#define B_AGE 0x00000001 /* Move to age queue when I/O done. */
-#define B_NEEDCOMMIT 0x00000002 /* Needs committing to stable storage */
#define B_ASYNC 0x00000004 /* Start I/O, do not wait. */
#define B_BAD 0x00000008 /* Bad block revectoring in progress. */
#define B_BUSY 0x00000010 /* I/O in progress. */
@@ -144,7 +141,6 @@ struct buf {
#define B_UAREA 0x00400000 /* Buffer describes Uarea I/O. */
#define B_WANTED 0x00800000 /* Process wants this buffer. */
#define B_WRITE 0x00000000 /* Write buffer (pseudo flag). */
-#define B_WRITEINPROG 0x01000000 /* Write in progress. */
#define B_XXX 0x02000000 /* Debugging flag. */
#define B_DEFERRED 0x04000000 /* Skipped over for cleaning */
#define B_SCANNED 0x08000000 /* Block already pushed during sync */
diff --git a/sys/sys/mount.h b/sys/sys/mount.h
index f398a301c69..50f59e4a532 100644
--- a/sys/sys/mount.h
+++ b/sys/sys/mount.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: mount.h,v 1.40 2001/11/21 21:13:34 csapuntz Exp $ */
+/* $OpenBSD: mount.h,v 1.41 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: mount.h,v 1.48 1996/02/18 11:55:47 fvdl Exp $ */
/*
@@ -336,6 +336,8 @@ struct mount {
struct lock mnt_lock; /* mount structure lock */
int mnt_flag; /* flags */
int mnt_maxsymlinklen; /* max size of short symlink */
+ int mnt_fs_bshift; /* offset shift for lblkno */
+ int mnt_dev_bshift; /* shift for device sectors */
struct statfs mnt_stat; /* cache of filesystem stats */
qaddr_t mnt_data; /* private data */
};
diff --git a/sys/sys/param.h b/sys/sys/param.h
index a950b196cc3..59fe3a01548 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: param.h,v 1.41 2001/09/11 13:11:18 deraadt Exp $ */
+/* $OpenBSD: param.h,v 1.42 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: param.h,v 1.23 1996/03/17 01:02:29 thorpej Exp $ */
/*-
@@ -227,3 +227,16 @@
#define RFCNAMEG (1<<10) /* UNIMPL zero plan9 `name space' */
#define RFCENVG (1<<11) /* UNIMPL zero plan9 `env space' */
#define RFCFDG (1<<12) /* zero fd table */
+
+#ifdef _KERNEL
+/*
+ * Defaults for Unified Buffer Cache parameters.
+ */
+
+#ifndef UBC_WINSIZE
+#define UBC_WINSIZE 8192
+#endif
+#ifndef UBC_NWINS
+#define UBC_NWINS 1024
+#endif
+#endif /* _KERNEL */
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index 64a90990d0e..9eaf484201f 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: vnode.h,v 1.41 2001/11/15 06:22:30 art Exp $ */
+/* $OpenBSD: vnode.h,v 1.42 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: vnode.h,v 1.38 1996/02/29 20:59:05 cgd Exp $ */
/*
@@ -90,8 +90,10 @@ struct vnode {
struct uvm_vnode v_uvm; /* uvm data */
int (**v_op) __P((void *)); /* vnode operations vector */
enum vtype v_type; /* vnode type */
- u_int v_flag; /* vnode flags (see below) */
- u_int v_usecount; /* reference count of users */
+#define v_flag v_uvm.u_flags
+#define v_usecount v_uvm.u_obj.uo_refs
+#define v_interlock v_uvm.u_obj.vmobjlock
+#define v_numoutput v_uvm.u_nio
/* reference count of writers */
u_int v_writecount;
/* Flags that can be read/written in interrupts */
@@ -103,7 +105,6 @@ struct vnode {
LIST_ENTRY(vnode) v_mntvnodes; /* vnodes for mount point */
struct buflists v_cleanblkhd; /* clean blocklist head */
struct buflists v_dirtyblkhd; /* dirty blocklist head */
- u_int v_numoutput; /* num of writes in progress */
LIST_ENTRY(vnode) v_synclist; /* vnode with dirty buffers */
union {
struct mount *vu_mountedhere;/* ptr to mounted vfs (VDIR) */
@@ -112,8 +113,8 @@ struct vnode {
struct fifoinfo *vu_fifoinfo; /* fifo (VFIFO) */
} v_un;
- struct simplelock v_interlock; /* lock on usecount and flag */
struct lock *v_vnlock; /* used for non-locking fs's */
+ struct lock v_glock; /* getpage lock */
enum vtagtype v_tag; /* type of underlying data */
void *v_data; /* private data for fs */
struct {
@@ -137,6 +138,9 @@ struct vnode {
#define VXWANT 0x0200 /* process is waiting for vnode */
#define VALIASED 0x0800 /* vnode has an alias */
#define VLOCKSWORK 0x4000 /* FS supports locking discipline */
+#define VDIRTY 0x8000 /* vnode possibly has dirty pages */
+
+#define VSIZENOTSET ((voff_t)-1)
/*
* (v_bioflag) Flags that may be manipulated by interrupt handlers
@@ -446,6 +450,12 @@ int vop_generic_unlock __P((void *));
int vop_generic_revoke __P((void *));
int vop_generic_kqfilter __P((void *));
+/* XXXUBC - doesn't really belong here. */
+int genfs_getpages __P((void *));
+int genfs_putpages __P((void *));
+int genfs_size __P((void *));
+
+
int vn_stat __P((struct vnode *vp, struct stat *sb, struct proc *p));
int vn_statfile __P((struct file *fp, struct stat *sb, struct proc *p));
int vn_writechk __P((struct vnode *vp));
diff --git a/sys/sys/vnode_if.h b/sys/sys/vnode_if.h
index 00cdadabe25..57aff6f4c97 100644
--- a/sys/sys/vnode_if.h
+++ b/sys/sys/vnode_if.h
@@ -3,7 +3,7 @@
* (Modifications made here may easily be lost!)
*
* Created from the file:
- * OpenBSD: vnode_if.src,v 1.11 2001/06/23 02:21:05 csapuntz Exp
+ * OpenBSD: vnode_if.src,v 1.13 2001/07/26 20:24:47 millert Exp
* by the script:
* OpenBSD: vnode_if.sh,v 1.8 2001/02/26 17:34:18 art Exp
*/
@@ -397,6 +397,52 @@ struct vop_whiteout_args {
extern struct vnodeop_desc vop_whiteout_desc;
int VOP_WHITEOUT __P((struct vnode *, struct componentname *, int));
+struct vop_ballocn_args {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ off_t a_offset;
+ off_t a_length;
+ struct ucred *a_cred;
+ int a_flags;
+};
+extern struct vnodeop_desc vop_ballocn_desc;
+int VOP_BALLOCN __P((struct vnode *, off_t, off_t, struct ucred *, int));
+
+struct vop_getpages_args {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ voff_t a_offset;
+ vm_page_t *a_m;
+ int *a_count;
+ int a_centeridx;
+ vm_prot_t a_access_type;
+ int a_advice;
+ int a_flags;
+};
+extern struct vnodeop_desc vop_getpages_desc;
+int VOP_GETPAGES __P((struct vnode *, voff_t, vm_page_t *, int *, int,
+ vm_prot_t, int, int));
+
+struct vop_putpages_args {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ vm_page_t *a_m;
+ int a_count;
+ int a_flags;
+ int *a_rtvals;
+};
+extern struct vnodeop_desc vop_putpages_desc;
+int VOP_PUTPAGES __P((struct vnode *, vm_page_t *, int, int, int *));
+
+struct vop_size_args {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ off_t a_size;
+ off_t *a_eobp;
+};
+extern struct vnodeop_desc vop_size_desc;
+int VOP_SIZE __P((struct vnode *, off_t, off_t *));
+
/* Special cases: */
#include <sys/buf.h>
diff --git a/sys/ufs/ext2fs/ext2fs_balloc.c b/sys/ufs/ext2fs/ext2fs_balloc.c
index 849a8864b2a..78fb0a8371c 100644
--- a/sys/ufs/ext2fs/ext2fs_balloc.c
+++ b/sys/ufs/ext2fs/ext2fs_balloc.c
@@ -1,5 +1,4 @@
-/* $OpenBSD: ext2fs_balloc.c,v 1.7 2001/11/06 19:53:21 miod Exp $ */
-/* $NetBSD: ext2fs_balloc.c,v 1.10 2001/07/04 21:16:01 chs Exp $ */
+/* $NetBSD: ext2fs_balloc.c,v 1.8 2000/12/10 06:38:31 chs Exp $ */
/*
* Copyright (c) 1997 Manuel Bouyer.
@@ -44,8 +43,9 @@
#include <sys/proc.h>
#include <sys/file.h>
#include <sys/vnode.h>
+#include <sys/mount.h>
-#include <uvm/uvm_extern.h>
+#include <uvm/uvm.h>
#include <ufs/ufs/quota.h>
#include <ufs/ufs/inode.h>
@@ -73,8 +73,13 @@ ext2fs_buf_alloc(struct inode *ip, daddr_t bn, int size, struct ucred *cred,
u_int deallocated;
ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
int unwindidx = -1;
+ UVMHIST_FUNC("ext2fs_buf_alloc"); UVMHIST_CALLED(ubchist);
- *bpp = NULL;
+ UVMHIST_LOG(ubchist, "bn 0x%x", bn,0,0,0);
+
+ if (bpp != NULL) {
+ *bpp = NULL;
+ }
if (bn < 0)
return (EFBIG);
fs = ip->i_e2fs;
@@ -86,20 +91,29 @@ ext2fs_buf_alloc(struct inode *ip, daddr_t bn, int size, struct ucred *cred,
if (bn < NDADDR) {
nb = fs2h32(ip->i_e2fs_blocks[bn]);
if (nb != 0) {
- error = bread(vp, bn, fs->e2fs_bsize, NOCRED, &bp);
- if (error) {
- brelse(bp);
- return (error);
+
+ /*
+ * the block is already allocated, just read it.
+ */
+
+ if (bpp != NULL) {
+ error = bread(vp, bn, fs->e2fs_bsize, NOCRED,
+ &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ *bpp = bp;
}
- *bpp = bp;
return (0);
}
/*
* allocate a new direct block.
*/
+
error = ext2fs_alloc(ip, bn,
- ext2fs_blkpref(ip, bn, (int)bn, &ip->i_e2fs_blocks[0]),
+ ext2fs_blkpref(ip, bn, bn, &ip->i_e2fs_blocks[0]),
cred, &newb);
if (error)
return (error);
@@ -107,11 +121,13 @@ ext2fs_buf_alloc(struct inode *ip, daddr_t bn, int size, struct ucred *cred,
ip->i_e2fs_last_blk = newb;
ip->i_e2fs_blocks[bn] = h2fs32(newb);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
- bp = getblk(vp, bn, fs->e2fs_bsize, 0, 0);
- bp->b_blkno = fsbtodb(fs, newb);
- if (flags & B_CLRBUF)
- clrbuf(bp);
- *bpp = bp;
+ if (bpp != NULL) {
+ bp = getblk(vp, bn, fs->e2fs_bsize, 0, 0);
+ bp->b_blkno = fsbtodb(fs, newb);
+ if (flags & B_CLRBUF)
+ clrbuf(bp);
+ *bpp = bp;
+ }
return (0);
}
/*
@@ -229,26 +245,30 @@ ext2fs_buf_alloc(struct inode *ip, daddr_t bn, int size, struct ucred *cred,
} else {
bdwrite(bp);
}
- nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0);
- nbp->b_blkno = fsbtodb(fs, nb);
- if (flags & B_CLRBUF)
- clrbuf(nbp);
- *bpp = nbp;
+ if (bpp != NULL) {
+ nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0);
+ nbp->b_blkno = fsbtodb(fs, nb);
+ if (flags & B_CLRBUF)
+ clrbuf(nbp);
+ *bpp = nbp;
+ }
return (0);
}
brelse(bp);
- if (flags & B_CLRBUF) {
- error = bread(vp, lbn, (int)fs->e2fs_bsize, NOCRED, &nbp);
- if (error) {
- brelse(nbp);
- goto fail;
+ if (bpp != NULL) {
+ if (flags & B_CLRBUF) {
+ error = bread(vp, lbn, (int)fs->e2fs_bsize, NOCRED,
+ &nbp);
+ if (error) {
+ brelse(nbp);
+ goto fail;
+ }
+ } else {
+ nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0);
+ nbp->b_blkno = fsbtodb(fs, nb);
}
- } else {
- nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0);
- nbp->b_blkno = fsbtodb(fs, nb);
+ *bpp = nbp;
}
-
- *bpp = nbp;
return (0);
fail:
/*
@@ -292,3 +312,153 @@ fail:
}
return error;
}
+
+int
+ext2fs_ballocn(v)
+ void *v;
+{
+ struct vop_ballocn_args /* {
+ struct vnode *a_vp;
+ off_t a_offset;
+ off_t a_length;
+ struct ucred *a_cred;
+ int a_flags;
+ } */ *ap = v;
+ off_t off, len;
+ struct vnode *vp = ap->a_vp;
+ struct inode *ip = VTOI(vp);
+ struct m_ext2fs *fs = ip->i_e2fs;
+ int error, delta, bshift, bsize;
+ UVMHIST_FUNC("ext2fs_ballocn"); UVMHIST_CALLED(ubchist);
+
+ bshift = fs->e2fs_bshift;
+ bsize = 1 << bshift;
+
+ off = ap->a_offset;
+ len = ap->a_length;
+
+ delta = off & (bsize - 1);
+ off -= delta;
+ len += delta;
+
+ while (len > 0) {
+ bsize = min(bsize, len);
+ UVMHIST_LOG(ubchist, "off 0x%x len 0x%x bsize 0x%x",
+ off, len, bsize, 0);
+
+ error = ext2fs_buf_alloc(ip, lblkno(fs, off), bsize, ap->a_cred,
+ NULL, ap->a_flags);
+ if (error) {
+ UVMHIST_LOG(ubchist, "error %d", error, 0,0,0);
+ return error;
+ }
+
+ /*
+ * increase file size now, VOP_BALLOC() requires that
+ * EOF be up-to-date before each call.
+ */
+
+ if (ip->i_e2fs_size < off + bsize) {
+ UVMHIST_LOG(ubchist, "old 0x%x new 0x%x",
+ ip->i_e2fs_size, off + bsize,0,0);
+ ip->i_e2fs_size = off + bsize;
+ if (vp->v_uvm.u_size < ip->i_e2fs_size) {
+ uvm_vnp_setsize(vp, ip->i_e2fs_size);
+ }
+ }
+
+ off += bsize;
+ len -= bsize;
+ }
+ return 0;
+}
+
+/*
+ * allocate a range of blocks in a file.
+ * after this function returns, any page entirely contained within the range
+ * will map to invalid data and thus must be overwritten before it is made
+ * accessible to others.
+ */
+
+int
+ext2fs_balloc_range(vp, off, len, cred, flags)
+ struct vnode *vp;
+ off_t off, len;
+ struct ucred *cred;
+ int flags;
+{
+ off_t oldeof, eof, pagestart;
+ struct uvm_object *uobj;
+ int i, delta, error, npages;
+ int bshift = vp->v_mount->mnt_fs_bshift;
+ int bsize = 1 << bshift;
+ int ppb = max(bsize >> PAGE_SHIFT, 1);
+ struct vm_page *pgs[ppb];
+ UVMHIST_FUNC("ext2fs_balloc_range"); UVMHIST_CALLED(ubchist);
+ UVMHIST_LOG(ubchist, "vp %p off 0x%x len 0x%x u_size 0x%x",
+ vp, off, len, vp->v_uvm.u_size);
+
+ error = 0;
+ uobj = &vp->v_uvm.u_obj;
+ oldeof = vp->v_uvm.u_size;
+ eof = max(oldeof, off + len);
+ UVMHIST_LOG(ubchist, "new eof 0x%x", eof,0,0,0);
+ pgs[0] = NULL;
+
+ /*
+ * cache the new range of the file. this will create zeroed pages
+ * where the new block will be and keep them locked until the
+ * new block is allocated, so there will be no window where
+ * the old contents of the new block is visible to racing threads.
+ */
+
+ pagestart = trunc_page(off) & ~(bsize - 1);
+ npages = min(ppb, (round_page(eof) - pagestart) >> PAGE_SHIFT);
+ memset(pgs, 0, npages);
+ simple_lock(&uobj->vmobjlock);
+ error = VOP_GETPAGES(vp, pagestart, pgs, &npages, 0,
+ VM_PROT_READ, 0, PGO_SYNCIO | PGO_PASTEOF);
+ if (error) {
+ UVMHIST_LOG(ubchist, "getpages %d", error,0,0,0);
+ goto errout;
+ }
+ for (i = 0; i < npages; i++) {
+ UVMHIST_LOG(ubchist, "got pgs[%d] %p", i, pgs[i],0,0);
+ KASSERT((pgs[i]->flags & PG_RELEASED) == 0);
+ pgs[i]->flags &= ~PG_CLEAN;
+ uvm_pageactivate(pgs[i]);
+ }
+
+ /*
+ * adjust off to be block-aligned.
+ */
+
+ delta = off & (bsize - 1);
+ off -= delta;
+ len += delta;
+
+ /*
+ * now allocate the range.
+ */
+
+ lockmgr(&vp->v_glock, LK_EXCLUSIVE, NULL, curproc);
+ error = VOP_BALLOCN(vp, off, len, cred, flags);
+ UVMHIST_LOG(ubchist, "ballocn %d", error,0,0,0);
+ lockmgr(&vp->v_glock, LK_RELEASE, NULL, curproc);
+
+ /*
+ * unbusy any pages we are holding.
+ */
+
+errout:
+ simple_lock(&uobj->vmobjlock);
+ if (error) {
+ (void) (uobj->pgops->pgo_flush)(uobj, oldeof, pagestart + ppb,
+ PGO_FREE);
+ }
+ if (pgs[0] != NULL) {
+ uvm_page_unbusy(pgs, npages);
+ }
+ simple_unlock(&uobj->vmobjlock);
+ return (error);
+}
diff --git a/sys/ufs/ext2fs/ext2fs_extern.h b/sys/ufs/ext2fs/ext2fs_extern.h
index b7a3f96df38..af23fb6ef2d 100644
--- a/sys/ufs/ext2fs/ext2fs_extern.h
+++ b/sys/ufs/ext2fs/ext2fs_extern.h
@@ -1,5 +1,5 @@
-/* $OpenBSD: ext2fs_extern.h,v 1.10 2001/09/18 00:39:15 art Exp $ */
-/* $NetBSD: ext2fs_extern.h,v 1.1 1997/06/11 09:33:55 bouyer Exp $ */
+/* $OpenBSD: ext2fs_extern.h,v 1.11 2001/11/27 05:27:12 art Exp $ */
+/* $NetBSD: ext2fs_extern.h,v 1.9 2000/11/27 08:39:53 chs Exp $ */
/*-
* Copyright (c) 1997 Manuel Bouyer.
@@ -74,6 +74,9 @@ int ext2fs_inode_free(struct inode *pip, ino_t ino, int mode);
/* ext2fs_balloc.c */
int ext2fs_buf_alloc(struct inode *, daddr_t, int, struct ucred *,
struct buf **, int);
+int ext2fs_ballocn __P((void *));
+int ext2fs_balloc_range __P((struct vnode *, off_t, off_t, struct ucred *,
+ int));
/* ext2fs_bmap.c */
int ext2fs_bmap __P((void *));
diff --git a/sys/ufs/ext2fs/ext2fs_inode.c b/sys/ufs/ext2fs/ext2fs_inode.c
index 4af28d9bf0e..f77c99c47b5 100644
--- a/sys/ufs/ext2fs/ext2fs_inode.c
+++ b/sys/ufs/ext2fs/ext2fs_inode.c
@@ -1,5 +1,4 @@
-/* $OpenBSD: ext2fs_inode.c,v 1.17 2001/11/06 19:53:21 miod Exp $ */
-/* $NetBSD: ext2fs_inode.c,v 1.24 2001/06/19 12:59:18 wiz Exp $ */
+/* $NetBSD: ext2fs_inode.c,v 1.23 2001/02/18 20:17:04 chs Exp $ */
/*
* Copyright (c) 1997 Manuel Bouyer.
@@ -59,8 +58,10 @@
#include <ufs/ext2fs/ext2fs.h>
#include <ufs/ext2fs/ext2fs_extern.h>
+extern int prtactive;
+
static int ext2fs_indirtrunc __P((struct inode *, ufs_daddr_t, ufs_daddr_t,
- ufs_daddr_t, int, long *));
+ ufs_daddr_t, int, long *));
/*
* Last reference to an inode. If necessary, write or delete it.
@@ -78,7 +79,6 @@ ext2fs_inactive(v)
struct proc *p = ap->a_p;
struct timespec ts;
int error = 0;
- extern int prtactive;
if (prtactive && vp->v_usecount != 0)
vprint("ext2fs_inactive: pushing active", vp);
@@ -171,14 +171,13 @@ ext2fs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred)
{
struct vnode *ovp = ITOV(oip);
ufs_daddr_t lastblock;
- ufs_daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR];
+ ufs_daddr_t bn, lastiblock[NIADDR], indir_lbn[NIADDR];
ufs_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR];
struct m_ext2fs *fs;
- struct buf *bp;
int offset, size, level;
long count, nblocks, vflags, blocksreleased = 0;
int i;
- int aflags, error, allerror;
+ int error, allerror;
off_t osize;
if (length < 0)
@@ -219,22 +218,8 @@ ext2fs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred)
if (length > fs->fs_maxfilesize)
return (EFBIG);
#endif
- offset = blkoff(fs, length - 1);
- lbn = lblkno(fs, length - 1);
- aflags = B_CLRBUF;
- if (flags & IO_SYNC)
- aflags |= B_SYNC;
- error = ext2fs_buf_alloc(oip, lbn, offset + 1, cred, &bp,
- aflags);
- if (error)
- return (error);
- oip->i_e2fs_size = length;
- uvm_vnp_setsize(ovp, length);
- uvm_vnp_uncache(ovp);
- if (aflags & B_SYNC)
- bwrite(bp);
- else
- bawrite(bp);
+ ext2fs_balloc_range(ovp, length - 1, 1, cred,
+ flags & IO_SYNC ? B_SYNC : 0);
oip->i_flag |= IN_CHANGE | IN_UPDATE;
return (ext2fs_update(oip, NULL, NULL, 1));
}
@@ -246,28 +231,15 @@ ext2fs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred)
* of subsequent file growth.
*/
offset = blkoff(fs, length);
- if (offset == 0) {
- oip->i_e2fs_size = length;
- } else {
- lbn = lblkno(fs, length);
- aflags = B_CLRBUF;
- if (flags & IO_SYNC)
- aflags |= B_SYNC;
- error = ext2fs_buf_alloc(oip, lbn, offset, cred, &bp,
- aflags);
- if (error)
- return (error);
- oip->i_e2fs_size = length;
+ if (offset != 0) {
size = fs->e2fs_bsize;
- uvm_vnp_setsize(ovp, length);
- uvm_vnp_uncache(ovp);
- bzero((char *)bp->b_data + offset, (u_int)(size - offset));
- allocbuf(bp, size);
- if (aflags & B_SYNC)
- bwrite(bp);
- else
- bawrite(bp);
+
+ /* XXXUBC we should handle more than just VREG */
+ uvm_vnp_zerorange(ovp, length, size - offset);
}
+ oip->i_e2fs_size = length;
+ uvm_vnp_setsize(ovp, length);
+
/*
* Calculate index into inode's block list of
* last direct and indirect blocks (if any)
diff --git a/sys/ufs/ext2fs/ext2fs_readwrite.c b/sys/ufs/ext2fs/ext2fs_readwrite.c
index 9ae4322756f..94424055733 100644
--- a/sys/ufs/ext2fs/ext2fs_readwrite.c
+++ b/sys/ufs/ext2fs/ext2fs_readwrite.c
@@ -79,6 +79,8 @@ ext2fs_read(v)
struct uio *uio;
struct m_ext2fs *fs;
struct buf *bp;
+ void *win;
+ vsize_t bytelen;
ufs_daddr_t lbn, nextlbn;
off_t bytesinfile;
long size, xfersize, blkoffset;
@@ -107,6 +109,27 @@ ext2fs_read(v)
if (uio->uio_resid == 0)
return (0);
+ if (vp->v_type == VREG) {
+ error = 0;
+ while (uio->uio_resid > 0) {
+
+ bytelen = MIN(ip->i_e2fs_size - uio->uio_offset,
+ uio->uio_resid);
+
+ if (bytelen == 0) {
+ break;
+ }
+ win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset,
+ &bytelen, UBC_READ);
+ error = uiomove(win, bytelen, uio);
+ ubc_release(win, 0);
+ if (error) {
+ break;
+ }
+ }
+ goto out;
+ }
+
for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
if ((bytesinfile = ip->i_e2fs_size - uio->uio_offset) <= 0)
break;
@@ -156,8 +179,11 @@ ext2fs_read(v)
if (bp != NULL)
brelse(bp);
+out:
if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) {
ip->i_flag |= IN_ACCESS;
+ if ((ap->a_ioflag & IO_SYNC) == IO_SYNC)
+ error = ext2fs_update(ip, NULL, NULL, 1);
}
return (error);
}
@@ -183,12 +209,17 @@ ext2fs_write(v)
struct proc *p;
ufs_daddr_t lbn;
off_t osize;
- int blkoffset, error, flags, ioflag, resid, size, xfersize;
+ int blkoffset, error, flags, ioflag, resid, xfersize;
+ vsize_t bytelen;
+ void *win;
+ off_t oldoff;
+ boolean_t rv;
ioflag = ap->a_ioflag;
uio = ap->a_uio;
vp = ap->a_vp;
ip = VTOI(vp);
+ error = 0;
#ifdef DIAGNOSTIC
if (uio->uio_rw != UIO_WRITE)
@@ -232,35 +263,65 @@ ext2fs_write(v)
resid = uio->uio_resid;
osize = ip->i_e2fs_size;
- flags = ioflag & IO_SYNC ? B_SYNC : 0;
+ if (vp->v_type == VREG) {
+ while (uio->uio_resid > 0) {
+ oldoff = uio->uio_offset;
+ blkoffset = blkoff(fs, uio->uio_offset);
+ bytelen = MIN(fs->e2fs_bsize - blkoffset,
+ uio->uio_resid);
+
+ /*
+ * XXXUBC if file is mapped and this is the last block,
+ * process one page at a time.
+ */
+
+ error = ext2fs_balloc_range(vp, uio->uio_offset,
+ bytelen, ap->a_cred, 0);
+ if (error) {
+ break;
+ }
+ win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset,
+ &bytelen, UBC_WRITE);
+ error = uiomove(win, bytelen, uio);
+ ubc_release(win, 0);
+ if (error) {
+ break;
+ }
+
+ /*
+ * flush what we just wrote if necessary.
+ * XXXUBC simplistic async flushing.
+ */
+
+ if (oldoff >> 16 != uio->uio_offset >> 16) {
+ simple_lock(&vp->v_uvm.u_obj.vmobjlock);
+ rv = vp->v_uvm.u_obj.pgops->pgo_flush(
+ &vp->v_uvm.u_obj, (oldoff >> 16) << 16,
+ (uio->uio_offset >> 16) << 16, PGO_CLEANIT);
+ simple_unlock(&vp->v_uvm.u_obj.vmobjlock);
+ }
+ }
+ goto out;
+ }
+
+ flags = ioflag & IO_SYNC ? B_SYNC : 0;
for (error = 0; uio->uio_resid > 0;) {
lbn = lblkno(fs, uio->uio_offset);
blkoffset = blkoff(fs, uio->uio_offset);
- xfersize = fs->e2fs_bsize - blkoffset;
- if (uio->uio_resid < xfersize)
- xfersize = uio->uio_resid;
- if (fs->e2fs_bsize > xfersize)
+ xfersize = MIN(fs->e2fs_bsize - blkoffset, uio->uio_resid);
+ if (xfersize < fs->e2fs_bsize)
flags |= B_CLRBUF;
else
flags &= ~B_CLRBUF;
-
error = ext2fs_buf_alloc(ip,
- lbn, blkoffset + xfersize, ap->a_cred, &bp, flags);
+ lbn, blkoffset + xfersize, ap->a_cred, &bp, flags);
if (error)
break;
- if (uio->uio_offset + xfersize > ip->i_e2fs_size) {
+ if (ip->i_e2fs_size < uio->uio_offset + xfersize) {
ip->i_e2fs_size = uio->uio_offset + xfersize;
- uvm_vnp_setsize(vp, ip->i_e2fs_size);
}
- uvm_vnp_uncache(vp);
-
- size = fs->e2fs_bsize - bp->b_resid;
- if (size < xfersize)
- xfersize = size;
-
- error =
- uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
+ error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
if (ioflag & IO_SYNC)
(void)bwrite(bp);
else if (xfersize + blkoffset == fs->e2fs_bsize) {
@@ -272,13 +333,14 @@ ext2fs_write(v)
bdwrite(bp);
if (error || xfersize == 0)
break;
- ip->i_flag |= IN_CHANGE | IN_UPDATE;
}
/*
* If we successfully wrote any data, and we are not the superuser
* we clear the setuid and setgid bits as a precaution against
* tampering.
*/
+out:
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
ip->i_e2fs_mode &= ~(ISUID | ISGID);
if (error) {
@@ -288,8 +350,7 @@ ext2fs_write(v)
uio->uio_offset -= resid - uio->uio_resid;
uio->uio_resid = resid;
}
- } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
+ } else if (resid > uio->uio_resid && (ioflag & IO_SYNC) == IO_SYNC)
error = ext2fs_update(ip, NULL, NULL, 1);
- }
return (error);
}
diff --git a/sys/ufs/ext2fs/ext2fs_subr.c b/sys/ufs/ext2fs/ext2fs_subr.c
index 82165b8f242..3263f7e5391 100644
--- a/sys/ufs/ext2fs/ext2fs_subr.c
+++ b/sys/ufs/ext2fs/ext2fs_subr.c
@@ -1,5 +1,4 @@
-/* $OpenBSD: ext2fs_subr.c,v 1.6 2001/09/18 01:39:13 art Exp $ */
-/* $NetBSD: ext2fs_subr.c,v 1.1 1997/06/11 09:34:03 bouyer Exp $ */
+/* $NetBSD: ext2fs_subr.c,v 1.4 2000/03/30 12:41:11 augustss Exp $ */
/*
* Copyright (c) 1997 Manuel Bouyer.
@@ -96,7 +95,7 @@ ext2fs_checkoverlap(bp, ip)
if (ep == bp || (ep->b_flags & B_INVAL) ||
ep->b_vp == NULLVP)
continue;
- if (VOP_BMAP(ep->b_vp, (daddr_t)0, &vp, (daddr_t)0, NULL))
+ if (VOP_BMAP(ep->b_vp, (ufs_daddr_t)0, &vp, (ufs_daddr_t)0, NULL))
continue;
if (vp != ip->i_devvp)
continue;
diff --git a/sys/ufs/ext2fs/ext2fs_vfsops.c b/sys/ufs/ext2fs/ext2fs_vfsops.c
index 6991cf9d650..e438268acbc 100644
--- a/sys/ufs/ext2fs/ext2fs_vfsops.c
+++ b/sys/ufs/ext2fs/ext2fs_vfsops.c
@@ -1,5 +1,5 @@
-/* $OpenBSD: ext2fs_vfsops.c,v 1.16 2001/11/21 22:21:48 csapuntz Exp $ */
-/* $NetBSD: ext2fs_vfsops.c,v 1.1 1997/06/11 09:34:07 bouyer Exp $ */
+/* $OpenBSD: ext2fs_vfsops.c,v 1.17 2001/11/27 05:27:12 art Exp $ */
+/* $NetBSD: ext2fs_vfsops.c,v 1.40 2000/11/27 08:39:53 chs Exp $ */
/*
* Copyright (c) 1997 Manuel Bouyer.
@@ -402,9 +402,11 @@ ext2fs_reload(mountp, cred, p)
* Step 1: invalidate all cached meta-data.
*/
devvp = VFSTOUFS(mountp)->um_devvp;
- if (vinvalbuf(devvp, 0, cred, p, 0, 0))
+ vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
+ error = vinvalbuf(devvp, 0, cred, p, 0, 0);
+ VOP_UNLOCK(devvp, 0, p);
+ if (error)
panic("ext2fs_reload: dirty1");
-
/*
* Step 2: re-read superblock from disk.
*/
@@ -583,14 +585,18 @@ ext2fs_mountfs(devvp, mp, p)
mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN;
mp->mnt_flag |= MNT_LOCAL;
+ mp->mnt_dev_bshift = DEV_BSHIFT; /* XXX */
+ mp->mnt_fs_bshift = m_fs->e2fs_bshift;
ump->um_mountp = mp;
ump->um_dev = dev;
ump->um_devvp = devvp;
ump->um_nindir = NINDIR(m_fs);
+ ump->um_lognindir = ffs(NINDIR(m_fs)) - 1;
ump->um_bptrtodb = m_fs->e2fs_fsbtodb;
ump->um_seqinc = 1; /* no frags */
devvp->v_specmountpoint = mp;
return (0);
+
out:
if (bp)
brelse(bp);
@@ -924,6 +930,7 @@ ext2fs_vget(mp, ino, vpp)
ip->i_flag |= IN_MODIFIED;
}
+ vp->v_uvm.u_size = ip->i_e2fs_size;
*vpp = vp;
return (0);
}
diff --git a/sys/ufs/ext2fs/ext2fs_vnops.c b/sys/ufs/ext2fs/ext2fs_vnops.c
index 0faba75ffd2..fffdd494d5a 100644
--- a/sys/ufs/ext2fs/ext2fs_vnops.c
+++ b/sys/ufs/ext2fs/ext2fs_vnops.c
@@ -1,5 +1,5 @@
-/* $OpenBSD: ext2fs_vnops.c,v 1.17 2001/11/06 19:53:21 miod Exp $ */
-/* $NetBSD: ext2fs_vnops.c,v 1.1 1997/06/11 09:34:09 bouyer Exp $ */
+/* $OpenBSD: ext2fs_vnops.c,v 1.18 2001/11/27 05:27:12 art Exp $ */
+/* $NetBSD: ext2fs_vnops.c,v 1.30 2000/11/27 08:39:53 chs Exp $ */
/*
* Copyright (c) 1997 Manuel Bouyer.
@@ -402,8 +402,6 @@ ext2fs_chmod(vp, mode, cred, p)
ip->i_e2fs_mode &= ~ALLPERMS;
ip->i_e2fs_mode |= (mode & ALLPERMS);
ip->i_flag |= IN_CHANGE;
- if ((vp->v_flag & VTEXT) && (ip->i_e2fs_mode & S_ISTXT) == 0)
- (void) uvm_vnp_uncache(vp);
return (0);
}
@@ -1469,7 +1467,11 @@ struct vnodeopv_entry_desc ext2fs_vnodeop_entries[] = {
{ &vop_pathconf_desc, ufs_pathconf }, /* pathconf */
{ &vop_advlock_desc, ext2fs_advlock }, /* advlock */
{ &vop_bwrite_desc, vop_generic_bwrite }, /* bwrite */
- { (struct vnodeop_desc*)NULL, (int(*) __P((void*)))NULL }
+ { &vop_ballocn_desc, ext2fs_ballocn },
+ { &vop_getpages_desc, genfs_getpages },
+ { &vop_putpages_desc, genfs_putpages },
+ { &vop_size_desc, genfs_size },
+ { NULL, NULL }
};
struct vnodeopv_desc ext2fs_vnodeop_opv_desc =
{ &ext2fs_vnodeop_p, ext2fs_vnodeop_entries };
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index 8ddf99405fc..a53d87828c3 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_alloc.c,v 1.35 2001/11/21 21:23:56 csapuntz Exp $ */
+/* $OpenBSD: ffs_alloc.c,v 1.36 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: ffs_alloc.c,v 1.11 1996/05/11 18:27:09 mycroft Exp $ */
/*
@@ -169,14 +169,15 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp, blknop)
struct buf **bpp;
ufs_daddr_t *blknop;
{
- register struct fs *fs;
- struct buf *bp = NULL;
+ struct fs *fs;
+ struct buf *bp;
ufs_daddr_t quota_updated = 0;
int cg, request, error;
daddr_t bprev, bno;
if (bpp != NULL)
*bpp = NULL;
+
fs = ip->i_fs;
#ifdef DIAGNOSTIC
if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
@@ -282,7 +283,6 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp, blknop)
if (bno <= 0)
goto nospace;
- (void) uvm_vnp_uncache(ITOV(ip));
if (!DOINGSOFTDEP(ITOV(ip)))
ffs_blkfree(ip, bprev, (long)osize);
if (nsize < request)
@@ -362,7 +362,8 @@ ffs_reallocblks(v)
struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
int i, len, start_lvl, end_lvl, pref, ssize;
- if (doreallocblks == 0)
+ /* XXXUBC - don't reallocblks for now */
+ if (1 || doreallocblks == 0)
return (ENOSPC);
vp = ap->a_vp;
diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c
index 009adc91ff9..5f6ddc3d94e 100644
--- a/sys/ufs/ffs/ffs_balloc.c
+++ b/sys/ufs/ffs/ffs_balloc.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_balloc.c,v 1.18 2001/11/21 21:23:56 csapuntz Exp $ */
+/* $OpenBSD: ffs_balloc.c,v 1.19 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: ffs_balloc.c,v 1.3 1996/02/09 22:22:21 christos Exp $ */
/*
@@ -402,3 +402,61 @@ fail:
return (error);
}
+
+int
+ffs_ballocn(v)
+ void *v;
+{
+ struct vop_ballocn_args /* {
+ struct vnode *a_vp;
+ off_t a_offset;
+ off_t a_length;
+ struct ucred *a_cred;
+ int a_flags;
+ } */ *ap = v;
+
+ off_t off, len;
+ struct vnode *vp = ap->a_vp;
+ struct inode *ip = VTOI(vp);
+ struct fs *fs = ip->i_fs;
+ int error, delta, bshift, bsize;
+
+ error = 0;
+ bshift = fs->fs_bshift;
+ bsize = 1 << bshift;
+
+ off = ap->a_offset;
+ len = ap->a_length;
+
+ delta = off & (bsize - 1);
+ off -= delta;
+ len += delta;
+
+ while (len > 0) {
+ bsize = min(bsize, len);
+
+ error = ffs_balloc(ip, off, bsize, ap->a_cred, ap->a_flags,
+ NULL);
+ if (error) {
+ goto out;
+ }
+
+ /*
+ * increase file size now, VOP_BALLOC() requires that
+ * EOF be up-to-date before each call.
+ */
+
+ if (ip->i_ffs_size < off + bsize) {
+ ip->i_ffs_size = off + bsize;
+ if (vp->v_uvm.u_size < ip->i_ffs_size) {
+ uvm_vnp_setsize(vp, ip->i_ffs_size);
+ }
+ }
+
+ off += bsize;
+ len -= bsize;
+ }
+
+out:
+ return error;
+ }
diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h
index eeeba209c69..2875a332a57 100644
--- a/sys/ufs/ffs/ffs_extern.h
+++ b/sys/ufs/ffs/ffs_extern.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_extern.h,v 1.14 2001/11/13 00:10:56 art Exp $ */
+/* $OpenBSD: ffs_extern.h,v 1.15 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: ffs_extern.h,v 1.4 1996/02/09 22:22:22 christos Exp $ */
/*-
@@ -87,6 +87,7 @@ void ffs_clusteracct __P((struct fs *, struct cg *, daddr_t, int));
/* ffs_balloc.c */
int ffs_balloc(struct inode *, off_t, int, struct ucred *, int, struct buf **);
+int ffs_ballocn(void *);
/* ffs_inode.c */
int ffs_init __P((struct vfsconf *));
@@ -128,7 +129,7 @@ int ffs_read __P((void *));
int ffs_write __P((void *));
int ffs_fsync __P((void *));
int ffs_reclaim __P((void *));
-
+int ffs_size __P((void *));
/*
* Soft dependency function prototypes.
diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c
index c81c795b2ac..cddf6a368ca 100644
--- a/sys/ufs/ffs/ffs_inode.c
+++ b/sys/ufs/ffs/ffs_inode.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_inode.c,v 1.25 2001/11/21 21:23:56 csapuntz Exp $ */
+/* $OpenBSD: ffs_inode.c,v 1.26 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: ffs_inode.c,v 1.10 1996/05/11 18:27:19 mycroft Exp $ */
/*
@@ -150,14 +150,14 @@ ffs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred)
{
struct vnode *ovp;
daddr_t lastblock;
- daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR];
+ daddr_t bn, lastiblock[NIADDR], indir_lbn[NIADDR];
daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR];
struct fs *fs;
- struct buf *bp;
+ struct proc *p = curproc;
int offset, size, level;
long count, nblocks, vflags, blocksreleased = 0;
register int i;
- int aflags, error, allerror;
+ int error, allerror;
off_t osize;
if (length < 0)
@@ -188,10 +188,55 @@ ffs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred)
if ((error = getinoquota(oip)) != 0)
return (error);
- uvm_vnp_setsize(ovp, length);
+ fs = oip->i_fs;
+ if (length > fs->fs_maxfilesize)
+ return (EFBIG);
+ osize = oip->i_ffs_size;
oip->i_ci.ci_lasta = oip->i_ci.ci_clen
= oip->i_ci.ci_cstart = oip->i_ci.ci_lastw = 0;
+ /*
+ * Lengthen the size of the file. We must ensure that the
+ * last byte of the file is allocated. Since the smallest
+ * value of osize is 0, length will be at least 1.
+ */
+
+ if (osize < length) {
+ ufs_balloc_range(ovp, length - 1, 1, cred,
+ flags & IO_SYNC ? B_SYNC : 0);
+ oip->i_flag |= IN_CHANGE | IN_UPDATE;
+ return (UFS_UPDATE(oip, 1));
+ }
+
+ /*
+ * When truncating a regular file down to a non-block-aligned size,
+ * we must zero the part of last block which is past the new EOF.
+ * We must synchronously flush the zeroed pages to disk
+ * since the new pages will be invalidated as soon as we
+ * inform the VM system of the new, smaller size.
+ * We must to this before acquiring the GLOCK, since fetching
+ * the pages will acquire the GLOCK internally.
+ * So there is a window where another thread could see a whole
+ * zeroed page past EOF, but that's life.
+ */
+
+ offset = blkoff(fs, length);
+ if (ovp->v_type == VREG && length < osize && offset != 0) {
+ struct uvm_object *uobj;
+ voff_t eoz;
+
+ size = blksize(fs, oip, lblkno(fs, length));
+ eoz = min(lblktosize(fs, lblkno(fs, length)) + size, osize);
+ uvm_vnp_zerorange(ovp, length, eoz - length);
+ uobj = &ovp->v_uvm.u_obj;
+ simple_lock(&uobj->vmobjlock);
+ uobj->pgops->pgo_flush(uobj, length, eoz,
+ PGO_CLEANIT|PGO_DEACTIVATE|PGO_SYNCIO);
+ simple_unlock(&ovp->v_uvm.u_obj.vmobjlock);
+ }
+
+ lockmgr(&ovp->v_glock, LK_EXCLUSIVE, NULL, p);
+
if (DOINGSOFTDEP(ovp)) {
if (length > 0 || softdep_slowdown(ovp)) {
/*
@@ -204,80 +249,29 @@ ffs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred)
* so that it will have no data structures left.
*/
if ((error = VOP_FSYNC(ovp, cred, MNT_WAIT,
- curproc)) != 0)
+ curproc)) != 0) {
+ lockmgr(&ovp->v_glock, LK_RELEASE, NULL, p);
return (error);
+ }
} else {
+ uvm_vnp_setsize(ovp, length);
(void)ufs_quota_free_blocks(oip, oip->i_ffs_blocks,
NOCRED);
softdep_setup_freeblocks(oip, length);
(void) vinvalbuf(ovp, 0, cred, curproc, 0, 0);
+ lockmgr(&ovp->v_glock, LK_RELEASE, NULL, p);
oip->i_flag |= IN_CHANGE | IN_UPDATE;
return (UFS_UPDATE(oip, 0));
}
}
- fs = oip->i_fs;
- osize = oip->i_ffs_size;
/*
- * Lengthen the size of the file. We must ensure that the
- * last byte of the file is allocated. Since the smallest
- * value of osize is 0, length will be at least 1.
+ * Reduce the size of the file.
*/
- if (osize < length) {
- if (length > fs->fs_maxfilesize)
- return (EFBIG);
- aflags = B_CLRBUF;
- if (flags & IO_SYNC)
- aflags |= B_SYNC;
- error = UFS_BUF_ALLOC(oip, length - 1, 1,
- cred, aflags, &bp);
- if (error)
- return (error);
- oip->i_ffs_size = length;
- uvm_vnp_setsize(ovp, length);
- (void) uvm_vnp_uncache(ovp);
- if (aflags & B_SYNC)
- bwrite(bp);
- else
- bawrite(bp);
- oip->i_flag |= IN_CHANGE | IN_UPDATE;
- return (UFS_UPDATE(oip, MNT_WAIT));
- }
+ oip->i_ffs_size = length;
uvm_vnp_setsize(ovp, length);
/*
- * Shorten the size of the file. If the file is not being
- * truncated to a block boundary, the contents of the
- * partial block following the end of the file must be
- * zero'ed in case it ever becomes accessible again because
- * of subsequent file growth. Directories however are not
- * zero'ed as they should grow back initialized to empty.
- */
- offset = blkoff(fs, length);
- if (offset == 0) {
- oip->i_ffs_size = length;
- } else {
- lbn = lblkno(fs, length);
- aflags = B_CLRBUF;
- if (flags & IO_SYNC)
- aflags |= B_SYNC;
- error = UFS_BUF_ALLOC(oip, length - 1, 1,
- cred, aflags, &bp);
- if (error)
- return (error);
- oip->i_ffs_size = length;
- size = blksize(fs, oip, lbn);
- (void) uvm_vnp_uncache(ovp);
- if (ovp->v_type != VDIR)
- bzero((char *)bp->b_data + offset,
- (u_int)(size - offset));
- allocbuf(bp, size);
- if (aflags & B_SYNC)
- bwrite(bp);
- else
- bawrite(bp);
- }
- /*
* Calculate index into inode's block list of
* last direct and indirect blocks (if any)
* which we want to keep. Lastblock is -1 when
@@ -402,6 +396,7 @@ done:
oip->i_ffs_blocks -= blocksreleased;
if (oip->i_ffs_blocks < 0) /* sanity */
oip->i_ffs_blocks = 0;
+ lockmgr(&ovp->v_glock, LK_RELEASE, NULL, p);
oip->i_flag |= IN_CHANGE;
(void)ufs_quota_free_blocks(oip, blocksreleased, NOCRED);
return (allerror);
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 1d66094cc06..7a66eed4d8b 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_softdep.c,v 1.25 2001/11/13 14:19:24 art Exp $ */
+/* $OpenBSD: ffs_softdep.c,v 1.26 2001/11/27 05:27:12 art Exp $ */
/*
* Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved.
*
@@ -56,6 +56,7 @@
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/proc.h>
+#include <sys/pool.h>
#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/vnode.h>
@@ -69,6 +70,10 @@
#include <ufs/ffs/ffs_extern.h>
#include <ufs/ufs/ufs_extern.h>
+#include <uvm/uvm.h>
+struct pool sdpcpool;
+int softdep_lockedbufs;
+
#define STATIC
/*
@@ -109,6 +114,13 @@ extern char *memname[];
*/
/*
+ * Definitions for page cache info hashtable.
+ */
+#define PCBPHASHSIZE 1024
+LIST_HEAD(, buf) pcbphashhead[PCBPHASHSIZE];
+#define PCBPHASH(vp, lbn) ((((vaddr_t)(vp) >> 8) ^ (lbn)) & (PCBPHASHSIZE - 1))
+
+/*
* Internal function prototypes.
*/
STATIC void softdep_error __P((char *, int));
@@ -160,6 +172,13 @@ STATIC void pause_timer __P((void *));
STATIC int request_cleanup __P((int, int));
STATIC int process_worklist_item __P((struct mount *, int));
STATIC void add_to_worklist __P((struct worklist *));
+STATIC struct buf *softdep_setup_pagecache __P((struct inode *, ufs_lbn_t,
+ long));
+STATIC void softdep_collect_pagecache __P((struct inode *));
+STATIC void softdep_free_pagecache __P((struct inode *));
+STATIC struct vnode *softdep_lookupvp(struct fs *, ino_t);
+STATIC struct buf *softdep_lookup_pcbp __P((struct vnode *, ufs_lbn_t));
+void softdep_pageiodone __P((struct buf *));
/*
* Exported softdep operations.
@@ -176,6 +195,7 @@ struct bio_ops bioops = {
softdep_deallocate_dependencies, /* io_deallocate */
softdep_move_dependencies, /* io_movedeps */
softdep_count_dependencies, /* io_countdeps */
+ softdep_pageiodone, /* io_pagedone */
};
/*
@@ -1055,6 +1075,7 @@ top:
void
softdep_initialize()
{
+ int i;
LIST_INIT(&mkdirlisthd);
LIST_INIT(&softdep_workitem_pending);
@@ -1073,6 +1094,11 @@ softdep_initialize()
newblk_hashtbl = hashinit(64, M_NEWBLK, M_WAITOK, &newblk_hash);
sema_init(&newblk_in_progress, "newblk", PRIBIO, 0);
timeout_set(&proc_waiting_timeout, pause_timer, 0);
+ pool_init(&sdpcpool, sizeof(struct buf), 0, 0, 0, "sdpcpool",
+ 0, pool_page_alloc_nointr, pool_page_free_nointr, M_TEMP);
+ for (i = 0; i < PCBPHASHSIZE; i++) {
+ LIST_INIT(&pcbphashhead[i]);
+ }
}
/*
@@ -1325,11 +1351,16 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
LIST_REMOVE(newblk, nb_hash);
FREE(newblk, M_NEWBLK);
+ /*
+ * If we were not passed a bp to attach the dep to,
+ * then this must be for a regular file.
+ * Allocate a buffer to represent the page cache pages
+ * that are the real dependency. The pages themselves
+ * cannot refer to the dependency since we don't want to
+ * add a field to struct vm_page for this.
+ */
if (bp == NULL) {
- /*
- * XXXUBC - Yes, I know how to fix this, but not right now.
- */
- panic("softdep_setup_allocdirect: Bonk art in the head\n");
+ bp = softdep_setup_pagecache(ip, lbn, newsize);
}
WORKLIST_INSERT(&bp->b_dep, &adp->ad_list);
if (lbn >= NDADDR) {
@@ -1563,10 +1594,7 @@ softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp)
pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0)
WORKLIST_INSERT(&nbp->b_dep, &pagedep->pd_list);
if (nbp == NULL) {
- /*
- * XXXUBC - Yes, I know how to fix this, but not right now.
- */
- panic("softdep_setup_allocindir_page: Bonk art in the head\n");
+ nbp = softdep_setup_pagecache(ip, lbn, ip->i_fs->fs_bsize);
}
WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list);
FREE_LOCK(&lk);
@@ -1745,6 +1773,7 @@ softdep_setup_freeblocks(ip, length)
int i, delay, error;
fs = ip->i_fs;
+ vp = ITOV(ip);
if (length != 0)
panic("softdep_setup_freeblocks: non-zero length");
MALLOC(freeblks, struct freeblks *, sizeof(struct freeblks),
@@ -1804,9 +1833,15 @@ softdep_setup_freeblocks(ip, length)
* with this inode are obsolete and can simply be de-allocated.
* We must first merge the two dependency lists to get rid of
* any duplicate freefrag structures, then purge the merged list.
+ * We must remove any pagecache markers from the pagecache
+ * hashtable first because any I/Os in flight will want to see
+ * dependencies attached to their pagecache markers. We cannot
+ * free the pagecache markers until after we've freed all the
+ * dependencies that reference them later.
* If we still have a bitmap dependency, then the inode has never
* been written to disk, so we can free any fragments without delay.
*/
+ softdep_collect_pagecache(ip);
merge_inode_lists(inodedep);
while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0)
free_allocdirect(&inodedep->id_inoupdt, adp, delay);
@@ -1818,7 +1853,6 @@ softdep_setup_freeblocks(ip, length)
* Once they are all there, walk the list and get rid of
* any dependencies.
*/
- vp = ITOV(ip);
ACQUIRE_LOCK(&lk);
drain_output(vp, 1);
while (getdirtybuf(&LIST_FIRST(&vp->v_dirtyblkhd), MNT_WAIT)) {
@@ -1830,6 +1864,7 @@ softdep_setup_freeblocks(ip, length)
brelse(bp);
ACQUIRE_LOCK(&lk);
}
+ softdep_free_pagecache(ip);
if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) != 0)
(void) free_inodedep(inodedep);
FREE_LOCK(&lk);
@@ -2898,7 +2933,6 @@ handle_workitem_freefile(freefile)
struct freefile *freefile;
{
struct fs *fs;
- struct vnode vp;
struct inode tip;
struct inodedep *idp;
int error;
@@ -2914,8 +2948,7 @@ handle_workitem_freefile(freefile)
tip.i_devvp = freefile->fx_devvp;
tip.i_dev = freefile->fx_devvp->v_rdev;
tip.i_fs = fs;
- tip.i_vnode = &vp;
- vp.v_data = &tip;
+ tip.i_vnode = NULL;
if ((error = ffs_freefile(&tip, freefile->fx_oldinum,
freefile->fx_mode)) != 0) {
@@ -4313,6 +4346,7 @@ flush_inodedep_deps(fs, ino)
struct allocdirect *adp;
int error, waitfor;
struct buf *bp;
+ struct vnode *vp;
/*
* This work is done in two passes. The first pass grabs most
@@ -4332,6 +4366,27 @@ flush_inodedep_deps(fs, ino)
ACQUIRE_LOCK(&lk);
if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
return (0);
+
+ /*
+ * When file data was in the buffer cache,
+ * softdep_sync_metadata() would start i/o on
+ * file data buffers itself. But now that
+ * we're using the page cache to hold file data,
+ * we need something else to trigger those flushes.
+ * let's just do it here.
+ */
+
+ vp = softdep_lookupvp(fs, ino);
+ if (vp) {
+ struct uvm_object *uobj = &vp->v_uvm.u_obj;
+
+ simple_lock(&uobj->vmobjlock);
+ (uobj->pgops->pgo_flush)(uobj, 0, 0,
+ PGO_ALLPAGES|PGO_CLEANIT|
+ (waitfor == MNT_NOWAIT ? 0: PGO_SYNCIO));
+ simple_unlock(&uobj->vmobjlock);
+ }
+
TAILQ_FOREACH(adp, &inodedep->id_inoupdt, ad_next) {
if (adp->ad_state & DEPCOMPLETE)
continue;
@@ -4944,3 +4999,196 @@ softdep_error(func, error)
/* XXX should do something better! */
printf("%s: got error %d while accessing filesystem\n", func, error);
}
+
+/*
+ * Allocate a buffer on which to attach a dependency.
+ */
+STATIC struct buf *
+softdep_setup_pagecache(ip, lbn, size)
+ struct inode *ip;
+ ufs_lbn_t lbn;
+ long size;
+{
+ struct vnode *vp = ITOV(ip);
+ struct buf *bp;
+ int s;
+
+ /*
+ * Enter pagecache dependency buf in hash.
+ */
+
+ bp = softdep_lookup_pcbp(vp, lbn);
+ if (bp == NULL) {
+ s = splbio();
+ bp = pool_get(&sdpcpool, PR_WAITOK);
+ splx(s);
+
+ bp->b_vp = vp;
+ bp->b_lblkno = lbn;
+ bp->b_bcount = bp->b_resid = size;
+ LIST_INIT(&bp->b_dep);
+ LIST_INSERT_HEAD(&pcbphashhead[PCBPHASH(vp, lbn)], bp, b_hash);
+ LIST_INSERT_HEAD(&ip->i_pcbufhd, bp, b_vnbufs);
+ } else {
+ KASSERT(size >= bp->b_bcount);
+ bp->b_resid += size - bp->b_bcount;
+ bp->b_bcount = size;
+ }
+ return bp;
+}
+
+/*
+ * softdep_collect_pagecache() and softdep_free_pagecache()
+ * are used to remove page cache dependency buffers when
+ * a file is being truncated to 0.
+ */
+
+STATIC void
+softdep_collect_pagecache(ip)
+ struct inode *ip;
+{
+ struct buf *bp;
+
+ LIST_FOREACH(bp, &ip->i_pcbufhd, b_vnbufs) {
+ LIST_REMOVE(bp, b_hash);
+ }
+}
+
+STATIC void
+softdep_free_pagecache(ip)
+ struct inode *ip;
+{
+ struct buf *bp, *nextbp;
+
+ for (bp = LIST_FIRST(&ip->i_pcbufhd); bp != NULL; bp = nextbp) {
+ nextbp = LIST_NEXT(bp, b_vnbufs);
+ LIST_REMOVE(bp, b_vnbufs);
+ KASSERT(LIST_FIRST(&bp->b_dep) == NULL);
+ pool_put(&sdpcpool, bp);
+ }
+}
+
+STATIC struct vnode *
+softdep_lookupvp(fs, ino)
+ struct fs *fs;
+ ino_t ino;
+{
+ struct mount *mp;
+ extern struct vfsops ffs_vfsops;
+
+ CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
+ if (mp->mnt_op == &ffs_vfsops &&
+ VFSTOUFS(mp)->um_fs == fs) {
+ break;
+ }
+ }
+ if (mp == NULL) {
+ return NULL;
+ }
+ return ufs_ihashlookup(VFSTOUFS(mp)->um_dev, ino);
+}
+
+STATIC struct buf *
+softdep_lookup_pcbp(vp, lbn)
+ struct vnode *vp;
+ ufs_lbn_t lbn;
+{
+ struct buf *bp;
+
+ LIST_FOREACH(bp, &pcbphashhead[PCBPHASH(vp, lbn)], b_hash) {
+ if (bp->b_vp == vp && bp->b_lblkno == lbn) {
+ break;
+ }
+ }
+ return bp;
+}
+
+/*
+ * Do softdep i/o completion processing for page cache writes.
+ */
+
+void
+softdep_pageiodone(bp)
+ struct buf *bp;
+{
+ int npages = bp->b_bufsize >> PAGE_SHIFT;
+ struct vnode *vp = bp->b_vp;
+ struct vm_page *pg;
+ struct buf *pcbp = NULL;
+ struct allocdirect *adp;
+ struct allocindir *aip;
+ struct worklist *wk;
+ ufs_lbn_t lbn;
+ voff_t off;
+ long iosize = bp->b_bcount;
+ int size, asize, bshift, bsize;
+ int i;
+
+ KASSERT(!(bp->b_flags & B_READ));
+ bshift = vp->v_mount->mnt_fs_bshift;
+ bsize = 1 << bshift;
+ asize = min(PAGE_SIZE, bsize);
+ ACQUIRE_LOCK(&lk);
+ for (i = 0; i < npages; i++) {
+ pg = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT));
+ if (pg == NULL) {
+ continue;
+ }
+
+ for (off = pg->offset;
+ off < pg->offset + PAGE_SIZE;
+ off += bsize) {
+ size = min(asize, iosize);
+ iosize -= size;
+ lbn = off >> bshift;
+ if (pcbp == NULL || pcbp->b_lblkno != lbn) {
+ pcbp = softdep_lookup_pcbp(vp, lbn);
+ }
+ if (pcbp == NULL) {
+ continue;
+ }
+ pcbp->b_resid -= size;
+ if (pcbp->b_resid < 0) {
+ panic("softdep_pageiodone: "
+ "resid < 0, vp %p lbn 0x%lx pcbp %p",
+ vp, lbn, pcbp);
+ }
+ if (pcbp->b_resid > 0) {
+ continue;
+ }
+
+ /*
+ * We've completed all the i/o for this block.
+ * mark the dep complete.
+ */
+
+ KASSERT(LIST_FIRST(&pcbp->b_dep) != NULL);
+ while ((wk = LIST_FIRST(&pcbp->b_dep))) {
+ WORKLIST_REMOVE(wk);
+ switch (wk->wk_type) {
+ case D_ALLOCDIRECT:
+ adp = WK_ALLOCDIRECT(wk);
+ adp->ad_state |= COMPLETE;
+ handle_allocdirect_partdone(adp);
+ break;
+
+ case D_ALLOCINDIR:
+ aip = WK_ALLOCINDIR(wk);
+ aip->ai_state |= COMPLETE;
+ handle_allocindir_partdone(aip);
+ break;
+
+ default:
+ panic("softdep_pageiodone: "
+ "bad type %d, pcbp %p wk %p",
+ wk->wk_type, pcbp, wk);
+ }
+ }
+ LIST_REMOVE(pcbp, b_hash);
+ LIST_REMOVE(pcbp, b_vnbufs);
+ pool_put(&sdpcpool, pcbp);
+ pcbp = NULL;
+ }
+ }
+ FREE_LOCK(&lk);
+}
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index b1dee123893..19c77726fa8 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_vfsops.c,v 1.45 2001/11/21 22:21:48 csapuntz Exp $ */
+/* $OpenBSD: ffs_vfsops.c,v 1.46 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: ffs_vfsops.c,v 1.19 1996/02/09 22:22:26 christos Exp $ */
/*
@@ -737,11 +737,14 @@ ffs_mountfs(devvp, mp, p)
else
mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
+ mp->mnt_fs_bshift = fs->fs_bshift;
+ mp->mnt_dev_bshift = DEV_BSHIFT;
mp->mnt_flag |= MNT_LOCAL;
ump->um_mountp = mp;
ump->um_dev = dev;
ump->um_devvp = devvp;
ump->um_nindir = fs->fs_nindir;
+ ump->um_lognindir = ffs(fs->fs_nindir) - 1;
ump->um_bptrtodb = fs->fs_fsbtodb;
ump->um_seqinc = fs->fs_frag;
for (i = 0; i < MAXQUOTAS; i++)
@@ -1119,6 +1122,7 @@ retry:
ip->i_fs = fs = ump->um_fs;
ip->i_dev = dev;
ip->i_number = ino;
+ LIST_INIT(&ip->i_pcbufhd);
ip->i_vtbl = &ffs_vtbl;
/*
@@ -1199,6 +1203,7 @@ retry:
ip->i_ffs_uid = ip->i_din.ffs_din.di_ouid; /* XXX */
ip->i_ffs_gid = ip->i_din.ffs_din.di_ogid; /* XXX */
} /* XXX */
+ uvm_vnp_setsize(vp, ip->i_ffs_size);
*vpp = vp;
return (0);
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
index 26e9bbaf9da..8190ef82eb3 100644
--- a/sys/ufs/ffs/ffs_vnops.c
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_vnops.c,v 1.20 2001/11/06 19:53:21 miod Exp $ */
+/* $OpenBSD: ffs_vnops.c,v 1.21 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: ffs_vnops.c,v 1.7 1996/05/11 18:27:24 mycroft Exp $ */
/*
@@ -107,8 +107,13 @@ struct vnodeopv_entry_desc ffs_vnodeop_entries[] = {
{ &vop_advlock_desc, ufs_advlock }, /* advlock */
{ &vop_reallocblks_desc, ffs_reallocblks }, /* reallocblks */
{ &vop_bwrite_desc, vop_generic_bwrite },
- { (struct vnodeop_desc*)NULL, (int(*) __P((void*)))NULL }
+ { &vop_ballocn_desc, ffs_ballocn },
+ { &vop_getpages_desc, genfs_getpages },
+ { &vop_putpages_desc, genfs_putpages },
+ { &vop_size_desc, ffs_size },
+ { NULL, NULL }
};
+
struct vnodeopv_desc ffs_vnodeop_opv_desc =
{ &ffs_vnodeop_p, ffs_vnodeop_entries };
@@ -229,6 +234,7 @@ ffs_fsync(v)
struct vnode *vp = ap->a_vp;
struct buf *bp, *nbp;
int s, error, passes, skipmeta;
+ struct uvm_object *uobj;
if (vp->v_type == VBLK &&
vp->v_specmountpoint != NULL &&
@@ -236,13 +242,22 @@ ffs_fsync(v)
softdep_fsync_mountdev(vp);
/*
- * Flush all dirty buffers associated with a vnode.
+ * Flush all dirty data associated with a vnode.
*/
passes = NIADDR + 1;
skipmeta = 0;
if (ap->a_waitfor == MNT_WAIT)
skipmeta = 1;
s = splbio();
+
+ if (vp->v_type == VREG) {
+ uobj = &vp->v_uvm.u_obj;
+ simple_lock(&uobj->vmobjlock);
+ (uobj->pgops->pgo_flush)(uobj, 0, 0, PGO_ALLPAGES|PGO_CLEANIT|
+ ((ap->a_waitfor == MNT_WAIT) ? PGO_SYNCIO : 0));
+ simple_unlock(&uobj->vmobjlock);
+ }
+
loop:
for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp;
bp = LIST_NEXT(bp, b_vnbufs))
@@ -281,8 +296,10 @@ loop:
*/
if (passes > 0 || ap->a_waitfor != MNT_WAIT)
(void) bawrite(bp);
- else if ((error = bwrite(bp)) != 0)
+ else if ((error = bwrite(bp)) != 0) {
+ printf("ffs_fsync: bwrite failed %d\n", error);
return (error);
+ }
s = splbio();
/*
* Since we may have slept during the I/O, we need
@@ -325,7 +342,11 @@ loop:
}
}
splx(s);
- return (UFS_UPDATE(VTOI(vp), ap->a_waitfor == MNT_WAIT));
+
+ error = (UFS_UPDATE(VTOI(vp), ap->a_waitfor == MNT_WAIT));
+ if (error)
+ printf("ffs_fsync: UFS_UPDATE failed. %d\n", error);
+ return (error);
}
/*
@@ -349,3 +370,31 @@ ffs_reclaim(v)
vp->v_data = NULL;
return (0);
}
+
+/*
+ * Return the last logical file offset that should be written for this file
+ * if we're doing a write that ends at "size".
+ */
+int
+ffs_size(v)
+ void *v;
+{
+ struct vop_size_args /* {
+ struct vnode *a_vp;
+ off_t a_size;
+ off_t *a_eobp;
+ } */ *ap = v;
+ struct inode *ip = VTOI(ap->a_vp);
+ struct fs *fs = ip->i_fs;
+ ufs_lbn_t olbn, nlbn;
+
+ olbn = lblkno(fs, ip->i_ffs_size);
+ nlbn = lblkno(fs, ap->a_size);
+
+ if (nlbn < NDADDR && olbn <= nlbn) {
+ *ap->a_eobp = fragroundup(fs, ap->a_size);
+ } else {
+ *ap->a_eobp = blkroundup(fs, ap->a_size);
+ }
+ return 0;
+}
diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h
index 5665b276a0f..98c73de5579 100644
--- a/sys/ufs/ufs/inode.h
+++ b/sys/ufs/ufs/inode.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: inode.h,v 1.16 2001/07/04 06:10:50 angelos Exp $ */
+/* $OpenBSD: inode.h,v 1.17 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: inode.h,v 1.8 1995/06/15 23:22:50 cgd Exp $ */
/*
@@ -84,6 +84,7 @@ struct inode {
#define i_e2fs inode_u.e2fs
struct cluster_info i_ci;
+ LIST_HEAD(,buf) i_pcbufhd;
struct dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */
u_quad_t i_modrev; /* Revision level for NFS lease. */
struct lockf *i_lockf;/* Head of byte-level lock list. */
diff --git a/sys/ufs/ufs/ufs_bmap.c b/sys/ufs/ufs/ufs_bmap.c
index add641e15ce..fdf5c1be055 100644
--- a/sys/ufs/ufs/ufs_bmap.c
+++ b/sys/ufs/ufs/ufs_bmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ufs_bmap.c,v 1.10 2001/11/21 22:24:24 csapuntz Exp $ */
+/* $OpenBSD: ufs_bmap.c,v 1.11 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: ufs_bmap.c,v 1.3 1996/02/09 22:36:00 christos Exp $ */
/*
@@ -233,6 +233,7 @@ ufs_getlbns(vp, bn, ap, nump)
long metalbn, realbn;
struct ufsmount *ump;
int64_t blockcnt;
+ int lbc;
int i, numlevels, off;
ump = VFSTOUFS(vp->v_mount);
@@ -260,10 +261,14 @@ ufs_getlbns(vp, bn, ap, nump)
* at the given level of indirection, and NIADDR - i is the number
* of levels of indirection needed to locate the requested block.
*/
- for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) {
+ bn -= NDADDR;
+ for (lbc = 0, i = NIADDR;; i--, bn -= blockcnt) {
if (i == 0)
return (EFBIG);
- blockcnt *= MNINDIR(ump);
+
+ lbc += ump->um_lognindir;
+ blockcnt = (int64_t)1 << lbc;
+
if (bn < blockcnt)
break;
}
@@ -289,8 +294,9 @@ ufs_getlbns(vp, bn, ap, nump)
if (metalbn == realbn)
break;
- blockcnt /= MNINDIR(ump);
- off = (bn / blockcnt) % MNINDIR(ump);
+ lbc -= ump->um_lognindir;
+ blockcnt = (int64_t)1 << lbc;
+ off = (bn >> lbc) & (MNINDIR(ump) - 1);
++numlevels;
ap->in_lbn = metalbn;
diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h
index 50175a0ec86..fc39e16b45e 100644
--- a/sys/ufs/ufs/ufs_extern.h
+++ b/sys/ufs/ufs/ufs_extern.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: ufs_extern.h,v 1.12 2001/11/21 21:23:56 csapuntz Exp $ */
+/* $OpenBSD: ufs_extern.h,v 1.13 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: ufs_extern.h,v 1.5 1996/02/09 22:36:03 christos Exp $ */
/*-
@@ -121,6 +121,7 @@ void ufs_ihashrem __P((struct inode *));
/* ufs_inode.c */
int ufs_init __P((struct vfsconf *));
int ufs_reclaim __P((struct vnode *, struct proc *));
+int ufs_balloc_range __P((struct vnode *, off_t, off_t, struct ucred *, int));
/* ufs_lookup.c */
void ufs_dirbad __P((struct inode *, doff_t, char *));
diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c
index 8a3935632fb..3865342fde0 100644
--- a/sys/ufs/ufs/ufs_inode.c
+++ b/sys/ufs/ufs/ufs_inode.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ufs_inode.c,v 1.10 2001/11/21 21:23:56 csapuntz Exp $ */
+/* $OpenBSD: ufs_inode.c,v 1.11 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: ufs_inode.c,v 1.7 1996/05/11 18:27:52 mycroft Exp $ */
/*
@@ -151,3 +151,150 @@ ufs_reclaim(vp, p)
ufs_quota_delete(ip);
return (0);
}
+
+/*
+ * allocate a range of blocks in a file.
+ * after this function returns, any page entirely contained within the range
+ * will map to invalid data and thus must be overwritten before it is made
+ * accessible to others.
+ */
+
+int
+ufs_balloc_range(vp, off, len, cred, flags)
+ struct vnode *vp;
+ off_t off, len;
+ struct ucred *cred;
+ int flags;
+{
+ off_t oldeof, neweof, oldeob, neweob, oldpagestart, pagestart;
+ struct uvm_object *uobj;
+ int i, delta, error, npages1, npages2;
+ int bshift = vp->v_mount->mnt_fs_bshift;
+ int bsize = 1 << bshift;
+ int ppb = MAX(bsize >> PAGE_SHIFT, 1);
+ struct vm_page *pgs1[ppb], *pgs2[ppb];
+ UVMHIST_FUNC("ufs_balloc_range"); UVMHIST_CALLED(ubchist);
+ UVMHIST_LOG(ubchist, "vp %p off 0x%x len 0x%x u_size 0x%x",
+ vp, off, len, vp->v_uvm.u_size);
+
+ oldeof = vp->v_uvm.u_size;
+ error = VOP_SIZE(vp, oldeof, &oldeob);
+ if (error) {
+ return error;
+ }
+
+ neweof = MAX(vp->v_uvm.u_size, off + len);
+ error = VOP_SIZE(vp, neweof, &neweob);
+ if (error) {
+ return error;
+ }
+
+ error = 0;
+ uobj = &vp->v_uvm.u_obj;
+ pgs1[0] = pgs2[0] = NULL;
+
+ /*
+ * if the last block in the file is not a full block (ie. it is a
+ * fragment), and this allocation is causing the fragment to change
+ * size (either to expand the fragment or promote it to a full block),
+ * cache the old last block (at its new size).
+ */
+
+ oldpagestart = trunc_page(oldeof) & ~(bsize - 1);
+ if ((oldeob & (bsize - 1)) != 0 && oldeob != neweob) {
+ npages1 = MIN(ppb, (round_page(neweob) - oldpagestart) >>
+ PAGE_SHIFT);
+ memset(pgs1, 0, npages1 * sizeof(struct vm_page *));
+ simple_lock(&uobj->vmobjlock);
+ error = VOP_GETPAGES(vp, oldpagestart, pgs1, &npages1,
+ 0, VM_PROT_READ, 0, PGO_SYNCIO|PGO_PASTEOF);
+ if (error) {
+ goto out;
+ }
+ simple_lock(&uobj->vmobjlock);
+ uvm_lock_pageq();
+ for (i = 0; i < npages1; i++) {
+ UVMHIST_LOG(ubchist, "got pgs1[%d] %p", i, pgs1[i],0,0);
+ KASSERT((pgs1[i]->flags & PG_RELEASED) == 0);
+ pgs1[i]->flags &= ~PG_CLEAN;
+ uvm_pageactivate(pgs1[i]);
+ }
+ uvm_unlock_pageq();
+ simple_unlock(&uobj->vmobjlock);
+ }
+
+ /*
+ * cache the new range as well. this will create zeroed pages
+ * where the new block will be and keep them locked until the
+ * new block is allocated, so there will be no window where
+ * the old contents of the new block is visible to racing threads.
+ */
+
+ pagestart = trunc_page(off) & ~(bsize - 1);
+ if (pagestart != oldpagestart || pgs1[0] == NULL) {
+ npages2 = MIN(ppb, (round_page(neweob) - pagestart) >>
+ PAGE_SHIFT);
+ memset(pgs2, 0, npages2 * sizeof(struct vm_page *));
+ simple_lock(&uobj->vmobjlock);
+ error = VOP_GETPAGES(vp, pagestart, pgs2, &npages2, 0,
+ VM_PROT_READ, 0, PGO_SYNCIO|PGO_PASTEOF);
+ if (error) {
+ goto out;
+ }
+ simple_lock(&uobj->vmobjlock);
+ uvm_lock_pageq();
+ for (i = 0; i < npages2; i++) {
+ UVMHIST_LOG(ubchist, "got pgs2[%d] %p", i, pgs2[i],0,0);
+ KASSERT((pgs2[i]->flags & PG_RELEASED) == 0);
+ pgs2[i]->flags &= ~PG_CLEAN;
+ uvm_pageactivate(pgs2[i]);
+ }
+ uvm_unlock_pageq();
+ simple_unlock(&uobj->vmobjlock);
+ }
+
+ /*
+ * adjust off to be block-aligned.
+ */
+
+ delta = off & (bsize - 1);
+ off -= delta;
+ len += delta;
+
+ /*
+ * now allocate the range.
+ */
+
+ lockmgr(&vp->v_glock, LK_EXCLUSIVE, NULL, curproc);
+ error = VOP_BALLOCN(vp, off, len, cred, flags);
+ lockmgr(&vp->v_glock, LK_RELEASE, NULL, curproc);
+
+ /*
+ * unbusy any pages we are holding.
+ * if we got an error, free any pages we created past the old eob.
+ */
+
+out:
+ simple_lock(&uobj->vmobjlock);
+ if (error) {
+ (void) (uobj->pgops->pgo_flush)(uobj, round_page(oldeob), 0,
+ PGO_FREE);
+ }
+ if (pgs1[0] != NULL) {
+ uvm_page_unbusy(pgs1, npages1);
+
+ /*
+ * The data in the frag might be moving to a new disk location.
+ * We need to flush pages to the new disk locations.
+ */
+
+ (uobj->pgops->pgo_flush)(uobj, oldeof & ~(bsize - 1),
+ MIN((oldeof + bsize) & ~(bsize - 1), neweof),
+ PGO_CLEANIT | ((flags & B_SYNC) ? PGO_SYNCIO : 0));
+ }
+ if (pgs2[0] != NULL) {
+ uvm_page_unbusy(pgs2, npages2);
+ }
+ simple_unlock(&uobj->vmobjlock);
+ return error;
+}
diff --git a/sys/ufs/ufs/ufs_readwrite.c b/sys/ufs/ufs/ufs_readwrite.c
index bbf1391dfe5..e0777e4b55f 100644
--- a/sys/ufs/ufs/ufs_readwrite.c
+++ b/sys/ufs/ufs/ufs_readwrite.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ufs_readwrite.c,v 1.19 2001/06/27 04:58:49 art Exp $ */
+/* $OpenBSD: ufs_readwrite.c,v 1.20 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: ufs_readwrite.c,v 1.9 1996/05/11 18:27:57 mycroft Exp $ */
/*-
@@ -76,21 +76,22 @@ READ(v)
int a_ioflag;
struct ucred *a_cred;
} */ *ap = v;
- register struct vnode *vp;
- register struct inode *ip;
- register struct uio *uio;
- register FS *fs;
+ struct vnode *vp;
+ struct inode *ip;
+ struct uio *uio;
+ FS *fs;
+ void *win;
+ vsize_t bytelen;
struct buf *bp;
daddr_t lbn, nextlbn;
off_t bytesinfile;
long size, xfersize, blkoffset;
int error;
- u_short mode;
vp = ap->a_vp;
ip = VTOI(vp);
- mode = ip->i_ffs_mode;
uio = ap->a_uio;
+ error = 0;
#ifdef DIAGNOSTIC
if (uio->uio_rw != UIO_READ)
@@ -110,6 +111,24 @@ READ(v)
if (uio->uio_resid == 0)
return (0);
+ if (uio->uio_offset >= ip->i_ffs_size)
+ goto out;
+
+ if (vp->v_type == VREG) {
+ while (uio->uio_resid > 0) {
+ bytelen = min(ip->i_ffs_size - uio->uio_offset,
+ uio->uio_resid);
+ if (bytelen == 0)
+ break;
+ win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset,
+ &bytelen, UBC_READ);
+ error = uiomove(win, bytelen, uio);
+ ubc_release(win, 0);
+ if (error)
+ break;
+ }
+ goto out;
+ }
for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
if ((bytesinfile = ip->i_ffs_size - uio->uio_offset) <= 0)
@@ -131,9 +150,6 @@ READ(v)
#else
if (lblktosize(fs, nextlbn) >= ip->i_ffs_size)
error = bread(vp, lbn, size, NOCRED, &bp);
- else if (doclusterread)
- error = cluster_read(vp, &ip->i_ci,
- ip->i_ffs_size, lbn, size, NOCRED, &bp);
else if (lbn - 1 == ip->i_ci.ci_lastr) {
int nextsize = BLKSIZE(fs, ip, nextlbn);
error = breadn(vp, lbn,
@@ -158,7 +174,7 @@ READ(v)
break;
xfersize = size;
}
- error = uiomove((char *)bp->b_data + blkoffset, (int)xfersize,
+ error = uiomove((char *)bp->b_data + blkoffset, xfersize,
uio);
if (error)
break;
@@ -166,6 +182,7 @@ READ(v)
}
if (bp != NULL)
brelse(bp);
+out:
ip->i_flag |= IN_ACCESS;
return (error);
}
@@ -183,15 +200,19 @@ WRITE(v)
int a_ioflag;
struct ucred *a_cred;
} */ *ap = v;
- register struct vnode *vp;
- register struct uio *uio;
- register struct inode *ip;
- register FS *fs;
+ struct vnode *vp;
+ struct uio *uio;
+ struct inode *ip;
+ FS *fs;
struct buf *bp;
struct proc *p;
daddr_t lbn;
off_t osize;
int blkoffset, error, extended, flags, ioflag, resid, size, xfersize;
+ void *win;
+ vsize_t bytelen;
+ off_t oldoff;
+ boolean_t rv;
extended = 0;
ioflag = ap->a_ioflag;
@@ -239,9 +260,77 @@ WRITE(v)
resid = uio->uio_resid;
osize = ip->i_ffs_size;
- flags = ioflag & IO_SYNC ? B_SYNC : 0;
+ error = 0;
+
+ if (vp->v_type != VREG)
+ goto bcache;
+
+ while (uio->uio_resid > 0) {
+ oldoff = uio->uio_offset;
+ blkoffset = blkoff(fs, uio->uio_offset);
+ bytelen = min(fs->fs_bsize - blkoffset, uio->uio_resid);
+
+ /*
+ * XXXUBC if file is mapped and this is the last block,
+ * process one page at a time.
+ */
+
+ error = ufs_balloc_range(vp, uio->uio_offset, bytelen,
+ ap->a_cred, ioflag & IO_SYNC ? B_SYNC : 0);
+ if (error) {
+ return error;
+ }
- for (error = 0; uio->uio_resid > 0;) {
+ win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset, &bytelen,
+ UBC_WRITE);
+ error = uiomove(win, bytelen, uio);
+ ubc_release(win, 0);
+
+ /*
+ * flush what we just wrote if necessary.
+ * XXXUBC simplistic async flushing.
+ */
+
+ if (ioflag & IO_SYNC) {
+ simple_lock(&vp->v_uvm.u_obj.vmobjlock);
+#if 1
+ /*
+ * XXX
+ * flush whole blocks in case there are deps.
+ * otherwise we can dirty and flush part of
+ * a block multiple times and the softdep code
+ * will get confused. fixing this the right way
+ * is complicated so we'll work around it for now.
+ */
+
+ rv = vp->v_uvm.u_obj.pgops->pgo_flush(
+ &vp->v_uvm.u_obj,
+ oldoff & ~(fs->fs_bsize - 1),
+ (oldoff + bytelen + fs->fs_bsize - 1) &
+ ~(fs->fs_bsize - 1),
+ PGO_CLEANIT|PGO_SYNCIO);
+#else
+ rv = vp->v_uvm.u_obj.pgops->pgo_flush(
+ &vp->v_uvm.u_obj, oldoff, oldoff + bytelen,
+ PGO_CLEANIT|PGO_SYNCIO);
+#endif
+ simple_unlock(&vp->v_uvm.u_obj.vmobjlock);
+ } else if (oldoff >> 16 != uio->uio_offset >> 16) {
+ simple_lock(&vp->v_uvm.u_obj.vmobjlock);
+ rv = vp->v_uvm.u_obj.pgops->pgo_flush(
+ &vp->v_uvm.u_obj, (oldoff >> 16) << 16,
+ (uio->uio_offset >> 16) << 16, PGO_CLEANIT);
+ simple_unlock(&vp->v_uvm.u_obj.vmobjlock);
+ }
+ if (error) {
+ break;
+ }
+ }
+ goto out;
+
+bcache:
+ flags = ioflag & IO_SYNC ? B_SYNC : 0;
+ while (uio->uio_resid > 0) {
lbn = lblkno(fs, uio->uio_offset);
blkoffset = blkoff(fs, uio->uio_offset);
xfersize = fs->fs_bsize - blkoffset;
@@ -260,14 +349,12 @@ WRITE(v)
uvm_vnp_setsize(vp, ip->i_ffs_size);
extended = 1;
}
- (void)uvm_vnp_uncache(vp);
size = BLKSIZE(fs, ip, lbn) - bp->b_resid;
if (size < xfersize)
xfersize = size;
- error =
- uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
+ error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
if (error != 0)
bzero((char *)bp->b_data + blkoffset, xfersize);
@@ -287,13 +374,14 @@ WRITE(v)
#endif
if (error || xfersize == 0)
break;
- ip->i_flag |= IN_CHANGE | IN_UPDATE;
}
/*
* If we successfully wrote any data, and we are not the superuser
* we clear the setuid and setgid bits as a precaution against
* tampering.
*/
+out:
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
ip->i_ffs_mode &= ~(ISUID | ISGID);
if (resid > uio->uio_resid)
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 4caf0ef78c7..e926ee7aff6 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ufs_vnops.c,v 1.39 2001/11/21 21:23:56 csapuntz Exp $ */
+/* $OpenBSD: ufs_vnops.c,v 1.40 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: ufs_vnops.c,v 1.18 1996/05/11 18:28:04 mycroft Exp $ */
/*
@@ -469,8 +469,6 @@ ufs_chmod(vp, mode, cred, p)
ip->i_ffs_mode &= ~ALLPERMS;
ip->i_ffs_mode |= (mode & ALLPERMS);
ip->i_flag |= IN_CHANGE;
- if ((vp->v_flag & VTEXT) && (ip->i_ffs_mode & S_ISTXT) == 0)
- (void) uvm_vnp_uncache(vp);
return (0);
}
diff --git a/sys/ufs/ufs/ufsmount.h b/sys/ufs/ufs/ufsmount.h
index e9dc71f9855..981eb21474b 100644
--- a/sys/ufs/ufs/ufsmount.h
+++ b/sys/ufs/ufs/ufsmount.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: ufsmount.h,v 1.5 1999/06/01 01:48:52 millert Exp $ */
+/* $OpenBSD: ufsmount.h,v 1.6 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: ufsmount.h,v 1.4 1994/12/21 20:00:23 mycroft Exp $ */
/*
@@ -64,6 +64,7 @@ struct ufsmount {
struct vnode *um_quotas[MAXQUOTAS]; /* pointer to quota files */
struct ucred *um_cred[MAXQUOTAS]; /* quota file access cred */
u_long um_nindir; /* indirect ptrs per block */
+ u_long um_lognindir; /* log2 of um_nindir */
u_long um_bptrtodb; /* indir ptr to disk block */
u_long um_seqinc; /* inc between seq blocks */
time_t um_btime[MAXQUOTAS]; /* block quota time limit */
diff --git a/sys/uvm/uvm_anon.c b/sys/uvm/uvm_anon.c
index 347867e47b8..8478141a72c 100644
--- a/sys/uvm/uvm_anon.c
+++ b/sys/uvm/uvm_anon.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_anon.c,v 1.15 2001/11/11 01:16:56 art Exp $ */
+/* $OpenBSD: uvm_anon.c,v 1.16 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: uvm_anon.c,v 1.15 2001/02/18 21:19:08 chs Exp $ */
/*
@@ -518,9 +518,6 @@ anon_pagein(anon)
*/
pmap_clear_reference(pg);
-#ifndef UBC
- pmap_page_protect(pg, VM_PROT_NONE);
-#endif
uvm_lock_pageq();
uvm_pagedeactivate(pg);
uvm_unlock_pageq();
diff --git a/sys/uvm/uvm_aobj.c b/sys/uvm/uvm_aobj.c
index 85ce0a495f6..0ebf53c3502 100644
--- a/sys/uvm/uvm_aobj.c
+++ b/sys/uvm/uvm_aobj.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_aobj.c,v 1.20 2001/11/11 01:16:56 art Exp $ */
+/* $OpenBSD: uvm_aobj.c,v 1.21 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: uvm_aobj.c,v 1.39 2001/02/18 21:19:08 chs Exp $ */
/*
@@ -878,15 +878,8 @@ uao_flush(uobj, start, stop, flags)
pp->wire_count != 0)
continue;
-#ifdef UBC
/* ...and deactivate the page. */
pmap_clear_reference(pp);
-#else
- /* zap all mappings for the page. */
- pmap_page_protect(pp, VM_PROT_NONE);
-
- /* ...and deactivate the page. */
-#endif
uvm_pagedeactivate(pp);
continue;
@@ -1523,9 +1516,6 @@ uao_pagein_page(aobj, pageidx)
* deactivate the page (to put it on a page queue).
*/
pmap_clear_reference(pg);
-#ifndef UBC
- pmap_page_protect(pg, VM_PROT_NONE);
-#endif
uvm_lock_pageq();
uvm_pagedeactivate(pg);
uvm_unlock_pageq();
diff --git a/sys/uvm/uvm_bio.c b/sys/uvm/uvm_bio.c
new file mode 100644
index 00000000000..fccf51b8ece
--- /dev/null
+++ b/sys/uvm/uvm_bio.c
@@ -0,0 +1,547 @@
+/* $NetBSD: uvm_bio.c,v 1.7 2001/02/02 01:55:52 enami Exp $ */
+
+/*
+ * Copyright (c) 1998 Chuck Silvers.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+/*
+ * uvm_bio.c: buffered i/o vnode mapping cache
+ */
+
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+
+#include <uvm/uvm.h>
+#include <uvm/uvm_page.h>
+
+/*
+ * global data structures
+ */
+
+/*
+ * local functions
+ */
+
+static int ubc_fault __P((struct uvm_faultinfo *, vaddr_t,
+ vm_page_t *, int, int, vm_fault_t, vm_prot_t,
+ int));
+static struct ubc_map *ubc_find_mapping __P((struct uvm_object *, voff_t));
+
+/*
+ * local data structues
+ */
+
+#define UBC_HASH(uobj, offset) (((((u_long)(uobj)) >> 8) + \
+ (((u_long)(offset)) >> PAGE_SHIFT)) & \
+ ubc_object.hashmask)
+
+#define UBC_QUEUE(offset) (&ubc_object.inactive[((offset) / ubc_winsize) & \
+ (UBC_NQUEUES - 1)])
+
+struct ubc_map
+{
+ struct uvm_object * uobj; /* mapped object */
+ voff_t offset; /* offset into uobj */
+ int refcount; /* refcount on mapping */
+ voff_t writeoff; /* overwrite offset */
+ vsize_t writelen; /* overwrite len */
+
+ LIST_ENTRY(ubc_map) hash; /* hash table */
+ TAILQ_ENTRY(ubc_map) inactive; /* inactive queue */
+};
+
+static struct ubc_object
+{
+ struct uvm_object uobj; /* glue for uvm_map() */
+ char *kva; /* where ubc_object is mapped */
+ struct ubc_map *umap; /* array of ubc_map's */
+
+ LIST_HEAD(, ubc_map) *hash; /* hashtable for cached ubc_map's */
+ u_long hashmask; /* mask for hashtable */
+
+ TAILQ_HEAD(ubc_inactive_head, ubc_map) *inactive;
+ /* inactive queues for ubc_map's */
+
+} ubc_object;
+
+struct uvm_pagerops ubc_pager =
+{
+ NULL, /* init */
+ NULL, /* reference */
+ NULL, /* detach */
+ ubc_fault, /* fault */
+ /* ... rest are NULL */
+};
+
+int ubc_nwins = UBC_NWINS;
+int ubc_winsize = UBC_WINSIZE;
+#ifdef PMAP_PREFER
+int ubc_nqueues;
+boolean_t ubc_release_unmap = FALSE;
+#define UBC_NQUEUES ubc_nqueues
+#define UBC_RELEASE_UNMAP ubc_release_unmap
+#else
+#define UBC_NQUEUES 1
+#define UBC_RELEASE_UNMAP FALSE
+#endif
+
+/*
+ * ubc_init
+ *
+ * init pager private data structures.
+ */
+
+void
+ubc_init(void)
+{
+ struct ubc_map *umap;
+ vaddr_t va;
+ int i;
+
+ /*
+ * init ubc_object.
+ * alloc and init ubc_map's.
+ * init inactive queues.
+ * alloc and init hashtable.
+ * map in ubc_object.
+ */
+
+ simple_lock_init(&ubc_object.uobj.vmobjlock);
+ ubc_object.uobj.pgops = &ubc_pager;
+ TAILQ_INIT(&ubc_object.uobj.memq);
+ ubc_object.uobj.uo_npages = 0;
+ ubc_object.uobj.uo_refs = UVM_OBJ_KERN;
+
+ ubc_object.umap = malloc(ubc_nwins * sizeof(struct ubc_map),
+ M_TEMP, M_NOWAIT);
+ if (ubc_object.umap == NULL)
+ panic("ubc_init: failed to allocate ubc_map");
+ bzero(ubc_object.umap, ubc_nwins * sizeof(struct ubc_map));
+
+ va = (vaddr_t)1L;
+#ifdef PMAP_PREFER
+ PMAP_PREFER(0, &va);
+ if (va < ubc_winsize) {
+ va = ubc_winsize;
+ }
+ ubc_nqueues = va / ubc_winsize;
+ if (ubc_nqueues != 1) {
+ ubc_release_unmap = TRUE;
+ }
+#endif
+ ubc_object.inactive = malloc(UBC_NQUEUES *
+ sizeof(struct ubc_inactive_head),
+ M_TEMP, M_NOWAIT);
+ if (ubc_object.inactive == NULL)
+ panic("ubc_init: failed to allocate inactive queue heads");
+ for (i = 0; i < UBC_NQUEUES; i++) {
+ TAILQ_INIT(&ubc_object.inactive[i]);
+ }
+ for (i = 0; i < ubc_nwins; i++) {
+ umap = &ubc_object.umap[i];
+ TAILQ_INSERT_TAIL(&ubc_object.inactive[i & (UBC_NQUEUES - 1)],
+ umap, inactive);
+ }
+
+ ubc_object.hash = hashinit(ubc_nwins, M_TEMP, M_NOWAIT,
+ &ubc_object.hashmask);
+ for (i = 0; i <= ubc_object.hashmask; i++) {
+ LIST_INIT(&ubc_object.hash[i]);
+ }
+
+ if (uvm_map(kernel_map, (vaddr_t *)&ubc_object.kva,
+ ubc_nwins * ubc_winsize, &ubc_object.uobj, 0, (vsize_t)va,
+ UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE,
+ UVM_ADV_RANDOM, UVM_FLAG_NOMERGE))
+ != KERN_SUCCESS) {
+ panic("ubc_init: failed to map ubc_object\n");
+ }
+ UVMHIST_INIT(ubchist, 300);
+}
+
+
+/*
+ * ubc_fault: fault routine for ubc mapping
+ */
+static int
+ubc_fault(ufi, ign1, ign2, ign3, ign4, fault_type, access_type, flags)
+ struct uvm_faultinfo *ufi;
+ vaddr_t ign1;
+ vm_page_t *ign2;
+ int ign3, ign4;
+ vm_fault_t fault_type;
+ vm_prot_t access_type;
+ int flags;
+{
+ struct uvm_object *uobj;
+ struct vnode *vp;
+ struct ubc_map *umap;
+ vaddr_t va, eva, ubc_offset, slot_offset;
+ int i, error, rv, npages;
+ struct vm_page *pgs[ubc_winsize >> PAGE_SHIFT], *pg;
+ UVMHIST_FUNC("ubc_fault"); UVMHIST_CALLED(ubchist);
+
+ /*
+ * no need to try with PGO_LOCKED...
+ * we don't need to have the map locked since we know that
+ * no one will mess with it until our reference is released.
+ */
+ if (flags & PGO_LOCKED) {
+#if 0
+ return VM_PAGER_UNLOCK;
+#else
+ uvmfault_unlockall(ufi, NULL, &ubc_object.uobj, NULL);
+ flags &= ~PGO_LOCKED;
+#endif
+ }
+
+ va = ufi->orig_rvaddr;
+ ubc_offset = va - (vaddr_t)ubc_object.kva;
+
+ UVMHIST_LOG(ubchist, "va 0x%lx ubc_offset 0x%lx at %d",
+ va, ubc_offset, access_type,0);
+
+ umap = &ubc_object.umap[ubc_offset / ubc_winsize];
+ KASSERT(umap->refcount != 0);
+ slot_offset = trunc_page(ubc_offset & (ubc_winsize - 1));
+
+ /* no umap locking needed since we have a ref on the umap */
+ uobj = umap->uobj;
+ vp = (struct vnode *)uobj;
+ KASSERT(uobj != NULL);
+
+ npages = (ubc_winsize - slot_offset) >> PAGE_SHIFT;
+
+ /*
+ * XXXUBC
+ * if npages is more than 1 we have to be sure that
+ * we set PGO_OVERWRITE correctly.
+ */
+ if (access_type == VM_PROT_WRITE) {
+ npages = 1;
+ }
+
+again:
+ memset(pgs, 0, sizeof (pgs));
+ simple_lock(&uobj->vmobjlock);
+
+ UVMHIST_LOG(ubchist, "slot_offset 0x%x writeoff 0x%x writelen 0x%x "
+ "u_size 0x%x", slot_offset, umap->writeoff, umap->writelen,
+ vp->v_uvm.u_size);
+
+ if (access_type & VM_PROT_WRITE &&
+ slot_offset >= umap->writeoff &&
+ (slot_offset + PAGE_SIZE <= umap->writeoff + umap->writelen ||
+ slot_offset + PAGE_SIZE >= vp->v_uvm.u_size - umap->offset)) {
+ UVMHIST_LOG(ubchist, "setting PGO_OVERWRITE", 0,0,0,0);
+ flags |= PGO_OVERWRITE;
+ }
+ else { UVMHIST_LOG(ubchist, "NOT setting PGO_OVERWRITE", 0,0,0,0); }
+ /* XXX be sure to zero any part of the page past EOF */
+
+ /*
+ * XXX
+ * ideally we'd like to pre-fault all of the pages we're overwriting.
+ * so for PGO_OVERWRITE, we should call VOP_GETPAGES() with all of the
+ * pages in [writeoff, writeoff+writesize] instead of just the one.
+ */
+
+ UVMHIST_LOG(ubchist, "getpages vp %p offset 0x%x npages %d",
+ uobj, umap->offset + slot_offset, npages, 0);
+
+ error = VOP_GETPAGES(vp, umap->offset + slot_offset, pgs, &npages, 0,
+ access_type, 0, flags);
+ UVMHIST_LOG(ubchist, "getpages error %d npages %d", error, npages,0,0);
+
+ if (error == EAGAIN) {
+ tsleep(&lbolt, PVM, "ubc_fault", 0);
+ goto again;
+ }
+ if (error) {
+ return VM_PAGER_ERROR;
+ }
+ if (npages == 0) {
+ return VM_PAGER_OK;
+ }
+
+ va = ufi->orig_rvaddr;
+ eva = ufi->orig_rvaddr + (npages << PAGE_SHIFT);
+
+ UVMHIST_LOG(ubchist, "va 0x%lx eva 0x%lx", va, eva, 0,0);
+ simple_lock(&uobj->vmobjlock);
+ for (i = 0; va < eva; i++, va += PAGE_SIZE) {
+ UVMHIST_LOG(ubchist, "pgs[%d] = %p", i, pgs[i],0,0);
+ pg = pgs[i];
+
+ if (pg == NULL || pg == PGO_DONTCARE) {
+ continue;
+ }
+ if (pg->flags & PG_WANTED) {
+ wakeup(pg);
+ }
+ KASSERT((pg->flags & PG_FAKE) == 0);
+ if (pg->flags & PG_RELEASED) {
+ rv = uobj->pgops->pgo_releasepg(pg, NULL);
+ KASSERT(rv);
+ continue;
+ }
+ KASSERT(access_type == VM_PROT_READ ||
+ (pg->flags & PG_RDONLY) == 0);
+
+ uvm_lock_pageq();
+ uvm_pageactivate(pg);
+ uvm_unlock_pageq();
+
+ pmap_enter(ufi->orig_map->pmap, va, VM_PAGE_TO_PHYS(pg),
+ VM_PROT_READ | VM_PROT_WRITE, access_type);
+
+ pg->flags &= ~(PG_BUSY);
+ UVM_PAGE_OWN(pg, NULL);
+ }
+ simple_unlock(&uobj->vmobjlock);
+ return VM_PAGER_OK;
+}
+
+/*
+ * local functions
+ */
+
+static struct ubc_map *
+ubc_find_mapping(uobj, offset)
+ struct uvm_object *uobj;
+ voff_t offset;
+{
+ struct ubc_map *umap;
+
+ LIST_FOREACH(umap, &ubc_object.hash[UBC_HASH(uobj, offset)], hash) {
+ if (umap->uobj == uobj && umap->offset == offset) {
+ return umap;
+ }
+ }
+ return NULL;
+}
+
+
+/*
+ * ubc interface functions
+ */
+
+/*
+ * ubc_alloc: allocate a buffer mapping
+ */
+void *
+ubc_alloc(uobj, offset, lenp, flags)
+ struct uvm_object *uobj;
+ voff_t offset;
+ vsize_t *lenp;
+ int flags;
+{
+ int s;
+ vaddr_t slot_offset, va;
+ struct ubc_map *umap;
+ voff_t umap_offset;
+ UVMHIST_FUNC("ubc_alloc"); UVMHIST_CALLED(ubchist);
+
+ UVMHIST_LOG(ubchist, "uobj %p offset 0x%lx len 0x%lx filesize 0x%x",
+ uobj, offset, *lenp, ((struct uvm_vnode *)uobj)->u_size);
+
+ umap_offset = (offset & ~((voff_t)ubc_winsize - 1));
+ slot_offset = (vaddr_t)(offset & ((voff_t)ubc_winsize - 1));
+ *lenp = min(*lenp, ubc_winsize - slot_offset);
+
+ /*
+ * the vnode is always locked here, so we don't need to add a ref.
+ */
+
+ s = splbio();
+
+again:
+ simple_lock(&ubc_object.uobj.vmobjlock);
+ umap = ubc_find_mapping(uobj, umap_offset);
+ if (umap == NULL) {
+ umap = TAILQ_FIRST(UBC_QUEUE(offset));
+ if (umap == NULL) {
+ simple_unlock(&ubc_object.uobj.vmobjlock);
+ tsleep(&lbolt, PVM, "ubc_alloc", 0);
+ goto again;
+ }
+
+ /*
+ * remove from old hash (if any),
+ * add to new hash.
+ */
+
+ if (umap->uobj != NULL) {
+ LIST_REMOVE(umap, hash);
+ }
+
+ umap->uobj = uobj;
+ umap->offset = umap_offset;
+
+ LIST_INSERT_HEAD(&ubc_object.hash[UBC_HASH(uobj, umap_offset)],
+ umap, hash);
+
+ va = (vaddr_t)(ubc_object.kva +
+ (umap - ubc_object.umap) * ubc_winsize);
+ pmap_remove(pmap_kernel(), va, va + ubc_winsize);
+ }
+
+ if (umap->refcount == 0) {
+ TAILQ_REMOVE(UBC_QUEUE(offset), umap, inactive);
+ }
+
+#ifdef DIAGNOSTIC
+ if ((flags & UBC_WRITE) &&
+ (umap->writeoff || umap->writelen)) {
+ panic("ubc_fault: concurrent writes vp %p", uobj);
+ }
+#endif
+ if (flags & UBC_WRITE) {
+ umap->writeoff = slot_offset;
+ umap->writelen = *lenp;
+ }
+
+ umap->refcount++;
+ simple_unlock(&ubc_object.uobj.vmobjlock);
+ splx(s);
+ UVMHIST_LOG(ubchist, "umap %p refs %d va %p",
+ umap, umap->refcount,
+ ubc_object.kva + (umap - ubc_object.umap) * ubc_winsize,0);
+
+ return ubc_object.kva +
+ (umap - ubc_object.umap) * ubc_winsize + slot_offset;
+}
+
+
+void
+ubc_release(va, wlen)
+ void *va;
+ vsize_t wlen;
+{
+ struct ubc_map *umap;
+ struct uvm_object *uobj;
+ int s;
+ UVMHIST_FUNC("ubc_release"); UVMHIST_CALLED(ubchist);
+
+ UVMHIST_LOG(ubchist, "va %p", va,0,0,0);
+
+ s = splbio();
+ simple_lock(&ubc_object.uobj.vmobjlock);
+
+ umap = &ubc_object.umap[((char *)va - ubc_object.kva) / ubc_winsize];
+ uobj = umap->uobj;
+ KASSERT(uobj != NULL);
+
+ umap->writeoff = 0;
+ umap->writelen = 0;
+ umap->refcount--;
+ if (umap->refcount == 0) {
+ if (UBC_RELEASE_UNMAP &&
+ (((struct vnode *)uobj)->v_flag & VTEXT)) {
+ vaddr_t va;
+
+ /*
+ * if this file is the executable image of
+ * some process, that process will likely have
+ * the file mapped at an alignment other than
+ * what PMAP_PREFER() would like. we'd like
+ * to have process text be able to use the
+ * cache even if someone is also reading the
+ * file, so invalidate mappings of such files
+ * as soon as possible.
+ */
+
+ va = (vaddr_t)(ubc_object.kva +
+ (umap - ubc_object.umap) * ubc_winsize);
+ pmap_remove(pmap_kernel(), va, va + ubc_winsize);
+ LIST_REMOVE(umap, hash);
+ umap->uobj = NULL;
+ TAILQ_INSERT_HEAD(UBC_QUEUE(umap->offset), umap,
+ inactive);
+ } else {
+ TAILQ_INSERT_TAIL(UBC_QUEUE(umap->offset), umap,
+ inactive);
+ }
+ }
+ UVMHIST_LOG(ubchist, "umap %p refs %d", umap, umap->refcount,0,0);
+ simple_unlock(&ubc_object.uobj.vmobjlock);
+ splx(s);
+}
+
+
+/*
+ * removing a range of mappings from the ubc mapping cache.
+ */
+
+void
+ubc_flush(uobj, start, end)
+ struct uvm_object *uobj;
+ voff_t start, end;
+{
+ struct ubc_map *umap;
+ vaddr_t va;
+ int s;
+ UVMHIST_FUNC("ubc_flush"); UVMHIST_CALLED(ubchist);
+
+ UVMHIST_LOG(ubchist, "uobj %p start 0x%lx end 0x%lx",
+ uobj, start, end,0);
+
+ s = splbio();
+ simple_lock(&ubc_object.uobj.vmobjlock);
+ for (umap = ubc_object.umap;
+ umap < &ubc_object.umap[ubc_nwins];
+ umap++) {
+
+ if (umap->uobj != uobj ||
+ umap->offset < start ||
+ (umap->offset >= end && end != 0) ||
+ umap->refcount > 0) {
+ continue;
+ }
+
+ /*
+ * remove from hash,
+ * move to head of inactive queue.
+ */
+
+ va = (vaddr_t)(ubc_object.kva +
+ (umap - ubc_object.umap) * ubc_winsize);
+ pmap_remove(pmap_kernel(), va, va + ubc_winsize);
+
+ LIST_REMOVE(umap, hash);
+ umap->uobj = NULL;
+ TAILQ_REMOVE(UBC_QUEUE(umap->offset), umap, inactive);
+ TAILQ_INSERT_HEAD(UBC_QUEUE(umap->offset), umap, inactive);
+ }
+ simple_unlock(&ubc_object.uobj.vmobjlock);
+ splx(s);
+}
diff --git a/sys/uvm/uvm_extern.h b/sys/uvm/uvm_extern.h
index 5575021ad6f..bb6b841f0ca 100644
--- a/sys/uvm/uvm_extern.h
+++ b/sys/uvm/uvm_extern.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_extern.h,v 1.33 2001/11/12 01:26:09 art Exp $ */
+/* $OpenBSD: uvm_extern.h,v 1.34 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: uvm_extern.h,v 1.57 2001/03/09 01:02:12 chs Exp $ */
/*
@@ -223,6 +223,21 @@ typedef int vm_prot_t;
#define UVM_PGA_ZERO 0x0002 /* returned page must be zero'd */
/*
+ * the following defines are for ubc_alloc's flags
+ */
+#define UBC_READ 0
+#define UBC_WRITE 1
+
+/*
+ * flags for uvn_findpages().
+ */
+#define UFP_ALL 0x0
+#define UFP_NOWAIT 0x1
+#define UFP_NOALLOC 0x2
+#define UFP_NOCACHE 0x4
+#define UFP_NORDONLY 0x8
+
+/*
* lockflags that control the locking behavior of various functions.
*/
#define UVM_LK_ENTER 0x00000001 /* map locked on entry */
@@ -464,9 +479,16 @@ void uao_detach_locked __P((struct uvm_object *));
void uao_reference __P((struct uvm_object *));
void uao_reference_locked __P((struct uvm_object *));
+/* uvm_bio.c */
+void ubc_init __P((void));
+void * ubc_alloc __P((struct uvm_object *, voff_t, vsize_t *,
+ int));
+void ubc_release __P((void *, vsize_t));
+void ubc_flush __P((struct uvm_object *, voff_t, voff_t));
+
/* uvm_fault.c */
-int uvm_fault __P((vm_map_t, vaddr_t,
- vm_fault_t, vm_prot_t));
+int uvm_fault __P((vm_map_t, vaddr_t, vm_fault_t,
+ vm_prot_t));
/* handle a page fault */
/* uvm_glue.c */
@@ -593,10 +615,11 @@ int uvm_deallocate __P((vm_map_t, vaddr_t, vsize_t));
/* uvm_vnode.c */
void uvm_vnp_setsize __P((struct vnode *, voff_t));
void uvm_vnp_sync __P((struct mount *));
-void uvm_vnp_terminate __P((struct vnode *));
- /* terminate a uvm/uvn object */
-boolean_t uvm_vnp_uncache __P((struct vnode *));
struct uvm_object *uvn_attach __P((void *, vm_prot_t));
+void uvn_findpages __P((struct uvm_object *, voff_t,
+ int *, struct vm_page **, int));
+void uvm_vnp_zerorange __P((struct vnode *, off_t, size_t));
+void uvm_vnp_asyncget __P((struct vnode *, off_t, size_t));
/* kern_malloc.c */
void kmeminit_nkmempages __P((void));
diff --git a/sys/uvm/uvm_fault.c b/sys/uvm/uvm_fault.c
index 662e2509321..0e4103fe49b 100644
--- a/sys/uvm/uvm_fault.c
+++ b/sys/uvm/uvm_fault.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_fault.c,v 1.24 2001/11/12 01:26:09 art Exp $ */
+/* $OpenBSD: uvm_fault.c,v 1.25 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: uvm_fault.c,v 1.56 2001/02/18 21:19:08 chs Exp $ */
/*
@@ -204,11 +204,7 @@ uvmfault_anonflush(anons, n)
if (pg && (pg->flags & PG_BUSY) == 0 && pg->loan_count == 0) {
uvm_lock_pageq();
if (pg->wire_count == 0) {
-#ifdef UBC
pmap_clear_reference(pg);
-#else
- pmap_page_protect(pg, VM_PROT_NONE);
-#endif
uvm_pagedeactivate(pg);
}
uvm_unlock_pageq();
diff --git a/sys/uvm/uvm_map.h b/sys/uvm/uvm_map.h
index bbc2afb9f19..2c95aff1607 100644
--- a/sys/uvm/uvm_map.h
+++ b/sys/uvm/uvm_map.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_map.h,v 1.15 2001/11/12 01:26:09 art Exp $ */
+/* $OpenBSD: uvm_map.h,v 1.16 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: uvm_map.h,v 1.24 2001/02/18 21:19:08 chs Exp $ */
/*
@@ -427,7 +427,7 @@ vm_map_lock(map)
simple_lock(&map->flags_lock);
while (map->flags & VM_MAP_BUSY) {
map->flags |= VM_MAP_WANTLOCK;
- ltsleep(&map->flags, PVM, (char *)vmmapbsy, 0, &map->flags_lock);
+ ltsleep(&map->flags, PVM, vmmapbsy, 0, &map->flags_lock);
}
error = lockmgr(&map->lock, LK_EXCLUSIVE|LK_SLEEPFAIL|LK_INTERLOCK,
diff --git a/sys/uvm/uvm_mmap.c b/sys/uvm/uvm_mmap.c
index 6bd7260b6a0..3c4c4bdf961 100644
--- a/sys/uvm/uvm_mmap.c
+++ b/sys/uvm/uvm_mmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_mmap.c,v 1.27 2001/11/12 01:26:09 art Exp $ */
+/* $OpenBSD: uvm_mmap.c,v 1.28 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: uvm_mmap.c,v 1.49 2001/02/18 21:19:08 chs Exp $ */
/*
@@ -1126,40 +1126,8 @@ uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff, locklimit)
uobj = uvn_attach((void *) vp, (flags & MAP_SHARED) ?
maxprot : (maxprot & ~VM_PROT_WRITE));
-#ifndef UBC
- /*
- * XXXCDC: hack from old code
- * don't allow vnodes which have been mapped
- * shared-writeable to persist [forces them to be
- * flushed out when last reference goes].
- * XXXCDC: interesting side effect: avoids a bug.
- * note that in WRITE [ufs_readwrite.c] that we
- * allocate buffer, uncache, and then do the write.
- * the problem with this is that if the uncache causes
- * VM data to be flushed to the same area of the file
- * we are writing to... in that case we've got the
- * buffer locked and our process goes to sleep forever.
- *
- * XXXCDC: checking maxprot protects us from the
- * "persistbug" program but this is not a long term
- * solution.
- *
- * XXXCDC: we don't bother calling uncache with the vp
- * VOP_LOCKed since we know that we are already
- * holding a valid reference to the uvn (from the
- * uvn_attach above), and thus it is impossible for
- * the uncache to kill the uvn and trigger I/O.
- */
- if (flags & MAP_SHARED) {
- if ((prot & VM_PROT_WRITE) ||
- (maxprot & VM_PROT_WRITE)) {
- uvm_vnp_uncache(vp);
- }
- }
-#else
/* XXX for now, attach doesn't gain a ref */
VREF(vp);
-#endif
} else {
uobj = udv_attach((void *) &vp->v_rdev,
(flags & MAP_SHARED) ? maxprot :
diff --git a/sys/uvm/uvm_page.c b/sys/uvm/uvm_page.c
index 4ea890c8c3b..f7ebbd77f80 100644
--- a/sys/uvm/uvm_page.c
+++ b/sys/uvm/uvm_page.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_page.c,v 1.31 2001/11/12 01:26:09 art Exp $ */
+/* $OpenBSD: uvm_page.c,v 1.32 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: uvm_page.c,v 1.51 2001/03/09 01:02:12 chs Exp $ */
/*
@@ -906,17 +906,11 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list)
* the pagedaemon.
*/
-#ifdef UBC
if (uvmexp.free + uvmexp.paging < uvmexp.freemin ||
(uvmexp.free + uvmexp.paging < uvmexp.freetarg &&
uvmexp.inactive < uvmexp.inactarg)) {
wakeup(&uvm.pagedaemon);
}
-#else
- if (uvmexp.free < uvmexp.freemin || (uvmexp.free < uvmexp.freetarg &&
- uvmexp.inactive < uvmexp.inactarg))
- wakeup(&uvm.pagedaemon);
-#endif
/*
* fail if any of these conditions is true:
diff --git a/sys/uvm/uvm_page_i.h b/sys/uvm/uvm_page_i.h
index e0547d8414b..3ea680714c6 100644
--- a/sys/uvm/uvm_page_i.h
+++ b/sys/uvm/uvm_page_i.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_page_i.h,v 1.10 2001/11/12 01:26:10 art Exp $ */
+/* $OpenBSD: uvm_page_i.h,v 1.11 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: uvm_page_i.h,v 1.16 2001/01/28 23:30:45 thorpej Exp $ */
/*
@@ -219,9 +219,6 @@ uvm_pagedeactivate(pg)
TAILQ_INSERT_TAIL(&uvm.page_inactive_obj, pg, pageq);
pg->pqflags |= PQ_INACTIVE;
uvmexp.inactive++;
-#ifndef UBC
- pmap_clear_reference(pg);
-#endif
/*
* update the "clean" bit. this isn't 100%
* accurate, and doesn't have to be. we'll
diff --git a/sys/uvm/uvm_pager.c b/sys/uvm/uvm_pager.c
index 69400e5f010..2fded9caf08 100644
--- a/sys/uvm/uvm_pager.c
+++ b/sys/uvm/uvm_pager.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_pager.c,v 1.22 2001/11/12 01:26:10 art Exp $ */
+/* $OpenBSD: uvm_pager.c,v 1.23 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: uvm_pager.c,v 1.41 2001/02/18 19:26:50 chs Exp $ */
/*
@@ -58,17 +58,13 @@ struct pool *uvm_aiobuf_pool;
extern struct uvm_pagerops uvm_deviceops;
extern struct uvm_pagerops uvm_vnodeops;
-#ifdef UBC
extern struct uvm_pagerops ubc_pager;
-#endif
struct uvm_pagerops *uvmpagerops[] = {
&aobj_pager,
&uvm_deviceops,
&uvm_vnodeops,
-#ifdef UBC
&ubc_pager,
-#endif
};
/*
@@ -153,7 +149,7 @@ ReStart:
kva = 0; /* let system choose VA */
if (uvm_map(pager_map, &kva, size, NULL,
- UVM_UNKNOWN_OFFSET, 0, UVM_FLAG_NOMERGE) != KERN_SUCCESS) {
+ UVM_UNKNOWN_OFFSET, 0, UVM_FLAG_NOMERGE) != KERN_SUCCESS) {
if (curproc == uvm.pagedaemon_proc) {
simple_lock(&pager_map_wanted_lock);
if (emerginuse) {
@@ -733,7 +729,6 @@ uvm_pager_dropcluster(uobj, pg, ppsp, npages, flags)
}
}
-#ifdef UBC
/*
* interrupt-context iodone handler for nested i/o bufs.
*
@@ -757,7 +752,6 @@ uvm_aio_biodone1(bp)
biodone(mbp);
}
}
-#endif
/*
* interrupt-context iodone handler for single-buf i/os
@@ -798,12 +792,10 @@ uvm_aio_aiodone(bp)
error = (bp->b_flags & B_ERROR) ? (bp->b_error ? bp->b_error : EIO) : 0;
write = (bp->b_flags & B_READ) == 0;
-#ifdef UBC
/* XXXUBC B_NOCACHE is for swap pager, should be done differently */
if (write && !(bp->b_flags & B_NOCACHE) && bioops.io_pageiodone) {
(*bioops.io_pageiodone)(bp);
}
-#endif
uobj = NULL;
for (i = 0; i < npages; i++) {
diff --git a/sys/uvm/uvm_param.h b/sys/uvm/uvm_param.h
index d7cdccc28a4..78b3f1bc5ba 100644
--- a/sys/uvm/uvm_param.h
+++ b/sys/uvm/uvm_param.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_param.h,v 1.2 2001/11/12 01:26:10 art Exp $ */
+/* $OpenBSD: uvm_param.h,v 1.3 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: uvm_param.h,v 1.5 2001/03/09 01:02:12 chs Exp $ */
/*
@@ -114,7 +114,7 @@ typedef int boolean_t;
#define VM_ANONMIN 7
#define VM_VTEXTMIN 8
#define VM_VNODEMIN 9
-#define VM_MAXID 9 /* number of valid vm ids */
+#define VM_MAXID 10 /* number of valid vm ids */
#define CTL_VM_NAMES { \
{ 0, 0 }, \
@@ -166,10 +166,8 @@ struct _ps_strings {
#define trunc_page(x) ((x) & ~PAGE_MASK)
extern psize_t mem_size; /* size of physical memory (bytes) */
-#ifdef UBC
extern int ubc_nwins; /* number of UBC mapping windows */
extern int ubc_winsize; /* size of a UBC mapping window */
-#endif
#else
/* out-of-kernel versions of round_page and trunc_page */
diff --git a/sys/uvm/uvm_swap.c b/sys/uvm/uvm_swap.c
index 4697d8a23f6..c4298200688 100644
--- a/sys/uvm/uvm_swap.c
+++ b/sys/uvm/uvm_swap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_swap.c,v 1.41 2001/11/15 23:15:15 art Exp $ */
+/* $OpenBSD: uvm_swap.c,v 1.42 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: uvm_swap.c,v 1.46 2001/02/18 21:19:08 chs Exp $ */
/*
@@ -1393,32 +1393,6 @@ sw_reg_strategy(sdp, bp, bn)
nbp->vb_buf.b_vnbufs.le_next = NOLIST;
LIST_INIT(&nbp->vb_buf.b_dep);
- /*
- * set b_dirtyoff/end and b_validoff/end. this is
- * required by the NFS client code (otherwise it will
- * just discard our I/O request).
- */
- if (bp->b_dirtyend == 0) {
- nbp->vb_buf.b_dirtyoff = 0;
- nbp->vb_buf.b_dirtyend = sz;
- } else {
- nbp->vb_buf.b_dirtyoff =
- max(0, bp->b_dirtyoff - (bp->b_bcount-resid));
- nbp->vb_buf.b_dirtyend =
- min(sz,
- max(0, bp->b_dirtyend - (bp->b_bcount-resid)));
- }
- if (bp->b_validend == 0) {
- nbp->vb_buf.b_validoff = 0;
- nbp->vb_buf.b_validend = sz;
- } else {
- nbp->vb_buf.b_validoff =
- max(0, bp->b_validoff - (bp->b_bcount-resid));
- nbp->vb_buf.b_validend =
- min(sz,
- max(0, bp->b_validend - (bp->b_bcount-resid)));
- }
-
nbp->vb_xfer = vnx; /* patch it back in to vnx */
/*
@@ -1990,8 +1964,6 @@ uvm_swap_io(pps, startslot, npages, flags)
* and we bump v_numoutput (counter of number of active outputs).
*/
if (write) {
- bp->b_dirtyoff = 0;
- bp->b_dirtyend = npages << PAGE_SHIFT;
#ifdef UVM_SWAP_ENCRYPT
/* mark the pages in the drum for decryption */
if (swap_encrypt_initalized)
diff --git a/sys/uvm/uvm_vnode.c b/sys/uvm/uvm_vnode.c
index e921e4fb846..667cbc5b458 100644
--- a/sys/uvm/uvm_vnode.c
+++ b/sys/uvm/uvm_vnode.c
@@ -1,5 +1,5 @@
-/* $OpenBSD: uvm_vnode.c,v 1.24 2001/11/10 18:42:32 art Exp $ */
-/* $NetBSD: uvm_vnode.c,v 1.36 2000/11/24 20:34:01 chs Exp $ */
+/* $OpenBSD: uvm_vnode.c,v 1.25 2001/11/27 05:27:12 art Exp $ */
+/* $NetBSD: uvm_vnode.c,v 1.47 2001/03/09 01:02:13 chs Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -52,6 +52,7 @@
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/kernel.h>
#include <sys/proc.h>
#include <sys/malloc.h>
#include <sys/vnode.h>
@@ -59,6 +60,8 @@
#include <sys/ioctl.h>
#include <sys/fcntl.h>
#include <sys/conf.h>
+#include <sys/pool.h>
+#include <sys/mount.h>
#include <miscfs/specfs/specdev.h>
@@ -66,55 +69,38 @@
#include <uvm/uvm_vnode.h>
/*
- * private global data structure
- *
- * we keep a list of writeable active vnode-backed VM objects for sync op.
- * we keep a simpleq of vnodes that are currently being sync'd.
- */
-
-LIST_HEAD(uvn_list_struct, uvm_vnode);
-static struct uvn_list_struct uvn_wlist; /* writeable uvns */
-static simple_lock_data_t uvn_wl_lock; /* locks uvn_wlist */
-
-SIMPLEQ_HEAD(uvn_sq_struct, uvm_vnode);
-static struct uvn_sq_struct uvn_sync_q; /* sync'ing uvns */
-lock_data_t uvn_sync_lock; /* locks sync operation */
-
-/*
* functions
*/
-static void uvn_cluster __P((struct uvm_object *, voff_t,
- voff_t *, voff_t *));
-static void uvn_detach __P((struct uvm_object *));
-static boolean_t uvn_flush __P((struct uvm_object *, voff_t,
- voff_t, int));
-static int uvn_get __P((struct uvm_object *, voff_t,
- vm_page_t *, int *, int,
- vm_prot_t, int, int));
-static void uvn_init __P((void));
-static int uvn_io __P((struct uvm_vnode *, vm_page_t *,
- int, int, int));
-static int uvn_put __P((struct uvm_object *, vm_page_t *,
- int, boolean_t));
-static void uvn_reference __P((struct uvm_object *));
-static boolean_t uvn_releasepg __P((struct vm_page *,
- struct vm_page **));
+static void uvn_cluster __P((struct uvm_object *, voff_t, voff_t *,
+ voff_t *));
+static void uvn_detach __P((struct uvm_object *));
+static int uvn_findpage __P((struct uvm_object *, voff_t,
+ struct vm_page **, int));
+boolean_t uvn_flush __P((struct uvm_object *, voff_t, voff_t,
+ int));
+static int uvn_get __P((struct uvm_object *, voff_t, vm_page_t *,
+ int *, int, vm_prot_t, int, int));
+static int uvn_put __P((struct uvm_object *, vm_page_t *, int,
+ boolean_t));
+static void uvn_reference __P((struct uvm_object *));
+static boolean_t uvn_releasepg __P((struct vm_page *,
+ struct vm_page **));
/*
* master pager structure
*/
struct uvm_pagerops uvm_vnodeops = {
- uvn_init,
+ NULL,
uvn_reference,
uvn_detach,
- NULL, /* no specialized fault routine required */
+ NULL,
uvn_flush,
uvn_get,
uvn_put,
uvn_cluster,
- uvm_mk_pcluster, /* use generic version of this: see uvm_pager.c */
+ uvm_mk_pcluster,
uvn_releasepg,
};
@@ -123,22 +109,6 @@ struct uvm_pagerops uvm_vnodeops = {
*/
/*
- * uvn_init
- *
- * init pager private data structures.
- */
-
-static void
-uvn_init()
-{
-
- LIST_INIT(&uvn_wlist);
- simple_lock_init(&uvn_wl_lock);
- /* note: uvn_sync_q init'd in uvm_vnp_sync() */
- lockinit(&uvn_sync_lock, PVM, "uvnsync", 0, 0);
-}
-
-/*
* uvn_attach
*
* attach a vnode structure to a VM object. if the vnode is already
@@ -161,23 +131,20 @@ uvn_attach(arg, accessprot)
struct vnode *vp = arg;
struct uvm_vnode *uvn = &vp->v_uvm;
struct vattr vattr;
- int oldflags, result;
+ int result;
struct partinfo pi;
- u_quad_t used_vnode_size;
+ voff_t used_vnode_size;
UVMHIST_FUNC("uvn_attach"); UVMHIST_CALLED(maphist);
UVMHIST_LOG(maphist, "(vn=0x%x)", arg,0,0,0);
-
- used_vnode_size = (u_quad_t)0; /* XXX gcc -Wuninitialized */
+ used_vnode_size = (voff_t)0;
/*
* first get a lock on the uvn.
*/
simple_lock(&uvn->u_obj.vmobjlock);
- while (uvn->u_flags & UVM_VNODE_BLOCKED) {
- printf("uvn_attach: blocked at 0x%p flags 0x%x\n",
- uvn, uvn->u_flags);
- uvn->u_flags |= UVM_VNODE_WANTED;
+ while (uvn->u_flags & VXLOCK) {
+ uvn->u_flags |= VXWANT;
UVMHIST_LOG(maphist, " SLEEPING on blocked vn",0,0,0,0);
UVM_UNLOCK_AND_WAIT(uvn, &uvn->u_obj.vmobjlock, FALSE,
"uvn_attach", 0);
@@ -189,56 +156,26 @@ uvn_attach(arg, accessprot)
* if we're mapping a BLK device, make sure it is a disk.
*/
if (vp->v_type == VBLK && bdevsw[major(vp->v_rdev)].d_type != D_DISK) {
- simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock */
+ simple_unlock(&uvn->u_obj.vmobjlock);
UVMHIST_LOG(maphist,"<- done (VBLK not D_DISK!)", 0,0,0,0);
return(NULL);
}
- /*
- * now we have lock and uvn must not be in a blocked state.
- * first check to see if it is already active, in which case
- * we can bump the reference count, check to see if we need to
- * add it to the writeable list, and then return.
- */
- if (uvn->u_flags & UVM_VNODE_VALID) { /* already active? */
-
- /* regain VREF if we were persisting */
- if (uvn->u_obj.uo_refs == 0) {
- VREF(vp);
- UVMHIST_LOG(maphist," VREF (reclaim persisting vnode)",
- 0,0,0,0);
- }
- uvn->u_obj.uo_refs++; /* bump uvn ref! */
-
- /* check for new writeable uvn */
- if ((accessprot & VM_PROT_WRITE) != 0 &&
- (uvn->u_flags & UVM_VNODE_WRITEABLE) == 0) {
- simple_lock(&uvn_wl_lock);
- LIST_INSERT_HEAD(&uvn_wlist, uvn, u_wlist);
- simple_unlock(&uvn_wl_lock);
- /* we are now on wlist! */
- uvn->u_flags |= UVM_VNODE_WRITEABLE;
- }
-
- /* unlock and return */
- simple_unlock(&uvn->u_obj.vmobjlock);
- UVMHIST_LOG(maphist,"<- done, refcnt=%d", uvn->u_obj.uo_refs,
- 0, 0, 0);
- return (&uvn->u_obj);
- }
+#ifdef DIAGNOSTIC
+ if (vp->v_type != VREG) {
+ panic("uvn_attach: vp %p not VREG", vp);
+ }
+#endif
/*
- * need to call VOP_GETATTR() to get the attributes, but that could
- * block (due to I/O), so we want to unlock the object before calling.
- * however, we want to keep anyone else from playing with the object
- * while it is unlocked. to do this we set UVM_VNODE_ALOCK which
- * prevents anyone from attaching to the vnode until we are done with
- * it.
+ * set up our idea of the size
+ * if this hasn't been done already.
*/
- uvn->u_flags = UVM_VNODE_ALOCK;
+ if (uvn->u_size == VSIZENOTSET) {
+
+ uvn->u_flags |= VXLOCK;
simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock in case we sleep */
/* XXX: curproc? */
-
if (vp->v_type == VBLK) {
/*
* We could implement this as a specfs getattr call, but:
@@ -252,8 +189,8 @@ uvn_attach(arg, accessprot)
DIOCGPART, (caddr_t)&pi, FREAD, curproc);
if (result == 0) {
/* XXX should remember blocksize */
- used_vnode_size = (u_quad_t)pi.disklab->d_secsize *
- (u_quad_t)pi.part->p_size;
+ used_vnode_size = (voff_t)pi.disklab->d_secsize *
+ (voff_t)pi.part->p_size;
}
} else {
result = VOP_GETATTR(vp, &vattr, curproc->p_ucred, curproc);
@@ -262,58 +199,26 @@ uvn_attach(arg, accessprot)
}
/* relock object */
- simple_lock(&uvn->u_obj.vmobjlock);
+ simple_lock(&uvn->u_obj.vmobjlock);
+
+ if (uvn->u_flags & VXWANT)
+ wakeup(uvn);
+ uvn->u_flags &= ~(VXLOCK|VXWANT);
if (result != 0) {
- if (uvn->u_flags & UVM_VNODE_WANTED)
- wakeup(uvn);
- uvn->u_flags = 0;
simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock */
UVMHIST_LOG(maphist,"<- done (VOP_GETATTR FAILED!)", 0,0,0,0);
return(NULL);
}
-
- /*
- * make sure that the newsize fits within a vaddr_t
- * XXX: need to revise addressing data types
- */
-#ifdef DEBUG
- if (vp->v_type == VBLK)
- printf("used_vnode_size = %llu\n", (long long)used_vnode_size);
-#endif
-
- /*
- * now set up the uvn.
- */
- uvn->u_obj.pgops = &uvm_vnodeops;
- TAILQ_INIT(&uvn->u_obj.memq);
- uvn->u_obj.uo_npages = 0;
- uvn->u_obj.uo_refs = 1; /* just us... */
- oldflags = uvn->u_flags;
- uvn->u_flags = UVM_VNODE_VALID|UVM_VNODE_CANPERSIST;
- uvn->u_nio = 0;
uvn->u_size = used_vnode_size;
- /* if write access, we need to add it to the wlist */
- if (accessprot & VM_PROT_WRITE) {
- simple_lock(&uvn_wl_lock);
- LIST_INSERT_HEAD(&uvn_wlist, uvn, u_wlist);
- simple_unlock(&uvn_wl_lock);
- uvn->u_flags |= UVM_VNODE_WRITEABLE; /* we are on wlist! */
}
- /*
- * add a reference to the vnode. this reference will stay as long
- * as there is a valid mapping of the vnode. dropped when the
- * reference count goes to zero [and we either free or persist].
- */
- VREF(vp);
+ /* unlock and return */
simple_unlock(&uvn->u_obj.vmobjlock);
- if (oldflags & UVM_VNODE_WANTED)
- wakeup(uvn);
-
- UVMHIST_LOG(maphist,"<- done/VREF, ret 0x%x", &uvn->u_obj,0,0,0);
- return(&uvn->u_obj);
+ UVMHIST_LOG(maphist,"<- done, refcnt=%d", uvn->u_obj.uo_refs,
+ 0, 0, 0);
+ return (&uvn->u_obj);
}
@@ -333,23 +238,7 @@ static void
uvn_reference(uobj)
struct uvm_object *uobj;
{
-#ifdef DEBUG
- struct uvm_vnode *uvn = (struct uvm_vnode *) uobj;
-#endif
- UVMHIST_FUNC("uvn_reference"); UVMHIST_CALLED(maphist);
-
- simple_lock(&uobj->vmobjlock);
-#ifdef DEBUG
- if ((uvn->u_flags & UVM_VNODE_VALID) == 0) {
- printf("uvn_reference: ref=%d, flags=0x%x\n", uvn->u_flags,
- uobj->uo_refs);
- panic("uvn_reference: invalid state");
- }
-#endif
- uobj->uo_refs++;
- UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)",
- uobj, uobj->uo_refs,0,0);
- simple_unlock(&uobj->vmobjlock);
+ VREF((struct vnode *)uobj);
}
/*
@@ -365,291 +254,7 @@ static void
uvn_detach(uobj)
struct uvm_object *uobj;
{
- struct uvm_vnode *uvn;
- struct vnode *vp;
- int oldflags;
- UVMHIST_FUNC("uvn_detach"); UVMHIST_CALLED(maphist);
-
- simple_lock(&uobj->vmobjlock);
-
- UVMHIST_LOG(maphist," (uobj=0x%x) ref=%d", uobj,uobj->uo_refs,0,0);
- uobj->uo_refs--; /* drop ref! */
- if (uobj->uo_refs) { /* still more refs */
- simple_unlock(&uobj->vmobjlock);
- UVMHIST_LOG(maphist, "<- done (rc>0)", 0,0,0,0);
- return;
- }
-
- /*
- * get other pointers ...
- */
-
- uvn = (struct uvm_vnode *) uobj;
- vp = (struct vnode *) uobj;
-
- /*
- * clear VTEXT flag now that there are no mappings left (VTEXT is used
- * to keep an active text file from being overwritten).
- */
- vp->v_flag &= ~VTEXT;
-
- /*
- * we just dropped the last reference to the uvn. see if we can
- * let it "stick around".
- */
-
- if (uvn->u_flags & UVM_VNODE_CANPERSIST) {
- /* won't block */
- uvn_flush(uobj, 0, 0, PGO_DEACTIVATE|PGO_ALLPAGES);
- simple_unlock(&uobj->vmobjlock);
- vrele(vp); /* drop vnode reference */
- UVMHIST_LOG(maphist,"<- done/vrele! (persist)", 0,0,0,0);
- return;
- }
-
- /*
- * its a goner!
- */
-
- UVMHIST_LOG(maphist," its a goner (flushing)!", 0,0,0,0);
-
- uvn->u_flags |= UVM_VNODE_DYING;
-
- /*
- * even though we may unlock in flush, no one can gain a reference
- * to us until we clear the "dying" flag [because it blocks
- * attaches]. we will not do that until after we've disposed of all
- * the pages with uvn_flush(). note that before the flush the only
- * pages that could be marked PG_BUSY are ones that are in async
- * pageout by the daemon. (there can't be any pending "get"'s
- * because there are no references to the object).
- */
-
- (void) uvn_flush(uobj, 0, 0, PGO_CLEANIT|PGO_FREE|PGO_ALLPAGES);
-
- UVMHIST_LOG(maphist," its a goner (done flush)!", 0,0,0,0);
-
- /*
- * given the structure of this pager, the above flush request will
- * create the following state: all the pages that were in the object
- * have either been free'd or they are marked PG_BUSY|PG_RELEASED.
- * the PG_BUSY bit was set either by us or the daemon for async I/O.
- * in either case, if we have pages left we can't kill the object
- * yet because i/o is pending. in this case we set the "relkill"
- * flag which will cause pgo_releasepg to kill the object once all
- * the I/O's are done [pgo_releasepg will be called from the aiodone
- * routine or from the page daemon].
- */
-
- if (uobj->uo_npages) { /* I/O pending. iodone will free */
-#ifdef DEBUG
- /*
- * XXXCDC: very unlikely to happen until we have async i/o
- * so print a little info message in case it does.
- */
- printf("uvn_detach: vn %p has pages left after flush - "
- "relkill mode\n", uobj);
-#endif
- uvn->u_flags |= UVM_VNODE_RELKILL;
- simple_unlock(&uobj->vmobjlock);
- UVMHIST_LOG(maphist,"<- done! (releasepg will kill obj)", 0, 0,
- 0, 0);
- return;
- }
-
- /*
- * kill object now. note that we can't be on the sync q because
- * all references are gone.
- */
- if (uvn->u_flags & UVM_VNODE_WRITEABLE) {
- simple_lock(&uvn_wl_lock); /* protect uvn_wlist */
- LIST_REMOVE(uvn, u_wlist);
- simple_unlock(&uvn_wl_lock);
- }
-#ifdef DIAGNOSTIC
- if (uobj->memq.tqh_first != NULL)
- panic("uvn_deref: vnode VM object still has pages afer "
- "syncio/free flush");
-#endif
- oldflags = uvn->u_flags;
- uvn->u_flags = 0;
- simple_unlock(&uobj->vmobjlock);
-
- /* wake up any sleepers */
- if (oldflags & UVM_VNODE_WANTED)
- wakeup(uvn);
-
- /*
- * drop our reference to the vnode.
- */
- vrele(vp);
- UVMHIST_LOG(maphist,"<- done (vrele) final", 0,0,0,0);
-
- return;
-}
-
-/*
- * uvm_vnp_terminate: external hook to clear out a vnode's VM
- *
- * called in two cases:
- * [1] when a persisting vnode vm object (i.e. one with a zero reference
- * count) needs to be freed so that a vnode can be reused. this
- * happens under "getnewvnode" in vfs_subr.c. if the vnode from
- * the free list is still attached (i.e. not VBAD) then vgone is
- * called. as part of the vgone trace this should get called to
- * free the vm object. this is the common case.
- * [2] when a filesystem is being unmounted by force (MNT_FORCE,
- * "umount -f") the vgone() function is called on active vnodes
- * on the mounted file systems to kill their data (the vnodes become
- * "dead" ones [see src/sys/miscfs/deadfs/...]). that results in a
- * call here (even if the uvn is still in use -- i.e. has a non-zero
- * reference count). this case happens at "umount -f" and during a
- * "reboot/halt" operation.
- *
- * => the caller must XLOCK and VOP_LOCK the vnode before calling us
- * [protects us from getting a vnode that is already in the DYING
- * state...]
- * => unlike uvn_detach, this function must not return until all the
- * uvn's pages are disposed of.
- * => in case [2] the uvn is still alive after this call, but all I/O
- * ops will fail (due to the backing vnode now being "dead"). this
- * will prob. kill any process using the uvn due to pgo_get failing.
- */
-
-void
-uvm_vnp_terminate(vp)
- struct vnode *vp;
-{
- struct uvm_vnode *uvn = &vp->v_uvm;
- int oldflags;
- UVMHIST_FUNC("uvm_vnp_terminate"); UVMHIST_CALLED(maphist);
-
- /*
- * lock object and check if it is valid
- */
- simple_lock(&uvn->u_obj.vmobjlock);
- UVMHIST_LOG(maphist, " vp=0x%x, ref=%d, flag=0x%x", vp,
- uvn->u_obj.uo_refs, uvn->u_flags, 0);
- if ((uvn->u_flags & UVM_VNODE_VALID) == 0) {
- simple_unlock(&uvn->u_obj.vmobjlock);
- UVMHIST_LOG(maphist, "<- done (not active)", 0, 0, 0, 0);
- return;
- }
-
- /*
- * must be a valid uvn that is not already dying (because XLOCK
- * protects us from that). the uvn can't in the ALOCK state
- * because it is valid, and uvn's that are in the ALOCK state haven't
- * been marked valid yet.
- */
-
-#ifdef DEBUG
- /*
- * debug check: are we yanking the vnode out from under our uvn?
- */
- if (uvn->u_obj.uo_refs) {
- printf("uvm_vnp_terminate(%p): terminating active vnode "
- "(refs=%d)\n", uvn, uvn->u_obj.uo_refs);
- }
-#endif
-
- /*
- * it is possible that the uvn was detached and is in the relkill
- * state [i.e. waiting for async i/o to finish so that releasepg can
- * kill object]. we take over the vnode now and cancel the relkill.
- * we want to know when the i/o is done so we can recycle right
- * away. note that a uvn can only be in the RELKILL state if it
- * has a zero reference count.
- */
-
- if (uvn->u_flags & UVM_VNODE_RELKILL)
- uvn->u_flags &= ~UVM_VNODE_RELKILL; /* cancel RELKILL */
-
- /*
- * block the uvn by setting the dying flag, and then flush the
- * pages. (note that flush may unlock object while doing I/O, but
- * it will re-lock it before it returns control here).
- *
- * also, note that we tell I/O that we are already VOP_LOCK'd so
- * that uvn_io doesn't attempt to VOP_LOCK again.
- *
- * XXXCDC: setting VNISLOCKED on an active uvn which is being terminated
- * due to a forceful unmount might not be a good idea. maybe we
- * need a way to pass in this info to uvn_flush through a
- * pager-defined PGO_ constant [currently there are none].
- */
- uvn->u_flags |= UVM_VNODE_DYING|UVM_VNODE_VNISLOCKED;
-
- (void) uvn_flush(&uvn->u_obj, 0, 0, PGO_CLEANIT|PGO_FREE|PGO_ALLPAGES);
-
- /*
- * as we just did a flush we expect all the pages to be gone or in
- * the process of going. sleep to wait for the rest to go [via iosync].
- */
-
- while (uvn->u_obj.uo_npages) {
-#ifdef DEBUG
- struct vm_page *pp;
- for (pp = uvn->u_obj.memq.tqh_first ; pp != NULL ;
- pp = pp->listq.tqe_next) {
- if ((pp->flags & PG_BUSY) == 0)
- panic("uvm_vnp_terminate: detected unbusy pg");
- }
- if (uvn->u_nio == 0)
- panic("uvm_vnp_terminate: no I/O to wait for?");
- printf("uvm_vnp_terminate: waiting for I/O to fin.\n");
- /*
- * XXXCDC: this is unlikely to happen without async i/o so we
- * put a printf in just to keep an eye on it.
- */
-#endif
- uvn->u_flags |= UVM_VNODE_IOSYNC;
- UVM_UNLOCK_AND_WAIT(&uvn->u_nio, &uvn->u_obj.vmobjlock, FALSE,
- "uvn_term",0);
- simple_lock(&uvn->u_obj.vmobjlock);
- }
-
- /*
- * done. now we free the uvn if its reference count is zero
- * (true if we are zapping a persisting uvn). however, if we are
- * terminating a uvn with active mappings we let it live ... future
- * calls down to the vnode layer will fail.
- */
-
- oldflags = uvn->u_flags;
- if (uvn->u_obj.uo_refs) {
-
- /*
- * uvn must live on it is dead-vnode state until all references
- * are gone. restore flags. clear CANPERSIST state.
- */
-
- uvn->u_flags &= ~(UVM_VNODE_DYING|UVM_VNODE_VNISLOCKED|
- UVM_VNODE_WANTED|UVM_VNODE_CANPERSIST);
-
- } else {
-
- /*
- * free the uvn now. note that the VREF reference is already
- * gone [it is dropped when we enter the persist state].
- */
- if (uvn->u_flags & UVM_VNODE_IOSYNCWANTED)
- panic("uvm_vnp_terminate: io sync wanted bit set");
-
- if (uvn->u_flags & UVM_VNODE_WRITEABLE) {
- simple_lock(&uvn_wl_lock);
- LIST_REMOVE(uvn, u_wlist);
- simple_unlock(&uvn_wl_lock);
- }
- uvn->u_flags = 0; /* uvn is history, clear all bits */
- }
-
- if (oldflags & UVM_VNODE_WANTED)
- wakeup(uvn); /* object lock still held */
-
- simple_unlock(&uvn->u_obj.vmobjlock);
- UVMHIST_LOG(maphist, "<- done", 0, 0, 0, 0);
-
+ vrele((struct vnode *)uobj);
}
/*
@@ -662,7 +267,7 @@ uvm_vnp_terminate(vp)
* => returns TRUE if page's object is still alive, FALSE if we
* killed the page's object. if we return TRUE, then we
* return with the object locked.
- * => if (nextpgp != NULL) => we return pageq.tqe_next here, and return
+ * => if (nextpgp != NULL) => we return the next page on the queue, and return
* with the page queues locked [for pagedaemon]
* => if (nextpgp == NULL) => we return with page queues unlocked [normal case]
* => we kill the uvn if it is not referenced and we are suppose to
@@ -674,11 +279,7 @@ uvn_releasepg(pg, nextpgp)
struct vm_page *pg;
struct vm_page **nextpgp; /* OUT */
{
- struct uvm_vnode *uvn = (struct uvm_vnode *) pg->uobject;
-#ifdef DIAGNOSTIC
- if ((pg->flags & PG_RELEASED) == 0)
- panic("uvn_releasepg: page not released!");
-#endif
+ KASSERT(pg->flags & PG_RELEASED);
/*
* dispose of the page [caller handles PG_WANTED]
@@ -686,64 +287,25 @@ uvn_releasepg(pg, nextpgp)
pmap_page_protect(pg, VM_PROT_NONE);
uvm_lock_pageq();
if (nextpgp)
- *nextpgp = pg->pageq.tqe_next; /* next page for daemon */
+ *nextpgp = TAILQ_NEXT(pg, pageq);
uvm_pagefree(pg);
if (!nextpgp)
uvm_unlock_pageq();
- /*
- * now see if we need to kill the object
- */
- if (uvn->u_flags & UVM_VNODE_RELKILL) {
- if (uvn->u_obj.uo_refs)
- panic("uvn_releasepg: kill flag set on referenced "
- "object!");
- if (uvn->u_obj.uo_npages == 0) {
- if (uvn->u_flags & UVM_VNODE_WRITEABLE) {
- simple_lock(&uvn_wl_lock);
- LIST_REMOVE(uvn, u_wlist);
- simple_unlock(&uvn_wl_lock);
- }
-#ifdef DIAGNOSTIC
- if (uvn->u_obj.memq.tqh_first)
- panic("uvn_releasepg: pages in object with npages == 0");
-#endif
- if (uvn->u_flags & UVM_VNODE_WANTED)
- /* still holding object lock */
- wakeup(uvn);
-
- uvn->u_flags = 0; /* DEAD! */
- simple_unlock(&uvn->u_obj.vmobjlock);
- return (FALSE);
- }
- }
return (TRUE);
}
/*
- * NOTE: currently we have to use VOP_READ/VOP_WRITE because they go
- * through the buffer cache and allow I/O in any size. These VOPs use
- * synchronous i/o. [vs. VOP_STRATEGY which can be async, but doesn't
- * go through the buffer cache or allow I/O sizes larger than a
- * block]. we will eventually want to change this.
- *
* issues to consider:
- * uvm provides the uvm_aiodesc structure for async i/o management.
* there are two tailq's in the uvm. structure... one for pending async
* i/o and one for "done" async i/o. to do an async i/o one puts
- * an aiodesc on the "pending" list (protected by splbio()), starts the
+ * a buf on the "pending" list (protected by splbio()), starts the
* i/o and returns VM_PAGER_PEND. when the i/o is done, we expect
* some sort of "i/o done" function to be called (at splbio(), interrupt
- * time). this function should remove the aiodesc from the pending list
+ * time). this function should remove the buf from the pending list
* and place it on the "done" list and wakeup the daemon. the daemon
* will run at normal spl() and will remove all items from the "done"
- * list and call the "aiodone" hook for each done request (see uvm_pager.c).
- * [in the old vm code, this was done by calling the "put" routine with
- * null arguments which made the code harder to read and understand because
- * you had one function ("put") doing two things.]
- *
- * so the current pager needs:
- * int uvn_aiodone(struct uvm_aiodesc *)
+ * list and call the iodone hook for each done request (see uvm_pager.c).
*
* => return KERN_SUCCESS (aio finished, free it). otherwise requeue for
* later collection.
@@ -764,15 +326,17 @@ uvn_releasepg(pg, nextpgp)
/*
* uvn_flush: flush pages out of a uvm object.
*
+ * => "stop == 0" means flush all pages at or after "start".
* => object should be locked by caller. we may _unlock_ the object
- * if (and only if) we need to clean a page (PGO_CLEANIT).
+ * if (and only if) we need to clean a page (PGO_CLEANIT), or
+ * if PGO_SYNCIO is set and there are pages busy.
* we return with the object locked.
- * => if PGO_CLEANIT is set, we may block (due to I/O). thus, a caller
- * might want to unlock higher level resources (e.g. vm_map)
- * before calling flush.
- * => if PGO_CLEANIT is not set, then we will neither unlock the object
- * or block.
- * => if PGO_ALLPAGE is set, then all pages in the object are valid targets
+ * => if PGO_CLEANIT or PGO_SYNCIO is set, we may block (due to I/O).
+ * thus, a caller might want to unlock higher level resources
+ * (e.g. vm_map) before calling flush.
+ * => if neither PGO_CLEANIT nor PGO_SYNCIO is set, then we will neither
+ * unlock the object nor block.
+ * => if PGO_ALLPAGES is set, then all pages in the object are valid targets
* for flushing.
* => NOTE: we rely on the fact that the object's memq is a TAILQ and
* that new pages are inserted on the tail end of the list. thus,
@@ -814,39 +378,62 @@ uvn_releasepg(pg, nextpgp)
#define UVN_HASH_PENALTY 4 /* XXX: a guess */
-static boolean_t
+boolean_t
uvn_flush(uobj, start, stop, flags)
struct uvm_object *uobj;
voff_t start, stop;
int flags;
{
- struct uvm_vnode *uvn = (struct uvm_vnode *) uobj;
+ struct uvm_vnode *uvn = (struct uvm_vnode *)uobj;
+ struct vnode *vp = (struct vnode *)uobj;
struct vm_page *pp, *ppnext, *ptmp;
- struct vm_page *pps[MAXBSIZE >> PAGE_SHIFT], **ppsp;
+ struct vm_page *pps[256], **ppsp;
+ int s;
int npages, result, lcv;
- boolean_t retval, need_iosync, by_list, needs_clean, all;
+ boolean_t retval, need_iosync, by_list, needs_clean, all, wasclean;
voff_t curoff;
u_short pp_version;
UVMHIST_FUNC("uvn_flush"); UVMHIST_CALLED(maphist);
+ UVMHIST_LOG(maphist, "uobj %p start 0x%x stop 0x%x flags 0x%x",
+ uobj, start, stop, flags);
+ KASSERT(flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE));
+
+ if (uobj->uo_npages == 0) {
+ s = splbio();
+ if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
+ (vp->v_bioflag & VBIOONSYNCLIST)) {
+ vp->v_bioflag &= ~VBIOONSYNCLIST;
+ LIST_REMOVE(vp, v_synclist);
+ }
+ splx(s);
+ return TRUE;
+ }
+
+#ifdef DIAGNOSTIC
+ if (uvn->u_size == VSIZENOTSET) {
+ printf("uvn_flush: size not set vp %p\n", uvn);
+ vprint("uvn_flush VSIZENOTSET", vp);
+ flags |= PGO_ALLPAGES;
+ }
+#endif
- curoff = 0; /* XXX: shut up gcc */
/*
* get init vals and determine how we are going to traverse object
*/
+ if (stop == 0) {
+ stop = trunc_page(LLONG_MAX);
+ }
+ curoff = 0;
need_iosync = FALSE;
- retval = TRUE; /* return value */
+ retval = TRUE;
+ wasclean = TRUE;
if (flags & PGO_ALLPAGES) {
all = TRUE;
- by_list = TRUE; /* always go by the list */
+ by_list = TRUE;
} else {
start = trunc_page(start);
stop = round_page(stop);
-#ifdef DEBUG
- if (stop > round_page(uvn->u_size))
- printf("uvn_flush: strange, got an out of range "
- "flush (fixed)\n");
-#endif
all = FALSE;
by_list = (uobj->uo_npages <=
((stop - start) >> PAGE_SHIFT) * UVN_HASH_PENALTY);
@@ -870,8 +457,7 @@ uvn_flush(uobj, start, stop, flags)
if ((flags & PGO_CLEANIT) != 0 &&
uobj->pgops->pgo_mk_pcluster != NULL) {
if (by_list) {
- for (pp = uobj->memq.tqh_first ; pp != NULL ;
- pp = pp->listq.tqe_next) {
+ TAILQ_FOREACH(pp, &uobj->memq, listq) {
if (!all &&
(pp->offset < start || pp->offset >= stop))
continue;
@@ -895,45 +481,32 @@ uvn_flush(uobj, start, stop, flags)
*/
if (by_list) {
- pp = uobj->memq.tqh_first;
+ pp = TAILQ_FIRST(&uobj->memq);
} else {
curoff = start;
pp = uvm_pagelookup(uobj, curoff);
}
- ppnext = NULL; /* XXX: shut up gcc */
- ppsp = NULL; /* XXX: shut up gcc */
- uvm_lock_pageq(); /* page queues locked */
+ ppnext = NULL;
+ ppsp = NULL;
+ uvm_lock_pageq();
/* locked: both page queues and uobj */
for ( ; (by_list && pp != NULL) ||
- (!by_list && curoff < stop) ; pp = ppnext) {
-
+ (!by_list && curoff < stop) ; pp = ppnext) {
if (by_list) {
-
- /*
- * range check
- */
-
if (!all &&
(pp->offset < start || pp->offset >= stop)) {
- ppnext = pp->listq.tqe_next;
+ ppnext = TAILQ_NEXT(pp, listq);
continue;
}
-
} else {
-
- /*
- * null check
- */
-
curoff += PAGE_SIZE;
if (pp == NULL) {
if (curoff < stop)
ppnext = uvm_pagelookup(uobj, curoff);
continue;
}
-
}
/*
@@ -949,24 +522,23 @@ uvn_flush(uobj, start, stop, flags)
if ((flags & PGO_CLEANIT) == 0 || (pp->flags & PG_BUSY) != 0) {
needs_clean = FALSE;
- if ((pp->flags & PG_BUSY) != 0 &&
- (flags & (PGO_CLEANIT|PGO_SYNCIO)) ==
- (PGO_CLEANIT|PGO_SYNCIO))
+ if (flags & PGO_SYNCIO)
need_iosync = TRUE;
} else {
+
/*
* freeing: nuke all mappings so we can sync
* PG_CLEAN bit with no race
*/
if ((pp->flags & PG_CLEAN) != 0 &&
(flags & PGO_FREE) != 0 &&
- (pp->pqflags & PQ_ACTIVE) != 0)
+ /* XXX ACTIVE|INACTIVE test unnecessary? */
+ (pp->pqflags & (PQ_ACTIVE|PQ_INACTIVE)) != 0)
pmap_page_protect(pp, VM_PROT_NONE);
if ((pp->flags & PG_CLEAN) != 0 &&
pmap_is_modified(pp))
pp->flags &= ~(PG_CLEAN);
- pp->flags |= PG_CLEANCHK; /* update "hint" */
-
+ pp->flags |= PG_CLEANCHK;
needs_clean = ((pp->flags & PG_CLEAN) == 0);
}
@@ -974,29 +546,26 @@ uvn_flush(uobj, start, stop, flags)
* if we don't need a clean... load ppnext and dispose of pp
*/
if (!needs_clean) {
- /* load ppnext */
if (by_list)
- ppnext = pp->listq.tqe_next;
+ ppnext = TAILQ_NEXT(pp, listq);
else {
if (curoff < stop)
ppnext = uvm_pagelookup(uobj, curoff);
}
- /* now dispose of pp */
if (flags & PGO_DEACTIVATE) {
if ((pp->pqflags & PQ_INACTIVE) == 0 &&
+ (pp->flags & PG_BUSY) == 0 &&
pp->wire_count == 0) {
- pmap_page_protect(pp, VM_PROT_NONE);
+ pmap_clear_reference(pp);
uvm_pagedeactivate(pp);
}
} else if (flags & PGO_FREE) {
if (pp->flags & PG_BUSY) {
- /* release busy pages */
pp->flags |= PG_RELEASED;
} else {
pmap_page_protect(pp, VM_PROT_NONE);
- /* removed page from object */
uvm_pagefree(pp);
}
}
@@ -1013,6 +582,7 @@ uvn_flush(uobj, start, stop, flags)
* note: locked: uobj and page queues.
*/
+ wasclean = FALSE;
pp->flags |= PG_BUSY; /* we 'own' page now */
UVM_PAGE_OWN(pp, "uvn_flush");
pmap_page_protect(pp, VM_PROT_READ);
@@ -1023,7 +593,7 @@ ReTry:
/* locked: page queues, uobj */
result = uvm_pager_put(uobj, pp, &ppsp, &npages,
- flags | PGO_DOACTCLUST, start, stop);
+ flags | PGO_DOACTCLUST, start, stop);
/* unlocked: page queues, uobj */
/*
@@ -1046,7 +616,8 @@ ReTry:
*/
if (result == VM_PAGER_AGAIN) {
- /*
+
+ /*
* it is unlikely, but page could have been released
* while we had the object lock dropped. we ignore
* this now and retry the I/O. we will detect and
@@ -1073,27 +644,22 @@ ReTry:
* we can move on to the next page.
*/
- if (result == VM_PAGER_PEND) {
+ if (result == VM_PAGER_PEND &&
+ (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
- if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
- /*
- * no per-page ops: refresh ppnext and continue
- */
- if (by_list) {
- if (pp->version == pp_version)
- ppnext = pp->listq.tqe_next;
- else
- /* reset */
- ppnext = uobj->memq.tqh_first;
- } else {
- if (curoff < stop)
- ppnext = uvm_pagelookup(uobj,
- curoff);
- }
- continue;
+ /*
+ * no per-page ops: refresh ppnext and continue
+ */
+ if (by_list) {
+ if (pp->version == pp_version)
+ ppnext = TAILQ_NEXT(pp, listq);
+ else
+ ppnext = TAILQ_FIRST(&uobj->memq);
+ } else {
+ if (curoff < stop)
+ ppnext = uvm_pagelookup(uobj, curoff);
}
-
- /* need to do anything here? */
+ continue;
}
/*
@@ -1120,18 +686,19 @@ ReTry:
/* set up next page for outer loop */
if (by_list) {
if (pp->version == pp_version)
- ppnext = pp->listq.tqe_next;
+ ppnext = TAILQ_NEXT(pp, listq);
else
- /* reset */
- ppnext = uobj->memq.tqh_first;
+ ppnext = TAILQ_FIRST(
+ &uobj->memq);
} else {
if (curoff < stop)
- ppnext = uvm_pagelookup(uobj, curoff);
+ ppnext = uvm_pagelookup(uobj,
+ curoff);
}
}
/*
- * verify the page didn't get moved while obj was
+ * verify the page wasn't moved while obj was
* unlocked
*/
if (result == VM_PAGER_PEND && ptmp->uobject != uobj)
@@ -1145,26 +712,32 @@ ReTry:
*/
if (result != VM_PAGER_PEND) {
- if (ptmp->flags & PG_WANTED)
+ if (ptmp->flags & PG_WANTED) {
/* still holding object lock */
wakeup(ptmp);
-
+ }
ptmp->flags &= ~(PG_WANTED|PG_BUSY);
UVM_PAGE_OWN(ptmp, NULL);
if (ptmp->flags & PG_RELEASED) {
-
- /* pgo_releasepg wants this */
uvm_unlock_pageq();
- if (!uvn_releasepg(ptmp, NULL))
+ if (!uvn_releasepg(ptmp, NULL)) {
+ UVMHIST_LOG(maphist,
+ "released %p",
+ ptmp, 0,0,0);
return (TRUE);
-
- uvm_lock_pageq(); /* relock */
- continue; /* next page */
-
+ }
+ uvm_lock_pageq();
+ continue;
} else {
- ptmp->flags |= (PG_CLEAN|PG_CLEANCHK);
- if ((flags & PGO_FREE) == 0)
- pmap_clear_modify(ptmp);
+ if ((flags & PGO_WEAK) == 0 &&
+ !(result == VM_PAGER_ERROR &&
+ curproc == uvm.pagedaemon_proc)) {
+ ptmp->flags |=
+ (PG_CLEAN|PG_CLEANCHK);
+ if ((flags & PGO_FREE) == 0) {
+ pmap_clear_modify(ptmp);
+ }
+ }
}
}
@@ -1174,11 +747,11 @@ ReTry:
if (flags & PGO_DEACTIVATE) {
if ((pp->pqflags & PQ_INACTIVE) == 0 &&
+ (pp->flags & PG_BUSY) == 0 &&
pp->wire_count == 0) {
- pmap_page_protect(ptmp, VM_PROT_NONE);
+ pmap_clear_reference(ptmp);
uvm_pagedeactivate(ptmp);
}
-
} else if (flags & PGO_FREE) {
if (result == VM_PAGER_PEND) {
if ((ptmp->flags & PG_BUSY) != 0)
@@ -1187,10 +760,10 @@ ReTry:
} else {
if (result != VM_PAGER_OK) {
printf("uvn_flush: obj=%p, "
- "offset=0x%llx. error "
- "during pageout.\n",
+ "offset=0x%llx. error %d\n",
pp->uobject,
- (long long)pp->offset);
+ (long long)pp->offset,
+ result);
printf("uvn_flush: WARNING: "
"changes to page may be "
"lost!\n");
@@ -1200,31 +773,38 @@ ReTry:
uvm_pagefree(ptmp);
}
}
-
} /* end of "lcv" for loop */
-
} /* end of "pp" for loop */
- /*
- * done with pagequeues: unlock
- */
uvm_unlock_pageq();
-
- /*
- * now wait for all I/O if required.
- */
+ s = splbio();
+ if ((flags & PGO_CLEANIT) && all && wasclean &&
+ LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
+ (vp->v_bioflag & VBIOONSYNCLIST)) {
+ vp->v_bioflag &= ~VBIOONSYNCLIST;
+ LIST_REMOVE(vp, v_synclist);
+ }
+ splx(s);
if (need_iosync) {
-
UVMHIST_LOG(maphist," <<DOING IOSYNC>>",0,0,0,0);
- while (uvn->u_nio != 0) {
- uvn->u_flags |= UVM_VNODE_IOSYNC;
- UVM_UNLOCK_AND_WAIT(&uvn->u_nio, &uvn->u_obj.vmobjlock,
- FALSE, "uvn_flush",0);
+
+ /*
+ * XXX this doesn't use the new two-flag scheme,
+ * but to use that, all i/o initiators will have to change.
+ */
+
+ s = splbio();
+ while (vp->v_numoutput != 0) {
+ UVMHIST_LOG(ubchist, "waiting for vp %p num %d",
+ vp, vp->v_numoutput,0,0);
+
+ vp->v_bioflag |= VBIOWAIT;
+ UVM_UNLOCK_AND_WAIT(&vp->v_numoutput,
+ &uvn->u_obj.vmobjlock,
+ FALSE, "uvn_flush",0);
simple_lock(&uvn->u_obj.vmobjlock);
}
- if (uvn->u_flags & UVM_VNODE_IOSYNCWANTED)
- wakeup(&uvn->u_flags);
- uvn->u_flags &= ~(UVM_VNODE_IOSYNC|UVM_VNODE_IOSYNCWANTED);
+ splx(s);
}
/* return, with object locked! */
@@ -1248,31 +828,18 @@ uvn_cluster(uobj, offset, loffset, hoffset)
voff_t offset;
voff_t *loffset, *hoffset; /* OUT */
{
- struct uvm_vnode *uvn = (struct uvm_vnode *) uobj;
- *loffset = offset;
-
- if (*loffset >= uvn->u_size)
- panic("uvn_cluster: offset out of range");
+ struct uvm_vnode *uvn = (struct uvm_vnode *)uobj;
- /*
- * XXX: old pager claims we could use VOP_BMAP to get maxcontig value.
- */
- *hoffset = *loffset + MAXBSIZE;
- if (*hoffset > round_page(uvn->u_size)) /* past end? */
- *hoffset = round_page(uvn->u_size);
-
- return;
+ *loffset = offset;
+ *hoffset = MIN(offset + MAXBSIZE, round_page(uvn->u_size));
}
/*
* uvn_put: flush page data to backing store.
*
- * => prefer map unlocked (not required)
* => object must be locked! we will _unlock_ it before starting I/O.
* => flags: PGO_SYNCIO -- use sync. I/O
* => note: caller must set PG_CLEAN and pmap_clear_modify (if needed)
- * => XXX: currently we use VOP_READ/VOP_WRITE which are only sync.
- * [thus we never do async i/o! see iodone comment]
*/
static int
@@ -1281,13 +848,11 @@ uvn_put(uobj, pps, npages, flags)
struct vm_page **pps;
int npages, flags;
{
- int retval;
-
- /* note: object locked */
- retval = uvn_io((struct uvm_vnode*)uobj, pps, npages, flags, UIO_WRITE);
- /* note: object unlocked */
+ struct vnode *vp = (struct vnode *)uobj;
+ int error;
- return(retval);
+ error = VOP_PUTPAGES(vp, pps, npages, flags, NULL);
+ return uvm_errno2vmerror(error);
}
@@ -1308,551 +873,121 @@ uvn_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
voff_t offset;
struct vm_page **pps; /* IN/OUT */
int *npagesp; /* IN (OUT if PGO_LOCKED) */
- int centeridx, advice, flags;
+ int centeridx;
vm_prot_t access_type;
+ int advice, flags;
{
- voff_t current_offset;
- struct vm_page *ptmp;
- int lcv, result, gotpages;
- boolean_t done;
- UVMHIST_FUNC("uvn_get"); UVMHIST_CALLED(maphist);
- UVMHIST_LOG(maphist, "flags=%d", flags,0,0,0);
-
- /*
- * step 1: handled the case where fault data structures are locked.
- */
-
- if (flags & PGO_LOCKED) {
-
- /*
- * gotpages is the current number of pages we've gotten (which
- * we pass back up to caller via *npagesp.
- */
-
- gotpages = 0;
-
- /*
- * step 1a: get pages that are already resident. only do this
- * if the data structures are locked (i.e. the first time
- * through).
- */
-
- done = TRUE; /* be optimistic */
-
- for (lcv = 0, current_offset = offset ; lcv < *npagesp ;
- lcv++, current_offset += PAGE_SIZE) {
-
- /* do we care about this page? if not, skip it */
- if (pps[lcv] == PGO_DONTCARE)
- continue;
-
- /* lookup page */
- ptmp = uvm_pagelookup(uobj, current_offset);
-
- /* to be useful must get a non-busy, non-released pg */
- if (ptmp == NULL ||
- (ptmp->flags & (PG_BUSY|PG_RELEASED)) != 0) {
- if (lcv == centeridx || (flags & PGO_ALLPAGES)
- != 0)
- done = FALSE; /* need to do a wait or I/O! */
- continue;
- }
-
- /*
- * useful page: busy/lock it and plug it in our
- * result array
- */
- ptmp->flags |= PG_BUSY; /* loan up to caller */
- UVM_PAGE_OWN(ptmp, "uvn_get1");
- pps[lcv] = ptmp;
- gotpages++;
-
- } /* "for" lcv loop */
-
- /*
- * XXX: given the "advice", should we consider async read-ahead?
- * XXX: fault current does deactive of pages behind us. is
- * this good (other callers might now).
- */
- /*
- * XXX: read-ahead currently handled by buffer cache (bread)
- * level.
- * XXX: no async i/o available.
- * XXX: so we don't do anything now.
- */
-
- /*
- * step 1c: now we've either done everything needed or we to
- * unlock and do some waiting or I/O.
- */
-
- *npagesp = gotpages; /* let caller know */
- if (done)
- return(VM_PAGER_OK); /* bingo! */
- else
- /* EEK! Need to unlock and I/O */
- return(VM_PAGER_UNLOCK);
- }
-
- /*
- * step 2: get non-resident or busy pages.
- * object is locked. data structures are unlocked.
- *
- * XXX: because we can't do async I/O at this level we get things
- * page at a time (otherwise we'd chunk). the VOP_READ() will do
- * async-read-ahead for us at a lower level.
- */
-
- for (lcv = 0, current_offset = offset ;
- lcv < *npagesp ; lcv++, current_offset += PAGE_SIZE) {
-
- /* skip over pages we've already gotten or don't want */
- /* skip over pages we don't _have_ to get */
- if (pps[lcv] != NULL || (lcv != centeridx &&
- (flags & PGO_ALLPAGES) == 0))
- continue;
-
- /*
- * we have yet to locate the current page (pps[lcv]). we first
- * look for a page that is already at the current offset. if
- * we fine a page, we check to see if it is busy or released.
- * if that is the case, then we sleep on the page until it is
- * no longer busy or released and repeat the lookup. if the
- * page we found is neither busy nor released, then we busy it
- * (so we own it) and plug it into pps[lcv]. this breaks the
- * following while loop and indicates we are ready to move on
- * to the next page in the "lcv" loop above.
- *
- * if we exit the while loop with pps[lcv] still set to NULL,
- * then it means that we allocated a new busy/fake/clean page
- * ptmp in the object and we need to do I/O to fill in the data.
- */
-
- while (pps[lcv] == NULL) { /* top of "pps" while loop */
-
- /* look for a current page */
- ptmp = uvm_pagelookup(uobj, current_offset);
-
- /* nope? allocate one now (if we can) */
- if (ptmp == NULL) {
-
- ptmp = uvm_pagealloc(uobj, current_offset,
- NULL, 0);
-
- /* out of RAM? */
- if (ptmp == NULL) {
- simple_unlock(&uobj->vmobjlock);
- uvm_wait("uvn_getpage");
- simple_lock(&uobj->vmobjlock);
-
- /* goto top of pps while loop */
- continue;
- }
-
- /*
- * got new page ready for I/O. break pps
- * while loop. pps[lcv] is still NULL.
- */
- break;
- }
-
- /* page is there, see if we need to wait on it */
- if ((ptmp->flags & (PG_BUSY|PG_RELEASED)) != 0) {
- ptmp->flags |= PG_WANTED;
- UVM_UNLOCK_AND_WAIT(ptmp,
- &uobj->vmobjlock, FALSE, "uvn_get",0);
- simple_lock(&uobj->vmobjlock);
- continue; /* goto top of pps while loop */
- }
-
- /*
- * if we get here then the page has become resident
- * and unbusy between steps 1 and 2. we busy it
- * now (so we own it) and set pps[lcv] (so that we
- * exit the while loop).
- */
- ptmp->flags |= PG_BUSY;
- UVM_PAGE_OWN(ptmp, "uvn_get2");
- pps[lcv] = ptmp;
- }
-
- /*
- * if we own the a valid page at the correct offset, pps[lcv]
- * will point to it. nothing more to do except go to the
- * next page.
- */
-
- if (pps[lcv])
- continue; /* next lcv */
-
- /*
- * we have a "fake/busy/clean" page that we just allocated. do
- * I/O to fill it with valid data. note that object must be
- * locked going into uvn_io, but will be unlocked afterwards.
- */
-
- result = uvn_io((struct uvm_vnode *) uobj, &ptmp, 1,
- PGO_SYNCIO, UIO_READ);
-
- /*
- * I/O done. object is unlocked (by uvn_io). because we used
- * syncio the result can not be PEND or AGAIN. we must relock
- * and check for errors.
- */
-
- /* lock object. check for errors. */
- simple_lock(&uobj->vmobjlock);
- if (result != VM_PAGER_OK) {
- if (ptmp->flags & PG_WANTED)
- /* object lock still held */
- wakeup(ptmp);
-
- ptmp->flags &= ~(PG_WANTED|PG_BUSY);
- UVM_PAGE_OWN(ptmp, NULL);
- uvm_lock_pageq();
- uvm_pagefree(ptmp);
- uvm_unlock_pageq();
- simple_unlock(&uobj->vmobjlock);
- return(result);
- }
-
- /*
- * we got the page! clear the fake flag (indicates valid
- * data now in page) and plug into our result array. note
- * that page is still busy.
- *
- * it is the callers job to:
- * => check if the page is released
- * => unbusy the page
- * => activate the page
- */
-
- ptmp->flags &= ~PG_FAKE; /* data is valid ... */
- pmap_clear_modify(ptmp); /* ... and clean */
- pps[lcv] = ptmp;
-
- } /* lcv loop */
-
- /*
- * finally, unlock object and return.
- */
-
- simple_unlock(&uobj->vmobjlock);
- return (VM_PAGER_OK);
+ struct vnode *vp = (struct vnode *)uobj;
+ int error;
+ UVMHIST_FUNC("uvn_get"); UVMHIST_CALLED(ubchist);
+
+ UVMHIST_LOG(ubchist, "vp %p off 0x%x", vp, (int)offset, 0,0);
+ error = VOP_GETPAGES(vp, offset, pps, npagesp, centeridx,
+ access_type, advice, flags);
+ return uvm_errno2vmerror(error);
}
+
/*
- * uvn_io: do I/O to a vnode
- *
- * => prefer map unlocked (not required)
- * => object must be locked! we will _unlock_ it before starting I/O.
- * => flags: PGO_SYNCIO -- use sync. I/O
- * => XXX: currently we use VOP_READ/VOP_WRITE which are only sync.
- * [thus we never do async i/o! see iodone comment]
+ * uvn_findpages:
+ * return the page for the uobj and offset requested, allocating if needed.
+ * => uobj must be locked.
+ * => returned page will be BUSY.
*/
-static int
-uvn_io(uvn, pps, npages, flags, rw)
- struct uvm_vnode *uvn;
- vm_page_t *pps;
- int npages, flags, rw;
+void
+uvn_findpages(uobj, offset, npagesp, pps, flags)
+ struct uvm_object *uobj;
+ voff_t offset;
+ int *npagesp;
+ struct vm_page **pps;
+ int flags;
{
- struct vnode *vn;
- struct uio uio;
- struct iovec iov;
- vaddr_t kva;
- off_t file_offset;
- int waitf, result, mapinflags;
- size_t got, wanted;
- UVMHIST_FUNC("uvn_io"); UVMHIST_CALLED(maphist);
-
- UVMHIST_LOG(maphist, "rw=%d", rw,0,0,0);
-
- /*
- * init values
- */
-
- waitf = (flags & PGO_SYNCIO) ? M_WAITOK : M_NOWAIT;
- vn = (struct vnode *) uvn;
- file_offset = pps[0]->offset;
-
- /*
- * check for sync'ing I/O.
- */
-
- while (uvn->u_flags & UVM_VNODE_IOSYNC) {
- if (waitf == M_NOWAIT) {
- simple_unlock(&uvn->u_obj.vmobjlock);
- UVMHIST_LOG(maphist,"<- try again (iosync)",0,0,0,0);
- return(VM_PAGER_AGAIN);
- }
- uvn->u_flags |= UVM_VNODE_IOSYNCWANTED;
- UVM_UNLOCK_AND_WAIT(&uvn->u_flags, &uvn->u_obj.vmobjlock,
- FALSE, "uvn_iosync",0);
- simple_lock(&uvn->u_obj.vmobjlock);
- }
-
- /*
- * check size
- */
-
- if (file_offset >= uvn->u_size) {
- simple_unlock(&uvn->u_obj.vmobjlock);
- UVMHIST_LOG(maphist,"<- BAD (size check)",0,0,0,0);
- return(VM_PAGER_BAD);
- }
-
- /*
- * first try and map the pages in (without waiting)
- */
-
- mapinflags = (rw == UIO_READ) ?
- UVMPAGER_MAPIN_READ : UVMPAGER_MAPIN_WRITE;
-
- kva = uvm_pagermapin(pps, npages, mapinflags);
- if (kva == 0 && waitf == M_NOWAIT) {
- simple_unlock(&uvn->u_obj.vmobjlock);
- UVMHIST_LOG(maphist,"<- mapin failed (try again)",0,0,0,0);
- return(VM_PAGER_AGAIN);
- }
-
- /*
- * ok, now bump u_nio up. at this point we are done with uvn
- * and can unlock it. if we still don't have a kva, try again
- * (this time with sleep ok).
- */
-
- uvn->u_nio++; /* we have an I/O in progress! */
- simple_unlock(&uvn->u_obj.vmobjlock);
- /* NOTE: object now unlocked */
- if (kva == 0)
- kva = uvm_pagermapin(pps, npages,
- mapinflags | UVMPAGER_MAPIN_WAITOK);
-
- /*
- * ok, mapped in. our pages are PG_BUSY so they are not going to
- * get touched (so we can look at "offset" without having to lock
- * the object). set up for I/O.
- */
-
- /*
- * fill out uio/iov
- */
-
- iov.iov_base = (caddr_t) kva;
- wanted = npages << PAGE_SHIFT;
- if (file_offset + wanted > uvn->u_size)
- wanted = uvn->u_size - file_offset; /* XXX: needed? */
- iov.iov_len = wanted;
- uio.uio_iov = &iov;
- uio.uio_iovcnt = 1;
- uio.uio_offset = file_offset;
- uio.uio_segflg = UIO_SYSSPACE;
- uio.uio_rw = rw;
- uio.uio_resid = wanted;
- uio.uio_procp = curproc;
-
- /*
- * do the I/O! (XXX: curproc?)
- */
-
- UVMHIST_LOG(maphist, "calling VOP",0,0,0,0);
-
- /*
- * This process may already have this vnode locked, if we faulted in
- * copyin() or copyout() on a region backed by this vnode
- * while doing I/O to the vnode. If this is the case, don't
- * panic.. instead, return the error to the user.
- *
- * XXX this is a stopgap to prevent a panic.
- * Ideally, this kind of operation *should* work.
- */
- result = 0;
- if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
- result = vn_lock(vn, LK_EXCLUSIVE | LK_RETRY | LK_RECURSEFAIL, curproc);
-
- if (result == 0) {
- /* NOTE: vnode now locked! */
-
- if (rw == UIO_READ)
- result = VOP_READ(vn, &uio, 0, curproc->p_ucred);
- else
- result = VOP_WRITE(vn, &uio, 0, curproc->p_ucred);
+ int i, rv, npages;
- if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
- VOP_UNLOCK(vn, 0, curproc);
+ rv = 0;
+ npages = *npagesp;
+ for (i = 0; i < npages; i++, offset += PAGE_SIZE) {
+ rv += uvn_findpage(uobj, offset, &pps[i], flags);
}
-
- /* NOTE: vnode now unlocked (unless vnislocked) */
-
- UVMHIST_LOG(maphist, "done calling VOP",0,0,0,0);
-
- /*
- * result == unix style errno (0 == OK!)
- *
- * zero out rest of buffer (if needed)
- */
-
- if (result == 0) {
- got = wanted - uio.uio_resid;
-
- if (wanted && got == 0) {
- result = EIO; /* XXX: error? */
- } else if (got < PAGE_SIZE * npages && rw == UIO_READ) {
- memset((void *) (kva + got), 0,
- (npages << PAGE_SHIFT) - got);
- }
- }
-
- /*
- * now remove pager mapping
- */
- uvm_pagermapout(kva, npages);
-
- /*
- * now clean up the object (i.e. drop I/O count)
- */
-
- simple_lock(&uvn->u_obj.vmobjlock);
- /* NOTE: object now locked! */
-
- uvn->u_nio--; /* I/O DONE! */
- if ((uvn->u_flags & UVM_VNODE_IOSYNC) != 0 && uvn->u_nio == 0) {
- wakeup(&uvn->u_nio);
- }
- simple_unlock(&uvn->u_obj.vmobjlock);
- /* NOTE: object now unlocked! */
-
- /*
- * done!
- */
-
- UVMHIST_LOG(maphist, "<- done (result %d)", result,0,0,0);
- if (result == 0)
- return(VM_PAGER_OK);
- else
- return(VM_PAGER_ERROR);
+ *npagesp = rv;
}
-/*
- * uvm_vnp_uncache: disable "persisting" in a vnode... when last reference
- * is gone we will kill the object (flushing dirty pages back to the vnode
- * if needed).
- *
- * => returns TRUE if there was no uvm_object attached or if there was
- * one and we killed it [i.e. if there is no active uvn]
- * => called with the vnode VOP_LOCK'd [we will unlock it for I/O, if
- * needed]
- *
- * => XXX: given that we now kill uvn's when a vnode is recycled (without
- * having to hold a reference on the vnode) and given a working
- * uvm_vnp_sync(), how does that effect the need for this function?
- * [XXXCDC: seems like it can die?]
- *
- * => XXX: this function should DIE once we merge the VM and buffer
- * cache.
- *
- * research shows that this is called in the following places:
- * ext2fs_truncate, ffs_truncate, detrunc[msdosfs]: called when vnode
- * changes sizes
- * ext2fs_write, WRITE [ufs_readwrite], msdosfs_write: called when we
- * are written to
- * ex2fs_chmod, ufs_chmod: called if VTEXT vnode and the sticky bit
- * is off
- * ffs_realloccg: when we can't extend the current block and have
- * to allocate a new one we call this [XXX: why?]
- * nfsrv_rename, rename_files: called when the target filename is there
- * and we want to remove it
- * nfsrv_remove, sys_unlink: called on file we are removing
- * nfsrv_access: if VTEXT and we want WRITE access and we don't uncache
- * then return "text busy"
- * nfs_open: seems to uncache any file opened with nfs
- * vn_writechk: if VTEXT vnode and can't uncache return "text busy"
- */
-
-boolean_t
-uvm_vnp_uncache(vp)
- struct vnode *vp;
+static int
+uvn_findpage(uobj, offset, pgp, flags)
+ struct uvm_object *uobj;
+ voff_t offset;
+ struct vm_page **pgp;
+ int flags;
{
- struct uvm_vnode *uvn = &vp->v_uvm;
-
- /*
- * lock uvn part of the vnode and check to see if we need to do anything
- */
+ struct vm_page *pg;
+ UVMHIST_FUNC("uvn_findpage"); UVMHIST_CALLED(ubchist);
+ UVMHIST_LOG(ubchist, "vp %p off 0x%lx", uobj, offset,0,0);
- simple_lock(&uvn->u_obj.vmobjlock);
- if ((uvn->u_flags & UVM_VNODE_VALID) == 0 ||
- (uvn->u_flags & UVM_VNODE_BLOCKED) != 0) {
- simple_unlock(&uvn->u_obj.vmobjlock);
- return(TRUE);
+ if (*pgp != NULL) {
+ UVMHIST_LOG(ubchist, "dontcare", 0,0,0,0);
+ return 0;
}
+ for (;;) {
+ /* look for an existing page */
+ pg = uvm_pagelookup(uobj, offset);
+
+ /* nope? allocate one now */
+ if (pg == NULL) {
+ if (flags & UFP_NOALLOC) {
+ UVMHIST_LOG(ubchist, "noalloc", 0,0,0,0);
+ return 0;
+ }
+ pg = uvm_pagealloc(uobj, offset, NULL, 0);
+ if (pg == NULL) {
+ if (flags & UFP_NOWAIT) {
+ UVMHIST_LOG(ubchist, "nowait",0,0,0,0);
+ return 0;
+ }
+ simple_unlock(&uobj->vmobjlock);
+ uvm_wait("uvn_fp1");
+ simple_lock(&uobj->vmobjlock);
+ continue;
+ }
+ if (UVM_OBJ_IS_VTEXT(uobj)) {
+ uvmexp.vtextpages++;
+ } else {
+ uvmexp.vnodepages++;
+ }
+ UVMHIST_LOG(ubchist, "alloced",0,0,0,0);
+ break;
+ } else if (flags & UFP_NOCACHE) {
+ UVMHIST_LOG(ubchist, "nocache",0,0,0,0);
+ return 0;
+ }
- /*
- * we have a valid, non-blocked uvn. clear persist flag.
- * if uvn is currently active we can return now.
- */
-
- uvn->u_flags &= ~UVM_VNODE_CANPERSIST;
- if (uvn->u_obj.uo_refs) {
- simple_unlock(&uvn->u_obj.vmobjlock);
- return(FALSE);
- }
-
- /*
- * uvn is currently persisting! we have to gain a reference to
- * it so that we can call uvn_detach to kill the uvn.
- */
-
- VREF(vp); /* seems ok, even with VOP_LOCK */
- uvn->u_obj.uo_refs++; /* value is now 1 */
- simple_unlock(&uvn->u_obj.vmobjlock);
-
-
-#ifdef DEBUG
- /*
- * carry over sanity check from old vnode pager: the vnode should
- * be VOP_LOCK'd, and we confirm it here.
- */
- if (!VOP_ISLOCKED(vp)) {
- boolean_t is_ok_anyway = FALSE;
-#if defined(NFSCLIENT)
- extern int (**nfsv2_vnodeop_p) __P((void *));
- extern int (**spec_nfsv2nodeop_p) __P((void *));
- extern int (**fifo_nfsv2nodeop_p) __P((void *));
-
- /* vnode is NOT VOP_LOCKed: some vnode types _never_ lock */
- if (vp->v_op == nfsv2_vnodeop_p ||
- vp->v_op == spec_nfsv2nodeop_p) {
- is_ok_anyway = TRUE;
+ /* page is there, see if we need to wait on it */
+ if ((pg->flags & (PG_BUSY|PG_RELEASED)) != 0) {
+ if (flags & UFP_NOWAIT) {
+ UVMHIST_LOG(ubchist, "nowait",0,0,0,0);
+ return 0;
+ }
+ pg->flags |= PG_WANTED;
+ UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
+ "uvn_fp2", 0);
+ simple_lock(&uobj->vmobjlock);
+ continue;
}
- if (vp->v_op == fifo_nfsv2nodeop_p) {
- is_ok_anyway = TRUE;
+
+ /* skip PG_RDONLY pages if requested */
+ if ((flags & UFP_NORDONLY) && (pg->flags & PG_RDONLY)) {
+ UVMHIST_LOG(ubchist, "nordonly",0,0,0,0);
+ return 0;
}
-#endif /* defined(NFSSERVER) || defined(NFSCLIENT) */
- if (!is_ok_anyway)
- panic("uvm_vnp_uncache: vnode not locked!");
- }
-#endif /* DEBUG */
- /*
- * now drop our reference to the vnode. if we have the sole
- * reference to the vnode then this will cause it to die [as we
- * just cleared the persist flag]. we have to unlock the vnode
- * while we are doing this as it may trigger I/O.
- *
- * XXX: it might be possible for uvn to get reclaimed while we are
- * unlocked causing us to return TRUE when we should not. we ignore
- * this as a false-positive return value doesn't hurt us.
- */
- VOP_UNLOCK(vp, 0, curproc);
- uvn_detach(&uvn->u_obj);
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curproc);
-
- /*
- * and return...
- */
-
- return(TRUE);
+ /* mark the page BUSY and we're done. */
+ pg->flags |= PG_BUSY;
+ UVM_PAGE_OWN(pg, "uvn_findpage");
+ UVMHIST_LOG(ubchist, "found",0,0,0,0);
+ break;
+ }
+ *pgp = pg;
+ return 1;
}
/*
@@ -1879,150 +1014,49 @@ uvm_vnp_setsize(vp, newsize)
voff_t newsize;
{
struct uvm_vnode *uvn = &vp->v_uvm;
+ voff_t pgend = round_page(newsize);
+ UVMHIST_FUNC("uvm_vnp_setsize"); UVMHIST_CALLED(ubchist);
- /*
- * lock uvn and check for valid object, and if valid: do it!
- */
simple_lock(&uvn->u_obj.vmobjlock);
- if (uvn->u_flags & UVM_VNODE_VALID) {
-
- /*
- * now check if the size has changed: if we shrink we had better
- * toss some pages...
- */
- if (uvn->u_size > newsize) {
- (void)uvn_flush(&uvn->u_obj, newsize,
- uvn->u_size, PGO_FREE);
- }
- uvn->u_size = newsize;
- }
- simple_unlock(&uvn->u_obj.vmobjlock);
+ UVMHIST_LOG(ubchist, "old 0x%x new 0x%x", uvn->u_size, newsize, 0,0);
/*
- * done
+ * now check if the size has changed: if we shrink we had better
+ * toss some pages...
*/
- return;
+
+ if (uvn->u_size > pgend && uvn->u_size != VSIZENOTSET) {
+ (void) uvn_flush(&uvn->u_obj, pgend, 0, PGO_FREE);
+ }
+ uvn->u_size = newsize;
+ simple_unlock(&uvn->u_obj.vmobjlock);
}
/*
- * uvm_vnp_sync: flush all dirty VM pages back to their backing vnodes.
- *
- * => called from sys_sync with no VM structures locked
- * => only one process can do a sync at a time (because the uvn
- * structure only has one queue for sync'ing). we ensure this
- * by holding the uvn_sync_lock while the sync is in progress.
- * other processes attempting a sync will sleep on this lock
- * until we are done.
+ * uvm_vnp_zerorange: set a range of bytes in a file to zero.
*/
void
-uvm_vnp_sync(mp)
- struct mount *mp;
-{
- struct uvm_vnode *uvn;
+uvm_vnp_zerorange(vp, off, len)
struct vnode *vp;
- boolean_t got_lock;
-
- /*
- * step 1: ensure we are only ones using the uvn_sync_q by locking
- * our lock...
- */
- lockmgr(&uvn_sync_lock, LK_EXCLUSIVE, NULL, curproc);
-
- /*
- * step 2: build up a simpleq of uvns of interest based on the
- * write list. we gain a reference to uvns of interest. must
- * be careful about locking uvn's since we will be holding uvn_wl_lock
- * in the body of the loop.
- */
- SIMPLEQ_INIT(&uvn_sync_q);
- simple_lock(&uvn_wl_lock);
- for (uvn = uvn_wlist.lh_first ; uvn != NULL ;
- uvn = uvn->u_wlist.le_next) {
-
- vp = (struct vnode *) uvn;
- if (mp && vp->v_mount != mp)
- continue;
-
- /* attempt to gain reference */
- while ((got_lock = simple_lock_try(&uvn->u_obj.vmobjlock)) ==
- FALSE &&
- (uvn->u_flags & UVM_VNODE_BLOCKED) == 0)
- /* spin */ ;
-
- /*
- * we will exit the loop if either if the following are true:
- * - we got the lock [always true if NCPU == 1]
- * - we failed to get the lock but noticed the vnode was
- * "blocked" -- in this case the vnode must be a dying
- * vnode, and since dying vnodes are in the process of
- * being flushed out, we can safely skip this one
- *
- * we want to skip over the vnode if we did not get the lock,
- * or if the vnode is already dying (due to the above logic).
- *
- * note that uvn must already be valid because we found it on
- * the wlist (this also means it can't be ALOCK'd).
- */
- if (!got_lock || (uvn->u_flags & UVM_VNODE_BLOCKED) != 0) {
- if (got_lock)
- simple_unlock(&uvn->u_obj.vmobjlock);
- continue; /* skip it */
- }
-
- /*
- * gain reference. watch out for persisting uvns (need to
- * regain vnode REF).
- */
- if (uvn->u_obj.uo_refs == 0)
- VREF(vp);
- uvn->u_obj.uo_refs++;
- simple_unlock(&uvn->u_obj.vmobjlock);
-
- /*
- * got it!
- */
- SIMPLEQ_INSERT_HEAD(&uvn_sync_q, uvn, u_syncq);
- }
- simple_unlock(&uvn_wl_lock);
+ off_t off;
+ size_t len;
+{
+ void *win;
- /*
- * step 3: we now have a list of uvn's that may need cleaning.
- * we are holding the uvn_sync_lock, but have dropped the uvn_wl_lock
- * (so we can now safely lock uvn's again).
- */
+ /*
+ * XXXUBC invent kzero() and use it
+ */
- for (uvn = uvn_sync_q.sqh_first ; uvn ; uvn = uvn->u_syncq.sqe_next) {
- simple_lock(&uvn->u_obj.vmobjlock);
-#ifdef DEBUG
- if (uvn->u_flags & UVM_VNODE_DYING) {
- printf("uvm_vnp_sync: dying vnode on sync list\n");
- }
-#endif
- uvn_flush(&uvn->u_obj, 0, 0,
- PGO_CLEANIT|PGO_ALLPAGES|PGO_DOACTCLUST);
+ while (len) {
+ vsize_t bytelen = len;
- /*
- * if we have the only reference and we just cleaned the uvn,
- * then we can pull it out of the UVM_VNODE_WRITEABLE state
- * thus allowing us to avoid thinking about flushing it again
- * on later sync ops.
- */
- if (uvn->u_obj.uo_refs == 1 &&
- (uvn->u_flags & UVM_VNODE_WRITEABLE)) {
- LIST_REMOVE(uvn, u_wlist);
- uvn->u_flags &= ~UVM_VNODE_WRITEABLE;
- }
-
- simple_unlock(&uvn->u_obj.vmobjlock);
+ win = ubc_alloc(&vp->v_uvm.u_obj, off, &bytelen, UBC_WRITE);
+ memset(win, 0, bytelen);
+ ubc_release(win, 0);
- /* now drop our reference to the uvn */
- uvn_detach(&uvn->u_obj);
- }
-
- /*
- * done! release sync lock
- */
- lockmgr(&uvn_sync_lock, LK_RELEASE, (void *)0, curproc);
+ off += bytelen;
+ len -= bytelen;
+ }
}
diff --git a/sys/uvm/uvm_vnode.h b/sys/uvm/uvm_vnode.h
index 29efe4d2ac4..ce853189207 100644
--- a/sys/uvm/uvm_vnode.h
+++ b/sys/uvm/uvm_vnode.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: uvm_vnode.h,v 1.8 2001/08/06 14:03:05 art Exp $ */
+/* $OpenBSD: uvm_vnode.h,v 1.9 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: uvm_vnode.h,v 1.9 2000/03/26 20:54:48 kleink Exp $ */
/*
@@ -55,56 +55,6 @@ struct uvm_vnode {
int u_flags; /* flags */
int u_nio; /* number of running I/O requests */
voff_t u_size; /* size of object */
-
- /* the following entry is locked by uvn_wl_lock */
- LIST_ENTRY(uvm_vnode) u_wlist; /* list of writeable vnode objects */
-
- /* the following entry is locked by uvn_sync_lock */
- SIMPLEQ_ENTRY(uvm_vnode) u_syncq; /* vnode objects due for a "sync" */
};
-/*
- * u_flags values
- */
-#define UVM_VNODE_VALID 0x001 /* we are attached to the vnode */
-#define UVM_VNODE_CANPERSIST 0x002 /* we can persist after ref == 0 */
-#define UVM_VNODE_ALOCK 0x004 /* uvn_attach is locked out */
-#define UVM_VNODE_DYING 0x008 /* final detach/terminate in
- progress */
-#define UVM_VNODE_RELKILL 0x010 /* uvn should be killed by releasepg
- when final i/o is done */
-#define UVM_VNODE_WANTED 0x020 /* someone is waiting for alock,
- dying, or relkill to clear */
-#define UVM_VNODE_VNISLOCKED 0x040 /* underlying vnode struct is locked
- (valid when DYING is true) */
-#define UVM_VNODE_IOSYNC 0x080 /* I/O sync in progress ... setter
- sleeps on &uvn->u_nio */
-#define UVM_VNODE_IOSYNCWANTED 0x100 /* a process is waiting for the
- i/o sync to clear so it can do
- i/o */
-#define UVM_VNODE_WRITEABLE 0x200 /* uvn has pages that are writeable */
-
-/*
- * UVM_VNODE_BLOCKED: any condition that should new processes from
- * touching the vnode [set WANTED and sleep to wait for it to clear]
- */
-#define UVM_VNODE_BLOCKED (UVM_VNODE_ALOCK|UVM_VNODE_DYING|UVM_VNODE_RELKILL)
-
-#ifdef _KERNEL
-
-/*
- * prototypes
- */
-
-#if 0
-/*
- * moved uvn_attach to uvm_extern.h because uvm_vnode.h is needed to
- * include sys/vnode.h, and files that include sys/vnode.h don't know
- * what a vm_prot_t is.
- */
-struct uvm_object *uvn_attach __P((void *, vm_prot_t));
-#endif
-
-#endif /* _KERNEL */
-
#endif /* _UVM_UVM_VNODE_H_ */