src - OpenBSD base system

diff options


context:
space:
mode:

author	Artur Grabowski <art@cvs.openbsd.org>	2001-11-27 05:27:13 +0000
committer	Artur Grabowski <art@cvs.openbsd.org>	2001-11-27 05:27:13 +0000
commit	8a1845e49f56720cbfccd4c7f5f80ba5b980fdf4 (patch)
tree	d4a522dc41cdc79ba48fe761e94663b795da8cc0 /sys/nfs
parent	0d68e9b5af14f4bfa04d22dbebab5972ac647b26 (diff)

Merge in the unified buffer cache code as found in NetBSD 2001/03/10. The

code is written mostly by Chuck Silvers <chuq@chuq.com>/<chs@netbsd.org>. Tested for the past few weeks by many developers, should be in a pretty stable state, but will require optimizations and additional cleanups.

Diffstat (limited to 'sys/nfs')

-rw-r--r--

sys/nfs/nfs.h

-rw-r--r--

sys/nfs/nfs_bio.c

928

-rw-r--r--

sys/nfs/nfs_node.c

-rw-r--r--

sys/nfs/nfs_serv.c

-rw-r--r--

sys/nfs/nfs_subs.c

267

-rw-r--r--

sys/nfs/nfs_syscalls.c

-rw-r--r--

sys/nfs/nfs_var.h

-rw-r--r--

sys/nfs/nfs_vfsops.c

-rw-r--r--

sys/nfs/nfs_vnops.c

264

-rw-r--r--

sys/nfs/nfsnode.h

10 files changed, 1031 insertions, 568 deletions

diff --git a/sys/nfs/nfs.h b/sys/nfs/nfs.h
index 33435dc23e4..b86819902f2 100644
--- a/sys/nfs/nfs.h
+++ b/sys/nfs/nfs.h

@@ -1,4 +1,4 @@

-/* $OpenBSD: nfs.h,v 1.13 2001/09/16 00:42:44 millert Exp $ */

+/* $OpenBSD: nfs.h,v 1.14 2001/11/27 05:27:12 art Exp $ */

/* $NetBSD: nfs.h,v 1.10.4.1 1996/05/27 11:23:56 fvdl Exp $ */

@@ -78,8 +78,18 @@

* Ideally, NFS_DIRBLKSIZ should be bigger, but I've seen servers with

* broken NFS/ethernet drivers that won't work with anything bigger (Linux..)

-#define NFS_DIRBLKSIZ 1024 /* Must be a multiple of DIRBLKSIZ */

+#if 1

+/*

+ * XXXUBC temp hack because of the removal of b_validend.

+ * eventually we'll store NFS VDIR data in the page cache as well,

+ * we'll fix this at that point.

+ */

+#define NFS_DIRBLKSIZ PAGE_SIZE

+#define NFS_READDIRBLKSIZ PAGE_SIZE

+#else

+#define NFS_DIRBLKSIZ 1024 /* Must be a multiple of DIRBLKSIZ */

#define NFS_READDIRBLKSIZ 512 /* Size of read dir blocks. XXX */

+#endif

* Oddballs

@@ -111,10 +121,10 @@

#endif

- * The B_INVAFTERWRITE flag should be set to whatever is required by the

- * buffer cache code to say "Invalidate the block after it is written back".

+ * Use the vm_page flag reserved for pager use to indicate pages

+ * which have been written to the server but not yet committed.

-#define B_INVAFTERWRITE B_INVAL

+#define PG_NEEDCOMMIT PG_PAGER1

* The IO_METASYNC flag should be implemented for local file systems.

diff --git a/sys/nfs/nfs_bio.c b/sys/nfs/nfs_bio.c
index 1f33bc2eab7..42b25763a88 100644
--- a/sys/nfs/nfs_bio.c
+++ b/sys/nfs/nfs_bio.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: nfs_bio.c,v 1.24 2001/11/15 23:15:15 art Exp $ */

+/* $OpenBSD: nfs_bio.c,v 1.25 2001/11/27 05:27:12 art Exp $ */

/* $NetBSD: nfs_bio.c,v 1.25.4.2 1996/07/08 20:47:04 jtc Exp $ */

@@ -50,8 +50,9 @@

#include <sys/mount.h>

#include <sys/kernel.h>

#include <sys/namei.h>

+#include <sys/pool.h>

-#include <uvm/uvm_extern.h>

+#include <uvm/uvm.h>

#include <nfs/rpcv2.h>

#include <nfs/nfsproto.h>

@@ -70,20 +71,19 @@ struct nfsstats nfsstats;

int

nfs_bioread(vp, uio, ioflag, cred)

- register struct vnode *vp;

- register struct uio *uio;

+ struct vnode *vp;

+ struct uio *uio;

int ioflag;

struct ucred *cred;

{

- register struct nfsnode *np = VTONFS(vp);

- register int biosize, diff;

- struct buf *bp = NULL, *rabp;

+ struct nfsnode *np = VTONFS(vp);

+ int biosize;

+ struct buf *bp = NULL;

struct vattr vattr;

struct proc *p;

struct nfsmount *nmp = VFSTONFS(vp->v_mount);

- daddr_t lbn, bn, rabn;

caddr_t baddr;

- int got_buf = 0, nra, error = 0, n = 0, on = 0, not_readin;

+ int got_buf = 0, error = 0, n = 0, on = 0;

#ifdef DIAGNOSTIC

if (uio->uio_rw != UIO_READ)

@@ -153,87 +153,25 @@ nfs_bioread(vp, uio, ioflag, cred)

switch (vp->v_type) {

case VREG:

nfsstats.biocache_reads++;

- lbn = uio->uio_offset / biosize;

- on = uio->uio_offset & (biosize - 1);

- bn = lbn * (biosize / DEV_BSIZE);

- not_readin = 1;

- /*

- * Start the read ahead(s), as required.

- */

- if (nfs_numasync > 0 && nmp->nm_readahead > 0) {

- for (nra = 0; nra < nmp->nm_readahead &&

- (lbn + 1 + nra) * biosize < np->n_size; nra++) {

- rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE);

- if (!incore(vp, rabn)) {

- rabp = nfs_getcacheblk(vp, rabn, biosize, p);

- if (!rabp)

- return (EINTR);

- if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) {

- rabp->b_flags |= (B_READ | B_ASYNC);

- if (nfs_asyncio(rabp)) {

- rabp->b_flags |= B_INVAL;

- brelse(rabp);

- }

- } else

- brelse(rabp);

- }

+ error = 0;

+ while (uio->uio_resid > 0) {

+ void *win;

+ vsize_t bytelen = MIN(np->n_size - uio->uio_offset,

+ uio->uio_resid);

- /*

- * If the block is in the cache and has the required data

- * in a valid region, just copy it out.

- * Otherwise, get the block and write back/read in,

- * as required.

- */

- if ((bp = incore(vp, bn)) &&

- (bp->b_flags & (B_BUSY | B_WRITEINPROG)) ==

- (B_BUSY | B_WRITEINPROG))

- got_buf = 0;

- else {

-again:

- bp = nfs_getcacheblk(vp, bn, biosize, p);

- if (!bp)

- return (EINTR);

- got_buf = 1;

- if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {

- bp->b_flags |= B_READ;

- not_readin = 0;

- error = nfs_doio(bp, p);

- if (error) {

- brelse(bp);

- return (error);

- }

- n = min((unsigned)(biosize - on), uio->uio_resid);

- diff = np->n_size - uio->uio_offset;

- if (diff < n)

- n = diff;

- if (not_readin && n > 0) {

- if (on < bp->b_validoff || (on + n) > bp->b_validend) {

- if (!got_buf) {

- bp = nfs_getcacheblk(vp, bn, biosize, p);

- if (!bp)

- return (EINTR);

- got_buf = 1;

- }

- bp->b_flags |= B_INVAFTERWRITE;

- if (bp->b_dirtyend > 0) {

- if ((bp->b_flags & B_DELWRI) == 0)

- panic("nfsbioread");

- if (VOP_BWRITE(bp) == EINTR)

- return (EINTR);

- } else

- brelse(bp);

- goto again;

+ if (bytelen == 0)

+ break;

+ win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset,

+ &bytelen, UBC_READ);

+ error = uiomove(win, bytelen, uio);

+ ubc_release(win, 0);

+ if (error) {

+ break;

}

- diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);

- if (diff < n)

- n = diff;

+ n = 0;

break;

case VLNK:

nfsstats.biocache_readlinks++;

bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);

@@ -247,7 +185,7 @@ again:

return (error);

}

- n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);

+ n = MIN(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);

got_buf = 1;

on = 0;

break;

@@ -289,18 +227,17 @@ nfs_write(v)

int a_ioflag;

struct ucred *a_cred;

} */ *ap = v;

- register int biosize;

- register struct uio *uio = ap->a_uio;

+ int biosize;

+ struct uio *uio = ap->a_uio;

struct proc *p = uio->uio_procp;

- register struct vnode *vp = ap->a_vp;

+ struct vnode *vp = ap->a_vp;

struct nfsnode *np = VTONFS(vp);

- register struct ucred *cred = ap->a_cred;

+ struct ucred *cred = ap->a_cred;

int ioflag = ap->a_ioflag;

- struct buf *bp;

struct vattr vattr;

struct nfsmount *nmp = VFSTONFS(vp->v_mount);

- daddr_t lbn, bn;

- int n, on, error = 0;

+ int error = 0;

+ int rv;

#ifdef DIAGNOSTIC

if (uio->uio_rw != UIO_WRITE)

@@ -360,85 +297,47 @@ nfs_write(v)

biosize = nmp->nm_rsize;

do {

- /*

- * XXX make sure we aren't cached in the VM page cache

- */

- uvm_vnp_uncache(vp);

+ void *win;

+ voff_t oldoff = uio->uio_offset;

+ vsize_t bytelen = uio->uio_resid;

nfsstats.biocache_writes++;

- lbn = uio->uio_offset / biosize;

- on = uio->uio_offset & (biosize-1);

- n = min((unsigned)(biosize - on), uio->uio_resid);

- bn = lbn * (biosize / DEV_BSIZE);

-again:

- bp = nfs_getcacheblk(vp, bn, biosize, p);

- if (!bp)

- return (EINTR);

np->n_flag |= NMODIFIED;

- if (uio->uio_offset + n > np->n_size) {

- np->n_size = uio->uio_offset + n;

- uvm_vnp_setsize(vp, (u_long)np->n_size);

- }

- /*

- * If the new write will leave a contiguous dirty

- * area, just update the b_dirtyoff and b_dirtyend,

- * otherwise force a write rpc of the old dirty area.

- */

- if (bp->b_dirtyend > 0 &&

- (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {

- bp->b_proc = p;

- if (VOP_BWRITE(bp) == EINTR)

- return (EINTR);

- goto again;

- }

- error = uiomove((char *)bp->b_data + on, n, uio);

- if (error) {

- bp->b_flags |= B_ERROR;

- brelse(bp);

- return (error);

+ if (np->n_size < uio->uio_offset + bytelen) {

+ np->n_size = uio->uio_offset + bytelen;

+ uvm_vnp_setsize(vp, np->n_size);

}

- if (bp->b_dirtyend > 0) {

- bp->b_dirtyoff = min(on, bp->b_dirtyoff);

- bp->b_dirtyend = max((on + n), bp->b_dirtyend);

- } else {

- bp->b_dirtyoff = on;

- bp->b_dirtyend = on + n;

+ win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset, &bytelen,

+ UBC_WRITE);

+ error = uiomove(win, bytelen, uio);

+ ubc_release(win, 0);

+ rv = 1;

+ if ((ioflag & IO_SYNC)) {

+ simple_lock(&vp->v_uvm.u_obj.vmobjlock);

+ rv = vp->v_uvm.u_obj.pgops->pgo_flush(

+ &vp->v_uvm.u_obj,

+ oldoff & ~(nmp->nm_wsize - 1),

+ uio->uio_offset & ~(nmp->nm_wsize - 1),

+ PGO_CLEANIT|PGO_SYNCIO);

+ simple_unlock(&vp->v_uvm.u_obj.vmobjlock);

+ } else if ((oldoff & ~(nmp->nm_wsize - 1)) !=

+ (uio->uio_offset & ~(nmp->nm_wsize - 1))) {

+ simple_lock(&vp->v_uvm.u_obj.vmobjlock);

+ rv = vp->v_uvm.u_obj.pgops->pgo_flush(

+ &vp->v_uvm.u_obj,

+ oldoff & ~(nmp->nm_wsize - 1),

+ uio->uio_offset & ~(nmp->nm_wsize - 1),

+ PGO_CLEANIT|PGO_WEAK);

+ simple_unlock(&vp->v_uvm.u_obj.vmobjlock);

}

- if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||

- bp->b_validoff > bp->b_dirtyend) {

- bp->b_validoff = bp->b_dirtyoff;

- bp->b_validend = bp->b_dirtyend;

- } else {

- bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);

- bp->b_validend = max(bp->b_validend, bp->b_dirtyend);

+ if (!rv) {

+ error = EIO;

}

- /*

- * Since this block is being modified, it must be written

- * again and not just committed.

- */

- bp->b_flags &= ~B_NEEDCOMMIT;

- /*

- * If the lease is non-cachable or IO_SYNC do bwrite().

- */

- if (ioflag & IO_SYNC) {

- bp->b_proc = p;

- error = VOP_BWRITE(bp);

- if (error)

- return (error);

- } else if ((n + on) == biosize) {

- bp->b_proc = (struct proc *)0;

- bp->b_flags |= B_ASYNC;

- (void)nfs_writebp(bp, 0);

- } else {

- bdwrite(bp);

+ if (error) {

+ break;

}

- } while (uio->uio_resid > 0 && n > 0);

- return (0);

+ } while (uio->uio_resid > 0);

+ return (error);

}

@@ -460,9 +359,9 @@ nfs_getcacheblk(vp, bn, size, p)

if (nmp->nm_flag & NFSMNT_INT) {

bp = getblk(vp, bn, size, PCATCH, 0);

- while (bp == (struct buf *)0) {

- if (nfs_sigintr(nmp, (struct nfsreq *)0, p))

- return ((struct buf *)0);

+ while (bp == NULL) {

+ if (nfs_sigintr(nmp, NULL, p))

+ return (NULL);

bp = getblk(vp, bn, size, 0, 2 * hz);

}

} else

@@ -502,7 +401,7 @@ nfs_vinvalbuf(vp, flags, cred, p, intrflg)

np->n_flag |= NFLUSHWANT;

error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",

slptimeo);

- if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))

+ if (error && intrflg && nfs_sigintr(nmp, NULL, p))

return (EINTR);

}

@@ -512,7 +411,7 @@ nfs_vinvalbuf(vp, flags, cred, p, intrflg)

np->n_flag |= NFLUSHINPROG;

error = vinvalbuf(vp, flags, cred, p, slpflag, 0);

while (error) {

- if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {

+ if (intrflg && nfs_sigintr(nmp, NULL, p)) {

np->n_flag &= ~NFLUSHINPROG;

if (np->n_flag & NFLUSHWANT) {

np->n_flag &= ~NFLUSHWANT;

@@ -539,41 +438,20 @@ int

nfs_asyncio(bp)

struct buf *bp;

{

- int i,s;

+ int i;

if (nfs_numasync == 0)

return (EIO);

- for (i = 0; i < NFS_MAXASYNCDAEMON; i++)

+ for (i = 0; i < NFS_MAXASYNCDAEMON; i++) {

if (nfs_iodwant[i]) {

- if ((bp->b_flags & B_READ) == 0) {

- bp->b_flags |= B_WRITEINPROG;

- }

TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist);

- nfs_iodwant[i] = (struct proc *)0;

+ nfs_iodwant[i] = NULL;

wakeup((caddr_t)&nfs_iodwant[i]);

return (0);

}

+ }

- /*

- * If it is a read or a write already marked B_WRITEINPROG or B_NOCACHE

- * return EIO so the process will call nfs_doio() and do it

- * synchronously.

- */

- if (bp->b_flags & (B_READ | B_WRITEINPROG | B_NOCACHE))

- return (EIO);

- /*

- * Just turn the async write into a delayed write, instead of

- * doing in synchronously. Hopefully, at least one of the nfsiods

- * is currently doing a write for this file and will pick up the

- * delayed writes before going back to sleep.

- */

- s = splbio();

- buf_dirty(bp);

- splx(s);

- biodone(bp);

- return (0);

+ return (EIO);

}

@@ -589,7 +467,7 @@ nfs_doio(bp, p)

struct nfsnode *np;

struct nfsmount *nmp;

- int s, error = 0, diff, len, iomode, must_commit = 0;

+ int error = 0, diff, len, iomode, must_commit = 0;

struct uio uio;

struct iovec io;

@@ -636,9 +514,7 @@ nfs_doio(bp, p)

uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT;

nfsstats.read_bios++;

error = nfs_readrpc(vp, uiop);

- if (!error) {

- bp->b_validoff = 0;

- if (uiop->uio_resid) {

+ if (!error && uiop->uio_resid) {

* If len > 0, there is a hole in the file and

* no writes after the hole have been pushed to

@@ -649,13 +525,9 @@ nfs_doio(bp, p)

len = np->n_size - ((((off_t)bp->b_blkno) << DEV_BSHIFT)

+ diff);

if (len > 0) {

- len = min(len, uiop->uio_resid);

- bzero((char *)bp->b_data + diff, len);

- bp->b_validend = diff + len;

- } else

- bp->b_validend = diff;

- } else

- bp->b_validend = bp->b_bcount;

+ len = MIN(len, uiop->uio_resid);

+ memset((char *)bp->b_data + diff, 0, len);

+ }

}

if (p && (vp->v_flag & VTEXT) &&

(np->n_mtime != np->n_vattr.va_mtime.tv_sec)) {

@@ -672,62 +544,19 @@ nfs_doio(bp, p)

default:

printf("nfs_doio: type %x unexpected\n",vp->v_type);

break;

- };

+ }

if (error) {

bp->b_flags |= B_ERROR;

bp->b_error = error;

}

} else {

- io.iov_len = uiop->uio_resid = bp->b_dirtyend

- - bp->b_dirtyoff;

- uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE

- + bp->b_dirtyoff;

- io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;

+ io.iov_base = bp->b_data;

+ io.iov_len = uiop->uio_resid = bp->b_bcount;

+ uiop->uio_offset = ((off_t)bp->b_blkno) << DEV_BSHIFT;

uiop->uio_rw = UIO_WRITE;

nfsstats.write_bios++;

- if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE)) == B_ASYNC)

- iomode = NFSV3WRITE_UNSTABLE;

- else

- iomode = NFSV3WRITE_FILESYNC;

- bp->b_flags |= B_WRITEINPROG;

-#ifdef fvdl_debug

- printf("nfs_doio(%x): bp %x doff %d dend %d\n",

- vp, bp, bp->b_dirtyoff, bp->b_dirtyend);

-#endif

+ iomode = NFSV3WRITE_UNSTABLE;

error = nfs_writerpc(vp, uiop, &iomode, &must_commit);

- if (!error && iomode == NFSV3WRITE_UNSTABLE)

- bp->b_flags |= B_NEEDCOMMIT;

- else

- bp->b_flags &= ~B_NEEDCOMMIT;

- bp->b_flags &= ~B_WRITEINPROG;

- /*

- * For an interrupted write, the buffer is still valid and the

- * write hasn't been pushed to the server yet, so we can't set

- * B_ERROR and report the interruption by setting B_EINTR. For

- * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt

- * is essentially a noop.

- * For the case of a V3 write rpc not being committed to stable

- * storage, the block is still dirty and requires either a commit

- * rpc or another write rpc with iomode == NFSV3WRITE_FILESYNC

- * before the block is reused. This is indicated by setting the

- * B_DELWRI and B_NEEDCOMMIT flags.

- */

- if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) {

- s = splbio();

- buf_dirty(bp);

- splx(s);

- if (!(bp->b_flags & B_ASYNC) && error)

- bp->b_flags |= B_EINTR;

- } else {

- if (error) {

- bp->b_flags |= B_ERROR;

- bp->b_error = np->n_error = error;

- np->n_flag |= NWRITEERR;

- }

- bp->b_dirtyoff = bp->b_dirtyend = 0;

- }

}

bp->b_resid = uiop->uio_resid;

if (must_commit)

@@ -735,3 +564,590 @@ nfs_doio(bp, p)

biodone(bp);

return (error);

}

+/*

+ * Vnode op for VM getpages.

+ */

+int

+nfs_getpages(v)

+ void *v;

+ struct vop_getpages_args /* {

+ struct vnode *a_vp;

+ voff_t a_offset;

+ vm_page_t *a_m;

+ int *a_count;

+ int a_centeridx;

+ vm_prot_t a_access_type;

+ int a_advice;

+ int a_flags;

+ } */ *ap = v;

+ off_t eof, offset, origoffset, startoffset, endoffset;

+ int s, i, error, npages, orignpages, npgs, ridx, pidx, pcount;

+ vaddr_t kva;

+ struct buf *bp, *mbp;

+ struct vnode *vp = ap->a_vp;

+ struct nfsnode *np = VTONFS(vp);

+ struct uvm_object *uobj = &vp->v_uvm.u_obj;

+ struct nfsmount *nmp = VFSTONFS(vp->v_mount);

+ size_t bytes, iobytes, tailbytes, totalbytes, skipbytes;

+ int flags = ap->a_flags;

+ int bsize;

+ struct vm_page *pgs[16]; /* XXXUBC 16 */

+ boolean_t v3 = NFS_ISV3(vp);

+ boolean_t async = (flags & PGO_SYNCIO) == 0;

+ boolean_t write = (ap->a_access_type & VM_PROT_WRITE) != 0;

+ struct proc *p = curproc;

+ UVMHIST_FUNC("nfs_getpages"); UVMHIST_CALLED(ubchist);

+ UVMHIST_LOG(ubchist, "vp %p off 0x%x count %d", vp, (int)ap->a_offset,

+ *ap->a_count,0);

+#ifdef DIAGNOSTIC

+ if (ap->a_centeridx < 0 || ap->a_centeridx >= *ap->a_count) {

+ panic("nfs_getpages: centeridx %d out of range",

+ ap->a_centeridx);

+ }

+#endif

+ error = 0;

+ origoffset = ap->a_offset;

+ eof = vp->v_uvm.u_size;

+ if (origoffset >= eof) {

+ if ((flags & PGO_LOCKED) == 0) {

+ simple_unlock(&uobj->vmobjlock);

+ }

+ UVMHIST_LOG(ubchist, "off 0x%x past EOF 0x%x",

+ (int)origoffset, (int)eof,0,0);

+ return EINVAL;

+ }

+ if (flags & PGO_LOCKED) {

+ uvn_findpages(uobj, origoffset, ap->a_count, ap->a_m,

+ UFP_NOWAIT|UFP_NOALLOC);

+ return 0;

+ }

+ /* vnode is VOP_LOCKed, uobj is locked */

+ bsize = nmp->nm_rsize;

+ orignpages = MIN(*ap->a_count,

+ round_page(eof - origoffset) >> PAGE_SHIFT);

+ npages = orignpages;

+ startoffset = origoffset & ~(bsize - 1);

+ endoffset = round_page((origoffset + (npages << PAGE_SHIFT)

+ + bsize - 1) & ~(bsize - 1));

+ endoffset = MIN(endoffset, round_page(eof));

+ ridx = (origoffset - startoffset) >> PAGE_SHIFT;

+ if (!async && !write) {

+ int rapages = MAX(PAGE_SIZE, nmp->nm_rsize) >> PAGE_SHIFT;

+ (void) VOP_GETPAGES(vp, endoffset, NULL, &rapages, 0,

+ VM_PROT_READ, 0, 0);

+ simple_lock(&uobj->vmobjlock);

+ }

+ UVMHIST_LOG(ubchist, "npages %d offset 0x%x", npages,

+ (int)origoffset, 0,0);

+ memset(pgs, 0, sizeof(pgs));

+ uvn_findpages(uobj, origoffset, &npages, &pgs[ridx], UFP_ALL);

+ if (flags & PGO_OVERWRITE) {

+ UVMHIST_LOG(ubchist, "PGO_OVERWRITE",0,0,0,0);

+ /* XXXUBC for now, zero the page if we allocated it */

+ for (i = 0; i < npages; i++) {

+ struct vm_page *pg = pgs[ridx + i];

+ if (pg->flags & PG_FAKE) {

+ uvm_pagezero(pg);

+ pg->flags &= ~(PG_FAKE);

+ }

+ npages += ridx;

+ if (v3) {

+ simple_unlock(&uobj->vmobjlock);

+ goto uncommit;

+ }

+ goto out;

+ }

+ /*

+ * if the pages are already resident, just return them.

+ */

+ for (i = 0; i < npages; i++) {

+ struct vm_page *pg = pgs[ridx + i];

+ if ((pg->flags & PG_FAKE) != 0 ||

+ ((ap->a_access_type & VM_PROT_WRITE) &&

+ (pg->flags & PG_RDONLY))) {

+ break;

+ }

+ if (i == npages) {

+ UVMHIST_LOG(ubchist, "returning cached pages", 0,0,0,0);

+ npages += ridx;

+ goto out;

+ }

+ /*

+ * the page wasn't resident and we're not overwriting,

+ * so we're going to have to do some i/o.

+ * find any additional pages needed to cover the expanded range.

+ */

+ if (startoffset != origoffset ||

+ startoffset + (npages << PAGE_SHIFT) != endoffset) {

+ /*

+ * XXXUBC we need to avoid deadlocks caused by locking

+ * additional pages at lower offsets than pages we

+ * already have locked. for now, unlock them all and

+ * start over.

+ */

+ for (i = 0; i < npages; i++) {

+ struct vm_page *pg = pgs[ridx + i];

+ if (pg->flags & PG_FAKE) {

+ pg->flags |= PG_RELEASED;

+ }

+ uvm_page_unbusy(&pgs[ridx], npages);

+ memset(pgs, 0, sizeof(pgs));

+ UVMHIST_LOG(ubchist, "reset npages start 0x%x end 0x%x",

+ startoffset, endoffset, 0,0);

+ npages = (endoffset - startoffset) >> PAGE_SHIFT;

+ npgs = npages;

+ uvn_findpages(uobj, startoffset, &npgs, pgs, UFP_ALL);

+ }

+ simple_unlock(&uobj->vmobjlock);

+ /*

+ * update the cached read creds for this node.

+ */

+ if (np->n_rcred) {

+ crfree(np->n_rcred);

+ }

+ np->n_rcred = curproc->p_ucred;

+ crhold(np->n_rcred);

+ /*

+ * read the desired page(s).

+ */

+ totalbytes = npages << PAGE_SHIFT;

+ bytes = MIN(totalbytes, vp->v_uvm.u_size - startoffset);

+ tailbytes = totalbytes - bytes;

+ skipbytes = 0;

+ kva = uvm_pagermapin(pgs, npages, UVMPAGER_MAPIN_WAITOK |

+ UVMPAGER_MAPIN_READ);

+ s = splbio();

+ mbp = pool_get(&bufpool, PR_WAITOK);

+ splx(s);

+ mbp->b_bufsize = totalbytes;

+ mbp->b_data = (void *)kva;

+ mbp->b_resid = mbp->b_bcount = bytes;

+ mbp->b_flags = B_BUSY|B_READ| (async ? B_CALL|B_ASYNC : 0);

+ mbp->b_iodone = uvm_aio_biodone;

+ mbp->b_vp = vp;

+ mbp->b_proc = NULL; /* XXXUBC */

+ LIST_INIT(&mbp->b_dep);

+ /*

+ * if EOF is in the middle of the last page, zero the part past EOF.

+ */

+ if (tailbytes > 0 && (pgs[bytes >> PAGE_SHIFT]->flags & PG_FAKE)) {

+ memset((char *)kva + bytes, 0, tailbytes);

+ }

+ /*

+ * now loop over the pages, reading as needed.

+ */

+ bp = NULL;

+ for (offset = startoffset;

+ bytes > 0;

+ offset += iobytes, bytes -= iobytes) {

+ /*

+ * skip pages which don't need to be read.

+ */

+ pidx = (offset - startoffset) >> PAGE_SHIFT;

+ UVMHIST_LOG(ubchist, "pidx %d offset 0x%x startoffset 0x%x",

+ pidx, (int)offset, (int)startoffset,0);

+ while ((pgs[pidx]->flags & PG_FAKE) == 0) {

+ size_t b;

+ KASSERT((offset & (PAGE_SIZE - 1)) == 0);

+ b = MIN(PAGE_SIZE, bytes);

+ offset += b;

+ bytes -= b;

+ skipbytes += b;

+ pidx++;

+ UVMHIST_LOG(ubchist, "skipping, new offset 0x%x",

+ (int)offset, 0,0,0);

+ if (bytes == 0) {

+ goto loopdone;

+ }

+ /*

+ * see how many pages can be read with this i/o.

+ * reduce the i/o size if necessary.

+ */

+ iobytes = bytes;

+ if (offset + iobytes > round_page(offset)) {

+ pcount = 1;

+ while (pidx + pcount < npages &&

+ pgs[pidx + pcount]->flags & PG_FAKE) {

+ pcount++;

+ }

+ iobytes = MIN(iobytes, (pcount << PAGE_SHIFT) -

+ (offset - trunc_page(offset)));

+ }

+ iobytes = MIN(iobytes, nmp->nm_rsize);

+ /*

+ * allocate a sub-buf for this piece of the i/o

+ * (or just use mbp if there's only 1 piece),

+ * and start it going.

+ */

+ if (offset == startoffset && iobytes == bytes) {

+ bp = mbp;

+ } else {

+ s = splbio();

+ bp = pool_get(&bufpool, PR_WAITOK);

+ splx(s);

+ bp->b_data = (char *)kva + offset - startoffset;

+ bp->b_resid = bp->b_bcount = iobytes;

+ bp->b_flags = B_BUSY|B_READ|B_CALL|B_ASYNC;

+ bp->b_iodone = uvm_aio_biodone1;

+ bp->b_vp = vp;

+ bp->b_proc = NULL; /* XXXUBC */

+ LIST_INIT(&bp->b_dep);

+ }

+ bp->b_private = mbp;

+ bp->b_lblkno = bp->b_blkno = offset >> DEV_BSHIFT;

+ UVMHIST_LOG(ubchist, "bp %p offset 0x%x bcount 0x%x blkno 0x%x",

+ bp, offset, iobytes, bp->b_blkno);

+ VOP_STRATEGY(bp);

+ }

+loopdone:

+ if (skipbytes) {

+ s = splbio();

+ mbp->b_resid -= skipbytes;

+ if (mbp->b_resid == 0) {

+ biodone(mbp);

+ }

+ splx(s);

+ }

+ if (async) {

+ UVMHIST_LOG(ubchist, "returning PEND",0,0,0,0);

+ return EINPROGRESS;

+ }

+ if (bp != NULL) {

+ error = biowait(mbp);

+ }

+ s = splbio();

+ pool_put(&bufpool, mbp);

+ splx(s);

+ uvm_pagermapout(kva, npages);

+ if (write && v3) {

+uncommit:

+ lockmgr(&np->n_commitlock, LK_EXCLUSIVE, NULL, p);

+ nfs_del_committed_range(vp, origoffset, npages);

+ nfs_del_tobecommitted_range(vp, origoffset, npages);

+ simple_lock(&uobj->vmobjlock);

+ for (i = 0; i < npages; i++) {

+ if (pgs[i] == NULL) {

+ continue;

+ }

+ pgs[i]->flags &= ~(PG_NEEDCOMMIT|PG_RDONLY);

+ }

+ simple_unlock(&uobj->vmobjlock);

+ lockmgr(&np->n_commitlock, LK_RELEASE, NULL, p);

+ }

+ simple_lock(&uobj->vmobjlock);

+out:

+ if (error) {

+ uvm_lock_pageq();

+ for (i = 0; i < npages; i++) {

+ if (pgs[i] == NULL) {

+ continue;

+ }

+ UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",

+ pgs[i], pgs[i]->flags, 0,0);

+ if (pgs[i]->flags & PG_WANTED) {

+ wakeup(pgs[i]);

+ }

+ if (pgs[i]->flags & PG_RELEASED) {

+ uvm_unlock_pageq();

+ (uobj->pgops->pgo_releasepg)(pgs[i], NULL);

+ uvm_lock_pageq();

+ continue;

+ }

+ if (pgs[i]->flags & PG_FAKE) {

+ uvm_pagefree(pgs[i]);

+ continue;

+ }

+ uvm_pageactivate(pgs[i]);

+ pgs[i]->flags &= ~(PG_WANTED|PG_BUSY);

+ UVM_PAGE_OWN(pgs[i], NULL);

+ }

+ uvm_unlock_pageq();

+ simple_unlock(&uobj->vmobjlock);

+ UVMHIST_LOG(ubchist, "returning error %d", error,0,0,0);

+ return error;

+ }

+ UVMHIST_LOG(ubchist, "ridx %d count %d", ridx, npages, 0,0);

+ uvm_lock_pageq();

+ for (i = 0; i < npages; i++) {

+ if (pgs[i] == NULL) {

+ continue;

+ }

+ UVMHIST_LOG(ubchist, "examining pg %p flags 0x%x",

+ pgs[i], pgs[i]->flags, 0,0);

+ if (pgs[i]->flags & PG_FAKE) {

+ UVMHIST_LOG(ubchist, "unfaking pg %p offset 0x%x",

+ pgs[i], (int)pgs[i]->offset,0,0);

+ pgs[i]->flags &= ~(PG_FAKE);

+ pmap_clear_modify(pgs[i]);

+ pmap_clear_reference(pgs[i]);

+ }

+ if (i < ridx || i >= ridx + orignpages || async) {

+ UVMHIST_LOG(ubchist, "unbusy pg %p offset 0x%x",

+ pgs[i], (int)pgs[i]->offset,0,0);

+ if (pgs[i]->flags & PG_WANTED) {

+ wakeup(pgs[i]);

+ }

+ if (pgs[i]->flags & PG_RELEASED) {

+ uvm_unlock_pageq();

+ (uobj->pgops->pgo_releasepg)(pgs[i], NULL);

+ uvm_lock_pageq();

+ continue;

+ }

+ uvm_pageactivate(pgs[i]);

+ pgs[i]->flags &= ~(PG_WANTED|PG_BUSY);

+ UVM_PAGE_OWN(pgs[i], NULL);

+ }

+ uvm_unlock_pageq();

+ simple_unlock(&uobj->vmobjlock);

+ if (ap->a_m != NULL) {

+ memcpy(ap->a_m, &pgs[ridx],

+ *ap->a_count * sizeof(struct vm_page *));

+ }

+ return 0;

+/*

+ * Vnode op for VM putpages.

+ */

+int

+nfs_putpages(v)

+ void *v;

+ struct vop_putpages_args /* {

+ struct vnode *a_vp;

+ struct vm_page **a_m;

+ int a_count;

+ int a_flags;

+ int *a_rtvals;

+ } */ *ap = v;

+ struct vnode *vp = ap->a_vp;

+ struct nfsnode *np = VTONFS(vp);

+ struct nfsmount *nmp = VFSTONFS(vp->v_mount);

+ struct buf *bp, *mbp;

+ struct vm_page **pgs = ap->a_m;

+ int flags = ap->a_flags;

+ int npages = ap->a_count;

+ int s, error, i;

+ size_t bytes, iobytes, skipbytes;

+ vaddr_t kva;

+ off_t offset, origoffset, commitoff;

+ uint32_t commitbytes;

+ boolean_t v3 = NFS_ISV3(vp);

+ boolean_t async = (flags & PGO_SYNCIO) == 0;

+ boolean_t weak = (flags & PGO_WEAK) && v3;

+ struct proc *p = curproc;

+ UVMHIST_FUNC("nfs_putpages"); UVMHIST_CALLED(ubchist);

+ UVMHIST_LOG(ubchist, "vp %p pgp %p count %d",

+ vp, ap->a_m, ap->a_count,0);

+ simple_unlock(&vp->v_uvm.u_obj.vmobjlock);

+ error = 0;

+ origoffset = pgs[0]->offset;

+ bytes = MIN(ap->a_count << PAGE_SHIFT, vp->v_uvm.u_size - origoffset);

+ skipbytes = 0;

+ /*

+ * if the range has been committed already, mark the pages thus.

+ * if the range just needs to be committed, we're done

+ * if it's a weak putpage, otherwise commit the range.

+ */

+ if (v3) {

+ lockmgr(&np->n_commitlock, LK_EXCLUSIVE, NULL, p);

+ if (nfs_in_committed_range(vp, origoffset, bytes)) {

+ goto committed;

+ }

+ if (nfs_in_tobecommitted_range(vp, origoffset, bytes)) {

+ if (weak) {

+ lockmgr(&np->n_commitlock, LK_RELEASE, NULL, p);

+ return 0;

+ } else {

+ commitoff = np->n_pushlo;

+ commitbytes = (uint32_t)(np->n_pushhi -

+ np->n_pushlo);

+ goto commit;

+ }

+ lockmgr(&np->n_commitlock, LK_RELEASE, NULL, p);

+ }

+ /*

+ * otherwise write or commit all the pages.

+ */

+ kva = uvm_pagermapin(pgs, ap->a_count, UVMPAGER_MAPIN_WAITOK|

+ UVMPAGER_MAPIN_WRITE);

+ s = splbio();

+ vp->v_numoutput += 2;

+ mbp = pool_get(&bufpool, PR_WAITOK);

+ UVMHIST_LOG(ubchist, "vp %p mbp %p num now %d bytes 0x%x",

+ vp, mbp, vp->v_numoutput, bytes);

+ splx(s);

+ mbp->b_bufsize = npages << PAGE_SHIFT;

+ mbp->b_data = (void *)kva;

+ mbp->b_resid = mbp->b_bcount = bytes;

+ mbp->b_flags = B_BUSY|B_WRITE|B_AGE |

+ (async ? B_CALL|B_ASYNC : 0) |

+ (curproc == uvm.pagedaemon_proc ? B_PDAEMON : 0);

+ mbp->b_iodone = uvm_aio_biodone;

+ mbp->b_vp = vp;

+ mbp->b_proc = NULL; /* XXXUBC */

+ LIST_INIT(&mbp->b_dep);

+ for (offset = origoffset;

+ bytes > 0;

+ offset += iobytes, bytes -= iobytes) {

+ iobytes = MIN(nmp->nm_wsize, bytes);

+ /*

+ * skip writing any pages which only need a commit.

+ */

+ if ((pgs[(offset - origoffset) >> PAGE_SHIFT]->flags &

+ PG_NEEDCOMMIT) != 0) {

+ KASSERT((offset & (PAGE_SIZE - 1)) == 0);

+ iobytes = MIN(PAGE_SIZE, bytes);

+ skipbytes += iobytes;

+ continue;

+ }

+ /* if it's really one i/o, don't make a second buf */

+ if (offset == origoffset && iobytes == bytes) {

+ bp = mbp;

+ } else {

+ s = splbio();

+ vp->v_numoutput++;

+ bp = pool_get(&bufpool, PR_WAITOK);

+ UVMHIST_LOG(ubchist, "vp %p bp %p num now %d",

+ vp, bp, vp->v_numoutput, 0);

+ splx(s);

+ bp->b_data = (char *)kva + (offset - origoffset);

+ bp->b_resid = bp->b_bcount = iobytes;

+ bp->b_flags = B_BUSY|B_WRITE|B_CALL|B_ASYNC;

+ bp->b_iodone = uvm_aio_biodone1;

+ bp->b_vp = vp;

+ bp->b_proc = NULL; /* XXXUBC */

+ LIST_INIT(&bp->b_dep);

+ }

+ bp->b_private = mbp;

+ bp->b_lblkno = bp->b_blkno = (daddr_t)(offset >> DEV_BSHIFT);

+ UVMHIST_LOG(ubchist, "bp %p numout %d",

+ bp, vp->v_numoutput,0,0);

+ VOP_STRATEGY(bp);

+ }

+ if (skipbytes) {

+ UVMHIST_LOG(ubchist, "skipbytes %d", bytes, 0,0,0);

+ s = splbio();

+ mbp->b_resid -= skipbytes;

+ if (mbp->b_resid == 0) {

+ biodone(mbp);

+ }

+ splx(s);

+ }

+ if (async) {

+ return EINPROGRESS;

+ }

+ if (bp != NULL) {

+ error = biowait(mbp);

+ }

+ s = splbio();

+ if (mbp->b_vp)

+ vwakeup(mbp->b_vp);

+ pool_put(&bufpool, mbp);

+ splx(s);

+ uvm_pagermapout(kva, ap->a_count);

+ if (error || !v3) {

+ UVMHIST_LOG(ubchist, "returning error %d", error, 0,0,0);

+ return error;

+ }

+ /*

+ * for a weak put, mark the range as "to be committed"

+ * and mark the pages read-only so that we will be notified

+ * to remove the pages from the "to be committed" range

+ * if they are made dirty again.

+ * for a strong put, commit the pages and remove them from the

+ * "to be committed" range. also, mark them as writable

+ * and not cleanable with just a commit.

+ */

+ lockmgr(&np->n_commitlock, LK_EXCLUSIVE, NULL, p);

+ if (weak) {

+ nfs_add_tobecommitted_range(vp, origoffset,

+ npages << PAGE_SHIFT);

+ for (i = 0; i < npages; i++) {

+ pgs[i]->flags |= PG_NEEDCOMMIT|PG_RDONLY;

+ }

+ } else {

+ commitoff = origoffset;

+ commitbytes = npages << PAGE_SHIFT;

+commit:

+ error = nfs_commit(vp, commitoff, commitbytes, curproc);

+ nfs_del_tobecommitted_range(vp, commitoff, commitbytes);

+committed:

+ for (i = 0; i < npages; i++) {

+ pgs[i]->flags &= ~(PG_NEEDCOMMIT|PG_RDONLY);

+ }

+ lockmgr(&np->n_commitlock, LK_RELEASE, NULL, p);

+ return error;

diff --git a/sys/nfs/nfs_node.c b/sys/nfs/nfs_node.c
index 987259eadc3..567738584da 100644
--- a/sys/nfs/nfs_node.c
+++ b/sys/nfs/nfs_node.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: nfs_node.c,v 1.16 2001/11/15 23:15:15 art Exp $ */

+/* $OpenBSD: nfs_node.c,v 1.17 2001/11/27 05:27:12 art Exp $ */

/* $NetBSD: nfs_node.c,v 1.16 1996/02/18 11:53:42 fvdl Exp $ */

@@ -145,6 +145,7 @@ loop:

vp = nvp;

np = pool_get(&nfs_node_pool, PR_WAITOK);

bzero((caddr_t)np, sizeof *np);

+ lockinit(&np->n_commitlock, PINOD, "nfsclock", 0, 0);

vp->v_data = np;

np->n_vnode = vp;

@@ -169,6 +170,17 @@ loop:

np->n_fhp = &np->n_fh;

bcopy((caddr_t)fhp, (caddr_t)np->n_fhp, fhsize);

np->n_fhsize = fhsize;

+ /*

+ * XXXUBC doing this while holding the nfs_hashlock is bad,

+ * but there's no alternative at the moment.

+ */

+ error = VOP_GETATTR(vp, &np->n_vattr, curproc->p_ucred, curproc);

+ if (error) {

+ return error;

+ }

+ uvm_vnp_setsize(vp, np->n_vattr.va_size);

lockmgr(&nfs_hashlock, LK_RELEASE, 0, p);

*npp = np;

return (0);

@@ -185,11 +197,12 @@ nfs_inactive(v)

struct nfsnode *np;

struct sillyrename *sp;

struct proc *p = curproc; /* XXX */

+ struct vnode *vp = ap->a_vp;

- np = VTONFS(ap->a_vp);

- if (prtactive && ap->a_vp->v_usecount != 0)

- vprint("nfs_inactive: pushing active", ap->a_vp);

- if (ap->a_vp->v_type != VDIR) {

+ np = VTONFS(vp);

+ if (prtactive && vp->v_usecount != 0)

+ vprint("nfs_inactive: pushing active", vp);

+ if (vp->v_type != VDIR) {

sp = np->n_sillyrename;

np->n_sillyrename = (struct sillyrename *)0;

} else

@@ -198,7 +211,7 @@ nfs_inactive(v)

* Remove the silly file that was rename'd earlier

- (void) nfs_vinvalbuf(ap->a_vp, 0, sp->s_cred, p, 1);

+ (void) nfs_vinvalbuf(vp, 0, sp->s_cred, p, 1);

nfs_removeit(sp);

crfree(sp->s_cred);

vrele(sp->s_dvp);

@@ -206,7 +219,7 @@ nfs_inactive(v)

}

np->n_flag &= (NMODIFIED | NFLUSHINPROG | NFLUSHWANT);

- VOP_UNLOCK(ap->a_vp, 0, ap->a_p);

+ VOP_UNLOCK(vp, 0, ap->a_p);

return (0);

}

diff --git a/sys/nfs/nfs_serv.c b/sys/nfs/nfs_serv.c
index 9d4de9fd9a1..9534e7221da 100644
--- a/sys/nfs/nfs_serv.c
+++ b/sys/nfs/nfs_serv.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: nfs_serv.c,v 1.27 2001/11/06 19:53:21 miod Exp $ */

+/* $OpenBSD: nfs_serv.c,v 1.28 2001/11/27 05:27:12 art Exp $ */

/* $NetBSD: nfs_serv.c,v 1.34 1997/05/12 23:37:12 fvdl Exp $ */

@@ -1663,8 +1663,6 @@ nfsrv_remove(nfsd, slp, procp, mrq)

error = EBUSY;

goto out;

}

- if (vp->v_flag & VTEXT)

- uvm_vnp_uncache(vp);

out:

if (!error) {

error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);

@@ -3276,11 +3274,10 @@ nfsrv_access(vp, flags, cred, rdonly, p, override)

}

- * If there's shared text associated with

- * the inode, try to free it up once. If

- * we fail, we can't allow writing.

+ * If the vnode is in use as a process's text,

+ * we can't allow writing.

- if ((vp->v_flag & VTEXT) && !uvm_vnp_uncache(vp))

+ if ((vp->v_flag & VTEXT))

return (ETXTBSY);

}

error = VOP_ACCESS(vp, flags, cred, p);

diff --git a/sys/nfs/nfs_subs.c b/sys/nfs/nfs_subs.c
index 9689d9f36a5..4a8bc11528d 100644
--- a/sys/nfs/nfs_subs.c
+++ b/sys/nfs/nfs_subs.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: nfs_subs.c,v 1.35 2001/11/06 19:53:21 miod Exp $ */

+/* $OpenBSD: nfs_subs.c,v 1.36 2001/11/27 05:27:12 art Exp $ */

/* $NetBSD: nfs_subs.c,v 1.27.4.3 1996/07/08 20:34:24 jtc Exp $ */

@@ -39,6 +39,40 @@

* @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95

+/*

+ *

+ * Written by Frank van der Linden for Wasabi Systems, Inc.

+ *

+ * Redistribution and use in source and binary forms, with or without

+ * modification, are permitted provided that the following conditions

+ * are met:

+ * 1. Redistributions of source code must retain the above copyright

+ * notice, this list of conditions and the following disclaimer.

+ * 2. Redistributions in binary form must reproduce the above copyright

+ * notice, this list of conditions and the following disclaimer in the

+ * documentation and/or other materials provided with the distribution.

+ * 3. All advertising materials mentioning features or use of this software

+ * must display the following acknowledgement:

+ * This product includes software developed for the NetBSD Project by

+ * Wasabi Systems, Inc.

+ * 4. The name of Wasabi Systems, Inc. may not be used to endorse

+ * or promote products derived from this software without specific prior

+ * written permission.

+ *

+ * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND

+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED

+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR

+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC

+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

+ * POSSIBILITY OF SUCH DAMAGE.

+ */

* These functions support the macros and help fiddle mbuf chains for

@@ -1241,17 +1275,14 @@ nfs_loadattrcache(vpp, mdp, dposp, vaper)

vap->va_filerev = 0;

}

if (vap->va_size != np->n_size) {

- if (vap->va_type == VREG) {

- if (np->n_flag & NMODIFIED) {

- if (vap->va_size < np->n_size)

- vap->va_size = np->n_size;

- else

- np->n_size = vap->va_size;

- } else

- np->n_size = vap->va_size;

- uvm_vnp_setsize(vp, np->n_size);

- } else

+ if ((np->n_flag & NMODIFIED) && vap->va_size < np->n_size) {

+ vap->va_size = np->n_size;

+ } else {

np->n_size = vap->va_size;

+ if (vap->va_type == VREG) {

+ uvm_vnp_setsize(vp, np->n_size);

+ }

}

np->n_attrstamp = time.tv_sec;

if (vaper != NULL) {

@@ -1741,26 +1772,216 @@ void

nfs_clearcommit(mp)

struct mount *mp;

{

- register struct vnode *vp, *nvp;

- register struct buf *bp, *nbp;

+ struct vnode *vp;

+ struct vm_page *pg;

+ struct nfsnode *np;

int s;

s = splbio();

-loop:

- for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {

- if (vp->v_mount != mp) /* Paranoia */

- goto loop;

- nvp = vp->v_mntvnodes.le_next;

- for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {

- nbp = bp->b_vnbufs.le_next;

- if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT))

- == (B_DELWRI | B_NEEDCOMMIT))

- bp->b_flags &= ~B_NEEDCOMMIT;

+ LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {

+ if (vp->v_type == VNON)

+ continue;

+ np = VTONFS(vp);

+ np->n_pushlo = np->n_pushhi = np->n_pushedlo =

+ np->n_pushedhi = 0;

+ np->n_commitflags &=

+ ~(NFS_COMMIT_PUSH_VALID | NFS_COMMIT_PUSHED_VALID);

+ simple_lock(&vp->v_uvm.u_obj.vmobjlock);

+ TAILQ_FOREACH(pg, &vp->v_uvm.u_obj.memq, listq) {

+ pg->flags &= ~PG_NEEDCOMMIT;

}

+ simple_unlock(&vp->v_uvm.u_obj.vmobjlock);

}

splx(s);

}

+void

+nfs_merge_commit_ranges(vp)

+ struct vnode *vp;

+ struct nfsnode *np = VTONFS(vp);

+ if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID)) {

+ np->n_pushedlo = np->n_pushlo;

+ np->n_pushedhi = np->n_pushhi;

+ np->n_commitflags |= NFS_COMMIT_PUSHED_VALID;

+ } else {

+ if (np->n_pushlo < np->n_pushedlo)

+ np->n_pushedlo = np->n_pushlo;

+ if (np->n_pushhi > np->n_pushedhi)

+ np->n_pushedhi = np->n_pushhi;

+ }

+ np->n_pushlo = np->n_pushhi = 0;

+ np->n_commitflags &= ~NFS_COMMIT_PUSH_VALID;

+#ifdef fvdl_debug

+ printf("merge: committed: %u - %u\n", (unsigned)np->n_pushedlo,

+ (unsigned)np->n_pushedhi);

+#endif

+int

+nfs_in_committed_range(vp, off, len)

+ struct vnode *vp;

+ off_t off, len;

+ struct nfsnode *np = VTONFS(vp);

+ off_t lo, hi;

+ if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID))

+ return 0;

+ lo = off;

+ hi = lo + len;

+ return (lo >= np->n_pushedlo && hi <= np->n_pushedhi);

+int

+nfs_in_tobecommitted_range(vp, off, len)

+ struct vnode *vp;

+ off_t off, len;

+ struct nfsnode *np = VTONFS(vp);

+ off_t lo, hi;

+ if (!(np->n_commitflags & NFS_COMMIT_PUSH_VALID))

+ return 0;

+ lo = off;

+ hi = lo + len;

+ return (lo >= np->n_pushlo && hi <= np->n_pushhi);

+void

+nfs_add_committed_range(vp, off, len)

+ struct vnode *vp;

+ off_t off, len;

+ struct nfsnode *np = VTONFS(vp);

+ off_t lo, hi;

+ lo = off;

+ hi = lo + len;

+ if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID)) {

+ np->n_pushedlo = lo;

+ np->n_pushedhi = hi;

+ np->n_commitflags |= NFS_COMMIT_PUSHED_VALID;

+ } else {

+ if (hi > np->n_pushedhi)

+ np->n_pushedhi = hi;

+ if (lo < np->n_pushedlo)

+ np->n_pushedlo = lo;

+ }

+#ifdef fvdl_debug

+ printf("add: committed: %u - %u\n", (unsigned)np->n_pushedlo,

+ (unsigned)np->n_pushedhi);

+#endif

+void

+nfs_del_committed_range(vp, off, len)

+ struct vnode *vp;

+ off_t off, len;

+ struct nfsnode *np = VTONFS(vp);

+ off_t lo, hi;

+ if (!(np->n_commitflags & NFS_COMMIT_PUSHED_VALID))

+ return;

+ lo = off;

+ hi = lo + len;

+ if (lo > np->n_pushedhi || hi < np->n_pushedlo)

+ return;

+ if (lo <= np->n_pushedlo)

+ np->n_pushedlo = hi;

+ else if (hi >= np->n_pushedhi)

+ np->n_pushedhi = lo;

+ else {

+ /*

+ * XXX There's only one range. If the deleted range

+ * is in the middle, pick the largest of the

+ * contiguous ranges that it leaves.

+ */

+ if ((np->n_pushedlo - lo) > (hi - np->n_pushedhi))

+ np->n_pushedhi = lo;

+ else

+ np->n_pushedlo = hi;

+ }

+#ifdef fvdl_debug

+ printf("del: committed: %u - %u\n", (unsigned)np->n_pushedlo,

+ (unsigned)np->n_pushedhi);

+#endif

+void

+nfs_add_tobecommitted_range(vp, off, len)

+ struct vnode *vp;

+ off_t off, len;

+ struct nfsnode *np = VTONFS(vp);

+ off_t lo, hi;

+ lo = off;

+ hi = lo + len;

+ if (!(np->n_commitflags & NFS_COMMIT_PUSH_VALID)) {

+ np->n_pushlo = lo;

+ np->n_pushhi = hi;

+ np->n_commitflags |= NFS_COMMIT_PUSH_VALID;

+ } else {

+ if (lo < np->n_pushlo)

+ np->n_pushlo = lo;

+ if (hi > np->n_pushhi)

+ np->n_pushhi = hi;

+ }

+#ifdef fvdl_debug

+ printf("add: tobecommitted: %u - %u\n", (unsigned)np->n_pushlo,

+ (unsigned)np->n_pushhi);

+#endif

+void

+nfs_del_tobecommitted_range(vp, off, len)

+ struct vnode *vp;

+ off_t off, len;

+ struct nfsnode *np = VTONFS(vp);

+ off_t lo, hi;

+ if (!(np->n_commitflags & NFS_COMMIT_PUSH_VALID))

+ return;

+ lo = off;

+ hi = lo + len;

+ if (lo > np->n_pushhi || hi < np->n_pushlo)

+ return;

+ if (lo <= np->n_pushlo)

+ np->n_pushlo = hi;

+ else if (hi >= np->n_pushhi)

+ np->n_pushhi = lo;

+ else {

+ /*

+ * XXX There's only one range. If the deleted range

+ * is in the middle, pick the largest of the

+ * contiguous ranges that it leaves.

+ */

+ if ((np->n_pushlo - lo) > (hi - np->n_pushhi))

+ np->n_pushhi = lo;

+ else

+ np->n_pushlo = hi;

+ }

+#ifdef fvdl_debug

+ printf("del: tobecommitted: %u - %u\n", (unsigned)np->n_pushlo,

+ (unsigned)np->n_pushhi);

+#endif

* Map errnos to NFS error numbers. For Version 3 also filter out error

* numbers not specified for the associated procedure.

diff --git a/sys/nfs/nfs_syscalls.c b/sys/nfs/nfs_syscalls.c
index c71a662ccb2..5a189ba344d 100644
--- a/sys/nfs/nfs_syscalls.c
+++ b/sys/nfs/nfs_syscalls.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: nfs_syscalls.c,v 1.20 2001/11/15 23:15:15 art Exp $ */

+/* $OpenBSD: nfs_syscalls.c,v 1.21 2001/11/27 05:27:12 art Exp $ */

/* $NetBSD: nfs_syscalls.c,v 1.19 1996/02/18 11:53:52 fvdl Exp $ */

@@ -913,10 +913,9 @@ int

nfssvc_iod(p)

struct proc *p;

{

- register struct buf *bp, *nbp;

- register int i, myiod;

- struct vnode *vp;

- int error = 0, s;

+ struct buf *bp;

+ int i, myiod;

+ int error = 0;

* Assign my position or return error if too many already running

@@ -944,39 +943,7 @@ nfssvc_iod(p)

while ((bp = nfs_bufq.tqh_first) != NULL) {

/* Take one off the front of the list */

TAILQ_REMOVE(&nfs_bufq, bp, b_freelist);

- if (bp->b_flags & B_READ)

- (void) nfs_doio(bp, NULL);

- else do {

- /*

- * Look for a delayed write for the same vnode, so I can do

- * it now. We must grab it before calling nfs_doio() to

- * avoid any risk of the vnode getting vclean()'d while

- * we are doing the write rpc.

- */

- vp = bp->b_vp;

- s = splbio();

- for (nbp = vp->v_dirtyblkhd.lh_first; nbp;

- nbp = nbp->b_vnbufs.le_next) {

- if ((nbp->b_flags &

- (B_BUSY|B_DELWRI|B_NEEDCOMMIT|B_NOCACHE))!=B_DELWRI)

- continue;

- bremfree(nbp);

- nbp->b_flags |= (B_BUSY|B_ASYNC);

- break;

- }

- /*

- * For the delayed write, do the first part of nfs_bwrite()

- * up to, but not including nfs_strategy().

- */

- if (nbp) {

- nbp->b_flags &= ~(B_READ|B_DONE|B_ERROR);

- buf_undirty(bp);

- nbp->b_vp->v_numoutput++;

- }

- splx(s);

- (void) nfs_doio(bp, NULL);

- } while ((bp = nbp) != NULL);

+ (void) nfs_doio(bp, NULL);

}

if (error) {

PRELE(p);

diff --git a/sys/nfs/nfs_var.h b/sys/nfs/nfs_var.h
index 861eaf3059e..71985e581a8 100644
--- a/sys/nfs/nfs_var.h
+++ b/sys/nfs/nfs_var.h

@@ -1,4 +1,4 @@

-/* $OpenBSD: nfs_var.h,v 1.15 2001/11/15 23:15:15 art Exp $ */

+/* $OpenBSD: nfs_var.h,v 1.16 2001/11/27 05:27:12 art Exp $ */

/* $NetBSD: nfs_var.h,v 1.3 1996/02/18 11:53:54 fvdl Exp $ */

@@ -119,7 +119,7 @@ int nfs_sillyrename __P((struct vnode *, struct vnode *,

struct componentname *));

int nfs_lookitup __P((struct vnode *, char *, int, struct ucred *,

struct proc *, struct nfsnode **));

-int nfs_commit __P((struct vnode *, u_quad_t, int, struct proc *));

+int nfs_commit __P((struct vnode *, u_quad_t, unsigned, struct proc *));

int nfs_bmap __P((void *));

int nfs_strategy __P((void *));

int nfs_mmap __P((void *));

@@ -134,7 +134,6 @@ int nfs_vfree __P((void *));

int nfs_truncate __P((void *));

int nfs_update __P((void *));

int nfs_bwrite __P((void *));

-int nfs_writebp __P((struct buf *, int));

int nfsspec_access __P((void *));

int nfsspec_read __P((void *));

int nfsspec_write __P((void *));

@@ -258,7 +257,16 @@ void nfsm_srvfattr __P((struct nfsrv_descript *, struct vattr *,

int nfsrv_fhtovp __P((fhandle_t *, int, struct vnode **, struct ucred *,

struct nfssvc_sock *, struct mbuf *, int *, int));

int netaddr_match __P((int, union nethostaddr *, struct mbuf *));

void nfs_clearcommit __P((struct mount *));

+void nfs_merge_commit_ranges __P((struct vnode *));

+int nfs_in_committed_range __P((struct vnode *, off_t, off_t));

+int nfs_in_tobecommitted_range __P((struct vnode *, off_t, off_t));

+void nfs_add_committed_range __P((struct vnode *, off_t, off_t));

+void nfs_del_committed_range __P((struct vnode *, off_t, off_t));

+void nfs_add_tobecommitted_range __P((struct vnode *, off_t, off_t));

+void nfs_del_tobecommitted_range __P((struct vnode *, off_t, off_t));

int nfsrv_errmap __P((struct nfsrv_descript *, int));

void nfsrvw_sort __P((gid_t *, int));

void nfsrv_setcred __P((struct ucred *, struct ucred *));

diff --git a/sys/nfs/nfs_vfsops.c b/sys/nfs/nfs_vfsops.c
index 13420530fc3..91f84da52b6 100644
--- a/sys/nfs/nfs_vfsops.c
+++ b/sys/nfs/nfs_vfsops.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: nfs_vfsops.c,v 1.38 2001/11/14 23:37:33 mickey Exp $ */

+/* $OpenBSD: nfs_vfsops.c,v 1.39 2001/11/27 05:27:12 art Exp $ */

/* $NetBSD: nfs_vfsops.c,v 1.46.4.1 1996/05/25 22:40:35 fvdl Exp $ */

@@ -748,6 +748,8 @@ mountnfs(argp, mp, nam, pth, hst)

* point.

mp->mnt_stat.f_iosize = NFS_MAXDGRAMDATA;

+ mp->mnt_fs_bshift = DEV_BSHIFT;

+ mp->mnt_dev_bshift = -1;

return (0);

bad:

@@ -856,8 +858,9 @@ loop:

if (vp->v_mount != mp)

goto loop;

- if (VOP_ISLOCKED(vp) || vp->v_dirtyblkhd.lh_first == NULL ||

- waitfor == MNT_LAZY)

+ if (waitfor == MNT_LAZY || VOP_ISLOCKED(vp) ||

+ (LIST_EMPTY(&vp->v_dirtyblkhd) &&

+ vp->v_uvm.u_obj.uo_npages == 0))

continue;

if (vget(vp, LK_EXCLUSIVE, p))

goto loop;

diff --git a/sys/nfs/nfs_vnops.c b/sys/nfs/nfs_vnops.c
index 0813b439cb2..4c176c1c1ec 100644
--- a/sys/nfs/nfs_vnops.c
+++ b/sys/nfs/nfs_vnops.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: nfs_vnops.c,v 1.39 2001/11/15 23:15:15 art Exp $ */

+/* $OpenBSD: nfs_vnops.c,v 1.40 2001/11/27 05:27:12 art Exp $ */

/* $NetBSD: nfs_vnops.c,v 1.62.4.1 1996/07/08 20:26:52 jtc Exp $ */

@@ -126,7 +126,9 @@ struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {

{ &vop_advlock_desc, nfs_advlock }, /* advlock */

{ &vop_reallocblks_desc, nfs_reallocblks }, /* reallocblks */

{ &vop_bwrite_desc, nfs_bwrite },

- { (struct vnodeop_desc*)NULL, (int(*) __P((void *)))NULL }

+ { &vop_getpages_desc, nfs_getpages }, /* getpages */

+ { &vop_putpages_desc, nfs_putpages }, /* putpages */

+ { NULL, NULL }

};

struct vnodeopv_desc nfsv2_vnodeop_opv_desc =

{ &nfsv2_vnodeop_p, nfsv2_vnodeop_entries };

@@ -151,7 +153,7 @@ struct vnodeopv_entry_desc spec_nfsv2nodeop_entries[] = {

{ &vop_ioctl_desc, spec_ioctl }, /* ioctl */

{ &vop_select_desc, spec_select }, /* select */

{ &vop_revoke_desc, spec_revoke }, /* revoke */

- { &vop_fsync_desc, nfs_fsync }, /* fsync */

+ { &vop_fsync_desc, spec_fsync }, /* fsync */

{ &vop_remove_desc, spec_remove }, /* remove */

{ &vop_link_desc, spec_link }, /* link */

{ &vop_rename_desc, spec_rename }, /* rename */

@@ -373,11 +375,30 @@ nfs_open(v)

return (EACCES);

}

+ /*

+ * Initialize read and write creds here, for swapfiles

+ * and other paths that don't set the creds themselves.

+ */

+ if (ap->a_mode & FREAD) {

+ if (np->n_rcred) {

+ crfree(np->n_rcred);

+ }

+ np->n_rcred = ap->a_cred;

+ crhold(np->n_rcred);

+ }

+ if (ap->a_mode & FWRITE) {

+ if (np->n_wcred) {

+ crfree(np->n_wcred);

+ }

+ np->n_wcred = ap->a_cred;

+ crhold(np->n_wcred);

+ }

if (np->n_flag & NMODIFIED) {

if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,

ap->a_p, 1)) == EINTR)

return (error);

- uvm_vnp_uncache(vp);

np->n_attrstamp = 0;

if (vp->v_type == VDIR)

np->n_direofoffset = 0;

@@ -395,7 +416,6 @@ nfs_open(v)

if ((error = nfs_vinvalbuf(vp, V_SAVE,

ap->a_cred, ap->a_p, 1)) == EINTR)

return (error);

- uvm_vnp_uncache(vp);

np->n_mtime = vattr.va_mtime.tv_sec;

}

@@ -2511,7 +2531,7 @@ int

nfs_commit(vp, offset, cnt, procp)

struct vnode *vp;

u_quad_t offset;

- int cnt;

+ unsigned cnt;

struct proc *procp;

{

caddr_t cp;

@@ -2626,9 +2646,7 @@ nfs_fsync(v)

}

- * Flush all the blocks associated with a vnode.

- * Walk through the buffer pool and push any dirty pages

- * associated with the vnode.

+ * Flush all the data associated with a vnode.

int

nfs_flush(vp, cred, waitfor, p, commit)

@@ -2638,154 +2656,19 @@ nfs_flush(vp, cred, waitfor, p, commit)

struct proc *p;

int commit;

{

+ struct uvm_object *uobj = &vp->v_uvm.u_obj;

struct nfsnode *np = VTONFS(vp);

- struct buf *bp;

- int i;

- struct buf *nbp;

- struct nfsmount *nmp = VFSTONFS(vp->v_mount);

- int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;

- int passone = 1;

- u_quad_t off = (u_quad_t)-1, endoff = 0, toff;

-#ifndef NFS_COMMITBVECSIZ

-#define NFS_COMMITBVECSIZ 20

-#endif

- struct buf *bvec[NFS_COMMITBVECSIZ];

+ int error;

+ int flushflags = PGO_ALLPAGES|PGO_CLEANIT|PGO_SYNCIO;

+ int rv;

- if (nmp->nm_flag & NFSMNT_INT)

- slpflag = PCATCH;

- if (!commit)

- passone = 0;

- /*

- * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the

- * server, but nas not been committed to stable storage on the server

- * yet. On the first pass, the byte range is worked out and the commit

- * rpc is done. On the second pass, nfs_writebp() is called to do the

- * job.

- */

-again:

- bvecpos = 0;

- if (NFS_ISV3(vp) && commit) {

- s = splbio();

- for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {

- nbp = bp->b_vnbufs.le_next;

- if (bvecpos >= NFS_COMMITBVECSIZ)

- break;

- if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT))

- != (B_DELWRI | B_NEEDCOMMIT))

- continue;

- bremfree(bp);

- bp->b_flags |= (B_BUSY | B_WRITEINPROG);

- /*

- * A list of these buffers is kept so that the

- * second loop knows which buffers have actually

- * been committed. This is necessary, since there

- * may be a race between the commit rpc and new

- * uncommitted writes on the file.

- */

- bvec[bvecpos++] = bp;

- toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +

- bp->b_dirtyoff;

- if (toff < off)

- off = toff;

- toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);

- if (toff > endoff)

- endoff = toff;

- }

- splx(s);

- }

- if (bvecpos > 0) {

- /*

- * Commit data on the server, as required.

- */

- retv = nfs_commit(vp, off, (int)(endoff - off), p);

- if (retv == NFSERR_STALEWRITEVERF)

- nfs_clearcommit(vp->v_mount);

- /*

- * Now, either mark the blocks I/O done or mark the

- * blocks dirty, depending on whether the commit

- * succeeded.

- */

- for (i = 0; i < bvecpos; i++) {

- bp = bvec[i];

- bp->b_flags &= ~(B_NEEDCOMMIT | B_WRITEINPROG);

- if (retv)

- brelse(bp);

- else {

- s = splbio();

- buf_undirty(bp);

- vp->v_numoutput++;

- bp->b_flags |= B_ASYNC;

- bp->b_flags &= ~(B_READ|B_DONE|B_ERROR);

- bp->b_dirtyoff = bp->b_dirtyend = 0;

- splx(s);

- biodone(bp);

- }

+ error = 0;

- /*

- * Start/do any write(s) that are required.

- */

-loop:

- s = splbio();

- for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {

- nbp = bp->b_vnbufs.le_next;

- if (bp->b_flags & B_BUSY) {

- if (waitfor != MNT_WAIT || passone)

- continue;

- bp->b_flags |= B_WANTED;

- error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),

- "nfsfsync", slptimeo);

- splx(s);

- if (error) {

- if (nfs_sigintr(nmp, (struct nfsreq *)0, p))

- return (EINTR);

- if (slpflag == PCATCH) {

- slpflag = 0;

- slptimeo = 2 * hz;

- }

- goto loop;

- }

- if ((bp->b_flags & B_DELWRI) == 0)

- panic("nfs_fsync: not dirty");

- if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT))

- continue;

- bremfree(bp);

- if (passone || !commit)

- bp->b_flags |= (B_BUSY|B_ASYNC);

- else

- bp->b_flags |= (B_BUSY|B_ASYNC|B_WRITEINPROG|B_NEEDCOMMIT);

- splx(s);

- VOP_BWRITE(bp);

- goto loop;

- }

- splx(s);

- if (passone) {

- passone = 0;

- goto again;

- }

- if (waitfor == MNT_WAIT) {

- loop2:

- s = splbio();

- error = vwaitforio(vp, slpflag, "nfs_fsync", slptimeo);

- splx(s);

- if (error) {

- if (nfs_sigintr(nmp, (struct nfsreq *)0, p))

- return (EINTR);

- if (slpflag == PCATCH) {

- slpflag = 0;

- slptimeo = 2 * hz;

- }

- goto loop2;

- }

- if (vp->v_dirtyblkhd.lh_first && commit) {

-#if 0

- vprint("nfs_fsync: dirty", vp);

-#endif

- goto loop;

- }

+ simple_lock(&uobj->vmobjlock);

+ rv = (uobj->pgops->pgo_flush)(uobj, 0, 0, flushflags);

+ simple_unlock(&uobj->vmobjlock);

+ if (!rv) {

+ error = EIO;

}

if (np->n_flag & NWRITEERR) {

error = np->n_error;

@@ -2860,7 +2743,7 @@ nfs_print(v)

}

- * Just call nfs_writebp() with the force argument set to 1.

+ * Just call bwrite().

int

nfs_bwrite(v)

@@ -2870,76 +2753,7 @@ nfs_bwrite(v)

struct buf *a_bp;

} */ *ap = v;

- return (nfs_writebp(ap->a_bp, 1));

-/*

- * This is a clone of vop_generic_bwrite(), except that B_WRITEINPROG isn't set unless

- * the force flag is one and it also handles the B_NEEDCOMMIT flag.

- */

-int

-nfs_writebp(bp, force)

- register struct buf *bp;

- int force;

- register int oldflags = bp->b_flags, retv = 1;

- register struct proc *p = curproc; /* XXX */

- off_t off;

- int s;

- if(!(bp->b_flags & B_BUSY))

- panic("bwrite: buffer is not busy???");

-#ifdef fvdl_debug

- printf("nfs_writebp(%x): vp %x voff %d vend %d doff %d dend %d\n",

- bp, bp->b_vp, bp->b_validoff, bp->b_validend, bp->b_dirtyoff,

- bp->b_dirtyend);

-#endif

- bp->b_flags &= ~(B_READ|B_DONE|B_ERROR);

- s = splbio();

- buf_undirty(bp);

- if ((oldflags & B_ASYNC) && !(oldflags & B_DELWRI) && p)

- ++p->p_stats->p_ru.ru_oublock;

- bp->b_vp->v_numoutput++;

- splx(s);

- /*

- * If B_NEEDCOMMIT is set, a commit rpc may do the trick. If not

- * an actual write will have to be scheduled via. VOP_STRATEGY().

- * If B_WRITEINPROG is already set, then push it with a write anyhow.

- */

- if ((oldflags & (B_NEEDCOMMIT | B_WRITEINPROG)) == B_NEEDCOMMIT) {

- off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff;

- bp->b_flags |= B_WRITEINPROG;

- retv = nfs_commit(bp->b_vp, off, bp->b_dirtyend-bp->b_dirtyoff,

- bp->b_proc);

- bp->b_flags &= ~B_WRITEINPROG;

- if (!retv) {

- bp->b_dirtyoff = bp->b_dirtyend = 0;

- bp->b_flags &= ~B_NEEDCOMMIT;

- biodone(bp);

- } else if (retv == NFSERR_STALEWRITEVERF)

- nfs_clearcommit(bp->b_vp->v_mount);

- }

- if (retv) {

- if (force)

- bp->b_flags |= B_WRITEINPROG;

- VOP_STRATEGY(bp);

- }

- if( (oldflags & B_ASYNC) == 0) {

- int rtval = biowait(bp);

- if (!(oldflags & B_DELWRI) && p) {

- ++p->p_stats->p_ru.ru_oublock;

- }

- brelse(bp);

- return (rtval);

- }

- return (0);

+ return (bwrite(ap->a_bp));

}

diff --git a/sys/nfs/nfsnode.h b/sys/nfs/nfsnode.h
index e1e0fd64327..42aaddfa637 100644
--- a/sys/nfs/nfsnode.h
+++ b/sys/nfs/nfsnode.h

@@ -1,4 +1,4 @@

-/* $OpenBSD: nfsnode.h,v 1.11 2001/11/15 23:15:15 art Exp $ */

+/* $OpenBSD: nfsnode.h,v 1.12 2001/11/27 05:27:12 art Exp $ */

/* $NetBSD: nfsnode.h,v 1.16 1996/02/18 11:54:04 fvdl Exp $ */

@@ -119,8 +119,20 @@ struct nfsnode {

nfsfh_t n_fh; /* Small File Handle */

struct ucred *n_rcred;

struct ucred *n_wcred;

+ off_t n_pushedlo; /* 1st blk in commited range */

+ off_t n_pushedhi; /* Last block in range */

+ off_t n_pushlo; /* 1st block in commit range */

+ off_t n_pushhi; /* Last block in range */

+ struct lock n_commitlock; /* Serialize commits XXX */

+ int n_commitflags;

};

+/*

+ * Values for n_commitflags

+ */

+#define NFS_COMMIT_PUSH_VALID 0x0001 /* push range valid */

+#define NFS_COMMIT_PUSHED_VALID 0x0002 /* pushed range valid */

#define n_atim n_un1.nf_atim

#define n_mtim n_un2.nf_mtim

#define n_sillyrename n_un3.nf_silly

@@ -199,6 +211,8 @@ int nfs_bwrite __P((void *));

int nfs_vget __P((struct mount *, ino_t, struct vnode **));

#define nfs_reallocblks \

((int (*) __P((void *)))eopnotsupp)

+int nfs_getpages __P((void *));

+int nfs_putpages __P((void *));

/* other stuff */

int nfs_removeit __P((struct sillyrename *));