summaryrefslogtreecommitdiff
path: root/sys/ufs
diff options
context:
space:
mode:
authorArtur Grabowski <art@cvs.openbsd.org>2001-11-27 05:27:13 +0000
committerArtur Grabowski <art@cvs.openbsd.org>2001-11-27 05:27:13 +0000
commit8a1845e49f56720cbfccd4c7f5f80ba5b980fdf4 (patch)
treed4a522dc41cdc79ba48fe761e94663b795da8cc0 /sys/ufs
parent0d68e9b5af14f4bfa04d22dbebab5972ac647b26 (diff)
Merge in the unified buffer cache code as found in NetBSD 2001/03/10. The
code is written mostly by Chuck Silvers <chuq@chuq.com>/<chs@netbsd.org>. Tested for the past few weeks by many developers, should be in a pretty stable state, but will require optimizations and additional cleanups.
Diffstat (limited to 'sys/ufs')
-rw-r--r--sys/ufs/ext2fs/ext2fs_balloc.c230
-rw-r--r--sys/ufs/ext2fs/ext2fs_extern.h7
-rw-r--r--sys/ufs/ext2fs/ext2fs_inode.c58
-rw-r--r--sys/ufs/ext2fs/ext2fs_readwrite.c103
-rw-r--r--sys/ufs/ext2fs/ext2fs_subr.c5
-rw-r--r--sys/ufs/ext2fs/ext2fs_vfsops.c15
-rw-r--r--sys/ufs/ext2fs/ext2fs_vnops.c12
-rw-r--r--sys/ufs/ffs/ffs_alloc.c11
-rw-r--r--sys/ufs/ffs/ffs_balloc.c60
-rw-r--r--sys/ufs/ffs/ffs_extern.h5
-rw-r--r--sys/ufs/ffs/ffs_inode.c121
-rw-r--r--sys/ufs/ffs/ffs_softdep.c274
-rw-r--r--sys/ufs/ffs/ffs_vfsops.c7
-rw-r--r--sys/ufs/ffs/ffs_vnops.c59
-rw-r--r--sys/ufs/ufs/inode.h3
-rw-r--r--sys/ufs/ufs/ufs_bmap.c16
-rw-r--r--sys/ufs/ufs/ufs_extern.h3
-rw-r--r--sys/ufs/ufs/ufs_inode.c149
-rw-r--r--sys/ufs/ufs/ufs_readwrite.c130
-rw-r--r--sys/ufs/ufs/ufs_vnops.c4
-rw-r--r--sys/ufs/ufs/ufsmount.h3
21 files changed, 1044 insertions, 231 deletions
diff --git a/sys/ufs/ext2fs/ext2fs_balloc.c b/sys/ufs/ext2fs/ext2fs_balloc.c
index 849a8864b2a..78fb0a8371c 100644
--- a/sys/ufs/ext2fs/ext2fs_balloc.c
+++ b/sys/ufs/ext2fs/ext2fs_balloc.c
@@ -1,5 +1,4 @@
-/* $OpenBSD: ext2fs_balloc.c,v 1.7 2001/11/06 19:53:21 miod Exp $ */
-/* $NetBSD: ext2fs_balloc.c,v 1.10 2001/07/04 21:16:01 chs Exp $ */
+/* $NetBSD: ext2fs_balloc.c,v 1.8 2000/12/10 06:38:31 chs Exp $ */
/*
* Copyright (c) 1997 Manuel Bouyer.
@@ -44,8 +43,9 @@
#include <sys/proc.h>
#include <sys/file.h>
#include <sys/vnode.h>
+#include <sys/mount.h>
-#include <uvm/uvm_extern.h>
+#include <uvm/uvm.h>
#include <ufs/ufs/quota.h>
#include <ufs/ufs/inode.h>
@@ -73,8 +73,13 @@ ext2fs_buf_alloc(struct inode *ip, daddr_t bn, int size, struct ucred *cred,
u_int deallocated;
ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
int unwindidx = -1;
+ UVMHIST_FUNC("ext2fs_buf_alloc"); UVMHIST_CALLED(ubchist);
- *bpp = NULL;
+ UVMHIST_LOG(ubchist, "bn 0x%x", bn,0,0,0);
+
+ if (bpp != NULL) {
+ *bpp = NULL;
+ }
if (bn < 0)
return (EFBIG);
fs = ip->i_e2fs;
@@ -86,20 +91,29 @@ ext2fs_buf_alloc(struct inode *ip, daddr_t bn, int size, struct ucred *cred,
if (bn < NDADDR) {
nb = fs2h32(ip->i_e2fs_blocks[bn]);
if (nb != 0) {
- error = bread(vp, bn, fs->e2fs_bsize, NOCRED, &bp);
- if (error) {
- brelse(bp);
- return (error);
+
+ /*
+ * the block is already allocated, just read it.
+ */
+
+ if (bpp != NULL) {
+ error = bread(vp, bn, fs->e2fs_bsize, NOCRED,
+ &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ *bpp = bp;
}
- *bpp = bp;
return (0);
}
/*
* allocate a new direct block.
*/
+
error = ext2fs_alloc(ip, bn,
- ext2fs_blkpref(ip, bn, (int)bn, &ip->i_e2fs_blocks[0]),
+ ext2fs_blkpref(ip, bn, bn, &ip->i_e2fs_blocks[0]),
cred, &newb);
if (error)
return (error);
@@ -107,11 +121,13 @@ ext2fs_buf_alloc(struct inode *ip, daddr_t bn, int size, struct ucred *cred,
ip->i_e2fs_last_blk = newb;
ip->i_e2fs_blocks[bn] = h2fs32(newb);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
- bp = getblk(vp, bn, fs->e2fs_bsize, 0, 0);
- bp->b_blkno = fsbtodb(fs, newb);
- if (flags & B_CLRBUF)
- clrbuf(bp);
- *bpp = bp;
+ if (bpp != NULL) {
+ bp = getblk(vp, bn, fs->e2fs_bsize, 0, 0);
+ bp->b_blkno = fsbtodb(fs, newb);
+ if (flags & B_CLRBUF)
+ clrbuf(bp);
+ *bpp = bp;
+ }
return (0);
}
/*
@@ -229,26 +245,30 @@ ext2fs_buf_alloc(struct inode *ip, daddr_t bn, int size, struct ucred *cred,
} else {
bdwrite(bp);
}
- nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0);
- nbp->b_blkno = fsbtodb(fs, nb);
- if (flags & B_CLRBUF)
- clrbuf(nbp);
- *bpp = nbp;
+ if (bpp != NULL) {
+ nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0);
+ nbp->b_blkno = fsbtodb(fs, nb);
+ if (flags & B_CLRBUF)
+ clrbuf(nbp);
+ *bpp = nbp;
+ }
return (0);
}
brelse(bp);
- if (flags & B_CLRBUF) {
- error = bread(vp, lbn, (int)fs->e2fs_bsize, NOCRED, &nbp);
- if (error) {
- brelse(nbp);
- goto fail;
+ if (bpp != NULL) {
+ if (flags & B_CLRBUF) {
+ error = bread(vp, lbn, (int)fs->e2fs_bsize, NOCRED,
+ &nbp);
+ if (error) {
+ brelse(nbp);
+ goto fail;
+ }
+ } else {
+ nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0);
+ nbp->b_blkno = fsbtodb(fs, nb);
}
- } else {
- nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0);
- nbp->b_blkno = fsbtodb(fs, nb);
+ *bpp = nbp;
}
-
- *bpp = nbp;
return (0);
fail:
/*
@@ -292,3 +312,153 @@ fail:
}
return error;
}
+
+int
+ext2fs_ballocn(v)
+ void *v;
+{
+ struct vop_ballocn_args /* {
+ struct vnode *a_vp;
+ off_t a_offset;
+ off_t a_length;
+ struct ucred *a_cred;
+ int a_flags;
+ } */ *ap = v;
+ off_t off, len;
+ struct vnode *vp = ap->a_vp;
+ struct inode *ip = VTOI(vp);
+ struct m_ext2fs *fs = ip->i_e2fs;
+ int error, delta, bshift, bsize;
+ UVMHIST_FUNC("ext2fs_ballocn"); UVMHIST_CALLED(ubchist);
+
+ bshift = fs->e2fs_bshift;
+ bsize = 1 << bshift;
+
+ off = ap->a_offset;
+ len = ap->a_length;
+
+ delta = off & (bsize - 1);
+ off -= delta;
+ len += delta;
+
+ while (len > 0) {
+ bsize = min(bsize, len);
+ UVMHIST_LOG(ubchist, "off 0x%x len 0x%x bsize 0x%x",
+ off, len, bsize, 0);
+
+ error = ext2fs_buf_alloc(ip, lblkno(fs, off), bsize, ap->a_cred,
+ NULL, ap->a_flags);
+ if (error) {
+ UVMHIST_LOG(ubchist, "error %d", error, 0,0,0);
+ return error;
+ }
+
+ /*
+ * increase file size now, VOP_BALLOC() requires that
+ * EOF be up-to-date before each call.
+ */
+
+ if (ip->i_e2fs_size < off + bsize) {
+ UVMHIST_LOG(ubchist, "old 0x%x new 0x%x",
+ ip->i_e2fs_size, off + bsize,0,0);
+ ip->i_e2fs_size = off + bsize;
+ if (vp->v_uvm.u_size < ip->i_e2fs_size) {
+ uvm_vnp_setsize(vp, ip->i_e2fs_size);
+ }
+ }
+
+ off += bsize;
+ len -= bsize;
+ }
+ return 0;
+}
+
+/*
+ * allocate a range of blocks in a file.
+ * after this function returns, any page entirely contained within the range
+ * will map to invalid data and thus must be overwritten before it is made
+ * accessible to others.
+ */
+
+int
+ext2fs_balloc_range(vp, off, len, cred, flags)
+ struct vnode *vp;
+ off_t off, len;
+ struct ucred *cred;
+ int flags;
+{
+ off_t oldeof, eof, pagestart;
+ struct uvm_object *uobj;
+ int i, delta, error, npages;
+ int bshift = vp->v_mount->mnt_fs_bshift;
+ int bsize = 1 << bshift;
+ int ppb = max(bsize >> PAGE_SHIFT, 1);
+ struct vm_page *pgs[ppb];
+ UVMHIST_FUNC("ext2fs_balloc_range"); UVMHIST_CALLED(ubchist);
+ UVMHIST_LOG(ubchist, "vp %p off 0x%x len 0x%x u_size 0x%x",
+ vp, off, len, vp->v_uvm.u_size);
+
+ error = 0;
+ uobj = &vp->v_uvm.u_obj;
+ oldeof = vp->v_uvm.u_size;
+ eof = max(oldeof, off + len);
+ UVMHIST_LOG(ubchist, "new eof 0x%x", eof,0,0,0);
+ pgs[0] = NULL;
+
+ /*
+ * cache the new range of the file. this will create zeroed pages
+ * where the new block will be and keep them locked until the
+ * new block is allocated, so there will be no window where
+ * the old contents of the new block is visible to racing threads.
+ */
+
+ pagestart = trunc_page(off) & ~(bsize - 1);
+ npages = min(ppb, (round_page(eof) - pagestart) >> PAGE_SHIFT);
+ memset(pgs, 0, npages);
+ simple_lock(&uobj->vmobjlock);
+ error = VOP_GETPAGES(vp, pagestart, pgs, &npages, 0,
+ VM_PROT_READ, 0, PGO_SYNCIO | PGO_PASTEOF);
+ if (error) {
+ UVMHIST_LOG(ubchist, "getpages %d", error,0,0,0);
+ goto errout;
+ }
+ for (i = 0; i < npages; i++) {
+ UVMHIST_LOG(ubchist, "got pgs[%d] %p", i, pgs[i],0,0);
+ KASSERT((pgs[i]->flags & PG_RELEASED) == 0);
+ pgs[i]->flags &= ~PG_CLEAN;
+ uvm_pageactivate(pgs[i]);
+ }
+
+ /*
+ * adjust off to be block-aligned.
+ */
+
+ delta = off & (bsize - 1);
+ off -= delta;
+ len += delta;
+
+ /*
+ * now allocate the range.
+ */
+
+ lockmgr(&vp->v_glock, LK_EXCLUSIVE, NULL, curproc);
+ error = VOP_BALLOCN(vp, off, len, cred, flags);
+ UVMHIST_LOG(ubchist, "ballocn %d", error,0,0,0);
+ lockmgr(&vp->v_glock, LK_RELEASE, NULL, curproc);
+
+ /*
+ * unbusy any pages we are holding.
+ */
+
+errout:
+ simple_lock(&uobj->vmobjlock);
+ if (error) {
+ (void) (uobj->pgops->pgo_flush)(uobj, oldeof, pagestart + ppb,
+ PGO_FREE);
+ }
+ if (pgs[0] != NULL) {
+ uvm_page_unbusy(pgs, npages);
+ }
+ simple_unlock(&uobj->vmobjlock);
+ return (error);
+}
diff --git a/sys/ufs/ext2fs/ext2fs_extern.h b/sys/ufs/ext2fs/ext2fs_extern.h
index b7a3f96df38..af23fb6ef2d 100644
--- a/sys/ufs/ext2fs/ext2fs_extern.h
+++ b/sys/ufs/ext2fs/ext2fs_extern.h
@@ -1,5 +1,5 @@
-/* $OpenBSD: ext2fs_extern.h,v 1.10 2001/09/18 00:39:15 art Exp $ */
-/* $NetBSD: ext2fs_extern.h,v 1.1 1997/06/11 09:33:55 bouyer Exp $ */
+/* $OpenBSD: ext2fs_extern.h,v 1.11 2001/11/27 05:27:12 art Exp $ */
+/* $NetBSD: ext2fs_extern.h,v 1.9 2000/11/27 08:39:53 chs Exp $ */
/*-
* Copyright (c) 1997 Manuel Bouyer.
@@ -74,6 +74,9 @@ int ext2fs_inode_free(struct inode *pip, ino_t ino, int mode);
/* ext2fs_balloc.c */
int ext2fs_buf_alloc(struct inode *, daddr_t, int, struct ucred *,
struct buf **, int);
+int ext2fs_ballocn __P((void *));
+int ext2fs_balloc_range __P((struct vnode *, off_t, off_t, struct ucred *,
+ int));
/* ext2fs_bmap.c */
int ext2fs_bmap __P((void *));
diff --git a/sys/ufs/ext2fs/ext2fs_inode.c b/sys/ufs/ext2fs/ext2fs_inode.c
index 4af28d9bf0e..f77c99c47b5 100644
--- a/sys/ufs/ext2fs/ext2fs_inode.c
+++ b/sys/ufs/ext2fs/ext2fs_inode.c
@@ -1,5 +1,4 @@
-/* $OpenBSD: ext2fs_inode.c,v 1.17 2001/11/06 19:53:21 miod Exp $ */
-/* $NetBSD: ext2fs_inode.c,v 1.24 2001/06/19 12:59:18 wiz Exp $ */
+/* $NetBSD: ext2fs_inode.c,v 1.23 2001/02/18 20:17:04 chs Exp $ */
/*
* Copyright (c) 1997 Manuel Bouyer.
@@ -59,8 +58,10 @@
#include <ufs/ext2fs/ext2fs.h>
#include <ufs/ext2fs/ext2fs_extern.h>
+extern int prtactive;
+
static int ext2fs_indirtrunc __P((struct inode *, ufs_daddr_t, ufs_daddr_t,
- ufs_daddr_t, int, long *));
+ ufs_daddr_t, int, long *));
/*
* Last reference to an inode. If necessary, write or delete it.
@@ -78,7 +79,6 @@ ext2fs_inactive(v)
struct proc *p = ap->a_p;
struct timespec ts;
int error = 0;
- extern int prtactive;
if (prtactive && vp->v_usecount != 0)
vprint("ext2fs_inactive: pushing active", vp);
@@ -171,14 +171,13 @@ ext2fs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred)
{
struct vnode *ovp = ITOV(oip);
ufs_daddr_t lastblock;
- ufs_daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR];
+ ufs_daddr_t bn, lastiblock[NIADDR], indir_lbn[NIADDR];
ufs_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR];
struct m_ext2fs *fs;
- struct buf *bp;
int offset, size, level;
long count, nblocks, vflags, blocksreleased = 0;
int i;
- int aflags, error, allerror;
+ int error, allerror;
off_t osize;
if (length < 0)
@@ -219,22 +218,8 @@ ext2fs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred)
if (length > fs->fs_maxfilesize)
return (EFBIG);
#endif
- offset = blkoff(fs, length - 1);
- lbn = lblkno(fs, length - 1);
- aflags = B_CLRBUF;
- if (flags & IO_SYNC)
- aflags |= B_SYNC;
- error = ext2fs_buf_alloc(oip, lbn, offset + 1, cred, &bp,
- aflags);
- if (error)
- return (error);
- oip->i_e2fs_size = length;
- uvm_vnp_setsize(ovp, length);
- uvm_vnp_uncache(ovp);
- if (aflags & B_SYNC)
- bwrite(bp);
- else
- bawrite(bp);
+ ext2fs_balloc_range(ovp, length - 1, 1, cred,
+ flags & IO_SYNC ? B_SYNC : 0);
oip->i_flag |= IN_CHANGE | IN_UPDATE;
return (ext2fs_update(oip, NULL, NULL, 1));
}
@@ -246,28 +231,15 @@ ext2fs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred)
* of subsequent file growth.
*/
offset = blkoff(fs, length);
- if (offset == 0) {
- oip->i_e2fs_size = length;
- } else {
- lbn = lblkno(fs, length);
- aflags = B_CLRBUF;
- if (flags & IO_SYNC)
- aflags |= B_SYNC;
- error = ext2fs_buf_alloc(oip, lbn, offset, cred, &bp,
- aflags);
- if (error)
- return (error);
- oip->i_e2fs_size = length;
+ if (offset != 0) {
size = fs->e2fs_bsize;
- uvm_vnp_setsize(ovp, length);
- uvm_vnp_uncache(ovp);
- bzero((char *)bp->b_data + offset, (u_int)(size - offset));
- allocbuf(bp, size);
- if (aflags & B_SYNC)
- bwrite(bp);
- else
- bawrite(bp);
+
+ /* XXXUBC we should handle more than just VREG */
+ uvm_vnp_zerorange(ovp, length, size - offset);
}
+ oip->i_e2fs_size = length;
+ uvm_vnp_setsize(ovp, length);
+
/*
* Calculate index into inode's block list of
* last direct and indirect blocks (if any)
diff --git a/sys/ufs/ext2fs/ext2fs_readwrite.c b/sys/ufs/ext2fs/ext2fs_readwrite.c
index 9ae4322756f..94424055733 100644
--- a/sys/ufs/ext2fs/ext2fs_readwrite.c
+++ b/sys/ufs/ext2fs/ext2fs_readwrite.c
@@ -79,6 +79,8 @@ ext2fs_read(v)
struct uio *uio;
struct m_ext2fs *fs;
struct buf *bp;
+ void *win;
+ vsize_t bytelen;
ufs_daddr_t lbn, nextlbn;
off_t bytesinfile;
long size, xfersize, blkoffset;
@@ -107,6 +109,27 @@ ext2fs_read(v)
if (uio->uio_resid == 0)
return (0);
+ if (vp->v_type == VREG) {
+ error = 0;
+ while (uio->uio_resid > 0) {
+
+ bytelen = MIN(ip->i_e2fs_size - uio->uio_offset,
+ uio->uio_resid);
+
+ if (bytelen == 0) {
+ break;
+ }
+ win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset,
+ &bytelen, UBC_READ);
+ error = uiomove(win, bytelen, uio);
+ ubc_release(win, 0);
+ if (error) {
+ break;
+ }
+ }
+ goto out;
+ }
+
for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
if ((bytesinfile = ip->i_e2fs_size - uio->uio_offset) <= 0)
break;
@@ -156,8 +179,11 @@ ext2fs_read(v)
if (bp != NULL)
brelse(bp);
+out:
if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) {
ip->i_flag |= IN_ACCESS;
+ if ((ap->a_ioflag & IO_SYNC) == IO_SYNC)
+ error = ext2fs_update(ip, NULL, NULL, 1);
}
return (error);
}
@@ -183,12 +209,17 @@ ext2fs_write(v)
struct proc *p;
ufs_daddr_t lbn;
off_t osize;
- int blkoffset, error, flags, ioflag, resid, size, xfersize;
+ int blkoffset, error, flags, ioflag, resid, xfersize;
+ vsize_t bytelen;
+ void *win;
+ off_t oldoff;
+ boolean_t rv;
ioflag = ap->a_ioflag;
uio = ap->a_uio;
vp = ap->a_vp;
ip = VTOI(vp);
+ error = 0;
#ifdef DIAGNOSTIC
if (uio->uio_rw != UIO_WRITE)
@@ -232,35 +263,65 @@ ext2fs_write(v)
resid = uio->uio_resid;
osize = ip->i_e2fs_size;
- flags = ioflag & IO_SYNC ? B_SYNC : 0;
+ if (vp->v_type == VREG) {
+ while (uio->uio_resid > 0) {
+ oldoff = uio->uio_offset;
+ blkoffset = blkoff(fs, uio->uio_offset);
+ bytelen = MIN(fs->e2fs_bsize - blkoffset,
+ uio->uio_resid);
+
+ /*
+ * XXXUBC if file is mapped and this is the last block,
+ * process one page at a time.
+ */
+
+ error = ext2fs_balloc_range(vp, uio->uio_offset,
+ bytelen, ap->a_cred, 0);
+ if (error) {
+ break;
+ }
+ win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset,
+ &bytelen, UBC_WRITE);
+ error = uiomove(win, bytelen, uio);
+ ubc_release(win, 0);
+ if (error) {
+ break;
+ }
+
+ /*
+ * flush what we just wrote if necessary.
+ * XXXUBC simplistic async flushing.
+ */
+
+ if (oldoff >> 16 != uio->uio_offset >> 16) {
+ simple_lock(&vp->v_uvm.u_obj.vmobjlock);
+ rv = vp->v_uvm.u_obj.pgops->pgo_flush(
+ &vp->v_uvm.u_obj, (oldoff >> 16) << 16,
+ (uio->uio_offset >> 16) << 16, PGO_CLEANIT);
+ simple_unlock(&vp->v_uvm.u_obj.vmobjlock);
+ }
+ }
+ goto out;
+ }
+
+ flags = ioflag & IO_SYNC ? B_SYNC : 0;
for (error = 0; uio->uio_resid > 0;) {
lbn = lblkno(fs, uio->uio_offset);
blkoffset = blkoff(fs, uio->uio_offset);
- xfersize = fs->e2fs_bsize - blkoffset;
- if (uio->uio_resid < xfersize)
- xfersize = uio->uio_resid;
- if (fs->e2fs_bsize > xfersize)
+ xfersize = MIN(fs->e2fs_bsize - blkoffset, uio->uio_resid);
+ if (xfersize < fs->e2fs_bsize)
flags |= B_CLRBUF;
else
flags &= ~B_CLRBUF;
-
error = ext2fs_buf_alloc(ip,
- lbn, blkoffset + xfersize, ap->a_cred, &bp, flags);
+ lbn, blkoffset + xfersize, ap->a_cred, &bp, flags);
if (error)
break;
- if (uio->uio_offset + xfersize > ip->i_e2fs_size) {
+ if (ip->i_e2fs_size < uio->uio_offset + xfersize) {
ip->i_e2fs_size = uio->uio_offset + xfersize;
- uvm_vnp_setsize(vp, ip->i_e2fs_size);
}
- uvm_vnp_uncache(vp);
-
- size = fs->e2fs_bsize - bp->b_resid;
- if (size < xfersize)
- xfersize = size;
-
- error =
- uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
+ error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
if (ioflag & IO_SYNC)
(void)bwrite(bp);
else if (xfersize + blkoffset == fs->e2fs_bsize) {
@@ -272,13 +333,14 @@ ext2fs_write(v)
bdwrite(bp);
if (error || xfersize == 0)
break;
- ip->i_flag |= IN_CHANGE | IN_UPDATE;
}
/*
* If we successfully wrote any data, and we are not the superuser
* we clear the setuid and setgid bits as a precaution against
* tampering.
*/
+out:
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
ip->i_e2fs_mode &= ~(ISUID | ISGID);
if (error) {
@@ -288,8 +350,7 @@ ext2fs_write(v)
uio->uio_offset -= resid - uio->uio_resid;
uio->uio_resid = resid;
}
- } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
+ } else if (resid > uio->uio_resid && (ioflag & IO_SYNC) == IO_SYNC)
error = ext2fs_update(ip, NULL, NULL, 1);
- }
return (error);
}
diff --git a/sys/ufs/ext2fs/ext2fs_subr.c b/sys/ufs/ext2fs/ext2fs_subr.c
index 82165b8f242..3263f7e5391 100644
--- a/sys/ufs/ext2fs/ext2fs_subr.c
+++ b/sys/ufs/ext2fs/ext2fs_subr.c
@@ -1,5 +1,4 @@
-/* $OpenBSD: ext2fs_subr.c,v 1.6 2001/09/18 01:39:13 art Exp $ */
-/* $NetBSD: ext2fs_subr.c,v 1.1 1997/06/11 09:34:03 bouyer Exp $ */
+/* $NetBSD: ext2fs_subr.c,v 1.4 2000/03/30 12:41:11 augustss Exp $ */
/*
* Copyright (c) 1997 Manuel Bouyer.
@@ -96,7 +95,7 @@ ext2fs_checkoverlap(bp, ip)
if (ep == bp || (ep->b_flags & B_INVAL) ||
ep->b_vp == NULLVP)
continue;
- if (VOP_BMAP(ep->b_vp, (daddr_t)0, &vp, (daddr_t)0, NULL))
+ if (VOP_BMAP(ep->b_vp, (ufs_daddr_t)0, &vp, (ufs_daddr_t)0, NULL))
continue;
if (vp != ip->i_devvp)
continue;
diff --git a/sys/ufs/ext2fs/ext2fs_vfsops.c b/sys/ufs/ext2fs/ext2fs_vfsops.c
index 6991cf9d650..e438268acbc 100644
--- a/sys/ufs/ext2fs/ext2fs_vfsops.c
+++ b/sys/ufs/ext2fs/ext2fs_vfsops.c
@@ -1,5 +1,5 @@
-/* $OpenBSD: ext2fs_vfsops.c,v 1.16 2001/11/21 22:21:48 csapuntz Exp $ */
-/* $NetBSD: ext2fs_vfsops.c,v 1.1 1997/06/11 09:34:07 bouyer Exp $ */
+/* $OpenBSD: ext2fs_vfsops.c,v 1.17 2001/11/27 05:27:12 art Exp $ */
+/* $NetBSD: ext2fs_vfsops.c,v 1.40 2000/11/27 08:39:53 chs Exp $ */
/*
* Copyright (c) 1997 Manuel Bouyer.
@@ -402,9 +402,11 @@ ext2fs_reload(mountp, cred, p)
* Step 1: invalidate all cached meta-data.
*/
devvp = VFSTOUFS(mountp)->um_devvp;
- if (vinvalbuf(devvp, 0, cred, p, 0, 0))
+ vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
+ error = vinvalbuf(devvp, 0, cred, p, 0, 0);
+ VOP_UNLOCK(devvp, 0, p);
+ if (error)
panic("ext2fs_reload: dirty1");
-
/*
* Step 2: re-read superblock from disk.
*/
@@ -583,14 +585,18 @@ ext2fs_mountfs(devvp, mp, p)
mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN;
mp->mnt_flag |= MNT_LOCAL;
+ mp->mnt_dev_bshift = DEV_BSHIFT; /* XXX */
+ mp->mnt_fs_bshift = m_fs->e2fs_bshift;
ump->um_mountp = mp;
ump->um_dev = dev;
ump->um_devvp = devvp;
ump->um_nindir = NINDIR(m_fs);
+ ump->um_lognindir = ffs(NINDIR(m_fs)) - 1;
ump->um_bptrtodb = m_fs->e2fs_fsbtodb;
ump->um_seqinc = 1; /* no frags */
devvp->v_specmountpoint = mp;
return (0);
+
out:
if (bp)
brelse(bp);
@@ -924,6 +930,7 @@ ext2fs_vget(mp, ino, vpp)
ip->i_flag |= IN_MODIFIED;
}
+ vp->v_uvm.u_size = ip->i_e2fs_size;
*vpp = vp;
return (0);
}
diff --git a/sys/ufs/ext2fs/ext2fs_vnops.c b/sys/ufs/ext2fs/ext2fs_vnops.c
index 0faba75ffd2..fffdd494d5a 100644
--- a/sys/ufs/ext2fs/ext2fs_vnops.c
+++ b/sys/ufs/ext2fs/ext2fs_vnops.c
@@ -1,5 +1,5 @@
-/* $OpenBSD: ext2fs_vnops.c,v 1.17 2001/11/06 19:53:21 miod Exp $ */
-/* $NetBSD: ext2fs_vnops.c,v 1.1 1997/06/11 09:34:09 bouyer Exp $ */
+/* $OpenBSD: ext2fs_vnops.c,v 1.18 2001/11/27 05:27:12 art Exp $ */
+/* $NetBSD: ext2fs_vnops.c,v 1.30 2000/11/27 08:39:53 chs Exp $ */
/*
* Copyright (c) 1997 Manuel Bouyer.
@@ -402,8 +402,6 @@ ext2fs_chmod(vp, mode, cred, p)
ip->i_e2fs_mode &= ~ALLPERMS;
ip->i_e2fs_mode |= (mode & ALLPERMS);
ip->i_flag |= IN_CHANGE;
- if ((vp->v_flag & VTEXT) && (ip->i_e2fs_mode & S_ISTXT) == 0)
- (void) uvm_vnp_uncache(vp);
return (0);
}
@@ -1469,7 +1467,11 @@ struct vnodeopv_entry_desc ext2fs_vnodeop_entries[] = {
{ &vop_pathconf_desc, ufs_pathconf }, /* pathconf */
{ &vop_advlock_desc, ext2fs_advlock }, /* advlock */
{ &vop_bwrite_desc, vop_generic_bwrite }, /* bwrite */
- { (struct vnodeop_desc*)NULL, (int(*) __P((void*)))NULL }
+ { &vop_ballocn_desc, ext2fs_ballocn },
+ { &vop_getpages_desc, genfs_getpages },
+ { &vop_putpages_desc, genfs_putpages },
+ { &vop_size_desc, genfs_size },
+ { NULL, NULL }
};
struct vnodeopv_desc ext2fs_vnodeop_opv_desc =
{ &ext2fs_vnodeop_p, ext2fs_vnodeop_entries };
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index 8ddf99405fc..a53d87828c3 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_alloc.c,v 1.35 2001/11/21 21:23:56 csapuntz Exp $ */
+/* $OpenBSD: ffs_alloc.c,v 1.36 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: ffs_alloc.c,v 1.11 1996/05/11 18:27:09 mycroft Exp $ */
/*
@@ -169,14 +169,15 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp, blknop)
struct buf **bpp;
ufs_daddr_t *blknop;
{
- register struct fs *fs;
- struct buf *bp = NULL;
+ struct fs *fs;
+ struct buf *bp;
ufs_daddr_t quota_updated = 0;
int cg, request, error;
daddr_t bprev, bno;
if (bpp != NULL)
*bpp = NULL;
+
fs = ip->i_fs;
#ifdef DIAGNOSTIC
if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
@@ -282,7 +283,6 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp, blknop)
if (bno <= 0)
goto nospace;
- (void) uvm_vnp_uncache(ITOV(ip));
if (!DOINGSOFTDEP(ITOV(ip)))
ffs_blkfree(ip, bprev, (long)osize);
if (nsize < request)
@@ -362,7 +362,8 @@ ffs_reallocblks(v)
struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
int i, len, start_lvl, end_lvl, pref, ssize;
- if (doreallocblks == 0)
+ /* XXXUBC - don't reallocblks for now */
+ if (1 || doreallocblks == 0)
return (ENOSPC);
vp = ap->a_vp;
diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c
index 009adc91ff9..5f6ddc3d94e 100644
--- a/sys/ufs/ffs/ffs_balloc.c
+++ b/sys/ufs/ffs/ffs_balloc.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_balloc.c,v 1.18 2001/11/21 21:23:56 csapuntz Exp $ */
+/* $OpenBSD: ffs_balloc.c,v 1.19 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: ffs_balloc.c,v 1.3 1996/02/09 22:22:21 christos Exp $ */
/*
@@ -402,3 +402,61 @@ fail:
return (error);
}
+
+int
+ffs_ballocn(v)
+ void *v;
+{
+ struct vop_ballocn_args /* {
+ struct vnode *a_vp;
+ off_t a_offset;
+ off_t a_length;
+ struct ucred *a_cred;
+ int a_flags;
+ } */ *ap = v;
+
+ off_t off, len;
+ struct vnode *vp = ap->a_vp;
+ struct inode *ip = VTOI(vp);
+ struct fs *fs = ip->i_fs;
+ int error, delta, bshift, bsize;
+
+ error = 0;
+ bshift = fs->fs_bshift;
+ bsize = 1 << bshift;
+
+ off = ap->a_offset;
+ len = ap->a_length;
+
+ delta = off & (bsize - 1);
+ off -= delta;
+ len += delta;
+
+ while (len > 0) {
+ bsize = min(bsize, len);
+
+ error = ffs_balloc(ip, off, bsize, ap->a_cred, ap->a_flags,
+ NULL);
+ if (error) {
+ goto out;
+ }
+
+ /*
+ * increase file size now, VOP_BALLOC() requires that
+ * EOF be up-to-date before each call.
+ */
+
+ if (ip->i_ffs_size < off + bsize) {
+ ip->i_ffs_size = off + bsize;
+ if (vp->v_uvm.u_size < ip->i_ffs_size) {
+ uvm_vnp_setsize(vp, ip->i_ffs_size);
+ }
+ }
+
+ off += bsize;
+ len -= bsize;
+ }
+
+out:
+ return error;
+ }
diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h
index eeeba209c69..2875a332a57 100644
--- a/sys/ufs/ffs/ffs_extern.h
+++ b/sys/ufs/ffs/ffs_extern.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_extern.h,v 1.14 2001/11/13 00:10:56 art Exp $ */
+/* $OpenBSD: ffs_extern.h,v 1.15 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: ffs_extern.h,v 1.4 1996/02/09 22:22:22 christos Exp $ */
/*-
@@ -87,6 +87,7 @@ void ffs_clusteracct __P((struct fs *, struct cg *, daddr_t, int));
/* ffs_balloc.c */
int ffs_balloc(struct inode *, off_t, int, struct ucred *, int, struct buf **);
+int ffs_ballocn(void *);
/* ffs_inode.c */
int ffs_init __P((struct vfsconf *));
@@ -128,7 +129,7 @@ int ffs_read __P((void *));
int ffs_write __P((void *));
int ffs_fsync __P((void *));
int ffs_reclaim __P((void *));
-
+int ffs_size __P((void *));
/*
* Soft dependency function prototypes.
diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c
index c81c795b2ac..cddf6a368ca 100644
--- a/sys/ufs/ffs/ffs_inode.c
+++ b/sys/ufs/ffs/ffs_inode.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_inode.c,v 1.25 2001/11/21 21:23:56 csapuntz Exp $ */
+/* $OpenBSD: ffs_inode.c,v 1.26 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: ffs_inode.c,v 1.10 1996/05/11 18:27:19 mycroft Exp $ */
/*
@@ -150,14 +150,14 @@ ffs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred)
{
struct vnode *ovp;
daddr_t lastblock;
- daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR];
+ daddr_t bn, lastiblock[NIADDR], indir_lbn[NIADDR];
daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR];
struct fs *fs;
- struct buf *bp;
+ struct proc *p = curproc;
int offset, size, level;
long count, nblocks, vflags, blocksreleased = 0;
register int i;
- int aflags, error, allerror;
+ int error, allerror;
off_t osize;
if (length < 0)
@@ -188,10 +188,55 @@ ffs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred)
if ((error = getinoquota(oip)) != 0)
return (error);
- uvm_vnp_setsize(ovp, length);
+ fs = oip->i_fs;
+ if (length > fs->fs_maxfilesize)
+ return (EFBIG);
+ osize = oip->i_ffs_size;
oip->i_ci.ci_lasta = oip->i_ci.ci_clen
= oip->i_ci.ci_cstart = oip->i_ci.ci_lastw = 0;
+ /*
+ * Lengthen the size of the file. We must ensure that the
+ * last byte of the file is allocated. Since the smallest
+ * value of osize is 0, length will be at least 1.
+ */
+
+ if (osize < length) {
+ ufs_balloc_range(ovp, length - 1, 1, cred,
+ flags & IO_SYNC ? B_SYNC : 0);
+ oip->i_flag |= IN_CHANGE | IN_UPDATE;
+ return (UFS_UPDATE(oip, 1));
+ }
+
+ /*
+ * When truncating a regular file down to a non-block-aligned size,
+ * we must zero the part of last block which is past the new EOF.
+ * We must synchronously flush the zeroed pages to disk
+ * since the new pages will be invalidated as soon as we
+ * inform the VM system of the new, smaller size.
+ * We must to this before acquiring the GLOCK, since fetching
+ * the pages will acquire the GLOCK internally.
+ * So there is a window where another thread could see a whole
+ * zeroed page past EOF, but that's life.
+ */
+
+ offset = blkoff(fs, length);
+ if (ovp->v_type == VREG && length < osize && offset != 0) {
+ struct uvm_object *uobj;
+ voff_t eoz;
+
+ size = blksize(fs, oip, lblkno(fs, length));
+ eoz = min(lblktosize(fs, lblkno(fs, length)) + size, osize);
+ uvm_vnp_zerorange(ovp, length, eoz - length);
+ uobj = &ovp->v_uvm.u_obj;
+ simple_lock(&uobj->vmobjlock);
+ uobj->pgops->pgo_flush(uobj, length, eoz,
+ PGO_CLEANIT|PGO_DEACTIVATE|PGO_SYNCIO);
+ simple_unlock(&ovp->v_uvm.u_obj.vmobjlock);
+ }
+
+ lockmgr(&ovp->v_glock, LK_EXCLUSIVE, NULL, p);
+
if (DOINGSOFTDEP(ovp)) {
if (length > 0 || softdep_slowdown(ovp)) {
/*
@@ -204,80 +249,29 @@ ffs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred)
* so that it will have no data structures left.
*/
if ((error = VOP_FSYNC(ovp, cred, MNT_WAIT,
- curproc)) != 0)
+ curproc)) != 0) {
+ lockmgr(&ovp->v_glock, LK_RELEASE, NULL, p);
return (error);
+ }
} else {
+ uvm_vnp_setsize(ovp, length);
(void)ufs_quota_free_blocks(oip, oip->i_ffs_blocks,
NOCRED);
softdep_setup_freeblocks(oip, length);
(void) vinvalbuf(ovp, 0, cred, curproc, 0, 0);
+ lockmgr(&ovp->v_glock, LK_RELEASE, NULL, p);
oip->i_flag |= IN_CHANGE | IN_UPDATE;
return (UFS_UPDATE(oip, 0));
}
}
- fs = oip->i_fs;
- osize = oip->i_ffs_size;
/*
- * Lengthen the size of the file. We must ensure that the
- * last byte of the file is allocated. Since the smallest
- * value of osize is 0, length will be at least 1.
+ * Reduce the size of the file.
*/
- if (osize < length) {
- if (length > fs->fs_maxfilesize)
- return (EFBIG);
- aflags = B_CLRBUF;
- if (flags & IO_SYNC)
- aflags |= B_SYNC;
- error = UFS_BUF_ALLOC(oip, length - 1, 1,
- cred, aflags, &bp);
- if (error)
- return (error);
- oip->i_ffs_size = length;
- uvm_vnp_setsize(ovp, length);
- (void) uvm_vnp_uncache(ovp);
- if (aflags & B_SYNC)
- bwrite(bp);
- else
- bawrite(bp);
- oip->i_flag |= IN_CHANGE | IN_UPDATE;
- return (UFS_UPDATE(oip, MNT_WAIT));
- }
+ oip->i_ffs_size = length;
uvm_vnp_setsize(ovp, length);
/*
- * Shorten the size of the file. If the file is not being
- * truncated to a block boundary, the contents of the
- * partial block following the end of the file must be
- * zero'ed in case it ever becomes accessible again because
- * of subsequent file growth. Directories however are not
- * zero'ed as they should grow back initialized to empty.
- */
- offset = blkoff(fs, length);
- if (offset == 0) {
- oip->i_ffs_size = length;
- } else {
- lbn = lblkno(fs, length);
- aflags = B_CLRBUF;
- if (flags & IO_SYNC)
- aflags |= B_SYNC;
- error = UFS_BUF_ALLOC(oip, length - 1, 1,
- cred, aflags, &bp);
- if (error)
- return (error);
- oip->i_ffs_size = length;
- size = blksize(fs, oip, lbn);
- (void) uvm_vnp_uncache(ovp);
- if (ovp->v_type != VDIR)
- bzero((char *)bp->b_data + offset,
- (u_int)(size - offset));
- allocbuf(bp, size);
- if (aflags & B_SYNC)
- bwrite(bp);
- else
- bawrite(bp);
- }
- /*
* Calculate index into inode's block list of
* last direct and indirect blocks (if any)
* which we want to keep. Lastblock is -1 when
@@ -402,6 +396,7 @@ done:
oip->i_ffs_blocks -= blocksreleased;
if (oip->i_ffs_blocks < 0) /* sanity */
oip->i_ffs_blocks = 0;
+ lockmgr(&ovp->v_glock, LK_RELEASE, NULL, p);
oip->i_flag |= IN_CHANGE;
(void)ufs_quota_free_blocks(oip, blocksreleased, NOCRED);
return (allerror);
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 1d66094cc06..7a66eed4d8b 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_softdep.c,v 1.25 2001/11/13 14:19:24 art Exp $ */
+/* $OpenBSD: ffs_softdep.c,v 1.26 2001/11/27 05:27:12 art Exp $ */
/*
* Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved.
*
@@ -56,6 +56,7 @@
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/proc.h>
+#include <sys/pool.h>
#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/vnode.h>
@@ -69,6 +70,10 @@
#include <ufs/ffs/ffs_extern.h>
#include <ufs/ufs/ufs_extern.h>
+#include <uvm/uvm.h>
+struct pool sdpcpool;
+int softdep_lockedbufs;
+
#define STATIC
/*
@@ -109,6 +114,13 @@ extern char *memname[];
*/
/*
+ * Definitions for page cache info hashtable.
+ */
+#define PCBPHASHSIZE 1024
+LIST_HEAD(, buf) pcbphashhead[PCBPHASHSIZE];
+#define PCBPHASH(vp, lbn) ((((vaddr_t)(vp) >> 8) ^ (lbn)) & (PCBPHASHSIZE - 1))
+
+/*
* Internal function prototypes.
*/
STATIC void softdep_error __P((char *, int));
@@ -160,6 +172,13 @@ STATIC void pause_timer __P((void *));
STATIC int request_cleanup __P((int, int));
STATIC int process_worklist_item __P((struct mount *, int));
STATIC void add_to_worklist __P((struct worklist *));
+STATIC struct buf *softdep_setup_pagecache __P((struct inode *, ufs_lbn_t,
+ long));
+STATIC void softdep_collect_pagecache __P((struct inode *));
+STATIC void softdep_free_pagecache __P((struct inode *));
+STATIC struct vnode *softdep_lookupvp(struct fs *, ino_t);
+STATIC struct buf *softdep_lookup_pcbp __P((struct vnode *, ufs_lbn_t));
+void softdep_pageiodone __P((struct buf *));
/*
* Exported softdep operations.
@@ -176,6 +195,7 @@ struct bio_ops bioops = {
softdep_deallocate_dependencies, /* io_deallocate */
softdep_move_dependencies, /* io_movedeps */
softdep_count_dependencies, /* io_countdeps */
+ softdep_pageiodone, /* io_pagedone */
};
/*
@@ -1055,6 +1075,7 @@ top:
void
softdep_initialize()
{
+ int i;
LIST_INIT(&mkdirlisthd);
LIST_INIT(&softdep_workitem_pending);
@@ -1073,6 +1094,11 @@ softdep_initialize()
newblk_hashtbl = hashinit(64, M_NEWBLK, M_WAITOK, &newblk_hash);
sema_init(&newblk_in_progress, "newblk", PRIBIO, 0);
timeout_set(&proc_waiting_timeout, pause_timer, 0);
+ pool_init(&sdpcpool, sizeof(struct buf), 0, 0, 0, "sdpcpool",
+ 0, pool_page_alloc_nointr, pool_page_free_nointr, M_TEMP);
+ for (i = 0; i < PCBPHASHSIZE; i++) {
+ LIST_INIT(&pcbphashhead[i]);
+ }
}
/*
@@ -1325,11 +1351,16 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
LIST_REMOVE(newblk, nb_hash);
FREE(newblk, M_NEWBLK);
+ /*
+ * If we were not passed a bp to attach the dep to,
+ * then this must be for a regular file.
+ * Allocate a buffer to represent the page cache pages
+ * that are the real dependency. The pages themselves
+ * cannot refer to the dependency since we don't want to
+ * add a field to struct vm_page for this.
+ */
if (bp == NULL) {
- /*
- * XXXUBC - Yes, I know how to fix this, but not right now.
- */
- panic("softdep_setup_allocdirect: Bonk art in the head\n");
+ bp = softdep_setup_pagecache(ip, lbn, newsize);
}
WORKLIST_INSERT(&bp->b_dep, &adp->ad_list);
if (lbn >= NDADDR) {
@@ -1563,10 +1594,7 @@ softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp)
pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0)
WORKLIST_INSERT(&nbp->b_dep, &pagedep->pd_list);
if (nbp == NULL) {
- /*
- * XXXUBC - Yes, I know how to fix this, but not right now.
- */
- panic("softdep_setup_allocindir_page: Bonk art in the head\n");
+ nbp = softdep_setup_pagecache(ip, lbn, ip->i_fs->fs_bsize);
}
WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list);
FREE_LOCK(&lk);
@@ -1745,6 +1773,7 @@ softdep_setup_freeblocks(ip, length)
int i, delay, error;
fs = ip->i_fs;
+ vp = ITOV(ip);
if (length != 0)
panic("softdep_setup_freeblocks: non-zero length");
MALLOC(freeblks, struct freeblks *, sizeof(struct freeblks),
@@ -1804,9 +1833,15 @@ softdep_setup_freeblocks(ip, length)
* with this inode are obsolete and can simply be de-allocated.
* We must first merge the two dependency lists to get rid of
* any duplicate freefrag structures, then purge the merged list.
+ * We must remove any pagecache markers from the pagecache
+ * hashtable first because any I/Os in flight will want to see
+ * dependencies attached to their pagecache markers. We cannot
+ * free the pagecache markers until after we've freed all the
+ * dependencies that reference them later.
* If we still have a bitmap dependency, then the inode has never
* been written to disk, so we can free any fragments without delay.
*/
+ softdep_collect_pagecache(ip);
merge_inode_lists(inodedep);
while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0)
free_allocdirect(&inodedep->id_inoupdt, adp, delay);
@@ -1818,7 +1853,6 @@ softdep_setup_freeblocks(ip, length)
* Once they are all there, walk the list and get rid of
* any dependencies.
*/
- vp = ITOV(ip);
ACQUIRE_LOCK(&lk);
drain_output(vp, 1);
while (getdirtybuf(&LIST_FIRST(&vp->v_dirtyblkhd), MNT_WAIT)) {
@@ -1830,6 +1864,7 @@ softdep_setup_freeblocks(ip, length)
brelse(bp);
ACQUIRE_LOCK(&lk);
}
+ softdep_free_pagecache(ip);
if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) != 0)
(void) free_inodedep(inodedep);
FREE_LOCK(&lk);
@@ -2898,7 +2933,6 @@ handle_workitem_freefile(freefile)
struct freefile *freefile;
{
struct fs *fs;
- struct vnode vp;
struct inode tip;
struct inodedep *idp;
int error;
@@ -2914,8 +2948,7 @@ handle_workitem_freefile(freefile)
tip.i_devvp = freefile->fx_devvp;
tip.i_dev = freefile->fx_devvp->v_rdev;
tip.i_fs = fs;
- tip.i_vnode = &vp;
- vp.v_data = &tip;
+ tip.i_vnode = NULL;
if ((error = ffs_freefile(&tip, freefile->fx_oldinum,
freefile->fx_mode)) != 0) {
@@ -4313,6 +4346,7 @@ flush_inodedep_deps(fs, ino)
struct allocdirect *adp;
int error, waitfor;
struct buf *bp;
+ struct vnode *vp;
/*
* This work is done in two passes. The first pass grabs most
@@ -4332,6 +4366,27 @@ flush_inodedep_deps(fs, ino)
ACQUIRE_LOCK(&lk);
if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
return (0);
+
+ /*
+ * When file data was in the buffer cache,
+ * softdep_sync_metadata() would start i/o on
+ * file data buffers itself. But now that
+ * we're using the page cache to hold file data,
+ * we need something else to trigger those flushes.
+ * let's just do it here.
+ */
+
+ vp = softdep_lookupvp(fs, ino);
+ if (vp) {
+ struct uvm_object *uobj = &vp->v_uvm.u_obj;
+
+ simple_lock(&uobj->vmobjlock);
+ (uobj->pgops->pgo_flush)(uobj, 0, 0,
+ PGO_ALLPAGES|PGO_CLEANIT|
+ (waitfor == MNT_NOWAIT ? 0: PGO_SYNCIO));
+ simple_unlock(&uobj->vmobjlock);
+ }
+
TAILQ_FOREACH(adp, &inodedep->id_inoupdt, ad_next) {
if (adp->ad_state & DEPCOMPLETE)
continue;
@@ -4944,3 +4999,196 @@ softdep_error(func, error)
/* XXX should do something better! */
printf("%s: got error %d while accessing filesystem\n", func, error);
}
+
+/*
+ * Allocate a buffer on which to attach a dependency.
+ */
+STATIC struct buf *
+softdep_setup_pagecache(ip, lbn, size)
+ struct inode *ip;
+ ufs_lbn_t lbn;
+ long size;
+{
+ struct vnode *vp = ITOV(ip);
+ struct buf *bp;
+ int s;
+
+ /*
+ * Enter pagecache dependency buf in hash.
+ */
+
+ bp = softdep_lookup_pcbp(vp, lbn);
+ if (bp == NULL) {
+ s = splbio();
+ bp = pool_get(&sdpcpool, PR_WAITOK);
+ splx(s);
+
+ bp->b_vp = vp;
+ bp->b_lblkno = lbn;
+ bp->b_bcount = bp->b_resid = size;
+ LIST_INIT(&bp->b_dep);
+ LIST_INSERT_HEAD(&pcbphashhead[PCBPHASH(vp, lbn)], bp, b_hash);
+ LIST_INSERT_HEAD(&ip->i_pcbufhd, bp, b_vnbufs);
+ } else {
+ KASSERT(size >= bp->b_bcount);
+ bp->b_resid += size - bp->b_bcount;
+ bp->b_bcount = size;
+ }
+ return bp;
+}
+
+/*
+ * softdep_collect_pagecache() and softdep_free_pagecache()
+ * are used to remove page cache dependency buffers when
+ * a file is being truncated to 0.
+ */
+
+STATIC void
+softdep_collect_pagecache(ip)
+ struct inode *ip;
+{
+ struct buf *bp;
+
+ LIST_FOREACH(bp, &ip->i_pcbufhd, b_vnbufs) {
+ LIST_REMOVE(bp, b_hash);
+ }
+}
+
+STATIC void
+softdep_free_pagecache(ip)
+ struct inode *ip;
+{
+ struct buf *bp, *nextbp;
+
+ for (bp = LIST_FIRST(&ip->i_pcbufhd); bp != NULL; bp = nextbp) {
+ nextbp = LIST_NEXT(bp, b_vnbufs);
+ LIST_REMOVE(bp, b_vnbufs);
+ KASSERT(LIST_FIRST(&bp->b_dep) == NULL);
+ pool_put(&sdpcpool, bp);
+ }
+}
+
+STATIC struct vnode *
+softdep_lookupvp(fs, ino)
+ struct fs *fs;
+ ino_t ino;
+{
+ struct mount *mp;
+ extern struct vfsops ffs_vfsops;
+
+ CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
+ if (mp->mnt_op == &ffs_vfsops &&
+ VFSTOUFS(mp)->um_fs == fs) {
+ break;
+ }
+ }
+ if (mp == NULL) {
+ return NULL;
+ }
+ return ufs_ihashlookup(VFSTOUFS(mp)->um_dev, ino);
+}
+
+STATIC struct buf *
+softdep_lookup_pcbp(vp, lbn)
+ struct vnode *vp;
+ ufs_lbn_t lbn;
+{
+ struct buf *bp;
+
+ LIST_FOREACH(bp, &pcbphashhead[PCBPHASH(vp, lbn)], b_hash) {
+ if (bp->b_vp == vp && bp->b_lblkno == lbn) {
+ break;
+ }
+ }
+ return bp;
+}
+
+/*
+ * Do softdep i/o completion processing for page cache writes.
+ */
+
+void
+softdep_pageiodone(bp)
+ struct buf *bp;
+{
+ int npages = bp->b_bufsize >> PAGE_SHIFT;
+ struct vnode *vp = bp->b_vp;
+ struct vm_page *pg;
+ struct buf *pcbp = NULL;
+ struct allocdirect *adp;
+ struct allocindir *aip;
+ struct worklist *wk;
+ ufs_lbn_t lbn;
+ voff_t off;
+ long iosize = bp->b_bcount;
+ int size, asize, bshift, bsize;
+ int i;
+
+ KASSERT(!(bp->b_flags & B_READ));
+ bshift = vp->v_mount->mnt_fs_bshift;
+ bsize = 1 << bshift;
+ asize = min(PAGE_SIZE, bsize);
+ ACQUIRE_LOCK(&lk);
+ for (i = 0; i < npages; i++) {
+ pg = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT));
+ if (pg == NULL) {
+ continue;
+ }
+
+ for (off = pg->offset;
+ off < pg->offset + PAGE_SIZE;
+ off += bsize) {
+ size = min(asize, iosize);
+ iosize -= size;
+ lbn = off >> bshift;
+ if (pcbp == NULL || pcbp->b_lblkno != lbn) {
+ pcbp = softdep_lookup_pcbp(vp, lbn);
+ }
+ if (pcbp == NULL) {
+ continue;
+ }
+ pcbp->b_resid -= size;
+ if (pcbp->b_resid < 0) {
+ panic("softdep_pageiodone: "
+ "resid < 0, vp %p lbn 0x%lx pcbp %p",
+ vp, lbn, pcbp);
+ }
+ if (pcbp->b_resid > 0) {
+ continue;
+ }
+
+ /*
+ * We've completed all the i/o for this block.
+ * mark the dep complete.
+ */
+
+ KASSERT(LIST_FIRST(&pcbp->b_dep) != NULL);
+ while ((wk = LIST_FIRST(&pcbp->b_dep))) {
+ WORKLIST_REMOVE(wk);
+ switch (wk->wk_type) {
+ case D_ALLOCDIRECT:
+ adp = WK_ALLOCDIRECT(wk);
+ adp->ad_state |= COMPLETE;
+ handle_allocdirect_partdone(adp);
+ break;
+
+ case D_ALLOCINDIR:
+ aip = WK_ALLOCINDIR(wk);
+ aip->ai_state |= COMPLETE;
+ handle_allocindir_partdone(aip);
+ break;
+
+ default:
+ panic("softdep_pageiodone: "
+ "bad type %d, pcbp %p wk %p",
+ wk->wk_type, pcbp, wk);
+ }
+ }
+ LIST_REMOVE(pcbp, b_hash);
+ LIST_REMOVE(pcbp, b_vnbufs);
+ pool_put(&sdpcpool, pcbp);
+ pcbp = NULL;
+ }
+ }
+ FREE_LOCK(&lk);
+}
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index b1dee123893..19c77726fa8 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_vfsops.c,v 1.45 2001/11/21 22:21:48 csapuntz Exp $ */
+/* $OpenBSD: ffs_vfsops.c,v 1.46 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: ffs_vfsops.c,v 1.19 1996/02/09 22:22:26 christos Exp $ */
/*
@@ -737,11 +737,14 @@ ffs_mountfs(devvp, mp, p)
else
mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
+ mp->mnt_fs_bshift = fs->fs_bshift;
+ mp->mnt_dev_bshift = DEV_BSHIFT;
mp->mnt_flag |= MNT_LOCAL;
ump->um_mountp = mp;
ump->um_dev = dev;
ump->um_devvp = devvp;
ump->um_nindir = fs->fs_nindir;
+ ump->um_lognindir = ffs(fs->fs_nindir) - 1;
ump->um_bptrtodb = fs->fs_fsbtodb;
ump->um_seqinc = fs->fs_frag;
for (i = 0; i < MAXQUOTAS; i++)
@@ -1119,6 +1122,7 @@ retry:
ip->i_fs = fs = ump->um_fs;
ip->i_dev = dev;
ip->i_number = ino;
+ LIST_INIT(&ip->i_pcbufhd);
ip->i_vtbl = &ffs_vtbl;
/*
@@ -1199,6 +1203,7 @@ retry:
ip->i_ffs_uid = ip->i_din.ffs_din.di_ouid; /* XXX */
ip->i_ffs_gid = ip->i_din.ffs_din.di_ogid; /* XXX */
} /* XXX */
+ uvm_vnp_setsize(vp, ip->i_ffs_size);
*vpp = vp;
return (0);
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
index 26e9bbaf9da..8190ef82eb3 100644
--- a/sys/ufs/ffs/ffs_vnops.c
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_vnops.c,v 1.20 2001/11/06 19:53:21 miod Exp $ */
+/* $OpenBSD: ffs_vnops.c,v 1.21 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: ffs_vnops.c,v 1.7 1996/05/11 18:27:24 mycroft Exp $ */
/*
@@ -107,8 +107,13 @@ struct vnodeopv_entry_desc ffs_vnodeop_entries[] = {
{ &vop_advlock_desc, ufs_advlock }, /* advlock */
{ &vop_reallocblks_desc, ffs_reallocblks }, /* reallocblks */
{ &vop_bwrite_desc, vop_generic_bwrite },
- { (struct vnodeop_desc*)NULL, (int(*) __P((void*)))NULL }
+ { &vop_ballocn_desc, ffs_ballocn },
+ { &vop_getpages_desc, genfs_getpages },
+ { &vop_putpages_desc, genfs_putpages },
+ { &vop_size_desc, ffs_size },
+ { NULL, NULL }
};
+
struct vnodeopv_desc ffs_vnodeop_opv_desc =
{ &ffs_vnodeop_p, ffs_vnodeop_entries };
@@ -229,6 +234,7 @@ ffs_fsync(v)
struct vnode *vp = ap->a_vp;
struct buf *bp, *nbp;
int s, error, passes, skipmeta;
+ struct uvm_object *uobj;
if (vp->v_type == VBLK &&
vp->v_specmountpoint != NULL &&
@@ -236,13 +242,22 @@ ffs_fsync(v)
softdep_fsync_mountdev(vp);
/*
- * Flush all dirty buffers associated with a vnode.
+ * Flush all dirty data associated with a vnode.
*/
passes = NIADDR + 1;
skipmeta = 0;
if (ap->a_waitfor == MNT_WAIT)
skipmeta = 1;
s = splbio();
+
+ if (vp->v_type == VREG) {
+ uobj = &vp->v_uvm.u_obj;
+ simple_lock(&uobj->vmobjlock);
+ (uobj->pgops->pgo_flush)(uobj, 0, 0, PGO_ALLPAGES|PGO_CLEANIT|
+ ((ap->a_waitfor == MNT_WAIT) ? PGO_SYNCIO : 0));
+ simple_unlock(&uobj->vmobjlock);
+ }
+
loop:
for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp;
bp = LIST_NEXT(bp, b_vnbufs))
@@ -281,8 +296,10 @@ loop:
*/
if (passes > 0 || ap->a_waitfor != MNT_WAIT)
(void) bawrite(bp);
- else if ((error = bwrite(bp)) != 0)
+ else if ((error = bwrite(bp)) != 0) {
+ printf("ffs_fsync: bwrite failed %d\n", error);
return (error);
+ }
s = splbio();
/*
* Since we may have slept during the I/O, we need
@@ -325,7 +342,11 @@ loop:
}
}
splx(s);
- return (UFS_UPDATE(VTOI(vp), ap->a_waitfor == MNT_WAIT));
+
+ error = (UFS_UPDATE(VTOI(vp), ap->a_waitfor == MNT_WAIT));
+ if (error)
+ printf("ffs_fsync: UFS_UPDATE failed. %d\n", error);
+ return (error);
}
/*
@@ -349,3 +370,31 @@ ffs_reclaim(v)
vp->v_data = NULL;
return (0);
}
+
+/*
+ * Return the last logical file offset that should be written for this file
+ * if we're doing a write that ends at "size".
+ */
+int
+ffs_size(v)
+ void *v;
+{
+ struct vop_size_args /* {
+ struct vnode *a_vp;
+ off_t a_size;
+ off_t *a_eobp;
+ } */ *ap = v;
+ struct inode *ip = VTOI(ap->a_vp);
+ struct fs *fs = ip->i_fs;
+ ufs_lbn_t olbn, nlbn;
+
+ olbn = lblkno(fs, ip->i_ffs_size);
+ nlbn = lblkno(fs, ap->a_size);
+
+ if (nlbn < NDADDR && olbn <= nlbn) {
+ *ap->a_eobp = fragroundup(fs, ap->a_size);
+ } else {
+ *ap->a_eobp = blkroundup(fs, ap->a_size);
+ }
+ return 0;
+}
diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h
index 5665b276a0f..98c73de5579 100644
--- a/sys/ufs/ufs/inode.h
+++ b/sys/ufs/ufs/inode.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: inode.h,v 1.16 2001/07/04 06:10:50 angelos Exp $ */
+/* $OpenBSD: inode.h,v 1.17 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: inode.h,v 1.8 1995/06/15 23:22:50 cgd Exp $ */
/*
@@ -84,6 +84,7 @@ struct inode {
#define i_e2fs inode_u.e2fs
struct cluster_info i_ci;
+ LIST_HEAD(,buf) i_pcbufhd;
struct dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */
u_quad_t i_modrev; /* Revision level for NFS lease. */
struct lockf *i_lockf;/* Head of byte-level lock list. */
diff --git a/sys/ufs/ufs/ufs_bmap.c b/sys/ufs/ufs/ufs_bmap.c
index add641e15ce..fdf5c1be055 100644
--- a/sys/ufs/ufs/ufs_bmap.c
+++ b/sys/ufs/ufs/ufs_bmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ufs_bmap.c,v 1.10 2001/11/21 22:24:24 csapuntz Exp $ */
+/* $OpenBSD: ufs_bmap.c,v 1.11 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: ufs_bmap.c,v 1.3 1996/02/09 22:36:00 christos Exp $ */
/*
@@ -233,6 +233,7 @@ ufs_getlbns(vp, bn, ap, nump)
long metalbn, realbn;
struct ufsmount *ump;
int64_t blockcnt;
+ int lbc;
int i, numlevels, off;
ump = VFSTOUFS(vp->v_mount);
@@ -260,10 +261,14 @@ ufs_getlbns(vp, bn, ap, nump)
* at the given level of indirection, and NIADDR - i is the number
* of levels of indirection needed to locate the requested block.
*/
- for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) {
+ bn -= NDADDR;
+ for (lbc = 0, i = NIADDR;; i--, bn -= blockcnt) {
if (i == 0)
return (EFBIG);
- blockcnt *= MNINDIR(ump);
+
+ lbc += ump->um_lognindir;
+ blockcnt = (int64_t)1 << lbc;
+
if (bn < blockcnt)
break;
}
@@ -289,8 +294,9 @@ ufs_getlbns(vp, bn, ap, nump)
if (metalbn == realbn)
break;
- blockcnt /= MNINDIR(ump);
- off = (bn / blockcnt) % MNINDIR(ump);
+ lbc -= ump->um_lognindir;
+ blockcnt = (int64_t)1 << lbc;
+ off = (bn >> lbc) & (MNINDIR(ump) - 1);
++numlevels;
ap->in_lbn = metalbn;
diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h
index 50175a0ec86..fc39e16b45e 100644
--- a/sys/ufs/ufs/ufs_extern.h
+++ b/sys/ufs/ufs/ufs_extern.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: ufs_extern.h,v 1.12 2001/11/21 21:23:56 csapuntz Exp $ */
+/* $OpenBSD: ufs_extern.h,v 1.13 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: ufs_extern.h,v 1.5 1996/02/09 22:36:03 christos Exp $ */
/*-
@@ -121,6 +121,7 @@ void ufs_ihashrem __P((struct inode *));
/* ufs_inode.c */
int ufs_init __P((struct vfsconf *));
int ufs_reclaim __P((struct vnode *, struct proc *));
+int ufs_balloc_range __P((struct vnode *, off_t, off_t, struct ucred *, int));
/* ufs_lookup.c */
void ufs_dirbad __P((struct inode *, doff_t, char *));
diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c
index 8a3935632fb..3865342fde0 100644
--- a/sys/ufs/ufs/ufs_inode.c
+++ b/sys/ufs/ufs/ufs_inode.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ufs_inode.c,v 1.10 2001/11/21 21:23:56 csapuntz Exp $ */
+/* $OpenBSD: ufs_inode.c,v 1.11 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: ufs_inode.c,v 1.7 1996/05/11 18:27:52 mycroft Exp $ */
/*
@@ -151,3 +151,150 @@ ufs_reclaim(vp, p)
ufs_quota_delete(ip);
return (0);
}
+
+/*
+ * allocate a range of blocks in a file.
+ * after this function returns, any page entirely contained within the range
+ * will map to invalid data and thus must be overwritten before it is made
+ * accessible to others.
+ */
+
+int
+ufs_balloc_range(vp, off, len, cred, flags)
+ struct vnode *vp;
+ off_t off, len;
+ struct ucred *cred;
+ int flags;
+{
+ off_t oldeof, neweof, oldeob, neweob, oldpagestart, pagestart;
+ struct uvm_object *uobj;
+ int i, delta, error, npages1, npages2;
+ int bshift = vp->v_mount->mnt_fs_bshift;
+ int bsize = 1 << bshift;
+ int ppb = MAX(bsize >> PAGE_SHIFT, 1);
+ struct vm_page *pgs1[ppb], *pgs2[ppb];
+ UVMHIST_FUNC("ufs_balloc_range"); UVMHIST_CALLED(ubchist);
+ UVMHIST_LOG(ubchist, "vp %p off 0x%x len 0x%x u_size 0x%x",
+ vp, off, len, vp->v_uvm.u_size);
+
+ oldeof = vp->v_uvm.u_size;
+ error = VOP_SIZE(vp, oldeof, &oldeob);
+ if (error) {
+ return error;
+ }
+
+ neweof = MAX(vp->v_uvm.u_size, off + len);
+ error = VOP_SIZE(vp, neweof, &neweob);
+ if (error) {
+ return error;
+ }
+
+ error = 0;
+ uobj = &vp->v_uvm.u_obj;
+ pgs1[0] = pgs2[0] = NULL;
+
+ /*
+ * if the last block in the file is not a full block (ie. it is a
+ * fragment), and this allocation is causing the fragment to change
+ * size (either to expand the fragment or promote it to a full block),
+ * cache the old last block (at its new size).
+ */
+
+ oldpagestart = trunc_page(oldeof) & ~(bsize - 1);
+ if ((oldeob & (bsize - 1)) != 0 && oldeob != neweob) {
+ npages1 = MIN(ppb, (round_page(neweob) - oldpagestart) >>
+ PAGE_SHIFT);
+ memset(pgs1, 0, npages1 * sizeof(struct vm_page *));
+ simple_lock(&uobj->vmobjlock);
+ error = VOP_GETPAGES(vp, oldpagestart, pgs1, &npages1,
+ 0, VM_PROT_READ, 0, PGO_SYNCIO|PGO_PASTEOF);
+ if (error) {
+ goto out;
+ }
+ simple_lock(&uobj->vmobjlock);
+ uvm_lock_pageq();
+ for (i = 0; i < npages1; i++) {
+ UVMHIST_LOG(ubchist, "got pgs1[%d] %p", i, pgs1[i],0,0);
+ KASSERT((pgs1[i]->flags & PG_RELEASED) == 0);
+ pgs1[i]->flags &= ~PG_CLEAN;
+ uvm_pageactivate(pgs1[i]);
+ }
+ uvm_unlock_pageq();
+ simple_unlock(&uobj->vmobjlock);
+ }
+
+ /*
+ * cache the new range as well. this will create zeroed pages
+ * where the new block will be and keep them locked until the
+ * new block is allocated, so there will be no window where
+ * the old contents of the new block is visible to racing threads.
+ */
+
+ pagestart = trunc_page(off) & ~(bsize - 1);
+ if (pagestart != oldpagestart || pgs1[0] == NULL) {
+ npages2 = MIN(ppb, (round_page(neweob) - pagestart) >>
+ PAGE_SHIFT);
+ memset(pgs2, 0, npages2 * sizeof(struct vm_page *));
+ simple_lock(&uobj->vmobjlock);
+ error = VOP_GETPAGES(vp, pagestart, pgs2, &npages2, 0,
+ VM_PROT_READ, 0, PGO_SYNCIO|PGO_PASTEOF);
+ if (error) {
+ goto out;
+ }
+ simple_lock(&uobj->vmobjlock);
+ uvm_lock_pageq();
+ for (i = 0; i < npages2; i++) {
+ UVMHIST_LOG(ubchist, "got pgs2[%d] %p", i, pgs2[i],0,0);
+ KASSERT((pgs2[i]->flags & PG_RELEASED) == 0);
+ pgs2[i]->flags &= ~PG_CLEAN;
+ uvm_pageactivate(pgs2[i]);
+ }
+ uvm_unlock_pageq();
+ simple_unlock(&uobj->vmobjlock);
+ }
+
+ /*
+ * adjust off to be block-aligned.
+ */
+
+ delta = off & (bsize - 1);
+ off -= delta;
+ len += delta;
+
+ /*
+ * now allocate the range.
+ */
+
+ lockmgr(&vp->v_glock, LK_EXCLUSIVE, NULL, curproc);
+ error = VOP_BALLOCN(vp, off, len, cred, flags);
+ lockmgr(&vp->v_glock, LK_RELEASE, NULL, curproc);
+
+ /*
+ * unbusy any pages we are holding.
+ * if we got an error, free any pages we created past the old eob.
+ */
+
+out:
+ simple_lock(&uobj->vmobjlock);
+ if (error) {
+ (void) (uobj->pgops->pgo_flush)(uobj, round_page(oldeob), 0,
+ PGO_FREE);
+ }
+ if (pgs1[0] != NULL) {
+ uvm_page_unbusy(pgs1, npages1);
+
+ /*
+ * The data in the frag might be moving to a new disk location.
+ * We need to flush pages to the new disk locations.
+ */
+
+ (uobj->pgops->pgo_flush)(uobj, oldeof & ~(bsize - 1),
+ MIN((oldeof + bsize) & ~(bsize - 1), neweof),
+ PGO_CLEANIT | ((flags & B_SYNC) ? PGO_SYNCIO : 0));
+ }
+ if (pgs2[0] != NULL) {
+ uvm_page_unbusy(pgs2, npages2);
+ }
+ simple_unlock(&uobj->vmobjlock);
+ return error;
+}
diff --git a/sys/ufs/ufs/ufs_readwrite.c b/sys/ufs/ufs/ufs_readwrite.c
index bbf1391dfe5..e0777e4b55f 100644
--- a/sys/ufs/ufs/ufs_readwrite.c
+++ b/sys/ufs/ufs/ufs_readwrite.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ufs_readwrite.c,v 1.19 2001/06/27 04:58:49 art Exp $ */
+/* $OpenBSD: ufs_readwrite.c,v 1.20 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: ufs_readwrite.c,v 1.9 1996/05/11 18:27:57 mycroft Exp $ */
/*-
@@ -76,21 +76,22 @@ READ(v)
int a_ioflag;
struct ucred *a_cred;
} */ *ap = v;
- register struct vnode *vp;
- register struct inode *ip;
- register struct uio *uio;
- register FS *fs;
+ struct vnode *vp;
+ struct inode *ip;
+ struct uio *uio;
+ FS *fs;
+ void *win;
+ vsize_t bytelen;
struct buf *bp;
daddr_t lbn, nextlbn;
off_t bytesinfile;
long size, xfersize, blkoffset;
int error;
- u_short mode;
vp = ap->a_vp;
ip = VTOI(vp);
- mode = ip->i_ffs_mode;
uio = ap->a_uio;
+ error = 0;
#ifdef DIAGNOSTIC
if (uio->uio_rw != UIO_READ)
@@ -110,6 +111,24 @@ READ(v)
if (uio->uio_resid == 0)
return (0);
+ if (uio->uio_offset >= ip->i_ffs_size)
+ goto out;
+
+ if (vp->v_type == VREG) {
+ while (uio->uio_resid > 0) {
+ bytelen = min(ip->i_ffs_size - uio->uio_offset,
+ uio->uio_resid);
+ if (bytelen == 0)
+ break;
+ win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset,
+ &bytelen, UBC_READ);
+ error = uiomove(win, bytelen, uio);
+ ubc_release(win, 0);
+ if (error)
+ break;
+ }
+ goto out;
+ }
for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
if ((bytesinfile = ip->i_ffs_size - uio->uio_offset) <= 0)
@@ -131,9 +150,6 @@ READ(v)
#else
if (lblktosize(fs, nextlbn) >= ip->i_ffs_size)
error = bread(vp, lbn, size, NOCRED, &bp);
- else if (doclusterread)
- error = cluster_read(vp, &ip->i_ci,
- ip->i_ffs_size, lbn, size, NOCRED, &bp);
else if (lbn - 1 == ip->i_ci.ci_lastr) {
int nextsize = BLKSIZE(fs, ip, nextlbn);
error = breadn(vp, lbn,
@@ -158,7 +174,7 @@ READ(v)
break;
xfersize = size;
}
- error = uiomove((char *)bp->b_data + blkoffset, (int)xfersize,
+ error = uiomove((char *)bp->b_data + blkoffset, xfersize,
uio);
if (error)
break;
@@ -166,6 +182,7 @@ READ(v)
}
if (bp != NULL)
brelse(bp);
+out:
ip->i_flag |= IN_ACCESS;
return (error);
}
@@ -183,15 +200,19 @@ WRITE(v)
int a_ioflag;
struct ucred *a_cred;
} */ *ap = v;
- register struct vnode *vp;
- register struct uio *uio;
- register struct inode *ip;
- register FS *fs;
+ struct vnode *vp;
+ struct uio *uio;
+ struct inode *ip;
+ FS *fs;
struct buf *bp;
struct proc *p;
daddr_t lbn;
off_t osize;
int blkoffset, error, extended, flags, ioflag, resid, size, xfersize;
+ void *win;
+ vsize_t bytelen;
+ off_t oldoff;
+ boolean_t rv;
extended = 0;
ioflag = ap->a_ioflag;
@@ -239,9 +260,77 @@ WRITE(v)
resid = uio->uio_resid;
osize = ip->i_ffs_size;
- flags = ioflag & IO_SYNC ? B_SYNC : 0;
+ error = 0;
+
+ if (vp->v_type != VREG)
+ goto bcache;
+
+ while (uio->uio_resid > 0) {
+ oldoff = uio->uio_offset;
+ blkoffset = blkoff(fs, uio->uio_offset);
+ bytelen = min(fs->fs_bsize - blkoffset, uio->uio_resid);
+
+ /*
+ * XXXUBC if file is mapped and this is the last block,
+ * process one page at a time.
+ */
+
+ error = ufs_balloc_range(vp, uio->uio_offset, bytelen,
+ ap->a_cred, ioflag & IO_SYNC ? B_SYNC : 0);
+ if (error) {
+ return error;
+ }
- for (error = 0; uio->uio_resid > 0;) {
+ win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset, &bytelen,
+ UBC_WRITE);
+ error = uiomove(win, bytelen, uio);
+ ubc_release(win, 0);
+
+ /*
+ * flush what we just wrote if necessary.
+ * XXXUBC simplistic async flushing.
+ */
+
+ if (ioflag & IO_SYNC) {
+ simple_lock(&vp->v_uvm.u_obj.vmobjlock);
+#if 1
+ /*
+ * XXX
+ * flush whole blocks in case there are deps.
+ * otherwise we can dirty and flush part of
+ * a block multiple times and the softdep code
+ * will get confused. fixing this the right way
+ * is complicated so we'll work around it for now.
+ */
+
+ rv = vp->v_uvm.u_obj.pgops->pgo_flush(
+ &vp->v_uvm.u_obj,
+ oldoff & ~(fs->fs_bsize - 1),
+ (oldoff + bytelen + fs->fs_bsize - 1) &
+ ~(fs->fs_bsize - 1),
+ PGO_CLEANIT|PGO_SYNCIO);
+#else
+ rv = vp->v_uvm.u_obj.pgops->pgo_flush(
+ &vp->v_uvm.u_obj, oldoff, oldoff + bytelen,
+ PGO_CLEANIT|PGO_SYNCIO);
+#endif
+ simple_unlock(&vp->v_uvm.u_obj.vmobjlock);
+ } else if (oldoff >> 16 != uio->uio_offset >> 16) {
+ simple_lock(&vp->v_uvm.u_obj.vmobjlock);
+ rv = vp->v_uvm.u_obj.pgops->pgo_flush(
+ &vp->v_uvm.u_obj, (oldoff >> 16) << 16,
+ (uio->uio_offset >> 16) << 16, PGO_CLEANIT);
+ simple_unlock(&vp->v_uvm.u_obj.vmobjlock);
+ }
+ if (error) {
+ break;
+ }
+ }
+ goto out;
+
+bcache:
+ flags = ioflag & IO_SYNC ? B_SYNC : 0;
+ while (uio->uio_resid > 0) {
lbn = lblkno(fs, uio->uio_offset);
blkoffset = blkoff(fs, uio->uio_offset);
xfersize = fs->fs_bsize - blkoffset;
@@ -260,14 +349,12 @@ WRITE(v)
uvm_vnp_setsize(vp, ip->i_ffs_size);
extended = 1;
}
- (void)uvm_vnp_uncache(vp);
size = BLKSIZE(fs, ip, lbn) - bp->b_resid;
if (size < xfersize)
xfersize = size;
- error =
- uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
+ error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
if (error != 0)
bzero((char *)bp->b_data + blkoffset, xfersize);
@@ -287,13 +374,14 @@ WRITE(v)
#endif
if (error || xfersize == 0)
break;
- ip->i_flag |= IN_CHANGE | IN_UPDATE;
}
/*
* If we successfully wrote any data, and we are not the superuser
* we clear the setuid and setgid bits as a precaution against
* tampering.
*/
+out:
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
ip->i_ffs_mode &= ~(ISUID | ISGID);
if (resid > uio->uio_resid)
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 4caf0ef78c7..e926ee7aff6 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ufs_vnops.c,v 1.39 2001/11/21 21:23:56 csapuntz Exp $ */
+/* $OpenBSD: ufs_vnops.c,v 1.40 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: ufs_vnops.c,v 1.18 1996/05/11 18:28:04 mycroft Exp $ */
/*
@@ -469,8 +469,6 @@ ufs_chmod(vp, mode, cred, p)
ip->i_ffs_mode &= ~ALLPERMS;
ip->i_ffs_mode |= (mode & ALLPERMS);
ip->i_flag |= IN_CHANGE;
- if ((vp->v_flag & VTEXT) && (ip->i_ffs_mode & S_ISTXT) == 0)
- (void) uvm_vnp_uncache(vp);
return (0);
}
diff --git a/sys/ufs/ufs/ufsmount.h b/sys/ufs/ufs/ufsmount.h
index e9dc71f9855..981eb21474b 100644
--- a/sys/ufs/ufs/ufsmount.h
+++ b/sys/ufs/ufs/ufsmount.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: ufsmount.h,v 1.5 1999/06/01 01:48:52 millert Exp $ */
+/* $OpenBSD: ufsmount.h,v 1.6 2001/11/27 05:27:12 art Exp $ */
/* $NetBSD: ufsmount.h,v 1.4 1994/12/21 20:00:23 mycroft Exp $ */
/*
@@ -64,6 +64,7 @@ struct ufsmount {
struct vnode *um_quotas[MAXQUOTAS]; /* pointer to quota files */
struct ucred *um_cred[MAXQUOTAS]; /* quota file access cred */
u_long um_nindir; /* indirect ptrs per block */
+ u_long um_lognindir; /* log2 of um_nindir */
u_long um_bptrtodb; /* indir ptr to disk block */
u_long um_seqinc; /* inc between seq blocks */
time_t um_btime[MAXQUOTAS]; /* block quota time limit */