diff options
author | Artur Grabowski <art@cvs.openbsd.org> | 2001-11-27 05:27:13 +0000 |
---|---|---|
committer | Artur Grabowski <art@cvs.openbsd.org> | 2001-11-27 05:27:13 +0000 |
commit | 8a1845e49f56720cbfccd4c7f5f80ba5b980fdf4 (patch) | |
tree | d4a522dc41cdc79ba48fe761e94663b795da8cc0 /sys/ufs | |
parent | 0d68e9b5af14f4bfa04d22dbebab5972ac647b26 (diff) |
Merge in the unified buffer cache code as found in NetBSD 2001/03/10. The
code is written mostly by Chuck Silvers <chuq@chuq.com>/<chs@netbsd.org>.
Tested for the past few weeks by many developers, should be in a pretty stable
state, but will require optimizations and additional cleanups.
Diffstat (limited to 'sys/ufs')
-rw-r--r-- | sys/ufs/ext2fs/ext2fs_balloc.c | 230 | ||||
-rw-r--r-- | sys/ufs/ext2fs/ext2fs_extern.h | 7 | ||||
-rw-r--r-- | sys/ufs/ext2fs/ext2fs_inode.c | 58 | ||||
-rw-r--r-- | sys/ufs/ext2fs/ext2fs_readwrite.c | 103 | ||||
-rw-r--r-- | sys/ufs/ext2fs/ext2fs_subr.c | 5 | ||||
-rw-r--r-- | sys/ufs/ext2fs/ext2fs_vfsops.c | 15 | ||||
-rw-r--r-- | sys/ufs/ext2fs/ext2fs_vnops.c | 12 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_alloc.c | 11 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_balloc.c | 60 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_extern.h | 5 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_inode.c | 121 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_softdep.c | 274 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_vfsops.c | 7 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_vnops.c | 59 | ||||
-rw-r--r-- | sys/ufs/ufs/inode.h | 3 | ||||
-rw-r--r-- | sys/ufs/ufs/ufs_bmap.c | 16 | ||||
-rw-r--r-- | sys/ufs/ufs/ufs_extern.h | 3 | ||||
-rw-r--r-- | sys/ufs/ufs/ufs_inode.c | 149 | ||||
-rw-r--r-- | sys/ufs/ufs/ufs_readwrite.c | 130 | ||||
-rw-r--r-- | sys/ufs/ufs/ufs_vnops.c | 4 | ||||
-rw-r--r-- | sys/ufs/ufs/ufsmount.h | 3 |
21 files changed, 1044 insertions, 231 deletions
diff --git a/sys/ufs/ext2fs/ext2fs_balloc.c b/sys/ufs/ext2fs/ext2fs_balloc.c index 849a8864b2a..78fb0a8371c 100644 --- a/sys/ufs/ext2fs/ext2fs_balloc.c +++ b/sys/ufs/ext2fs/ext2fs_balloc.c @@ -1,5 +1,4 @@ -/* $OpenBSD: ext2fs_balloc.c,v 1.7 2001/11/06 19:53:21 miod Exp $ */ -/* $NetBSD: ext2fs_balloc.c,v 1.10 2001/07/04 21:16:01 chs Exp $ */ +/* $NetBSD: ext2fs_balloc.c,v 1.8 2000/12/10 06:38:31 chs Exp $ */ /* * Copyright (c) 1997 Manuel Bouyer. @@ -44,8 +43,9 @@ #include <sys/proc.h> #include <sys/file.h> #include <sys/vnode.h> +#include <sys/mount.h> -#include <uvm/uvm_extern.h> +#include <uvm/uvm.h> #include <ufs/ufs/quota.h> #include <ufs/ufs/inode.h> @@ -73,8 +73,13 @@ ext2fs_buf_alloc(struct inode *ip, daddr_t bn, int size, struct ucred *cred, u_int deallocated; ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; int unwindidx = -1; + UVMHIST_FUNC("ext2fs_buf_alloc"); UVMHIST_CALLED(ubchist); - *bpp = NULL; + UVMHIST_LOG(ubchist, "bn 0x%x", bn,0,0,0); + + if (bpp != NULL) { + *bpp = NULL; + } if (bn < 0) return (EFBIG); fs = ip->i_e2fs; @@ -86,20 +91,29 @@ ext2fs_buf_alloc(struct inode *ip, daddr_t bn, int size, struct ucred *cred, if (bn < NDADDR) { nb = fs2h32(ip->i_e2fs_blocks[bn]); if (nb != 0) { - error = bread(vp, bn, fs->e2fs_bsize, NOCRED, &bp); - if (error) { - brelse(bp); - return (error); + + /* + * the block is already allocated, just read it. + */ + + if (bpp != NULL) { + error = bread(vp, bn, fs->e2fs_bsize, NOCRED, + &bp); + if (error) { + brelse(bp); + return (error); + } + *bpp = bp; } - *bpp = bp; return (0); } /* * allocate a new direct block. */ + error = ext2fs_alloc(ip, bn, - ext2fs_blkpref(ip, bn, (int)bn, &ip->i_e2fs_blocks[0]), + ext2fs_blkpref(ip, bn, bn, &ip->i_e2fs_blocks[0]), cred, &newb); if (error) return (error); @@ -107,11 +121,13 @@ ext2fs_buf_alloc(struct inode *ip, daddr_t bn, int size, struct ucred *cred, ip->i_e2fs_last_blk = newb; ip->i_e2fs_blocks[bn] = h2fs32(newb); ip->i_flag |= IN_CHANGE | IN_UPDATE; - bp = getblk(vp, bn, fs->e2fs_bsize, 0, 0); - bp->b_blkno = fsbtodb(fs, newb); - if (flags & B_CLRBUF) - clrbuf(bp); - *bpp = bp; + if (bpp != NULL) { + bp = getblk(vp, bn, fs->e2fs_bsize, 0, 0); + bp->b_blkno = fsbtodb(fs, newb); + if (flags & B_CLRBUF) + clrbuf(bp); + *bpp = bp; + } return (0); } /* @@ -229,26 +245,30 @@ ext2fs_buf_alloc(struct inode *ip, daddr_t bn, int size, struct ucred *cred, } else { bdwrite(bp); } - nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0); - nbp->b_blkno = fsbtodb(fs, nb); - if (flags & B_CLRBUF) - clrbuf(nbp); - *bpp = nbp; + if (bpp != NULL) { + nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0); + nbp->b_blkno = fsbtodb(fs, nb); + if (flags & B_CLRBUF) + clrbuf(nbp); + *bpp = nbp; + } return (0); } brelse(bp); - if (flags & B_CLRBUF) { - error = bread(vp, lbn, (int)fs->e2fs_bsize, NOCRED, &nbp); - if (error) { - brelse(nbp); - goto fail; + if (bpp != NULL) { + if (flags & B_CLRBUF) { + error = bread(vp, lbn, (int)fs->e2fs_bsize, NOCRED, + &nbp); + if (error) { + brelse(nbp); + goto fail; + } + } else { + nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0); + nbp->b_blkno = fsbtodb(fs, nb); } - } else { - nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0); - nbp->b_blkno = fsbtodb(fs, nb); + *bpp = nbp; } - - *bpp = nbp; return (0); fail: /* @@ -292,3 +312,153 @@ fail: } return error; } + +int +ext2fs_ballocn(v) + void *v; +{ + struct vop_ballocn_args /* { + struct vnode *a_vp; + off_t a_offset; + off_t a_length; + struct ucred *a_cred; + int a_flags; + } */ *ap = v; + off_t off, len; + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); + struct m_ext2fs *fs = ip->i_e2fs; + int error, delta, bshift, bsize; + UVMHIST_FUNC("ext2fs_ballocn"); UVMHIST_CALLED(ubchist); + + bshift = fs->e2fs_bshift; + bsize = 1 << bshift; + + off = ap->a_offset; + len = ap->a_length; + + delta = off & (bsize - 1); + off -= delta; + len += delta; + + while (len > 0) { + bsize = min(bsize, len); + UVMHIST_LOG(ubchist, "off 0x%x len 0x%x bsize 0x%x", + off, len, bsize, 0); + + error = ext2fs_buf_alloc(ip, lblkno(fs, off), bsize, ap->a_cred, + NULL, ap->a_flags); + if (error) { + UVMHIST_LOG(ubchist, "error %d", error, 0,0,0); + return error; + } + + /* + * increase file size now, VOP_BALLOC() requires that + * EOF be up-to-date before each call. + */ + + if (ip->i_e2fs_size < off + bsize) { + UVMHIST_LOG(ubchist, "old 0x%x new 0x%x", + ip->i_e2fs_size, off + bsize,0,0); + ip->i_e2fs_size = off + bsize; + if (vp->v_uvm.u_size < ip->i_e2fs_size) { + uvm_vnp_setsize(vp, ip->i_e2fs_size); + } + } + + off += bsize; + len -= bsize; + } + return 0; +} + +/* + * allocate a range of blocks in a file. + * after this function returns, any page entirely contained within the range + * will map to invalid data and thus must be overwritten before it is made + * accessible to others. + */ + +int +ext2fs_balloc_range(vp, off, len, cred, flags) + struct vnode *vp; + off_t off, len; + struct ucred *cred; + int flags; +{ + off_t oldeof, eof, pagestart; + struct uvm_object *uobj; + int i, delta, error, npages; + int bshift = vp->v_mount->mnt_fs_bshift; + int bsize = 1 << bshift; + int ppb = max(bsize >> PAGE_SHIFT, 1); + struct vm_page *pgs[ppb]; + UVMHIST_FUNC("ext2fs_balloc_range"); UVMHIST_CALLED(ubchist); + UVMHIST_LOG(ubchist, "vp %p off 0x%x len 0x%x u_size 0x%x", + vp, off, len, vp->v_uvm.u_size); + + error = 0; + uobj = &vp->v_uvm.u_obj; + oldeof = vp->v_uvm.u_size; + eof = max(oldeof, off + len); + UVMHIST_LOG(ubchist, "new eof 0x%x", eof,0,0,0); + pgs[0] = NULL; + + /* + * cache the new range of the file. this will create zeroed pages + * where the new block will be and keep them locked until the + * new block is allocated, so there will be no window where + * the old contents of the new block is visible to racing threads. + */ + + pagestart = trunc_page(off) & ~(bsize - 1); + npages = min(ppb, (round_page(eof) - pagestart) >> PAGE_SHIFT); + memset(pgs, 0, npages); + simple_lock(&uobj->vmobjlock); + error = VOP_GETPAGES(vp, pagestart, pgs, &npages, 0, + VM_PROT_READ, 0, PGO_SYNCIO | PGO_PASTEOF); + if (error) { + UVMHIST_LOG(ubchist, "getpages %d", error,0,0,0); + goto errout; + } + for (i = 0; i < npages; i++) { + UVMHIST_LOG(ubchist, "got pgs[%d] %p", i, pgs[i],0,0); + KASSERT((pgs[i]->flags & PG_RELEASED) == 0); + pgs[i]->flags &= ~PG_CLEAN; + uvm_pageactivate(pgs[i]); + } + + /* + * adjust off to be block-aligned. + */ + + delta = off & (bsize - 1); + off -= delta; + len += delta; + + /* + * now allocate the range. + */ + + lockmgr(&vp->v_glock, LK_EXCLUSIVE, NULL, curproc); + error = VOP_BALLOCN(vp, off, len, cred, flags); + UVMHIST_LOG(ubchist, "ballocn %d", error,0,0,0); + lockmgr(&vp->v_glock, LK_RELEASE, NULL, curproc); + + /* + * unbusy any pages we are holding. + */ + +errout: + simple_lock(&uobj->vmobjlock); + if (error) { + (void) (uobj->pgops->pgo_flush)(uobj, oldeof, pagestart + ppb, + PGO_FREE); + } + if (pgs[0] != NULL) { + uvm_page_unbusy(pgs, npages); + } + simple_unlock(&uobj->vmobjlock); + return (error); +} diff --git a/sys/ufs/ext2fs/ext2fs_extern.h b/sys/ufs/ext2fs/ext2fs_extern.h index b7a3f96df38..af23fb6ef2d 100644 --- a/sys/ufs/ext2fs/ext2fs_extern.h +++ b/sys/ufs/ext2fs/ext2fs_extern.h @@ -1,5 +1,5 @@ -/* $OpenBSD: ext2fs_extern.h,v 1.10 2001/09/18 00:39:15 art Exp $ */ -/* $NetBSD: ext2fs_extern.h,v 1.1 1997/06/11 09:33:55 bouyer Exp $ */ +/* $OpenBSD: ext2fs_extern.h,v 1.11 2001/11/27 05:27:12 art Exp $ */ +/* $NetBSD: ext2fs_extern.h,v 1.9 2000/11/27 08:39:53 chs Exp $ */ /*- * Copyright (c) 1997 Manuel Bouyer. @@ -74,6 +74,9 @@ int ext2fs_inode_free(struct inode *pip, ino_t ino, int mode); /* ext2fs_balloc.c */ int ext2fs_buf_alloc(struct inode *, daddr_t, int, struct ucred *, struct buf **, int); +int ext2fs_ballocn __P((void *)); +int ext2fs_balloc_range __P((struct vnode *, off_t, off_t, struct ucred *, + int)); /* ext2fs_bmap.c */ int ext2fs_bmap __P((void *)); diff --git a/sys/ufs/ext2fs/ext2fs_inode.c b/sys/ufs/ext2fs/ext2fs_inode.c index 4af28d9bf0e..f77c99c47b5 100644 --- a/sys/ufs/ext2fs/ext2fs_inode.c +++ b/sys/ufs/ext2fs/ext2fs_inode.c @@ -1,5 +1,4 @@ -/* $OpenBSD: ext2fs_inode.c,v 1.17 2001/11/06 19:53:21 miod Exp $ */ -/* $NetBSD: ext2fs_inode.c,v 1.24 2001/06/19 12:59:18 wiz Exp $ */ +/* $NetBSD: ext2fs_inode.c,v 1.23 2001/02/18 20:17:04 chs Exp $ */ /* * Copyright (c) 1997 Manuel Bouyer. @@ -59,8 +58,10 @@ #include <ufs/ext2fs/ext2fs.h> #include <ufs/ext2fs/ext2fs_extern.h> +extern int prtactive; + static int ext2fs_indirtrunc __P((struct inode *, ufs_daddr_t, ufs_daddr_t, - ufs_daddr_t, int, long *)); + ufs_daddr_t, int, long *)); /* * Last reference to an inode. If necessary, write or delete it. @@ -78,7 +79,6 @@ ext2fs_inactive(v) struct proc *p = ap->a_p; struct timespec ts; int error = 0; - extern int prtactive; if (prtactive && vp->v_usecount != 0) vprint("ext2fs_inactive: pushing active", vp); @@ -171,14 +171,13 @@ ext2fs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred) { struct vnode *ovp = ITOV(oip); ufs_daddr_t lastblock; - ufs_daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR]; + ufs_daddr_t bn, lastiblock[NIADDR], indir_lbn[NIADDR]; ufs_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR]; struct m_ext2fs *fs; - struct buf *bp; int offset, size, level; long count, nblocks, vflags, blocksreleased = 0; int i; - int aflags, error, allerror; + int error, allerror; off_t osize; if (length < 0) @@ -219,22 +218,8 @@ ext2fs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred) if (length > fs->fs_maxfilesize) return (EFBIG); #endif - offset = blkoff(fs, length - 1); - lbn = lblkno(fs, length - 1); - aflags = B_CLRBUF; - if (flags & IO_SYNC) - aflags |= B_SYNC; - error = ext2fs_buf_alloc(oip, lbn, offset + 1, cred, &bp, - aflags); - if (error) - return (error); - oip->i_e2fs_size = length; - uvm_vnp_setsize(ovp, length); - uvm_vnp_uncache(ovp); - if (aflags & B_SYNC) - bwrite(bp); - else - bawrite(bp); + ext2fs_balloc_range(ovp, length - 1, 1, cred, + flags & IO_SYNC ? B_SYNC : 0); oip->i_flag |= IN_CHANGE | IN_UPDATE; return (ext2fs_update(oip, NULL, NULL, 1)); } @@ -246,28 +231,15 @@ ext2fs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred) * of subsequent file growth. */ offset = blkoff(fs, length); - if (offset == 0) { - oip->i_e2fs_size = length; - } else { - lbn = lblkno(fs, length); - aflags = B_CLRBUF; - if (flags & IO_SYNC) - aflags |= B_SYNC; - error = ext2fs_buf_alloc(oip, lbn, offset, cred, &bp, - aflags); - if (error) - return (error); - oip->i_e2fs_size = length; + if (offset != 0) { size = fs->e2fs_bsize; - uvm_vnp_setsize(ovp, length); - uvm_vnp_uncache(ovp); - bzero((char *)bp->b_data + offset, (u_int)(size - offset)); - allocbuf(bp, size); - if (aflags & B_SYNC) - bwrite(bp); - else - bawrite(bp); + + /* XXXUBC we should handle more than just VREG */ + uvm_vnp_zerorange(ovp, length, size - offset); } + oip->i_e2fs_size = length; + uvm_vnp_setsize(ovp, length); + /* * Calculate index into inode's block list of * last direct and indirect blocks (if any) diff --git a/sys/ufs/ext2fs/ext2fs_readwrite.c b/sys/ufs/ext2fs/ext2fs_readwrite.c index 9ae4322756f..94424055733 100644 --- a/sys/ufs/ext2fs/ext2fs_readwrite.c +++ b/sys/ufs/ext2fs/ext2fs_readwrite.c @@ -79,6 +79,8 @@ ext2fs_read(v) struct uio *uio; struct m_ext2fs *fs; struct buf *bp; + void *win; + vsize_t bytelen; ufs_daddr_t lbn, nextlbn; off_t bytesinfile; long size, xfersize, blkoffset; @@ -107,6 +109,27 @@ ext2fs_read(v) if (uio->uio_resid == 0) return (0); + if (vp->v_type == VREG) { + error = 0; + while (uio->uio_resid > 0) { + + bytelen = MIN(ip->i_e2fs_size - uio->uio_offset, + uio->uio_resid); + + if (bytelen == 0) { + break; + } + win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset, + &bytelen, UBC_READ); + error = uiomove(win, bytelen, uio); + ubc_release(win, 0); + if (error) { + break; + } + } + goto out; + } + for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { if ((bytesinfile = ip->i_e2fs_size - uio->uio_offset) <= 0) break; @@ -156,8 +179,11 @@ ext2fs_read(v) if (bp != NULL) brelse(bp); +out: if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) { ip->i_flag |= IN_ACCESS; + if ((ap->a_ioflag & IO_SYNC) == IO_SYNC) + error = ext2fs_update(ip, NULL, NULL, 1); } return (error); } @@ -183,12 +209,17 @@ ext2fs_write(v) struct proc *p; ufs_daddr_t lbn; off_t osize; - int blkoffset, error, flags, ioflag, resid, size, xfersize; + int blkoffset, error, flags, ioflag, resid, xfersize; + vsize_t bytelen; + void *win; + off_t oldoff; + boolean_t rv; ioflag = ap->a_ioflag; uio = ap->a_uio; vp = ap->a_vp; ip = VTOI(vp); + error = 0; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_WRITE) @@ -232,35 +263,65 @@ ext2fs_write(v) resid = uio->uio_resid; osize = ip->i_e2fs_size; - flags = ioflag & IO_SYNC ? B_SYNC : 0; + if (vp->v_type == VREG) { + while (uio->uio_resid > 0) { + oldoff = uio->uio_offset; + blkoffset = blkoff(fs, uio->uio_offset); + bytelen = MIN(fs->e2fs_bsize - blkoffset, + uio->uio_resid); + + /* + * XXXUBC if file is mapped and this is the last block, + * process one page at a time. + */ + + error = ext2fs_balloc_range(vp, uio->uio_offset, + bytelen, ap->a_cred, 0); + if (error) { + break; + } + win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset, + &bytelen, UBC_WRITE); + error = uiomove(win, bytelen, uio); + ubc_release(win, 0); + if (error) { + break; + } + + /* + * flush what we just wrote if necessary. + * XXXUBC simplistic async flushing. + */ + + if (oldoff >> 16 != uio->uio_offset >> 16) { + simple_lock(&vp->v_uvm.u_obj.vmobjlock); + rv = vp->v_uvm.u_obj.pgops->pgo_flush( + &vp->v_uvm.u_obj, (oldoff >> 16) << 16, + (uio->uio_offset >> 16) << 16, PGO_CLEANIT); + simple_unlock(&vp->v_uvm.u_obj.vmobjlock); + } + } + goto out; + } + + flags = ioflag & IO_SYNC ? B_SYNC : 0; for (error = 0; uio->uio_resid > 0;) { lbn = lblkno(fs, uio->uio_offset); blkoffset = blkoff(fs, uio->uio_offset); - xfersize = fs->e2fs_bsize - blkoffset; - if (uio->uio_resid < xfersize) - xfersize = uio->uio_resid; - if (fs->e2fs_bsize > xfersize) + xfersize = MIN(fs->e2fs_bsize - blkoffset, uio->uio_resid); + if (xfersize < fs->e2fs_bsize) flags |= B_CLRBUF; else flags &= ~B_CLRBUF; - error = ext2fs_buf_alloc(ip, - lbn, blkoffset + xfersize, ap->a_cred, &bp, flags); + lbn, blkoffset + xfersize, ap->a_cred, &bp, flags); if (error) break; - if (uio->uio_offset + xfersize > ip->i_e2fs_size) { + if (ip->i_e2fs_size < uio->uio_offset + xfersize) { ip->i_e2fs_size = uio->uio_offset + xfersize; - uvm_vnp_setsize(vp, ip->i_e2fs_size); } - uvm_vnp_uncache(vp); - - size = fs->e2fs_bsize - bp->b_resid; - if (size < xfersize) - xfersize = size; - - error = - uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); + error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio); if (ioflag & IO_SYNC) (void)bwrite(bp); else if (xfersize + blkoffset == fs->e2fs_bsize) { @@ -272,13 +333,14 @@ ext2fs_write(v) bdwrite(bp); if (error || xfersize == 0) break; - ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * If we successfully wrote any data, and we are not the superuser * we clear the setuid and setgid bits as a precaution against * tampering. */ +out: + ip->i_flag |= IN_CHANGE | IN_UPDATE; if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0) ip->i_e2fs_mode &= ~(ISUID | ISGID); if (error) { @@ -288,8 +350,7 @@ ext2fs_write(v) uio->uio_offset -= resid - uio->uio_resid; uio->uio_resid = resid; } - } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) { + } else if (resid > uio->uio_resid && (ioflag & IO_SYNC) == IO_SYNC) error = ext2fs_update(ip, NULL, NULL, 1); - } return (error); } diff --git a/sys/ufs/ext2fs/ext2fs_subr.c b/sys/ufs/ext2fs/ext2fs_subr.c index 82165b8f242..3263f7e5391 100644 --- a/sys/ufs/ext2fs/ext2fs_subr.c +++ b/sys/ufs/ext2fs/ext2fs_subr.c @@ -1,5 +1,4 @@ -/* $OpenBSD: ext2fs_subr.c,v 1.6 2001/09/18 01:39:13 art Exp $ */ -/* $NetBSD: ext2fs_subr.c,v 1.1 1997/06/11 09:34:03 bouyer Exp $ */ +/* $NetBSD: ext2fs_subr.c,v 1.4 2000/03/30 12:41:11 augustss Exp $ */ /* * Copyright (c) 1997 Manuel Bouyer. @@ -96,7 +95,7 @@ ext2fs_checkoverlap(bp, ip) if (ep == bp || (ep->b_flags & B_INVAL) || ep->b_vp == NULLVP) continue; - if (VOP_BMAP(ep->b_vp, (daddr_t)0, &vp, (daddr_t)0, NULL)) + if (VOP_BMAP(ep->b_vp, (ufs_daddr_t)0, &vp, (ufs_daddr_t)0, NULL)) continue; if (vp != ip->i_devvp) continue; diff --git a/sys/ufs/ext2fs/ext2fs_vfsops.c b/sys/ufs/ext2fs/ext2fs_vfsops.c index 6991cf9d650..e438268acbc 100644 --- a/sys/ufs/ext2fs/ext2fs_vfsops.c +++ b/sys/ufs/ext2fs/ext2fs_vfsops.c @@ -1,5 +1,5 @@ -/* $OpenBSD: ext2fs_vfsops.c,v 1.16 2001/11/21 22:21:48 csapuntz Exp $ */ -/* $NetBSD: ext2fs_vfsops.c,v 1.1 1997/06/11 09:34:07 bouyer Exp $ */ +/* $OpenBSD: ext2fs_vfsops.c,v 1.17 2001/11/27 05:27:12 art Exp $ */ +/* $NetBSD: ext2fs_vfsops.c,v 1.40 2000/11/27 08:39:53 chs Exp $ */ /* * Copyright (c) 1997 Manuel Bouyer. @@ -402,9 +402,11 @@ ext2fs_reload(mountp, cred, p) * Step 1: invalidate all cached meta-data. */ devvp = VFSTOUFS(mountp)->um_devvp; - if (vinvalbuf(devvp, 0, cred, p, 0, 0)) + vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); + error = vinvalbuf(devvp, 0, cred, p, 0, 0); + VOP_UNLOCK(devvp, 0, p); + if (error) panic("ext2fs_reload: dirty1"); - /* * Step 2: re-read superblock from disk. */ @@ -583,14 +585,18 @@ ext2fs_mountfs(devvp, mp, p) mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; mp->mnt_flag |= MNT_LOCAL; + mp->mnt_dev_bshift = DEV_BSHIFT; /* XXX */ + mp->mnt_fs_bshift = m_fs->e2fs_bshift; ump->um_mountp = mp; ump->um_dev = dev; ump->um_devvp = devvp; ump->um_nindir = NINDIR(m_fs); + ump->um_lognindir = ffs(NINDIR(m_fs)) - 1; ump->um_bptrtodb = m_fs->e2fs_fsbtodb; ump->um_seqinc = 1; /* no frags */ devvp->v_specmountpoint = mp; return (0); + out: if (bp) brelse(bp); @@ -924,6 +930,7 @@ ext2fs_vget(mp, ino, vpp) ip->i_flag |= IN_MODIFIED; } + vp->v_uvm.u_size = ip->i_e2fs_size; *vpp = vp; return (0); } diff --git a/sys/ufs/ext2fs/ext2fs_vnops.c b/sys/ufs/ext2fs/ext2fs_vnops.c index 0faba75ffd2..fffdd494d5a 100644 --- a/sys/ufs/ext2fs/ext2fs_vnops.c +++ b/sys/ufs/ext2fs/ext2fs_vnops.c @@ -1,5 +1,5 @@ -/* $OpenBSD: ext2fs_vnops.c,v 1.17 2001/11/06 19:53:21 miod Exp $ */ -/* $NetBSD: ext2fs_vnops.c,v 1.1 1997/06/11 09:34:09 bouyer Exp $ */ +/* $OpenBSD: ext2fs_vnops.c,v 1.18 2001/11/27 05:27:12 art Exp $ */ +/* $NetBSD: ext2fs_vnops.c,v 1.30 2000/11/27 08:39:53 chs Exp $ */ /* * Copyright (c) 1997 Manuel Bouyer. @@ -402,8 +402,6 @@ ext2fs_chmod(vp, mode, cred, p) ip->i_e2fs_mode &= ~ALLPERMS; ip->i_e2fs_mode |= (mode & ALLPERMS); ip->i_flag |= IN_CHANGE; - if ((vp->v_flag & VTEXT) && (ip->i_e2fs_mode & S_ISTXT) == 0) - (void) uvm_vnp_uncache(vp); return (0); } @@ -1469,7 +1467,11 @@ struct vnodeopv_entry_desc ext2fs_vnodeop_entries[] = { { &vop_pathconf_desc, ufs_pathconf }, /* pathconf */ { &vop_advlock_desc, ext2fs_advlock }, /* advlock */ { &vop_bwrite_desc, vop_generic_bwrite }, /* bwrite */ - { (struct vnodeop_desc*)NULL, (int(*) __P((void*)))NULL } + { &vop_ballocn_desc, ext2fs_ballocn }, + { &vop_getpages_desc, genfs_getpages }, + { &vop_putpages_desc, genfs_putpages }, + { &vop_size_desc, genfs_size }, + { NULL, NULL } }; struct vnodeopv_desc ext2fs_vnodeop_opv_desc = { &ext2fs_vnodeop_p, ext2fs_vnodeop_entries }; diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c index 8ddf99405fc..a53d87828c3 100644 --- a/sys/ufs/ffs/ffs_alloc.c +++ b/sys/ufs/ffs/ffs_alloc.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_alloc.c,v 1.35 2001/11/21 21:23:56 csapuntz Exp $ */ +/* $OpenBSD: ffs_alloc.c,v 1.36 2001/11/27 05:27:12 art Exp $ */ /* $NetBSD: ffs_alloc.c,v 1.11 1996/05/11 18:27:09 mycroft Exp $ */ /* @@ -169,14 +169,15 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp, blknop) struct buf **bpp; ufs_daddr_t *blknop; { - register struct fs *fs; - struct buf *bp = NULL; + struct fs *fs; + struct buf *bp; ufs_daddr_t quota_updated = 0; int cg, request, error; daddr_t bprev, bno; if (bpp != NULL) *bpp = NULL; + fs = ip->i_fs; #ifdef DIAGNOSTIC if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || @@ -282,7 +283,6 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp, blknop) if (bno <= 0) goto nospace; - (void) uvm_vnp_uncache(ITOV(ip)); if (!DOINGSOFTDEP(ITOV(ip))) ffs_blkfree(ip, bprev, (long)osize); if (nsize < request) @@ -362,7 +362,8 @@ ffs_reallocblks(v) struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; int i, len, start_lvl, end_lvl, pref, ssize; - if (doreallocblks == 0) + /* XXXUBC - don't reallocblks for now */ + if (1 || doreallocblks == 0) return (ENOSPC); vp = ap->a_vp; diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c index 009adc91ff9..5f6ddc3d94e 100644 --- a/sys/ufs/ffs/ffs_balloc.c +++ b/sys/ufs/ffs/ffs_balloc.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_balloc.c,v 1.18 2001/11/21 21:23:56 csapuntz Exp $ */ +/* $OpenBSD: ffs_balloc.c,v 1.19 2001/11/27 05:27:12 art Exp $ */ /* $NetBSD: ffs_balloc.c,v 1.3 1996/02/09 22:22:21 christos Exp $ */ /* @@ -402,3 +402,61 @@ fail: return (error); } + +int +ffs_ballocn(v) + void *v; +{ + struct vop_ballocn_args /* { + struct vnode *a_vp; + off_t a_offset; + off_t a_length; + struct ucred *a_cred; + int a_flags; + } */ *ap = v; + + off_t off, len; + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); + struct fs *fs = ip->i_fs; + int error, delta, bshift, bsize; + + error = 0; + bshift = fs->fs_bshift; + bsize = 1 << bshift; + + off = ap->a_offset; + len = ap->a_length; + + delta = off & (bsize - 1); + off -= delta; + len += delta; + + while (len > 0) { + bsize = min(bsize, len); + + error = ffs_balloc(ip, off, bsize, ap->a_cred, ap->a_flags, + NULL); + if (error) { + goto out; + } + + /* + * increase file size now, VOP_BALLOC() requires that + * EOF be up-to-date before each call. + */ + + if (ip->i_ffs_size < off + bsize) { + ip->i_ffs_size = off + bsize; + if (vp->v_uvm.u_size < ip->i_ffs_size) { + uvm_vnp_setsize(vp, ip->i_ffs_size); + } + } + + off += bsize; + len -= bsize; + } + +out: + return error; + } diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h index eeeba209c69..2875a332a57 100644 --- a/sys/ufs/ffs/ffs_extern.h +++ b/sys/ufs/ffs/ffs_extern.h @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_extern.h,v 1.14 2001/11/13 00:10:56 art Exp $ */ +/* $OpenBSD: ffs_extern.h,v 1.15 2001/11/27 05:27:12 art Exp $ */ /* $NetBSD: ffs_extern.h,v 1.4 1996/02/09 22:22:22 christos Exp $ */ /*- @@ -87,6 +87,7 @@ void ffs_clusteracct __P((struct fs *, struct cg *, daddr_t, int)); /* ffs_balloc.c */ int ffs_balloc(struct inode *, off_t, int, struct ucred *, int, struct buf **); +int ffs_ballocn(void *); /* ffs_inode.c */ int ffs_init __P((struct vfsconf *)); @@ -128,7 +129,7 @@ int ffs_read __P((void *)); int ffs_write __P((void *)); int ffs_fsync __P((void *)); int ffs_reclaim __P((void *)); - +int ffs_size __P((void *)); /* * Soft dependency function prototypes. diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c index c81c795b2ac..cddf6a368ca 100644 --- a/sys/ufs/ffs/ffs_inode.c +++ b/sys/ufs/ffs/ffs_inode.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_inode.c,v 1.25 2001/11/21 21:23:56 csapuntz Exp $ */ +/* $OpenBSD: ffs_inode.c,v 1.26 2001/11/27 05:27:12 art Exp $ */ /* $NetBSD: ffs_inode.c,v 1.10 1996/05/11 18:27:19 mycroft Exp $ */ /* @@ -150,14 +150,14 @@ ffs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred) { struct vnode *ovp; daddr_t lastblock; - daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR]; + daddr_t bn, lastiblock[NIADDR], indir_lbn[NIADDR]; daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR]; struct fs *fs; - struct buf *bp; + struct proc *p = curproc; int offset, size, level; long count, nblocks, vflags, blocksreleased = 0; register int i; - int aflags, error, allerror; + int error, allerror; off_t osize; if (length < 0) @@ -188,10 +188,55 @@ ffs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred) if ((error = getinoquota(oip)) != 0) return (error); - uvm_vnp_setsize(ovp, length); + fs = oip->i_fs; + if (length > fs->fs_maxfilesize) + return (EFBIG); + osize = oip->i_ffs_size; oip->i_ci.ci_lasta = oip->i_ci.ci_clen = oip->i_ci.ci_cstart = oip->i_ci.ci_lastw = 0; + /* + * Lengthen the size of the file. We must ensure that the + * last byte of the file is allocated. Since the smallest + * value of osize is 0, length will be at least 1. + */ + + if (osize < length) { + ufs_balloc_range(ovp, length - 1, 1, cred, + flags & IO_SYNC ? B_SYNC : 0); + oip->i_flag |= IN_CHANGE | IN_UPDATE; + return (UFS_UPDATE(oip, 1)); + } + + /* + * When truncating a regular file down to a non-block-aligned size, + * we must zero the part of last block which is past the new EOF. + * We must synchronously flush the zeroed pages to disk + * since the new pages will be invalidated as soon as we + * inform the VM system of the new, smaller size. + * We must to this before acquiring the GLOCK, since fetching + * the pages will acquire the GLOCK internally. + * So there is a window where another thread could see a whole + * zeroed page past EOF, but that's life. + */ + + offset = blkoff(fs, length); + if (ovp->v_type == VREG && length < osize && offset != 0) { + struct uvm_object *uobj; + voff_t eoz; + + size = blksize(fs, oip, lblkno(fs, length)); + eoz = min(lblktosize(fs, lblkno(fs, length)) + size, osize); + uvm_vnp_zerorange(ovp, length, eoz - length); + uobj = &ovp->v_uvm.u_obj; + simple_lock(&uobj->vmobjlock); + uobj->pgops->pgo_flush(uobj, length, eoz, + PGO_CLEANIT|PGO_DEACTIVATE|PGO_SYNCIO); + simple_unlock(&ovp->v_uvm.u_obj.vmobjlock); + } + + lockmgr(&ovp->v_glock, LK_EXCLUSIVE, NULL, p); + if (DOINGSOFTDEP(ovp)) { if (length > 0 || softdep_slowdown(ovp)) { /* @@ -204,80 +249,29 @@ ffs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred) * so that it will have no data structures left. */ if ((error = VOP_FSYNC(ovp, cred, MNT_WAIT, - curproc)) != 0) + curproc)) != 0) { + lockmgr(&ovp->v_glock, LK_RELEASE, NULL, p); return (error); + } } else { + uvm_vnp_setsize(ovp, length); (void)ufs_quota_free_blocks(oip, oip->i_ffs_blocks, NOCRED); softdep_setup_freeblocks(oip, length); (void) vinvalbuf(ovp, 0, cred, curproc, 0, 0); + lockmgr(&ovp->v_glock, LK_RELEASE, NULL, p); oip->i_flag |= IN_CHANGE | IN_UPDATE; return (UFS_UPDATE(oip, 0)); } } - fs = oip->i_fs; - osize = oip->i_ffs_size; /* - * Lengthen the size of the file. We must ensure that the - * last byte of the file is allocated. Since the smallest - * value of osize is 0, length will be at least 1. + * Reduce the size of the file. */ - if (osize < length) { - if (length > fs->fs_maxfilesize) - return (EFBIG); - aflags = B_CLRBUF; - if (flags & IO_SYNC) - aflags |= B_SYNC; - error = UFS_BUF_ALLOC(oip, length - 1, 1, - cred, aflags, &bp); - if (error) - return (error); - oip->i_ffs_size = length; - uvm_vnp_setsize(ovp, length); - (void) uvm_vnp_uncache(ovp); - if (aflags & B_SYNC) - bwrite(bp); - else - bawrite(bp); - oip->i_flag |= IN_CHANGE | IN_UPDATE; - return (UFS_UPDATE(oip, MNT_WAIT)); - } + oip->i_ffs_size = length; uvm_vnp_setsize(ovp, length); /* - * Shorten the size of the file. If the file is not being - * truncated to a block boundary, the contents of the - * partial block following the end of the file must be - * zero'ed in case it ever becomes accessible again because - * of subsequent file growth. Directories however are not - * zero'ed as they should grow back initialized to empty. - */ - offset = blkoff(fs, length); - if (offset == 0) { - oip->i_ffs_size = length; - } else { - lbn = lblkno(fs, length); - aflags = B_CLRBUF; - if (flags & IO_SYNC) - aflags |= B_SYNC; - error = UFS_BUF_ALLOC(oip, length - 1, 1, - cred, aflags, &bp); - if (error) - return (error); - oip->i_ffs_size = length; - size = blksize(fs, oip, lbn); - (void) uvm_vnp_uncache(ovp); - if (ovp->v_type != VDIR) - bzero((char *)bp->b_data + offset, - (u_int)(size - offset)); - allocbuf(bp, size); - if (aflags & B_SYNC) - bwrite(bp); - else - bawrite(bp); - } - /* * Calculate index into inode's block list of * last direct and indirect blocks (if any) * which we want to keep. Lastblock is -1 when @@ -402,6 +396,7 @@ done: oip->i_ffs_blocks -= blocksreleased; if (oip->i_ffs_blocks < 0) /* sanity */ oip->i_ffs_blocks = 0; + lockmgr(&ovp->v_glock, LK_RELEASE, NULL, p); oip->i_flag |= IN_CHANGE; (void)ufs_quota_free_blocks(oip, blocksreleased, NOCRED); return (allerror); diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c index 1d66094cc06..7a66eed4d8b 100644 --- a/sys/ufs/ffs/ffs_softdep.c +++ b/sys/ufs/ffs/ffs_softdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_softdep.c,v 1.25 2001/11/13 14:19:24 art Exp $ */ +/* $OpenBSD: ffs_softdep.c,v 1.26 2001/11/27 05:27:12 art Exp $ */ /* * Copyright 1998, 2000 Marshall Kirk McKusick. All Rights Reserved. * @@ -56,6 +56,7 @@ #include <sys/malloc.h> #include <sys/mount.h> #include <sys/proc.h> +#include <sys/pool.h> #include <sys/syslog.h> #include <sys/systm.h> #include <sys/vnode.h> @@ -69,6 +70,10 @@ #include <ufs/ffs/ffs_extern.h> #include <ufs/ufs/ufs_extern.h> +#include <uvm/uvm.h> +struct pool sdpcpool; +int softdep_lockedbufs; + #define STATIC /* @@ -109,6 +114,13 @@ extern char *memname[]; */ /* + * Definitions for page cache info hashtable. + */ +#define PCBPHASHSIZE 1024 +LIST_HEAD(, buf) pcbphashhead[PCBPHASHSIZE]; +#define PCBPHASH(vp, lbn) ((((vaddr_t)(vp) >> 8) ^ (lbn)) & (PCBPHASHSIZE - 1)) + +/* * Internal function prototypes. */ STATIC void softdep_error __P((char *, int)); @@ -160,6 +172,13 @@ STATIC void pause_timer __P((void *)); STATIC int request_cleanup __P((int, int)); STATIC int process_worklist_item __P((struct mount *, int)); STATIC void add_to_worklist __P((struct worklist *)); +STATIC struct buf *softdep_setup_pagecache __P((struct inode *, ufs_lbn_t, + long)); +STATIC void softdep_collect_pagecache __P((struct inode *)); +STATIC void softdep_free_pagecache __P((struct inode *)); +STATIC struct vnode *softdep_lookupvp(struct fs *, ino_t); +STATIC struct buf *softdep_lookup_pcbp __P((struct vnode *, ufs_lbn_t)); +void softdep_pageiodone __P((struct buf *)); /* * Exported softdep operations. @@ -176,6 +195,7 @@ struct bio_ops bioops = { softdep_deallocate_dependencies, /* io_deallocate */ softdep_move_dependencies, /* io_movedeps */ softdep_count_dependencies, /* io_countdeps */ + softdep_pageiodone, /* io_pagedone */ }; /* @@ -1055,6 +1075,7 @@ top: void softdep_initialize() { + int i; LIST_INIT(&mkdirlisthd); LIST_INIT(&softdep_workitem_pending); @@ -1073,6 +1094,11 @@ softdep_initialize() newblk_hashtbl = hashinit(64, M_NEWBLK, M_WAITOK, &newblk_hash); sema_init(&newblk_in_progress, "newblk", PRIBIO, 0); timeout_set(&proc_waiting_timeout, pause_timer, 0); + pool_init(&sdpcpool, sizeof(struct buf), 0, 0, 0, "sdpcpool", + 0, pool_page_alloc_nointr, pool_page_free_nointr, M_TEMP); + for (i = 0; i < PCBPHASHSIZE; i++) { + LIST_INIT(&pcbphashhead[i]); + } } /* @@ -1325,11 +1351,16 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp) LIST_REMOVE(newblk, nb_hash); FREE(newblk, M_NEWBLK); + /* + * If we were not passed a bp to attach the dep to, + * then this must be for a regular file. + * Allocate a buffer to represent the page cache pages + * that are the real dependency. The pages themselves + * cannot refer to the dependency since we don't want to + * add a field to struct vm_page for this. + */ if (bp == NULL) { - /* - * XXXUBC - Yes, I know how to fix this, but not right now. - */ - panic("softdep_setup_allocdirect: Bonk art in the head\n"); + bp = softdep_setup_pagecache(ip, lbn, newsize); } WORKLIST_INSERT(&bp->b_dep, &adp->ad_list); if (lbn >= NDADDR) { @@ -1563,10 +1594,7 @@ softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp) pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0) WORKLIST_INSERT(&nbp->b_dep, &pagedep->pd_list); if (nbp == NULL) { - /* - * XXXUBC - Yes, I know how to fix this, but not right now. - */ - panic("softdep_setup_allocindir_page: Bonk art in the head\n"); + nbp = softdep_setup_pagecache(ip, lbn, ip->i_fs->fs_bsize); } WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list); FREE_LOCK(&lk); @@ -1745,6 +1773,7 @@ softdep_setup_freeblocks(ip, length) int i, delay, error; fs = ip->i_fs; + vp = ITOV(ip); if (length != 0) panic("softdep_setup_freeblocks: non-zero length"); MALLOC(freeblks, struct freeblks *, sizeof(struct freeblks), @@ -1804,9 +1833,15 @@ softdep_setup_freeblocks(ip, length) * with this inode are obsolete and can simply be de-allocated. * We must first merge the two dependency lists to get rid of * any duplicate freefrag structures, then purge the merged list. + * We must remove any pagecache markers from the pagecache + * hashtable first because any I/Os in flight will want to see + * dependencies attached to their pagecache markers. We cannot + * free the pagecache markers until after we've freed all the + * dependencies that reference them later. * If we still have a bitmap dependency, then the inode has never * been written to disk, so we can free any fragments without delay. */ + softdep_collect_pagecache(ip); merge_inode_lists(inodedep); while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0) free_allocdirect(&inodedep->id_inoupdt, adp, delay); @@ -1818,7 +1853,6 @@ softdep_setup_freeblocks(ip, length) * Once they are all there, walk the list and get rid of * any dependencies. */ - vp = ITOV(ip); ACQUIRE_LOCK(&lk); drain_output(vp, 1); while (getdirtybuf(&LIST_FIRST(&vp->v_dirtyblkhd), MNT_WAIT)) { @@ -1830,6 +1864,7 @@ softdep_setup_freeblocks(ip, length) brelse(bp); ACQUIRE_LOCK(&lk); } + softdep_free_pagecache(ip); if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) != 0) (void) free_inodedep(inodedep); FREE_LOCK(&lk); @@ -2898,7 +2933,6 @@ handle_workitem_freefile(freefile) struct freefile *freefile; { struct fs *fs; - struct vnode vp; struct inode tip; struct inodedep *idp; int error; @@ -2914,8 +2948,7 @@ handle_workitem_freefile(freefile) tip.i_devvp = freefile->fx_devvp; tip.i_dev = freefile->fx_devvp->v_rdev; tip.i_fs = fs; - tip.i_vnode = &vp; - vp.v_data = &tip; + tip.i_vnode = NULL; if ((error = ffs_freefile(&tip, freefile->fx_oldinum, freefile->fx_mode)) != 0) { @@ -4313,6 +4346,7 @@ flush_inodedep_deps(fs, ino) struct allocdirect *adp; int error, waitfor; struct buf *bp; + struct vnode *vp; /* * This work is done in two passes. The first pass grabs most @@ -4332,6 +4366,27 @@ flush_inodedep_deps(fs, ino) ACQUIRE_LOCK(&lk); if (inodedep_lookup(fs, ino, 0, &inodedep) == 0) return (0); + + /* + * When file data was in the buffer cache, + * softdep_sync_metadata() would start i/o on + * file data buffers itself. But now that + * we're using the page cache to hold file data, + * we need something else to trigger those flushes. + * let's just do it here. + */ + + vp = softdep_lookupvp(fs, ino); + if (vp) { + struct uvm_object *uobj = &vp->v_uvm.u_obj; + + simple_lock(&uobj->vmobjlock); + (uobj->pgops->pgo_flush)(uobj, 0, 0, + PGO_ALLPAGES|PGO_CLEANIT| + (waitfor == MNT_NOWAIT ? 0: PGO_SYNCIO)); + simple_unlock(&uobj->vmobjlock); + } + TAILQ_FOREACH(adp, &inodedep->id_inoupdt, ad_next) { if (adp->ad_state & DEPCOMPLETE) continue; @@ -4944,3 +4999,196 @@ softdep_error(func, error) /* XXX should do something better! */ printf("%s: got error %d while accessing filesystem\n", func, error); } + +/* + * Allocate a buffer on which to attach a dependency. + */ +STATIC struct buf * +softdep_setup_pagecache(ip, lbn, size) + struct inode *ip; + ufs_lbn_t lbn; + long size; +{ + struct vnode *vp = ITOV(ip); + struct buf *bp; + int s; + + /* + * Enter pagecache dependency buf in hash. + */ + + bp = softdep_lookup_pcbp(vp, lbn); + if (bp == NULL) { + s = splbio(); + bp = pool_get(&sdpcpool, PR_WAITOK); + splx(s); + + bp->b_vp = vp; + bp->b_lblkno = lbn; + bp->b_bcount = bp->b_resid = size; + LIST_INIT(&bp->b_dep); + LIST_INSERT_HEAD(&pcbphashhead[PCBPHASH(vp, lbn)], bp, b_hash); + LIST_INSERT_HEAD(&ip->i_pcbufhd, bp, b_vnbufs); + } else { + KASSERT(size >= bp->b_bcount); + bp->b_resid += size - bp->b_bcount; + bp->b_bcount = size; + } + return bp; +} + +/* + * softdep_collect_pagecache() and softdep_free_pagecache() + * are used to remove page cache dependency buffers when + * a file is being truncated to 0. + */ + +STATIC void +softdep_collect_pagecache(ip) + struct inode *ip; +{ + struct buf *bp; + + LIST_FOREACH(bp, &ip->i_pcbufhd, b_vnbufs) { + LIST_REMOVE(bp, b_hash); + } +} + +STATIC void +softdep_free_pagecache(ip) + struct inode *ip; +{ + struct buf *bp, *nextbp; + + for (bp = LIST_FIRST(&ip->i_pcbufhd); bp != NULL; bp = nextbp) { + nextbp = LIST_NEXT(bp, b_vnbufs); + LIST_REMOVE(bp, b_vnbufs); + KASSERT(LIST_FIRST(&bp->b_dep) == NULL); + pool_put(&sdpcpool, bp); + } +} + +STATIC struct vnode * +softdep_lookupvp(fs, ino) + struct fs *fs; + ino_t ino; +{ + struct mount *mp; + extern struct vfsops ffs_vfsops; + + CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) { + if (mp->mnt_op == &ffs_vfsops && + VFSTOUFS(mp)->um_fs == fs) { + break; + } + } + if (mp == NULL) { + return NULL; + } + return ufs_ihashlookup(VFSTOUFS(mp)->um_dev, ino); +} + +STATIC struct buf * +softdep_lookup_pcbp(vp, lbn) + struct vnode *vp; + ufs_lbn_t lbn; +{ + struct buf *bp; + + LIST_FOREACH(bp, &pcbphashhead[PCBPHASH(vp, lbn)], b_hash) { + if (bp->b_vp == vp && bp->b_lblkno == lbn) { + break; + } + } + return bp; +} + +/* + * Do softdep i/o completion processing for page cache writes. + */ + +void +softdep_pageiodone(bp) + struct buf *bp; +{ + int npages = bp->b_bufsize >> PAGE_SHIFT; + struct vnode *vp = bp->b_vp; + struct vm_page *pg; + struct buf *pcbp = NULL; + struct allocdirect *adp; + struct allocindir *aip; + struct worklist *wk; + ufs_lbn_t lbn; + voff_t off; + long iosize = bp->b_bcount; + int size, asize, bshift, bsize; + int i; + + KASSERT(!(bp->b_flags & B_READ)); + bshift = vp->v_mount->mnt_fs_bshift; + bsize = 1 << bshift; + asize = min(PAGE_SIZE, bsize); + ACQUIRE_LOCK(&lk); + for (i = 0; i < npages; i++) { + pg = uvm_pageratop((vaddr_t)bp->b_data + (i << PAGE_SHIFT)); + if (pg == NULL) { + continue; + } + + for (off = pg->offset; + off < pg->offset + PAGE_SIZE; + off += bsize) { + size = min(asize, iosize); + iosize -= size; + lbn = off >> bshift; + if (pcbp == NULL || pcbp->b_lblkno != lbn) { + pcbp = softdep_lookup_pcbp(vp, lbn); + } + if (pcbp == NULL) { + continue; + } + pcbp->b_resid -= size; + if (pcbp->b_resid < 0) { + panic("softdep_pageiodone: " + "resid < 0, vp %p lbn 0x%lx pcbp %p", + vp, lbn, pcbp); + } + if (pcbp->b_resid > 0) { + continue; + } + + /* + * We've completed all the i/o for this block. + * mark the dep complete. + */ + + KASSERT(LIST_FIRST(&pcbp->b_dep) != NULL); + while ((wk = LIST_FIRST(&pcbp->b_dep))) { + WORKLIST_REMOVE(wk); + switch (wk->wk_type) { + case D_ALLOCDIRECT: + adp = WK_ALLOCDIRECT(wk); + adp->ad_state |= COMPLETE; + handle_allocdirect_partdone(adp); + break; + + case D_ALLOCINDIR: + aip = WK_ALLOCINDIR(wk); + aip->ai_state |= COMPLETE; + handle_allocindir_partdone(aip); + break; + + default: + panic("softdep_pageiodone: " + "bad type %d, pcbp %p wk %p", + wk->wk_type, pcbp, wk); + } + } + LIST_REMOVE(pcbp, b_hash); + LIST_REMOVE(pcbp, b_vnbufs); + pool_put(&sdpcpool, pcbp); + pcbp = NULL; + } + } + FREE_LOCK(&lk); +} diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index b1dee123893..19c77726fa8 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_vfsops.c,v 1.45 2001/11/21 22:21:48 csapuntz Exp $ */ +/* $OpenBSD: ffs_vfsops.c,v 1.46 2001/11/27 05:27:12 art Exp $ */ /* $NetBSD: ffs_vfsops.c,v 1.19 1996/02/09 22:22:26 christos Exp $ */ /* @@ -737,11 +737,14 @@ ffs_mountfs(devvp, mp, p) else mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; + mp->mnt_fs_bshift = fs->fs_bshift; + mp->mnt_dev_bshift = DEV_BSHIFT; mp->mnt_flag |= MNT_LOCAL; ump->um_mountp = mp; ump->um_dev = dev; ump->um_devvp = devvp; ump->um_nindir = fs->fs_nindir; + ump->um_lognindir = ffs(fs->fs_nindir) - 1; ump->um_bptrtodb = fs->fs_fsbtodb; ump->um_seqinc = fs->fs_frag; for (i = 0; i < MAXQUOTAS; i++) @@ -1119,6 +1122,7 @@ retry: ip->i_fs = fs = ump->um_fs; ip->i_dev = dev; ip->i_number = ino; + LIST_INIT(&ip->i_pcbufhd); ip->i_vtbl = &ffs_vtbl; /* @@ -1199,6 +1203,7 @@ retry: ip->i_ffs_uid = ip->i_din.ffs_din.di_ouid; /* XXX */ ip->i_ffs_gid = ip->i_din.ffs_din.di_ogid; /* XXX */ } /* XXX */ + uvm_vnp_setsize(vp, ip->i_ffs_size); *vpp = vp; return (0); diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c index 26e9bbaf9da..8190ef82eb3 100644 --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_vnops.c,v 1.20 2001/11/06 19:53:21 miod Exp $ */ +/* $OpenBSD: ffs_vnops.c,v 1.21 2001/11/27 05:27:12 art Exp $ */ /* $NetBSD: ffs_vnops.c,v 1.7 1996/05/11 18:27:24 mycroft Exp $ */ /* @@ -107,8 +107,13 @@ struct vnodeopv_entry_desc ffs_vnodeop_entries[] = { { &vop_advlock_desc, ufs_advlock }, /* advlock */ { &vop_reallocblks_desc, ffs_reallocblks }, /* reallocblks */ { &vop_bwrite_desc, vop_generic_bwrite }, - { (struct vnodeop_desc*)NULL, (int(*) __P((void*)))NULL } + { &vop_ballocn_desc, ffs_ballocn }, + { &vop_getpages_desc, genfs_getpages }, + { &vop_putpages_desc, genfs_putpages }, + { &vop_size_desc, ffs_size }, + { NULL, NULL } }; + struct vnodeopv_desc ffs_vnodeop_opv_desc = { &ffs_vnodeop_p, ffs_vnodeop_entries }; @@ -229,6 +234,7 @@ ffs_fsync(v) struct vnode *vp = ap->a_vp; struct buf *bp, *nbp; int s, error, passes, skipmeta; + struct uvm_object *uobj; if (vp->v_type == VBLK && vp->v_specmountpoint != NULL && @@ -236,13 +242,22 @@ ffs_fsync(v) softdep_fsync_mountdev(vp); /* - * Flush all dirty buffers associated with a vnode. + * Flush all dirty data associated with a vnode. */ passes = NIADDR + 1; skipmeta = 0; if (ap->a_waitfor == MNT_WAIT) skipmeta = 1; s = splbio(); + + if (vp->v_type == VREG) { + uobj = &vp->v_uvm.u_obj; + simple_lock(&uobj->vmobjlock); + (uobj->pgops->pgo_flush)(uobj, 0, 0, PGO_ALLPAGES|PGO_CLEANIT| + ((ap->a_waitfor == MNT_WAIT) ? PGO_SYNCIO : 0)); + simple_unlock(&uobj->vmobjlock); + } + loop: for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = LIST_NEXT(bp, b_vnbufs)) @@ -281,8 +296,10 @@ loop: */ if (passes > 0 || ap->a_waitfor != MNT_WAIT) (void) bawrite(bp); - else if ((error = bwrite(bp)) != 0) + else if ((error = bwrite(bp)) != 0) { + printf("ffs_fsync: bwrite failed %d\n", error); return (error); + } s = splbio(); /* * Since we may have slept during the I/O, we need @@ -325,7 +342,11 @@ loop: } } splx(s); - return (UFS_UPDATE(VTOI(vp), ap->a_waitfor == MNT_WAIT)); + + error = (UFS_UPDATE(VTOI(vp), ap->a_waitfor == MNT_WAIT)); + if (error) + printf("ffs_fsync: UFS_UPDATE failed. %d\n", error); + return (error); } /* @@ -349,3 +370,31 @@ ffs_reclaim(v) vp->v_data = NULL; return (0); } + +/* + * Return the last logical file offset that should be written for this file + * if we're doing a write that ends at "size". + */ +int +ffs_size(v) + void *v; +{ + struct vop_size_args /* { + struct vnode *a_vp; + off_t a_size; + off_t *a_eobp; + } */ *ap = v; + struct inode *ip = VTOI(ap->a_vp); + struct fs *fs = ip->i_fs; + ufs_lbn_t olbn, nlbn; + + olbn = lblkno(fs, ip->i_ffs_size); + nlbn = lblkno(fs, ap->a_size); + + if (nlbn < NDADDR && olbn <= nlbn) { + *ap->a_eobp = fragroundup(fs, ap->a_size); + } else { + *ap->a_eobp = blkroundup(fs, ap->a_size); + } + return 0; +} diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h index 5665b276a0f..98c73de5579 100644 --- a/sys/ufs/ufs/inode.h +++ b/sys/ufs/ufs/inode.h @@ -1,4 +1,4 @@ -/* $OpenBSD: inode.h,v 1.16 2001/07/04 06:10:50 angelos Exp $ */ +/* $OpenBSD: inode.h,v 1.17 2001/11/27 05:27:12 art Exp $ */ /* $NetBSD: inode.h,v 1.8 1995/06/15 23:22:50 cgd Exp $ */ /* @@ -84,6 +84,7 @@ struct inode { #define i_e2fs inode_u.e2fs struct cluster_info i_ci; + LIST_HEAD(,buf) i_pcbufhd; struct dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */ u_quad_t i_modrev; /* Revision level for NFS lease. */ struct lockf *i_lockf;/* Head of byte-level lock list. */ diff --git a/sys/ufs/ufs/ufs_bmap.c b/sys/ufs/ufs/ufs_bmap.c index add641e15ce..fdf5c1be055 100644 --- a/sys/ufs/ufs/ufs_bmap.c +++ b/sys/ufs/ufs/ufs_bmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ufs_bmap.c,v 1.10 2001/11/21 22:24:24 csapuntz Exp $ */ +/* $OpenBSD: ufs_bmap.c,v 1.11 2001/11/27 05:27:12 art Exp $ */ /* $NetBSD: ufs_bmap.c,v 1.3 1996/02/09 22:36:00 christos Exp $ */ /* @@ -233,6 +233,7 @@ ufs_getlbns(vp, bn, ap, nump) long metalbn, realbn; struct ufsmount *ump; int64_t blockcnt; + int lbc; int i, numlevels, off; ump = VFSTOUFS(vp->v_mount); @@ -260,10 +261,14 @@ ufs_getlbns(vp, bn, ap, nump) * at the given level of indirection, and NIADDR - i is the number * of levels of indirection needed to locate the requested block. */ - for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) { + bn -= NDADDR; + for (lbc = 0, i = NIADDR;; i--, bn -= blockcnt) { if (i == 0) return (EFBIG); - blockcnt *= MNINDIR(ump); + + lbc += ump->um_lognindir; + blockcnt = (int64_t)1 << lbc; + if (bn < blockcnt) break; } @@ -289,8 +294,9 @@ ufs_getlbns(vp, bn, ap, nump) if (metalbn == realbn) break; - blockcnt /= MNINDIR(ump); - off = (bn / blockcnt) % MNINDIR(ump); + lbc -= ump->um_lognindir; + blockcnt = (int64_t)1 << lbc; + off = (bn >> lbc) & (MNINDIR(ump) - 1); ++numlevels; ap->in_lbn = metalbn; diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h index 50175a0ec86..fc39e16b45e 100644 --- a/sys/ufs/ufs/ufs_extern.h +++ b/sys/ufs/ufs/ufs_extern.h @@ -1,4 +1,4 @@ -/* $OpenBSD: ufs_extern.h,v 1.12 2001/11/21 21:23:56 csapuntz Exp $ */ +/* $OpenBSD: ufs_extern.h,v 1.13 2001/11/27 05:27:12 art Exp $ */ /* $NetBSD: ufs_extern.h,v 1.5 1996/02/09 22:36:03 christos Exp $ */ /*- @@ -121,6 +121,7 @@ void ufs_ihashrem __P((struct inode *)); /* ufs_inode.c */ int ufs_init __P((struct vfsconf *)); int ufs_reclaim __P((struct vnode *, struct proc *)); +int ufs_balloc_range __P((struct vnode *, off_t, off_t, struct ucred *, int)); /* ufs_lookup.c */ void ufs_dirbad __P((struct inode *, doff_t, char *)); diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c index 8a3935632fb..3865342fde0 100644 --- a/sys/ufs/ufs/ufs_inode.c +++ b/sys/ufs/ufs/ufs_inode.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ufs_inode.c,v 1.10 2001/11/21 21:23:56 csapuntz Exp $ */ +/* $OpenBSD: ufs_inode.c,v 1.11 2001/11/27 05:27:12 art Exp $ */ /* $NetBSD: ufs_inode.c,v 1.7 1996/05/11 18:27:52 mycroft Exp $ */ /* @@ -151,3 +151,150 @@ ufs_reclaim(vp, p) ufs_quota_delete(ip); return (0); } + +/* + * allocate a range of blocks in a file. + * after this function returns, any page entirely contained within the range + * will map to invalid data and thus must be overwritten before it is made + * accessible to others. + */ + +int +ufs_balloc_range(vp, off, len, cred, flags) + struct vnode *vp; + off_t off, len; + struct ucred *cred; + int flags; +{ + off_t oldeof, neweof, oldeob, neweob, oldpagestart, pagestart; + struct uvm_object *uobj; + int i, delta, error, npages1, npages2; + int bshift = vp->v_mount->mnt_fs_bshift; + int bsize = 1 << bshift; + int ppb = MAX(bsize >> PAGE_SHIFT, 1); + struct vm_page *pgs1[ppb], *pgs2[ppb]; + UVMHIST_FUNC("ufs_balloc_range"); UVMHIST_CALLED(ubchist); + UVMHIST_LOG(ubchist, "vp %p off 0x%x len 0x%x u_size 0x%x", + vp, off, len, vp->v_uvm.u_size); + + oldeof = vp->v_uvm.u_size; + error = VOP_SIZE(vp, oldeof, &oldeob); + if (error) { + return error; + } + + neweof = MAX(vp->v_uvm.u_size, off + len); + error = VOP_SIZE(vp, neweof, &neweob); + if (error) { + return error; + } + + error = 0; + uobj = &vp->v_uvm.u_obj; + pgs1[0] = pgs2[0] = NULL; + + /* + * if the last block in the file is not a full block (ie. it is a + * fragment), and this allocation is causing the fragment to change + * size (either to expand the fragment or promote it to a full block), + * cache the old last block (at its new size). + */ + + oldpagestart = trunc_page(oldeof) & ~(bsize - 1); + if ((oldeob & (bsize - 1)) != 0 && oldeob != neweob) { + npages1 = MIN(ppb, (round_page(neweob) - oldpagestart) >> + PAGE_SHIFT); + memset(pgs1, 0, npages1 * sizeof(struct vm_page *)); + simple_lock(&uobj->vmobjlock); + error = VOP_GETPAGES(vp, oldpagestart, pgs1, &npages1, + 0, VM_PROT_READ, 0, PGO_SYNCIO|PGO_PASTEOF); + if (error) { + goto out; + } + simple_lock(&uobj->vmobjlock); + uvm_lock_pageq(); + for (i = 0; i < npages1; i++) { + UVMHIST_LOG(ubchist, "got pgs1[%d] %p", i, pgs1[i],0,0); + KASSERT((pgs1[i]->flags & PG_RELEASED) == 0); + pgs1[i]->flags &= ~PG_CLEAN; + uvm_pageactivate(pgs1[i]); + } + uvm_unlock_pageq(); + simple_unlock(&uobj->vmobjlock); + } + + /* + * cache the new range as well. this will create zeroed pages + * where the new block will be and keep them locked until the + * new block is allocated, so there will be no window where + * the old contents of the new block is visible to racing threads. + */ + + pagestart = trunc_page(off) & ~(bsize - 1); + if (pagestart != oldpagestart || pgs1[0] == NULL) { + npages2 = MIN(ppb, (round_page(neweob) - pagestart) >> + PAGE_SHIFT); + memset(pgs2, 0, npages2 * sizeof(struct vm_page *)); + simple_lock(&uobj->vmobjlock); + error = VOP_GETPAGES(vp, pagestart, pgs2, &npages2, 0, + VM_PROT_READ, 0, PGO_SYNCIO|PGO_PASTEOF); + if (error) { + goto out; + } + simple_lock(&uobj->vmobjlock); + uvm_lock_pageq(); + for (i = 0; i < npages2; i++) { + UVMHIST_LOG(ubchist, "got pgs2[%d] %p", i, pgs2[i],0,0); + KASSERT((pgs2[i]->flags & PG_RELEASED) == 0); + pgs2[i]->flags &= ~PG_CLEAN; + uvm_pageactivate(pgs2[i]); + } + uvm_unlock_pageq(); + simple_unlock(&uobj->vmobjlock); + } + + /* + * adjust off to be block-aligned. + */ + + delta = off & (bsize - 1); + off -= delta; + len += delta; + + /* + * now allocate the range. + */ + + lockmgr(&vp->v_glock, LK_EXCLUSIVE, NULL, curproc); + error = VOP_BALLOCN(vp, off, len, cred, flags); + lockmgr(&vp->v_glock, LK_RELEASE, NULL, curproc); + + /* + * unbusy any pages we are holding. + * if we got an error, free any pages we created past the old eob. + */ + +out: + simple_lock(&uobj->vmobjlock); + if (error) { + (void) (uobj->pgops->pgo_flush)(uobj, round_page(oldeob), 0, + PGO_FREE); + } + if (pgs1[0] != NULL) { + uvm_page_unbusy(pgs1, npages1); + + /* + * The data in the frag might be moving to a new disk location. + * We need to flush pages to the new disk locations. + */ + + (uobj->pgops->pgo_flush)(uobj, oldeof & ~(bsize - 1), + MIN((oldeof + bsize) & ~(bsize - 1), neweof), + PGO_CLEANIT | ((flags & B_SYNC) ? PGO_SYNCIO : 0)); + } + if (pgs2[0] != NULL) { + uvm_page_unbusy(pgs2, npages2); + } + simple_unlock(&uobj->vmobjlock); + return error; +} diff --git a/sys/ufs/ufs/ufs_readwrite.c b/sys/ufs/ufs/ufs_readwrite.c index bbf1391dfe5..e0777e4b55f 100644 --- a/sys/ufs/ufs/ufs_readwrite.c +++ b/sys/ufs/ufs/ufs_readwrite.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ufs_readwrite.c,v 1.19 2001/06/27 04:58:49 art Exp $ */ +/* $OpenBSD: ufs_readwrite.c,v 1.20 2001/11/27 05:27:12 art Exp $ */ /* $NetBSD: ufs_readwrite.c,v 1.9 1996/05/11 18:27:57 mycroft Exp $ */ /*- @@ -76,21 +76,22 @@ READ(v) int a_ioflag; struct ucred *a_cred; } */ *ap = v; - register struct vnode *vp; - register struct inode *ip; - register struct uio *uio; - register FS *fs; + struct vnode *vp; + struct inode *ip; + struct uio *uio; + FS *fs; + void *win; + vsize_t bytelen; struct buf *bp; daddr_t lbn, nextlbn; off_t bytesinfile; long size, xfersize, blkoffset; int error; - u_short mode; vp = ap->a_vp; ip = VTOI(vp); - mode = ip->i_ffs_mode; uio = ap->a_uio; + error = 0; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_READ) @@ -110,6 +111,24 @@ READ(v) if (uio->uio_resid == 0) return (0); + if (uio->uio_offset >= ip->i_ffs_size) + goto out; + + if (vp->v_type == VREG) { + while (uio->uio_resid > 0) { + bytelen = min(ip->i_ffs_size - uio->uio_offset, + uio->uio_resid); + if (bytelen == 0) + break; + win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset, + &bytelen, UBC_READ); + error = uiomove(win, bytelen, uio); + ubc_release(win, 0); + if (error) + break; + } + goto out; + } for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { if ((bytesinfile = ip->i_ffs_size - uio->uio_offset) <= 0) @@ -131,9 +150,6 @@ READ(v) #else if (lblktosize(fs, nextlbn) >= ip->i_ffs_size) error = bread(vp, lbn, size, NOCRED, &bp); - else if (doclusterread) - error = cluster_read(vp, &ip->i_ci, - ip->i_ffs_size, lbn, size, NOCRED, &bp); else if (lbn - 1 == ip->i_ci.ci_lastr) { int nextsize = BLKSIZE(fs, ip, nextlbn); error = breadn(vp, lbn, @@ -158,7 +174,7 @@ READ(v) break; xfersize = size; } - error = uiomove((char *)bp->b_data + blkoffset, (int)xfersize, + error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio); if (error) break; @@ -166,6 +182,7 @@ READ(v) } if (bp != NULL) brelse(bp); +out: ip->i_flag |= IN_ACCESS; return (error); } @@ -183,15 +200,19 @@ WRITE(v) int a_ioflag; struct ucred *a_cred; } */ *ap = v; - register struct vnode *vp; - register struct uio *uio; - register struct inode *ip; - register FS *fs; + struct vnode *vp; + struct uio *uio; + struct inode *ip; + FS *fs; struct buf *bp; struct proc *p; daddr_t lbn; off_t osize; int blkoffset, error, extended, flags, ioflag, resid, size, xfersize; + void *win; + vsize_t bytelen; + off_t oldoff; + boolean_t rv; extended = 0; ioflag = ap->a_ioflag; @@ -239,9 +260,77 @@ WRITE(v) resid = uio->uio_resid; osize = ip->i_ffs_size; - flags = ioflag & IO_SYNC ? B_SYNC : 0; + error = 0; + + if (vp->v_type != VREG) + goto bcache; + + while (uio->uio_resid > 0) { + oldoff = uio->uio_offset; + blkoffset = blkoff(fs, uio->uio_offset); + bytelen = min(fs->fs_bsize - blkoffset, uio->uio_resid); + + /* + * XXXUBC if file is mapped and this is the last block, + * process one page at a time. + */ + + error = ufs_balloc_range(vp, uio->uio_offset, bytelen, + ap->a_cred, ioflag & IO_SYNC ? B_SYNC : 0); + if (error) { + return error; + } - for (error = 0; uio->uio_resid > 0;) { + win = ubc_alloc(&vp->v_uvm.u_obj, uio->uio_offset, &bytelen, + UBC_WRITE); + error = uiomove(win, bytelen, uio); + ubc_release(win, 0); + + /* + * flush what we just wrote if necessary. + * XXXUBC simplistic async flushing. + */ + + if (ioflag & IO_SYNC) { + simple_lock(&vp->v_uvm.u_obj.vmobjlock); +#if 1 + /* + * XXX + * flush whole blocks in case there are deps. + * otherwise we can dirty and flush part of + * a block multiple times and the softdep code + * will get confused. fixing this the right way + * is complicated so we'll work around it for now. + */ + + rv = vp->v_uvm.u_obj.pgops->pgo_flush( + &vp->v_uvm.u_obj, + oldoff & ~(fs->fs_bsize - 1), + (oldoff + bytelen + fs->fs_bsize - 1) & + ~(fs->fs_bsize - 1), + PGO_CLEANIT|PGO_SYNCIO); +#else + rv = vp->v_uvm.u_obj.pgops->pgo_flush( + &vp->v_uvm.u_obj, oldoff, oldoff + bytelen, + PGO_CLEANIT|PGO_SYNCIO); +#endif + simple_unlock(&vp->v_uvm.u_obj.vmobjlock); + } else if (oldoff >> 16 != uio->uio_offset >> 16) { + simple_lock(&vp->v_uvm.u_obj.vmobjlock); + rv = vp->v_uvm.u_obj.pgops->pgo_flush( + &vp->v_uvm.u_obj, (oldoff >> 16) << 16, + (uio->uio_offset >> 16) << 16, PGO_CLEANIT); + simple_unlock(&vp->v_uvm.u_obj.vmobjlock); + } + if (error) { + break; + } + } + goto out; + +bcache: + flags = ioflag & IO_SYNC ? B_SYNC : 0; + while (uio->uio_resid > 0) { lbn = lblkno(fs, uio->uio_offset); blkoffset = blkoff(fs, uio->uio_offset); xfersize = fs->fs_bsize - blkoffset; @@ -260,14 +349,12 @@ WRITE(v) uvm_vnp_setsize(vp, ip->i_ffs_size); extended = 1; } - (void)uvm_vnp_uncache(vp); size = BLKSIZE(fs, ip, lbn) - bp->b_resid; if (size < xfersize) xfersize = size; - error = - uiomove((char *)bp->b_data + blkoffset, xfersize, uio); + error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio); if (error != 0) bzero((char *)bp->b_data + blkoffset, xfersize); @@ -287,13 +374,14 @@ WRITE(v) #endif if (error || xfersize == 0) break; - ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * If we successfully wrote any data, and we are not the superuser * we clear the setuid and setgid bits as a precaution against * tampering. */ +out: + ip->i_flag |= IN_CHANGE | IN_UPDATE; if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0) ip->i_ffs_mode &= ~(ISUID | ISGID); if (resid > uio->uio_resid) diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c index 4caf0ef78c7..e926ee7aff6 100644 --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ufs_vnops.c,v 1.39 2001/11/21 21:23:56 csapuntz Exp $ */ +/* $OpenBSD: ufs_vnops.c,v 1.40 2001/11/27 05:27:12 art Exp $ */ /* $NetBSD: ufs_vnops.c,v 1.18 1996/05/11 18:28:04 mycroft Exp $ */ /* @@ -469,8 +469,6 @@ ufs_chmod(vp, mode, cred, p) ip->i_ffs_mode &= ~ALLPERMS; ip->i_ffs_mode |= (mode & ALLPERMS); ip->i_flag |= IN_CHANGE; - if ((vp->v_flag & VTEXT) && (ip->i_ffs_mode & S_ISTXT) == 0) - (void) uvm_vnp_uncache(vp); return (0); } diff --git a/sys/ufs/ufs/ufsmount.h b/sys/ufs/ufs/ufsmount.h index e9dc71f9855..981eb21474b 100644 --- a/sys/ufs/ufs/ufsmount.h +++ b/sys/ufs/ufs/ufsmount.h @@ -1,4 +1,4 @@ -/* $OpenBSD: ufsmount.h,v 1.5 1999/06/01 01:48:52 millert Exp $ */ +/* $OpenBSD: ufsmount.h,v 1.6 2001/11/27 05:27:12 art Exp $ */ /* $NetBSD: ufsmount.h,v 1.4 1994/12/21 20:00:23 mycroft Exp $ */ /* @@ -64,6 +64,7 @@ struct ufsmount { struct vnode *um_quotas[MAXQUOTAS]; /* pointer to quota files */ struct ucred *um_cred[MAXQUOTAS]; /* quota file access cred */ u_long um_nindir; /* indirect ptrs per block */ + u_long um_lognindir; /* log2 of um_nindir */ u_long um_bptrtodb; /* indir ptr to disk block */ u_long um_seqinc; /* inc between seq blocks */ time_t um_btime[MAXQUOTAS]; /* block quota time limit */ |