diff options
author | Stefan Fritsch <sf@cvs.openbsd.org> | 2016-08-30 19:47:24 +0000 |
---|---|---|
committer | Stefan Fritsch <sf@cvs.openbsd.org> | 2016-08-30 19:47:24 +0000 |
commit | a49143a4cc8878d212168f24f0d9888de4db62d9 (patch) | |
tree | 302091159353f30685df5e382f3ca657a59509fe | |
parent | d93491f093d0cc48fce8977fc9cb8c78b55cd504 (diff) |
Revert "Implement VFS read clustering for MSDOSFS"
This caused garbage to be written instead of blocks of 0-bytes if a
file is extended by seeking past the end. This happens for example
when extracting files containing lots of 0-bytes with tar.
ok mpi@
Details of original commit:
msdosfs_vnops.c revision 1.105
denode.h revision 1.28
date: 2016/01/13 10:00:55; author: mpi; commitid: ru9jHQwQX09BC5Bw;
Implement VFS read clustering for MSDOSFS.
The logic used in msdosfs_bmap() to loop calling pcbmap() comes from
FreeBSD and is not really efficient but it is good enough since it is
only called when generating I/O.
With this diff I get a 100% improvement when reading big files from a
crappy USB stick.
With this and bread_cluster(9) modified to not re-fetch B_CACHED buffers,
reading large contiguous files with chunk sizes of MAXPHYS is almost as
fast as physio(9) on the same device.
For a 'real world' example, when copying music files from a USB stick I
see a speed jump from 15MB/s on -current to 24Mb/s with this diff.
While here rename some 'lbn' variables into 'cn' to better reflect what
we're dealing with.
Tested by Mathieu, with support from deraadt@
-rw-r--r-- | sys/msdosfs/denode.h | 3 | ||||
-rw-r--r-- | sys/msdosfs/msdosfs_vnops.c | 118 |
2 files changed, 51 insertions, 70 deletions
diff --git a/sys/msdosfs/denode.h b/sys/msdosfs/denode.h index e9a142f8834..cdca90500ab 100644 --- a/sys/msdosfs/denode.h +++ b/sys/msdosfs/denode.h @@ -1,4 +1,4 @@ -/* $OpenBSD: denode.h,v 1.29 2016/06/19 11:54:33 natano Exp $ */ +/* $OpenBSD: denode.h,v 1.30 2016/08/30 19:47:23 sf Exp $ */ /* $NetBSD: denode.h,v 1.24 1997/10/17 11:23:39 ws Exp $ */ /*- @@ -142,6 +142,7 @@ struct denode { struct vnode *de_devvp; /* vnode of blk dev we live on */ uint32_t de_flag; /* flag bits */ dev_t de_dev; /* device where direntry lives */ + daddr_t de_lastr; uint32_t de_dirclust; /* cluster of the directory file containing this entry */ uint32_t de_diroffset; /* offset of this entry in the directory cluster */ uint32_t de_fndoffset; /* offset of found dir entry */ diff --git a/sys/msdosfs/msdosfs_vnops.c b/sys/msdosfs/msdosfs_vnops.c index 45a479fb646..39c60044df5 100644 --- a/sys/msdosfs/msdosfs_vnops.c +++ b/sys/msdosfs/msdosfs_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: msdosfs_vnops.c,v 1.112 2016/06/19 11:54:33 natano Exp $ */ +/* $OpenBSD: msdosfs_vnops.c,v 1.113 2016/08/30 19:47:23 sf Exp $ */ /* $NetBSD: msdosfs_vnops.c,v 1.63 1997/10/17 11:24:19 ws Exp $ */ /*- @@ -77,13 +77,13 @@ static uint32_t fileidhash(uint64_t); -int msdosfs_bmaparray(struct vnode *, daddr_t, daddr_t *, int *); int msdosfs_kqfilter(void *); int filt_msdosfsread(struct knote *, long); int filt_msdosfswrite(struct knote *, long); int filt_msdosfsvnode(struct knote *, long); void filt_msdosfsdetach(struct knote *); + /* * Some general notes: * @@ -509,14 +509,18 @@ int msdosfs_read(void *v) { struct vop_read_args *ap = v; + int error = 0; + uint32_t diff; + int blsize; + int isadir; + uint32_t n; + long on; + daddr_t lbn, rablock, rablkno; + struct buf *bp; struct vnode *vp = ap->a_vp; struct denode *dep = VTODE(vp); struct msdosfsmount *pmp = dep->de_pmp; struct uio *uio = ap->a_uio; - int isadir, error = 0; - uint32_t n, diff, size, on; - struct buf *bp; - daddr_t cn; /* * If they didn't ask for any data, then we are done. @@ -531,8 +535,7 @@ msdosfs_read(void *v) if (uio->uio_offset >= dep->de_FileSize) return (0); - cn = de_cluster(pmp, uio->uio_offset); - size = pmp->pm_bpcluster; + lbn = de_cluster(pmp, uio->uio_offset); on = uio->uio_offset & pmp->pm_crbomask; n = ulmin(pmp->pm_bpcluster - on, uio->uio_resid); @@ -545,21 +548,31 @@ msdosfs_read(void *v) if (diff < n) n = diff; + /* convert cluster # to block # if a directory */ + if (isadir) { + error = pcbmap(dep, lbn, &lbn, 0, &blsize); + if (error) + return (error); + } /* * If we are operating on a directory file then be sure to * do i/o with the vnode for the filesystem instead of the * vnode for the directory. */ if (isadir) { - error = pcbmap(dep, cn, &cn, 0, &size); - if (error) - return (error); - error = bread(pmp->pm_devvp, cn, size, &bp); + error = bread(pmp->pm_devvp, lbn, blsize, &bp); } else { - if (de_cn2off(pmp, cn + 1) >= dep->de_FileSize) - error = bread(vp, cn, size, &bp); + rablock = lbn + 1; + rablkno = de_cn2bn(pmp, rablock); + if (dep->de_lastr + 1 == lbn && + de_cn2off(pmp, rablock) < dep->de_FileSize) + error = breadn(vp, de_cn2bn(pmp, lbn), + pmp->pm_bpcluster, &rablkno, + &pmp->pm_bpcluster, 1, &bp); else - error = bread_cluster(vp, cn, size, &bp); + error = bread(vp, de_cn2bn(pmp, lbn), + pmp->pm_bpcluster, &bp); + dep->de_lastr = lbn; } n = min(n, pmp->pm_bpcluster - bp->b_resid); if (error) { @@ -588,7 +601,7 @@ msdosfs_write(void *v) uint32_t osize; int error = 0; uint32_t count, lastcn; - daddr_t cn; + daddr_t bn; struct buf *bp; int ioflag = ap->a_ioflag; struct uio *uio = ap->a_uio; @@ -665,34 +678,32 @@ msdosfs_write(void *v) lastcn = de_clcount(pmp, osize) - 1; do { - croffset = uio->uio_offset & pmp->pm_crbomask; - cn = de_cluster(pmp, uio->uio_offset); - - if (cn > lastcn) { + if (de_cluster(pmp, uio->uio_offset) > lastcn) { error = ENOSPC; break; } - if (croffset == 0 && - (de_cluster(pmp, uio->uio_offset + uio->uio_resid) > cn || - (uio->uio_offset + uio->uio_resid) >= dep->de_FileSize)) { + bn = de_blk(pmp, uio->uio_offset); + if ((uio->uio_offset & pmp->pm_crbomask) == 0 + && (de_blk(pmp, uio->uio_offset + uio->uio_resid) > de_blk(pmp, uio->uio_offset) + || uio->uio_offset + uio->uio_resid >= dep->de_FileSize)) { /* * If either the whole cluster gets written, * or we write the cluster from its start beyond EOF, * then no need to read data from disk. */ - bp = getblk(thisvp, cn, pmp->pm_bpcluster, 0, 0); + bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0); clrbuf(bp); /* * Do the bmap now, since pcbmap needs buffers * for the fat table. (see msdosfs_strategy) */ if (bp->b_blkno == bp->b_lblkno) { - error = pcbmap(dep, bp->b_lblkno, &cn, 0, 0); + error = pcbmap(dep, + de_bn2cn(pmp, bp->b_lblkno), + &bp->b_blkno, 0, 0); if (error) bp->b_blkno = -1; - else - bp->b_blkno = cn; } if (bp->b_blkno == -1) { brelse(bp); @@ -702,16 +713,16 @@ msdosfs_write(void *v) } } else { /* - * The block we need to write into exists, so - * read it in. + * The block we need to write into exists, so read it in. */ - error = bread(thisvp, cn, pmp->pm_bpcluster, &bp); + error = bread(thisvp, bn, pmp->pm_bpcluster, &bp); if (error) { brelse(bp); break; } } + croffset = uio->uio_offset & pmp->pm_crbomask; n = ulmin(uio->uio_resid, pmp->pm_bpcluster - croffset); if (uio->uio_offset + n > dep->de_FileSize) { dep->de_FileSize = uio->uio_offset + n; @@ -1744,7 +1755,7 @@ msdosfs_islocked(void *v) /* * vp - address of vnode file the file - * bn - which cluster we are interested in mapping to a filesystem block number + * bn - which cluster we are interested in mapping to a filesystem block number. * vpp - returns the vnode for the block special file holding the filesystem * containing the file of interest * bnp - address of where to return the filesystem relative block number @@ -1754,51 +1765,19 @@ msdosfs_bmap(void *v) { struct vop_bmap_args *ap = v; struct denode *dep = VTODE(ap->a_vp); + struct msdosfsmount *pmp = dep->de_pmp; if (ap->a_vpp != NULL) *ap->a_vpp = dep->de_devvp; if (ap->a_bnp == NULL) return (0); - - return (msdosfs_bmaparray(ap->a_vp, ap->a_bn, ap->a_bnp, ap->a_runp)); -} - -int -msdosfs_bmaparray(struct vnode *vp, daddr_t cn, daddr_t *bnp, int *runp) -{ - struct denode *dep = VTODE(vp); - struct msdosfsmount *pmp = dep->de_pmp; - struct mount *mp; - int error, maxrun = 0, run; - daddr_t daddr; - - mp = vp->v_mount; - - if (runp) { + if (ap->a_runp) { /* - * XXX - * If MAXBSIZE is the largest transfer the disks can handle, - * we probably want maxrun to be 1 block less so that we - * don't create a block larger than the device can handle. + * Sequential clusters should be counted here. */ - *runp = 0; - maxrun = min(MAXBSIZE / mp->mnt_stat.f_iosize - 1, - pmp->pm_maxcluster - cn); + *ap->a_runp = 0; } - - if ((error = pcbmap(dep, cn, bnp, 0, 0)) != 0) - return (error); - - for (run = 1; run <= maxrun; run++) { - error = pcbmap(dep, cn + run, &daddr, 0, 0); - if (error != 0 || (daddr != *bnp + de_cn2bn(pmp, run))) - break; - } - - if (runp) - *runp = run - 1; - - return (0); + return (pcbmap(dep, de_bn2cn(pmp, ap->a_bn), ap->a_bnp, 0, 0)); } int @@ -1820,7 +1799,8 @@ msdosfs_strategy(void *v) * don't allow files with holes, so we shouldn't ever see this. */ if (bp->b_blkno == bp->b_lblkno) { - error = pcbmap(dep, bp->b_lblkno, &bp->b_blkno, 0, 0); + error = pcbmap(dep, de_bn2cn(dep->de_pmp, bp->b_lblkno), + &bp->b_blkno, 0, 0); if (error) bp->b_blkno = -1; if (bp->b_blkno == -1) |