summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Fritsch <sf@cvs.openbsd.org>2016-08-30 19:47:24 +0000
committerStefan Fritsch <sf@cvs.openbsd.org>2016-08-30 19:47:24 +0000
commita49143a4cc8878d212168f24f0d9888de4db62d9 (patch)
tree302091159353f30685df5e382f3ca657a59509fe
parentd93491f093d0cc48fce8977fc9cb8c78b55cd504 (diff)
Revert "Implement VFS read clustering for MSDOSFS"
This caused garbage to be written instead of blocks of 0-bytes if a file is extended by seeking past the end. This happens for example when extracting files containing lots of 0-bytes with tar. ok mpi@ Details of original commit: msdosfs_vnops.c revision 1.105 denode.h revision 1.28 date: 2016/01/13 10:00:55; author: mpi; commitid: ru9jHQwQX09BC5Bw; Implement VFS read clustering for MSDOSFS. The logic used in msdosfs_bmap() to loop calling pcbmap() comes from FreeBSD and is not really efficient but it is good enough since it is only called when generating I/O. With this diff I get a 100% improvement when reading big files from a crappy USB stick. With this and bread_cluster(9) modified to not re-fetch B_CACHED buffers, reading large contiguous files with chunk sizes of MAXPHYS is almost as fast as physio(9) on the same device. For a 'real world' example, when copying music files from a USB stick I see a speed jump from 15MB/s on -current to 24Mb/s with this diff. While here rename some 'lbn' variables into 'cn' to better reflect what we're dealing with. Tested by Mathieu, with support from deraadt@
-rw-r--r--sys/msdosfs/denode.h3
-rw-r--r--sys/msdosfs/msdosfs_vnops.c118
2 files changed, 51 insertions, 70 deletions
diff --git a/sys/msdosfs/denode.h b/sys/msdosfs/denode.h
index e9a142f8834..cdca90500ab 100644
--- a/sys/msdosfs/denode.h
+++ b/sys/msdosfs/denode.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: denode.h,v 1.29 2016/06/19 11:54:33 natano Exp $ */
+/* $OpenBSD: denode.h,v 1.30 2016/08/30 19:47:23 sf Exp $ */
/* $NetBSD: denode.h,v 1.24 1997/10/17 11:23:39 ws Exp $ */
/*-
@@ -142,6 +142,7 @@ struct denode {
struct vnode *de_devvp; /* vnode of blk dev we live on */
uint32_t de_flag; /* flag bits */
dev_t de_dev; /* device where direntry lives */
+ daddr_t de_lastr;
uint32_t de_dirclust; /* cluster of the directory file containing this entry */
uint32_t de_diroffset; /* offset of this entry in the directory cluster */
uint32_t de_fndoffset; /* offset of found dir entry */
diff --git a/sys/msdosfs/msdosfs_vnops.c b/sys/msdosfs/msdosfs_vnops.c
index 45a479fb646..39c60044df5 100644
--- a/sys/msdosfs/msdosfs_vnops.c
+++ b/sys/msdosfs/msdosfs_vnops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: msdosfs_vnops.c,v 1.112 2016/06/19 11:54:33 natano Exp $ */
+/* $OpenBSD: msdosfs_vnops.c,v 1.113 2016/08/30 19:47:23 sf Exp $ */
/* $NetBSD: msdosfs_vnops.c,v 1.63 1997/10/17 11:24:19 ws Exp $ */
/*-
@@ -77,13 +77,13 @@
static uint32_t fileidhash(uint64_t);
-int msdosfs_bmaparray(struct vnode *, daddr_t, daddr_t *, int *);
int msdosfs_kqfilter(void *);
int filt_msdosfsread(struct knote *, long);
int filt_msdosfswrite(struct knote *, long);
int filt_msdosfsvnode(struct knote *, long);
void filt_msdosfsdetach(struct knote *);
+
/*
* Some general notes:
*
@@ -509,14 +509,18 @@ int
msdosfs_read(void *v)
{
struct vop_read_args *ap = v;
+ int error = 0;
+ uint32_t diff;
+ int blsize;
+ int isadir;
+ uint32_t n;
+ long on;
+ daddr_t lbn, rablock, rablkno;
+ struct buf *bp;
struct vnode *vp = ap->a_vp;
struct denode *dep = VTODE(vp);
struct msdosfsmount *pmp = dep->de_pmp;
struct uio *uio = ap->a_uio;
- int isadir, error = 0;
- uint32_t n, diff, size, on;
- struct buf *bp;
- daddr_t cn;
/*
* If they didn't ask for any data, then we are done.
@@ -531,8 +535,7 @@ msdosfs_read(void *v)
if (uio->uio_offset >= dep->de_FileSize)
return (0);
- cn = de_cluster(pmp, uio->uio_offset);
- size = pmp->pm_bpcluster;
+ lbn = de_cluster(pmp, uio->uio_offset);
on = uio->uio_offset & pmp->pm_crbomask;
n = ulmin(pmp->pm_bpcluster - on, uio->uio_resid);
@@ -545,21 +548,31 @@ msdosfs_read(void *v)
if (diff < n)
n = diff;
+ /* convert cluster # to block # if a directory */
+ if (isadir) {
+ error = pcbmap(dep, lbn, &lbn, 0, &blsize);
+ if (error)
+ return (error);
+ }
/*
* If we are operating on a directory file then be sure to
* do i/o with the vnode for the filesystem instead of the
* vnode for the directory.
*/
if (isadir) {
- error = pcbmap(dep, cn, &cn, 0, &size);
- if (error)
- return (error);
- error = bread(pmp->pm_devvp, cn, size, &bp);
+ error = bread(pmp->pm_devvp, lbn, blsize, &bp);
} else {
- if (de_cn2off(pmp, cn + 1) >= dep->de_FileSize)
- error = bread(vp, cn, size, &bp);
+ rablock = lbn + 1;
+ rablkno = de_cn2bn(pmp, rablock);
+ if (dep->de_lastr + 1 == lbn &&
+ de_cn2off(pmp, rablock) < dep->de_FileSize)
+ error = breadn(vp, de_cn2bn(pmp, lbn),
+ pmp->pm_bpcluster, &rablkno,
+ &pmp->pm_bpcluster, 1, &bp);
else
- error = bread_cluster(vp, cn, size, &bp);
+ error = bread(vp, de_cn2bn(pmp, lbn),
+ pmp->pm_bpcluster, &bp);
+ dep->de_lastr = lbn;
}
n = min(n, pmp->pm_bpcluster - bp->b_resid);
if (error) {
@@ -588,7 +601,7 @@ msdosfs_write(void *v)
uint32_t osize;
int error = 0;
uint32_t count, lastcn;
- daddr_t cn;
+ daddr_t bn;
struct buf *bp;
int ioflag = ap->a_ioflag;
struct uio *uio = ap->a_uio;
@@ -665,34 +678,32 @@ msdosfs_write(void *v)
lastcn = de_clcount(pmp, osize) - 1;
do {
- croffset = uio->uio_offset & pmp->pm_crbomask;
- cn = de_cluster(pmp, uio->uio_offset);
-
- if (cn > lastcn) {
+ if (de_cluster(pmp, uio->uio_offset) > lastcn) {
error = ENOSPC;
break;
}
- if (croffset == 0 &&
- (de_cluster(pmp, uio->uio_offset + uio->uio_resid) > cn ||
- (uio->uio_offset + uio->uio_resid) >= dep->de_FileSize)) {
+ bn = de_blk(pmp, uio->uio_offset);
+ if ((uio->uio_offset & pmp->pm_crbomask) == 0
+ && (de_blk(pmp, uio->uio_offset + uio->uio_resid) > de_blk(pmp, uio->uio_offset)
+ || uio->uio_offset + uio->uio_resid >= dep->de_FileSize)) {
/*
* If either the whole cluster gets written,
* or we write the cluster from its start beyond EOF,
* then no need to read data from disk.
*/
- bp = getblk(thisvp, cn, pmp->pm_bpcluster, 0, 0);
+ bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0);
clrbuf(bp);
/*
* Do the bmap now, since pcbmap needs buffers
* for the fat table. (see msdosfs_strategy)
*/
if (bp->b_blkno == bp->b_lblkno) {
- error = pcbmap(dep, bp->b_lblkno, &cn, 0, 0);
+ error = pcbmap(dep,
+ de_bn2cn(pmp, bp->b_lblkno),
+ &bp->b_blkno, 0, 0);
if (error)
bp->b_blkno = -1;
- else
- bp->b_blkno = cn;
}
if (bp->b_blkno == -1) {
brelse(bp);
@@ -702,16 +713,16 @@ msdosfs_write(void *v)
}
} else {
/*
- * The block we need to write into exists, so
- * read it in.
+ * The block we need to write into exists, so read it in.
*/
- error = bread(thisvp, cn, pmp->pm_bpcluster, &bp);
+ error = bread(thisvp, bn, pmp->pm_bpcluster, &bp);
if (error) {
brelse(bp);
break;
}
}
+ croffset = uio->uio_offset & pmp->pm_crbomask;
n = ulmin(uio->uio_resid, pmp->pm_bpcluster - croffset);
if (uio->uio_offset + n > dep->de_FileSize) {
dep->de_FileSize = uio->uio_offset + n;
@@ -1744,7 +1755,7 @@ msdosfs_islocked(void *v)
/*
* vp - address of vnode file the file
- * bn - which cluster we are interested in mapping to a filesystem block number
+ * bn - which cluster we are interested in mapping to a filesystem block number.
* vpp - returns the vnode for the block special file holding the filesystem
* containing the file of interest
* bnp - address of where to return the filesystem relative block number
@@ -1754,51 +1765,19 @@ msdosfs_bmap(void *v)
{
struct vop_bmap_args *ap = v;
struct denode *dep = VTODE(ap->a_vp);
+ struct msdosfsmount *pmp = dep->de_pmp;
if (ap->a_vpp != NULL)
*ap->a_vpp = dep->de_devvp;
if (ap->a_bnp == NULL)
return (0);
-
- return (msdosfs_bmaparray(ap->a_vp, ap->a_bn, ap->a_bnp, ap->a_runp));
-}
-
-int
-msdosfs_bmaparray(struct vnode *vp, daddr_t cn, daddr_t *bnp, int *runp)
-{
- struct denode *dep = VTODE(vp);
- struct msdosfsmount *pmp = dep->de_pmp;
- struct mount *mp;
- int error, maxrun = 0, run;
- daddr_t daddr;
-
- mp = vp->v_mount;
-
- if (runp) {
+ if (ap->a_runp) {
/*
- * XXX
- * If MAXBSIZE is the largest transfer the disks can handle,
- * we probably want maxrun to be 1 block less so that we
- * don't create a block larger than the device can handle.
+ * Sequential clusters should be counted here.
*/
- *runp = 0;
- maxrun = min(MAXBSIZE / mp->mnt_stat.f_iosize - 1,
- pmp->pm_maxcluster - cn);
+ *ap->a_runp = 0;
}
-
- if ((error = pcbmap(dep, cn, bnp, 0, 0)) != 0)
- return (error);
-
- for (run = 1; run <= maxrun; run++) {
- error = pcbmap(dep, cn + run, &daddr, 0, 0);
- if (error != 0 || (daddr != *bnp + de_cn2bn(pmp, run)))
- break;
- }
-
- if (runp)
- *runp = run - 1;
-
- return (0);
+ return (pcbmap(dep, de_bn2cn(pmp, ap->a_bn), ap->a_bnp, 0, 0));
}
int
@@ -1820,7 +1799,8 @@ msdosfs_strategy(void *v)
* don't allow files with holes, so we shouldn't ever see this.
*/
if (bp->b_blkno == bp->b_lblkno) {
- error = pcbmap(dep, bp->b_lblkno, &bp->b_blkno, 0, 0);
+ error = pcbmap(dep, de_bn2cn(dep->de_pmp, bp->b_lblkno),
+ &bp->b_blkno, 0, 0);
if (error)
bp->b_blkno = -1;
if (bp->b_blkno == -1)