summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sys/sys/buf.h25
-rw-r--r--sys/sys/lock.h167
-rw-r--r--sys/sys/malloc.h47
-rw-r--r--sys/sys/mount.h83
-rw-r--r--sys/sys/param.h3
-rw-r--r--sys/sys/queue.h113
-rw-r--r--sys/sys/simplelock.h86
-rw-r--r--sys/sys/sysctl.h13
-rw-r--r--sys/sys/systm.h6
-rw-r--r--sys/sys/vnode.h95
-rw-r--r--sys/sys/vnode_if.h147
-rw-r--r--sys/ufs/ffs/ffs_alloc.c210
-rw-r--r--sys/ufs/ffs/ffs_balloc.c188
-rw-r--r--sys/ufs/ffs/ffs_extern.h59
-rw-r--r--sys/ufs/ffs/ffs_inode.c63
-rw-r--r--sys/ufs/ffs/ffs_subr.c31
-rw-r--r--sys/ufs/ffs/ffs_vfsops.c298
-rw-r--r--sys/ufs/ffs/ffs_vnops.c96
-rw-r--r--sys/ufs/ffs/fs.h16
-rw-r--r--sys/ufs/mfs/mfs_extern.h6
-rw-r--r--sys/ufs/mfs/mfs_vfsops.c65
-rw-r--r--sys/ufs/mfs/mfs_vnops.c15
-rw-r--r--sys/ufs/mfs/mfsnode.h8
-rw-r--r--sys/ufs/ufs/inode.h28
-rw-r--r--sys/ufs/ufs/ufs_extern.h35
-rw-r--r--sys/ufs/ufs/ufs_ihash.c108
-rw-r--r--sys/ufs/ufs/ufs_inode.c47
-rw-r--r--sys/ufs/ufs/ufs_lookup.c341
-rw-r--r--sys/ufs/ufs/ufs_quota.c63
-rw-r--r--sys/ufs/ufs/ufs_readwrite.c12
-rw-r--r--sys/ufs/ufs/ufs_vfsops.c55
-rw-r--r--sys/ufs/ufs/ufs_vnops.c702
32 files changed, 2194 insertions, 1037 deletions
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
index 34587d51cc8..4b2582677a0 100644
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: buf.h,v 1.7 1997/07/28 09:13:14 deraadt Exp $ */
+/* $OpenBSD: buf.h,v 1.8 1997/10/06 15:25:32 csapuntz Exp $ */
/* $NetBSD: buf.h,v 1.25 1997/04/09 21:12:17 mycroft Exp $ */
/*
@@ -48,6 +48,27 @@
#define NOLIST ((struct buf *)0x87654321)
/*
+ * To avoid including <ufs/ffs/softdep.h>
+ */
+
+LIST_HEAD(workhead, worklist);
+
+/*
+ * These are currently used only by the soft dependency code, hence
+ * are stored once in a global variable. If other subsystems wanted
+ * to use these hooks, a pointer to a set of bio_ops could be added
+ * to each buffer.
+ */
+struct mount;
+extern struct bio_ops {
+ void (*io_start) __P((struct buf *));
+ void (*io_complete) __P((struct buf *));
+ void (*io_deallocate) __P((struct buf *));
+ int (*io_sync) __P((struct mount *));
+} bioops;
+
+
+/*
* The buffer header describes an I/O operation in the kernel.
*/
struct buf {
@@ -79,6 +100,7 @@ struct buf {
struct ucred *b_wcred; /* Write credentials reference. */
int b_validoff; /* Offset in buffer of valid region. */
int b_validend; /* Offset of end of valid region. */
+ struct workhead b_dep; /* List of filesystem dependencies. */
};
/*
@@ -177,6 +199,7 @@ int breadn __P((struct vnode *, daddr_t, int, daddr_t *, int *, int,
void brelse __P((struct buf *));
void bremfree __P((struct buf *));
void bufinit __P((void));
+void bdirty __P((struct buf *));
int bwrite __P((struct buf *));
void cluster_callback __P((struct buf *));
int cluster_read __P((struct vnode *, u_quad_t, daddr_t, long,
diff --git a/sys/sys/lock.h b/sys/sys/lock.h
new file mode 100644
index 00000000000..f4491b09520
--- /dev/null
+++ b/sys/sys/lock.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code contains ideas from software contributed to Berkeley by
+ * Avadis Tevanian, Jr., Michael Wayne Young, and the Mach Operating
+ * System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)lock.h 8.12 (Berkeley) 5/19/95
+ */
+
+#ifndef _LOCK_H_
+#define _LOCK_H_
+
+#include <sys/simplelock.h>
+
+/*
+ * The general lock structure. Provides for multiple shared locks,
+ * upgrading from shared to exclusive, and sleeping until the lock
+ * can be gained. The simple locks are defined in <machine/param.h>.
+ */
+struct lock {
+ struct simplelock lk_interlock; /* lock on remaining fields */
+ u_int lk_flags; /* see below */
+ int lk_sharecount; /* # of accepted shared locks */
+ int lk_waitcount; /* # of processes sleeping for lock */
+ short lk_exclusivecount; /* # of recursive exclusive locks */
+ short lk_prio; /* priority at which to sleep */
+ char *lk_wmesg; /* resource sleeping (for tsleep) */
+ int lk_timo; /* maximum sleep time (for tsleep) */
+ pid_t lk_lockholder; /* pid of exclusive lock holder */
+};
+/*
+ * Lock request types:
+ * LK_SHARED - get one of many possible shared locks. If a process
+ * holding an exclusive lock requests a shared lock, the exclusive
+ * lock(s) will be downgraded to shared locks.
+ * LK_EXCLUSIVE - stop further shared locks, when they are cleared,
+ * grant a pending upgrade if it exists, then grant an exclusive
+ * lock. Only one exclusive lock may exist at a time, except that
+ * a process holding an exclusive lock may get additional exclusive
+ * locks if it explicitly sets the LK_CANRECURSE flag in the lock
+ * request, or if the LK_CANRECUSE flag was set when the lock was
+ * initialized.
+ * LK_UPGRADE - the process must hold a shared lock that it wants to
+ * have upgraded to an exclusive lock. Other processes may get
+ * exclusive access to the resource between the time that the upgrade
+ * is requested and the time that it is granted.
+ * LK_EXCLUPGRADE - the process must hold a shared lock that it wants to
+ * have upgraded to an exclusive lock. If the request succeeds, no
+ * other processes will have gotten exclusive access to the resource
+ * between the time that the upgrade is requested and the time that
+ * it is granted. However, if another process has already requested
+ * an upgrade, the request will fail (see error returns below).
+ * LK_DOWNGRADE - the process must hold an exclusive lock that it wants
+ * to have downgraded to a shared lock. If the process holds multiple
+ * (recursive) exclusive locks, they will all be downgraded to shared
+ * locks.
+ * LK_RELEASE - release one instance of a lock.
+ * LK_DRAIN - wait for all activity on the lock to end, then mark it
+ * decommissioned. This feature is used before freeing a lock that
+ * is part of a piece of memory that is about to be freed.
+ *
+ * These are flags that are passed to the lockmgr routine.
+ */
+#define LK_TYPE_MASK 0x0000000f /* type of lock sought */
+#define LK_SHARED 0x00000001 /* shared lock */
+#define LK_EXCLUSIVE 0x00000002 /* exclusive lock */
+#define LK_UPGRADE 0x00000003 /* shared-to-exclusive upgrade */
+#define LK_EXCLUPGRADE 0x00000004 /* first shared-to-exclusive upgrade */
+#define LK_DOWNGRADE 0x00000005 /* exclusive-to-shared downgrade */
+#define LK_RELEASE 0x00000006 /* release any type of lock */
+#define LK_DRAIN 0x00000007 /* wait for all lock activity to end */
+/*
+ * External lock flags.
+ *
+ * The first three flags may be set in lock_init to set their mode permanently,
+ * or passed in as arguments to the lock manager. The LK_REENABLE flag may be
+ * set only at the release of a lock obtained by drain.
+ */
+#define LK_EXTFLG_MASK 0x00000070 /* mask of external flags */
+#define LK_NOWAIT 0x00000010 /* do not sleep to await lock */
+#define LK_SLEEPFAIL 0x00000020 /* sleep, then return failure */
+#define LK_CANRECURSE 0x00000040 /* allow recursive exclusive lock */
+#define LK_REENABLE 0x00000080 /* lock is be reenabled after drain */
+/*
+ * Internal lock flags.
+ *
+ * These flags are used internally to the lock manager.
+ */
+#define LK_WANT_UPGRADE 0x00000100 /* waiting for share-to-excl upgrade */
+#define LK_WANT_EXCL 0x00000200 /* exclusive lock sought */
+#define LK_HAVE_EXCL 0x00000400 /* exclusive lock obtained */
+#define LK_WAITDRAIN 0x00000800 /* process waiting for lock to drain */
+#define LK_DRAINING 0x00004000 /* lock is being drained */
+#define LK_DRAINED 0x00008000 /* lock has been decommissioned */
+/*
+ * Control flags
+ *
+ * Non-persistent external flags.
+ */
+#define LK_INTERLOCK 0x00010000 /* unlock passed simple lock after
+ getting lk_interlock */
+#define LK_RETRY 0x00020000 /* vn_lock: retry until locked */
+
+/*
+ * Lock return status.
+ *
+ * Successfully obtained locks return 0. Locks will always succeed
+ * unless one of the following is true:
+ * LK_FORCEUPGRADE is requested and some other process has already
+ * requested a lock upgrade (returns EBUSY).
+ * LK_WAIT is set and a sleep would be required (returns EBUSY).
+ * LK_SLEEPFAIL is set and a sleep was done (returns ENOLCK).
+ * PCATCH is set in lock priority and a signal arrives (returns
+ * either EINTR or ERESTART if system calls is to be restarted).
+ * Non-null lock timeout and timeout expires (returns EWOULDBLOCK).
+ * A failed lock attempt always returns a non-zero error value. No lock
+ * is held after an error return (in particular, a failed LK_UPGRADE
+ * or LK_FORCEUPGRADE will have released its shared access lock).
+ */
+
+/*
+ * Indicator that no process holds exclusive lock
+ */
+#define LK_KERNPROC ((pid_t) -2)
+#define LK_NOPROC ((pid_t) -1)
+
+struct proc;
+
+void lockinit __P((struct lock *, int prio, char *wmesg, int timo,
+ int flags));
+int lockmgr __P((__volatile struct lock *, u_int flags,
+ struct simplelock *, struct proc *p));
+void lockmgr_printinfo __P((struct lock *));
+int lockstatus __P((struct lock *));
+
+#endif /* !_LOCK_H_ */
+
diff --git a/sys/sys/malloc.h b/sys/sys/malloc.h
index 4b87be6fa20..3e380f50dfd 100644
--- a/sys/sys/malloc.h
+++ b/sys/sys/malloc.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: malloc.h,v 1.10 1997/03/01 21:24:46 kstailey Exp $ */
+/* $OpenBSD: malloc.h,v 1.11 1997/10/06 15:25:33 csapuntz Exp $ */
/* $NetBSD: malloc.h,v 1.23 1996/04/05 04:52:52 mhitch Exp $ */
/*
@@ -128,8 +128,25 @@
#define M_PFIL 73 /* packer filter */
#define M_TDB 75 /* Transforms database */
#define M_XDATA 76 /* IPsec data */
-#define M_TEMP 84 /* misc temporary data buffers */
-#define M_LAST 85 /* Must be last type + 1 */
+#define M_VFS 77 /* VFS file systems */
+
+#define M_PAGEDEP 78 /* File page dependencies */
+#define M_INODEDEP 79 /* Inode dependencies */
+#define M_NEWBLK 80 /* New block allocation */
+#define M_BMSAFEMAP 81 /* Block or frag allocated from cyl group map */
+#define M_ALLOCDIRECT 82 /* Block or frag dependency for an inode */
+#define M_INDIRDEP 83 /* Indirect block dependencies */
+#define M_ALLOCINDIR 84 /* Block dependency for an indirect block */
+#define M_FREEFRAG 85 /* Previously used frag for an inode */
+#define M_FREEBLKS 86 /* Blocks freed from an inode */
+#define M_FREEFILE 87 /* Inode deallocated */
+#define M_DIRADD 88 /* New directory entry */
+#define M_MKDIR 89 /* New directory */
+#define M_DIRREM 90 /* Directory entry deleted */
+
+#define M_TEMP 127 /* misc temporary data buffers */
+#define M_LAST 128 /* Must be last type + 1 */
+
#define INITKMEMNAMES { \
"free", /* 0 M_FREE */ \
@@ -209,9 +226,29 @@
NULL, \
"tdb", /* 75 M_TDB */ \
"xform_data", /* 76 M_XDATA */ \
- NULL, NULL, \
+ "vfs", /* 77 M_VFS */ \
+ "pagedep", /* 78 M_PAGEDEP */ \
+ "inodedep", /* 79 M_INODEDEP */ \
+ "newblk", /* 80 M_NEWBLK */ \
+ "bmsafemap", /* 81 M_BMSAFEMAP */ \
+ "allocdirect", /* 82 M_ALLOCDIRECT */ \
+ "indirdep", /* 83 M_INDIRDEP */ \
+ "allocindir", /* 84 M_ALLOCINDIR */ \
+ "freefrag", /* 85 M_FREEFRAG */ \
+ "freeblks", /* 86 M_FREEBLKS */ \
+ "freefile", /* 87 M_FREEFILE */ \
+ "diradd", /* 88 M_DIRADD */ \
+ "mkdir", /* 89 M_MKDIR */ \
+ "dirrem", /* 90 M_DIRREM */ \
+ NULL, NULL, NULL, NULL, NULL, \
+ NULL, NULL, NULL, NULL, NULL, \
+ NULL, NULL, NULL, NULL, NULL, \
+ NULL, NULL, NULL, NULL, NULL, \
+ NULL, NULL, NULL, NULL, NULL, \
+ NULL, NULL, NULL, NULL, NULL, \
NULL, NULL, NULL, NULL, NULL, \
- "temp", /* 84 M_TEMP */ \
+ NULL, \
+ "temp", /* 127 M_TEMP */ \
}
struct kmemstats {
diff --git a/sys/sys/mount.h b/sys/sys/mount.h
index 2ad19911a6a..776740078d0 100644
--- a/sys/sys/mount.h
+++ b/sys/sys/mount.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: mount.h,v 1.18 1997/04/16 09:49:00 downsj Exp $ */
+/* $OpenBSD: mount.h,v 1.19 1997/10/06 15:25:33 csapuntz Exp $ */
/* $NetBSD: mount.h,v 1.48 1996/02/18 11:55:47 fvdl Exp $ */
/*
@@ -43,6 +43,7 @@
#include <sys/ucred.h>
#endif
#include <sys/queue.h>
+#include <sys/lock.h>
typedef struct { int32_t val[2]; } fsid_t; /* file system id type */
@@ -55,7 +56,7 @@ typedef struct { int32_t val[2]; } fsid_t; /* file system id type */
struct fid {
u_short fid_len; /* length of data in bytes */
u_short fid_reserved; /* force longword alignment */
- char fid_data[MAXFIDSZ]; /* data (variable length) */
+ char fid_data[MAXFIDSZ]; /* data (variable length) */
};
/*
@@ -77,7 +78,9 @@ struct statfs {
long f_ffree; /* free file nodes in fs */
fsid_t f_fsid; /* file system id */
uid_t f_owner; /* user that mounted the file system */
- long f_spare[4]; /* spare for later */
+ long f_syncwrites; /* count of sync writes since mount */
+ long f_asyncwrites; /* count of async writes since mount */
+ long f_spare[2]; /* spare for later */
char f_fstypename[MFSNAMELEN]; /* fs type name */
char f_mntonname[MNAMELEN]; /* directory on which mounted */
char f_mntfromname[MNAMELEN]; /* mounted file system */
@@ -116,8 +119,11 @@ LIST_HEAD(vnodelst, vnode);
struct mount {
CIRCLEQ_ENTRY(mount) mnt_list; /* mount list */
struct vfsops *mnt_op; /* operations on fs */
+ struct vfsconf *mnt_vfc; /* configuration info */
struct vnode *mnt_vnodecovered; /* vnode we mounted on */
+ struct vnode *mnt_syncer; /* syncer vnode */
struct vnodelst mnt_vnodelist; /* list of vnodes this mount */
+ struct lock mnt_lock; /* mount structure lock */
int mnt_flag; /* flags */
int mnt_maxsymlinklen; /* max size of short symlink */
struct statfs mnt_stat; /* cache of filesystem stats */
@@ -161,7 +167,7 @@ struct mount {
/*
* Mask of flags that are visible to statfs()
*/
-#define MNT_VISFLAGMASK 0x0000ffff
+#define MNT_VISFLAGMASK 0x0400ffff
/*
* filesystem control flags.
@@ -180,6 +186,37 @@ struct mount {
#define MNT_MPWANT 0x00800000 /* waiting for mount point */
#define MNT_UNMOUNT 0x01000000 /* unmount in progress */
#define MNT_WANTRDWR 0x02000000 /* want upgrade to read/write */
+#define MNT_SOFTDEP 0x04000000 /* soft dependencies being done */
+/*
+ * Sysctl CTL_VFS definitions.
+ *
+ * Second level identifier specifies which filesystem. Second level
+ * identifier VFS_GENERIC returns information about all filesystems.
+ */
+#define VFS_GENERIC 0 /* generic filesystem information */
+/*
+ * Third level identifiers for VFS_GENERIC are given below; third
+ * level identifiers for specific filesystems are given in their
+ * mount specific header files.
+ */
+#define VFS_MAXTYPENUM 1 /* int: highest defined filesystem type */
+#define VFS_CONF 2 /* struct: vfsconf for filesystem given
+ as next argument */
+
+/*
+ * Filesystem configuration information. One of these exists for each
+ * type of filesystem supported by the kernel. These are searched at
+ * mount time to identify the requested filesystem.
+ */
+struct vfsconf {
+ struct vfsops *vfc_vfsops; /* filesystem operations vector */
+ char vfc_name[MFSNAMELEN]; /* filesystem type name */
+ int vfc_typenum; /* historic filesystem type number */
+ int vfc_refcount; /* number mounted of this type */
+ int vfc_flags; /* permanent flags */
+ int (*vfc_mountroot)(void); /* if != NULL, routine to mount root */
+ struct vfsconf *vfc_next; /* next in list */
+};
/*
* Operations supported on mounted file system.
@@ -190,8 +227,10 @@ struct nameidata;
struct mbuf;
#endif
+extern int maxvfsconf; /* highest defined filesystem type */
+extern struct vfsconf *vfsconf; /* head of list of filesystem types */
+
struct vfsops {
- char *vfs_name;
int (*vfs_mount) __P((struct mount *mp, char *path, caddr_t data,
struct nameidata *ndp, struct proc *p));
int (*vfs_start) __P((struct mount *mp, int flags,
@@ -211,8 +250,9 @@ struct vfsops {
struct mbuf *nam, struct vnode **vpp,
int *exflagsp, struct ucred **credanonp));
int (*vfs_vptofh) __P((struct vnode *vp, struct fid *fhp));
- void (*vfs_init) __P((void));
- int vfs_refcount;
+ int (*vfs_init) __P((struct vfsconf *));
+ int (*vfs_sysctl) __P((int *, u_int, void *, size_t *, void *,
+ size_t, struct proc *));
};
#define VFS_MOUNT(MP, PATH, DATA, NDP, P) \
@@ -234,8 +274,9 @@ struct vfsops {
*
* waitfor flags to vfs_sync() and getfsstat()
*/
-#define MNT_WAIT 1
-#define MNT_NOWAIT 2
+#define MNT_WAIT 1 /* synchronously wait for I/O to complete */
+#define MNT_NOWAIT 2 /* start all I/O, but do not wait for it */
+#define MNT_LAZY 3 /* push data not written by filesystem syncer */
/*
* Generic file handle
@@ -446,21 +487,25 @@ struct adosfs_args {
/*
* exported vnode operations
*/
+int vfs_busy __P((struct mount *, int, struct simplelock *, struct proc *));
+void vfs_getnewfsid __P((struct mount *));
+struct mount *vfs_getvfs __P((fsid_t *));
+int vfs_mountedon __P((struct vnode *));
+int vfs_mountroot __P((void));
+int vfs_rootmountalloc __P((char *, char *, struct mount **));
+void vfs_unbusy __P((struct mount *, struct proc *));
+void vfs_unmountall __P((void));
+extern CIRCLEQ_HEAD(mntlist, mount) mountlist;
+extern struct simplelock mountlist_slock;
+
struct mount *getvfs __P((fsid_t *)); /* return vfs given fsid */
int vfs_export /* process mount export info */
__P((struct mount *, struct netexport *, struct export_args *));
struct netcred *vfs_export_lookup /* lookup host in fs export list */
__P((struct mount *, struct netexport *, struct mbuf *));
-int vfs_lock __P((struct mount *)); /* lock a vfs */
-int vfs_mountedon __P((struct vnode *));/* is a vfs mounted on vp */
+int vfs_allocate_syncvnode __P((struct mount *));
+
void vfs_shutdown __P((void)); /* unmount and sync file systems */
-void vfs_unlock __P((struct mount *)); /* unlock a vfs */
-void vfs_unmountall __P((void)); /* unmount file systems */
-int vfs_busy __P((struct mount *));
-void vfs_unbusy __P((struct mount *));
-extern CIRCLEQ_HEAD(mntlist, mount) mountlist; /* mounted filesystem list */
-extern struct vfsops *vfssw[]; /* filesystem type table */
-extern int nvfssw;
long makefstype __P((char *));
int dounmount __P((struct mount *, int, struct proc *));
void vfsinit __P((void));
@@ -479,6 +524,8 @@ int getmntinfo __P((struct statfs **, int));
int mount __P((const char *, const char *, int, void *));
int statfs __P((const char *, struct statfs *));
int unmount __P((const char *, int));
+
+
__END_DECLS
#endif /* _KERNEL */
diff --git a/sys/sys/param.h b/sys/sys/param.h
index a7d227ee2da..d9b459abc71 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: param.h,v 1.15 1997/10/01 21:53:36 deraadt Exp $ */
+/* $OpenBSD: param.h,v 1.16 1997/10/06 15:25:34 csapuntz Exp $ */
/* $NetBSD: param.h,v 1.23 1996/03/17 01:02:29 thorpej Exp $ */
/*-
@@ -54,6 +54,7 @@
#ifndef _LOCORE
#include <sys/types.h>
+#include <sys/simplelock.h>
#endif
/*
diff --git a/sys/sys/queue.h b/sys/sys/queue.h
index 962009c90d4..e617d3c4052 100644
--- a/sys/sys/queue.h
+++ b/sys/sys/queue.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: queue.h,v 1.4 1996/05/22 12:07:15 deraadt Exp $ */
+/* $OpenBSD: queue.h,v 1.5 1997/10/06 15:25:34 csapuntz Exp $ */
/* $NetBSD: queue.h,v 1.11 1996/05/16 05:17:14 mycroft Exp $ */
/*
@@ -62,7 +62,7 @@
* linked so that an arbitrary element can be removed without a need to
* traverse the list. New elements can be added to the list before or
* after an existing element, at the head of the list, or at the end of
- * the list. A tail queue may only be traversed in the forward direction.
+ * the list. A tail queue may be traversed in either direction.
*
* A circle queue is headed by a pair of pointers, one to the head of the
* list and the other to the tail of the list. The elements are doubly
@@ -81,7 +81,7 @@
#define LIST_HEAD(name, type) \
struct name { \
struct type *lh_first; /* first element */ \
-}
+}
#define LIST_ENTRY(type) \
struct { \
@@ -89,41 +89,45 @@ struct { \
struct type **le_prev; /* address of previous next element */ \
}
+#define LIST_FIRST(head) ((head)->lh_first)
+#define LIST_NEXT(elm, field) ((elm)->field.le_next)
+#define LIST_END(head) NULL
+
/*
* List functions.
*/
-#define LIST_INIT(head) { \
+#define LIST_INIT(head) do { \
(head)->lh_first = NULL; \
-}
+} while (0)
-#define LIST_INSERT_AFTER(listelm, elm, field) { \
+#define LIST_INSERT_AFTER(listelm, elm, field) do { \
if (((elm)->field.le_next = (listelm)->field.le_next) != NULL) \
(listelm)->field.le_next->field.le_prev = \
&(elm)->field.le_next; \
(listelm)->field.le_next = (elm); \
(elm)->field.le_prev = &(listelm)->field.le_next; \
-}
+} while (0)
-#define LIST_INSERT_BEFORE(listelm, elm, field) { \
+#define LIST_INSERT_BEFORE(listelm, elm, field) do { \
(elm)->field.le_prev = (listelm)->field.le_prev; \
(elm)->field.le_next = (listelm); \
*(listelm)->field.le_prev = (elm); \
(listelm)->field.le_prev = &(elm)->field.le_next; \
-}
+} while (0)
-#define LIST_INSERT_HEAD(head, elm, field) { \
+#define LIST_INSERT_HEAD(head, elm, field) do { \
if (((elm)->field.le_next = (head)->lh_first) != NULL) \
(head)->lh_first->field.le_prev = &(elm)->field.le_next;\
(head)->lh_first = (elm); \
(elm)->field.le_prev = &(head)->lh_first; \
-}
+} while (0)
-#define LIST_REMOVE(elm, field) { \
+#define LIST_REMOVE(elm, field) do { \
if ((elm)->field.le_next != NULL) \
(elm)->field.le_next->field.le_prev = \
(elm)->field.le_prev; \
*(elm)->field.le_prev = (elm)->field.le_next; \
-}
+} while (0)
/*
* Simple queue definitions.
@@ -142,33 +146,33 @@ struct { \
/*
* Simple queue functions.
*/
-#define SIMPLEQ_INIT(head) { \
+#define SIMPLEQ_INIT(head) do { \
(head)->sqh_first = NULL; \
(head)->sqh_last = &(head)->sqh_first; \
-}
+} while (0)
-#define SIMPLEQ_INSERT_HEAD(head, elm, field) { \
+#define SIMPLEQ_INSERT_HEAD(head, elm, field) do { \
if (((elm)->field.sqe_next = (head)->sqh_first) == NULL) \
(head)->sqh_last = &(elm)->field.sqe_next; \
(head)->sqh_first = (elm); \
-}
+} while (0)
-#define SIMPLEQ_INSERT_TAIL(head, elm, field) { \
+#define SIMPLEQ_INSERT_TAIL(head, elm, field) do { \
(elm)->field.sqe_next = NULL; \
*(head)->sqh_last = (elm); \
(head)->sqh_last = &(elm)->field.sqe_next; \
-}
+} while (0)
-#define SIMPLEQ_INSERT_AFTER(head, listelm, elm, field) { \
+#define SIMPLEQ_INSERT_AFTER(head, listelm, elm, field) do { \
if (((elm)->field.sqe_next = (listelm)->field.sqe_next) == NULL)\
(head)->sqh_last = &(elm)->field.sqe_next; \
(listelm)->field.sqe_next = (elm); \
-}
+} while (0)
-#define SIMPLEQ_REMOVE_HEAD(head, elm, field) { \
+#define SIMPLEQ_REMOVE_HEAD(head, elm, field) do { \
if (((head)->sqh_first = (elm)->field.sqe_next) == NULL) \
(head)->sqh_last = &(head)->sqh_first; \
-}
+} while (0)
/*
* Tail queue definitions.
@@ -185,15 +189,24 @@ struct { \
struct type **tqe_prev; /* address of previous next element */ \
}
+
+#define TAILQ_FIRST(head) ((head)->tqh_first)
+#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next)
+#define TAILQ_END(head) NULL
+#define TAILQ_LAST(head, headname) \
+ (*(((struct headname *)((head)->tqh_last))->tqh_last))
+#define TAILQ_PREV(elm, headname, field) \
+ (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last))
+
/*
* Tail queue functions.
*/
-#define TAILQ_INIT(head) { \
+#define TAILQ_INIT(head) do { \
(head)->tqh_first = NULL; \
(head)->tqh_last = &(head)->tqh_first; \
-}
+} while (0)
-#define TAILQ_INSERT_HEAD(head, elm, field) { \
+#define TAILQ_INSERT_HEAD(head, elm, field) do { \
if (((elm)->field.tqe_next = (head)->tqh_first) != NULL) \
(head)->tqh_first->field.tqe_prev = \
&(elm)->field.tqe_next; \
@@ -201,16 +214,16 @@ struct { \
(head)->tqh_last = &(elm)->field.tqe_next; \
(head)->tqh_first = (elm); \
(elm)->field.tqe_prev = &(head)->tqh_first; \
-}
+} while (0)
-#define TAILQ_INSERT_TAIL(head, elm, field) { \
+#define TAILQ_INSERT_TAIL(head, elm, field) do { \
(elm)->field.tqe_next = NULL; \
(elm)->field.tqe_prev = (head)->tqh_last; \
*(head)->tqh_last = (elm); \
(head)->tqh_last = &(elm)->field.tqe_next; \
-}
+} while (0)
-#define TAILQ_INSERT_AFTER(head, listelm, elm, field) { \
+#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \
if (((elm)->field.tqe_next = (listelm)->field.tqe_next) != NULL)\
(elm)->field.tqe_next->field.tqe_prev = \
&(elm)->field.tqe_next; \
@@ -218,23 +231,23 @@ struct { \
(head)->tqh_last = &(elm)->field.tqe_next; \
(listelm)->field.tqe_next = (elm); \
(elm)->field.tqe_prev = &(listelm)->field.tqe_next; \
-}
+} while (0)
-#define TAILQ_INSERT_BEFORE(listelm, elm, field) { \
+#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \
(elm)->field.tqe_prev = (listelm)->field.tqe_prev; \
(elm)->field.tqe_next = (listelm); \
*(listelm)->field.tqe_prev = (elm); \
(listelm)->field.tqe_prev = &(elm)->field.tqe_next; \
-}
+} while (0)
-#define TAILQ_REMOVE(head, elm, field) { \
+#define TAILQ_REMOVE(head, elm, field) do { \
if (((elm)->field.tqe_next) != NULL) \
(elm)->field.tqe_next->field.tqe_prev = \
(elm)->field.tqe_prev; \
else \
(head)->tqh_last = (elm)->field.tqe_prev; \
*(elm)->field.tqe_prev = (elm)->field.tqe_next; \
-}
+} while (0)
/*
* Circular queue definitions.
@@ -251,15 +264,21 @@ struct { \
struct type *cqe_prev; /* previous element */ \
}
+#define CIRCLEQ_FIRST(head) ((head)->cqh_first)
+#define CIRCLEQ_LAST(head) ((head)->cqh_last)
+#define CIRCLEQ_END(head) ((void *)(head))
+#define CIRCLEQ_NEXT(elm, field) ((elm)->field.cqe_next)
+#define CIRCLEQ_PREV(elm, field) ((elm)->field.cqe_prev)
+
/*
* Circular queue functions.
*/
-#define CIRCLEQ_INIT(head) { \
+#define CIRCLEQ_INIT(head) do { \
(head)->cqh_first = (void *)(head); \
(head)->cqh_last = (void *)(head); \
-}
+} while (0)
-#define CIRCLEQ_INSERT_AFTER(head, listelm, elm, field) { \
+#define CIRCLEQ_INSERT_AFTER(head, listelm, elm, field) do { \
(elm)->field.cqe_next = (listelm)->field.cqe_next; \
(elm)->field.cqe_prev = (listelm); \
if ((listelm)->field.cqe_next == (void *)(head)) \
@@ -267,9 +286,9 @@ struct { \
else \
(listelm)->field.cqe_next->field.cqe_prev = (elm); \
(listelm)->field.cqe_next = (elm); \
-}
+} while (0)
-#define CIRCLEQ_INSERT_BEFORE(head, listelm, elm, field) { \
+#define CIRCLEQ_INSERT_BEFORE(head, listelm, elm, field) do { \
(elm)->field.cqe_next = (listelm); \
(elm)->field.cqe_prev = (listelm)->field.cqe_prev; \
if ((listelm)->field.cqe_prev == (void *)(head)) \
@@ -277,9 +296,9 @@ struct { \
else \
(listelm)->field.cqe_prev->field.cqe_next = (elm); \
(listelm)->field.cqe_prev = (elm); \
-}
+} while (0)
-#define CIRCLEQ_INSERT_HEAD(head, elm, field) { \
+#define CIRCLEQ_INSERT_HEAD(head, elm, field) do { \
(elm)->field.cqe_next = (head)->cqh_first; \
(elm)->field.cqe_prev = (void *)(head); \
if ((head)->cqh_last == (void *)(head)) \
@@ -287,9 +306,9 @@ struct { \
else \
(head)->cqh_first->field.cqe_prev = (elm); \
(head)->cqh_first = (elm); \
-}
+} while (0)
-#define CIRCLEQ_INSERT_TAIL(head, elm, field) { \
+#define CIRCLEQ_INSERT_TAIL(head, elm, field) do { \
(elm)->field.cqe_next = (void *)(head); \
(elm)->field.cqe_prev = (head)->cqh_last; \
if ((head)->cqh_first == (void *)(head)) \
@@ -297,9 +316,9 @@ struct { \
else \
(head)->cqh_last->field.cqe_next = (elm); \
(head)->cqh_last = (elm); \
-}
+} while (0)
-#define CIRCLEQ_REMOVE(head, elm, field) { \
+#define CIRCLEQ_REMOVE(head, elm, field) do { \
if ((elm)->field.cqe_next == (void *)(head)) \
(head)->cqh_last = (elm)->field.cqe_prev; \
else \
@@ -310,5 +329,5 @@ struct { \
else \
(elm)->field.cqe_prev->field.cqe_next = \
(elm)->field.cqe_next; \
-}
+} while (0)
#endif /* !_SYS_QUEUE_H_ */
diff --git a/sys/sys/simplelock.h b/sys/sys/simplelock.h
new file mode 100644
index 00000000000..c979f157b08
--- /dev/null
+++ b/sys/sys/simplelock.h
@@ -0,0 +1,86 @@
+#ifndef _SIMPLELOCK_H_
+#define _SIMPLELOCK_H_
+/*
+ * A simple spin lock.
+ *
+ * This structure only sets one bit of data, but is sized based on the
+ * minimum word size that can be operated on by the hardware test-and-set
+ * instruction. It is only needed for multiprocessors, as uniprocessors
+ * will always run to completion or a sleep. It is an error to hold one
+ * of these locks while a process is sleeping.
+ */
+struct simplelock {
+ int lock_data;
+};
+
+#ifndef NCPUS
+#define NCPUS 1
+#endif
+
+#if NCPUS == 1
+
+#if !defined(DEBUG)
+#define simple_lock(alp)
+#define simple_lock_try(alp) (1) /* always succeeds */
+#define simple_unlock(alp)
+
+static __inline void simple_lock_init __P((struct simplelock *));
+
+static __inline void
+simple_lock_init(lkp)
+ struct simplelock *lkp;
+{
+
+ lkp->lock_data = 0;
+}
+
+#else
+
+void _simple_unlock __P((__volatile struct simplelock *alp, const char *, int));
+#define simple_unlock(alp) _simple_unlock(alp, __FILE__, __LINE__)
+int _simple_lock_try __P((__volatile struct simplelock *alp, const char *, int));
+#define simple_lock_try(alp) _simple_lock_try(alp, __FILE__, __LINE__)
+void _simple_lock __P((__volatile struct simplelock *alp, const char *, int));
+#define simple_lock(alp) _simple_lock(alp, __FILE__, __LINE__)
+void simple_lock_init __P((struct simplelock *alp));
+
+#endif /* !defined(DEBUG) */
+
+#else /* NCPUS > 1 */
+
+/*
+ * The simple-lock routines are the primitives out of which the lock
+ * package is built. The machine-dependent code must implement an
+ * atomic test_and_set operation that indivisibly sets the simple lock
+ * to non-zero and returns its old value. It also assumes that the
+ * setting of the lock to zero below is indivisible. Simple locks may
+ * only be used for exclusive locks.
+ */
+
+static __inline void
+simple_lock(lkp)
+ __volatile struct simplelock *lkp;
+{
+
+ while (test_and_set(&lkp->lock_data))
+ continue;
+}
+
+static __inline int
+simple_lock_try(lkp)
+ __volatile struct simplelock *lkp;
+{
+
+ return (!test_and_set(&lkp->lock_data))
+}
+
+static __inline void
+simple_unlock(lkp)
+ __volatile struct simplelock *lkp;
+{
+
+ lkp->lock_data = 0;
+}
+#endif /* NCPUS > 1 */
+
+#endif /* !_SIMPLELOCK_H_ */
diff --git a/sys/sys/sysctl.h b/sys/sys/sysctl.h
index 9eb21269f2c..3cc255b0b1c 100644
--- a/sys/sys/sysctl.h
+++ b/sys/sys/sysctl.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: sysctl.h,v 1.18 1997/09/08 17:28:18 kstailey Exp $ */
+/* $OpenBSD: sysctl.h,v 1.19 1997/10/06 15:25:35 csapuntz Exp $ */
/* $NetBSD: sysctl.h,v 1.16 1996/04/09 20:55:36 cgd Exp $ */
/*
@@ -49,9 +49,10 @@
#include <sys/time.h>
#include <sys/ucred.h>
#include <sys/proc.h>
-#include <vm/vm.h>
#endif
+#include <vm/vm.h>
+
/*
* Definitions for sysctl call. The sysctl call uses a hierarchical name
* for objects that can be examined or modified. The name is expressed as
@@ -93,7 +94,8 @@ struct ctlname {
#define CTL_MACHDEP 7 /* machine dependent */
#define CTL_USER 8 /* user-level */
#define CTL_DDB 9 /* DDB user interface, see ddb_var.h */
-#define CTL_MAXID 10 /* number of valid top-level ids */
+#define CTL_VFS 10 /* VFS sysctl's */
+#define CTL_MAXID 11 /* number of valid top-level ids */
#define CTL_NAMES { \
{ 0, 0 }, \
@@ -106,6 +108,7 @@ struct ctlname {
{ "machdep", CTLTYPE_NODE }, \
{ "user", CTLTYPE_NODE }, \
{ "ddb", CTLTYPE_NODE }, \
+ { "vfs", CTLTYPE_NODE }, \
}
/*
@@ -383,7 +386,7 @@ int sysctl_rtable __P((int *, u_int, void *, size_t *, void *, size_t));
int sysctl_clockrate __P((char *, size_t *));
int sysctl_rdstring __P((void *, size_t *, void *, char *));
int sysctl_rdstruct __P((void *, size_t *, void *, void *, int));
-int sysctl_vnode __P((char *, size_t *));
+int sysctl_vnode __P((char *, size_t *, struct proc *));
int sysctl_ntptime __P((char *, size_t *));
#ifdef GPROF
int sysctl_doprof __P((int *, u_int, void *, size_t *, void *, size_t));
@@ -409,6 +412,8 @@ int net_sysctl __P((int *, u_int, void *, size_t *, void *, size_t,
struct proc *));
int cpu_sysctl __P((int *, u_int, void *, size_t *, void *, size_t,
struct proc *));
+int vfs_sysctl __P((int *, u_int, void *, size_t *, void *, size_t,
+ struct proc *));
#else /* !_KERNEL */
#include <sys/cdefs.h>
diff --git a/sys/sys/systm.h b/sys/sys/systm.h
index 757aa464ec2..1dbd1ed3c57 100644
--- a/sys/sys/systm.h
+++ b/sys/sys/systm.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: systm.h,v 1.20 1997/03/06 07:05:54 tholo Exp $ */
+/* $OpenBSD: systm.h,v 1.21 1997/10/06 15:25:35 csapuntz Exp $ */
/* $NetBSD: systm.h,v 1.50 1996/06/09 04:55:09 briggs Exp $ */
/*-
@@ -128,7 +128,7 @@ int enodev __P((void));
int enosys __P((void));
int enoioctl __P((void));
int enxio __P((void));
-int eopnotsupp __P((void));
+int eopnotsupp __P((void *));
int lkmenodev __P((void));
@@ -240,7 +240,7 @@ void kmstartup __P((void));
int nfs_mountroot __P((void));
int dk_mountroot __P((void));
-int (*mountroot) __P((void));
+int (*mountroot)__P((void));
#include <lib/libkern/libkern.h>
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index ed2fbcebca7..ebb93d38447 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: vnode.h,v 1.8 1996/07/14 08:54:05 downsj Exp $ */
+/* $OpenBSD: vnode.h,v 1.9 1997/10/06 15:25:36 csapuntz Exp $ */
/* $NetBSD: vnode.h,v 1.38 1996/02/29 20:59:05 cgd Exp $ */
/*
@@ -37,6 +37,7 @@
*/
#include <sys/queue.h>
+#include <sys/lock.h>
/*
* The vnode is the focus of all file activity in UNIX. There is a
@@ -60,7 +61,7 @@ enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD };
enum vtagtype {
VT_NON, VT_UFS, VT_NFS, VT_MFS, VT_MSDOSFS, VT_LFS, VT_LOFS, VT_FDESC,
VT_PORTAL, VT_NULL, VT_UMAP, VT_KERNFS, VT_PROCFS, VT_AFS, VT_ISOFS,
- VT_UNION, VT_ADOSFS, VT_EXT2FS, VT_NCPFS
+ VT_UNION, VT_ADOSFS, VT_EXT2FS, VT_NCPFS, VT_VFS
};
/*
@@ -69,6 +70,14 @@ enum vtagtype {
*/
LIST_HEAD(buflists, buf);
+/*
+ * Reading or writing any of these items requires holding the appropriate lock.
+ * v_freelist is locked by the global vnode_free_list simple lock.
+ * v_mntvnodes is locked by the global mntvnodes simple lock.
+ * v_flag, v_usecount, v_holdcount and v_writecount are
+ * locked by the v_interlock simple lock.
+ */
+
struct vnode {
u_long v_flag; /* vnode flags (see below) */
short v_usecount; /* reference count of users */
@@ -83,6 +92,7 @@ struct vnode {
struct buflists v_cleanblkhd; /* clean blocklist head */
struct buflists v_dirtyblkhd; /* dirty blocklist head */
long v_numoutput; /* num of writes in progress */
+ LIST_ENTRY(vnode) v_synclist; /* vnode with dirty buffers */
enum vtype v_type; /* vnode type */
union {
struct mount *vu_mountedhere;/* ptr to mounted vfs (VDIR) */
@@ -98,7 +108,9 @@ struct vnode {
int v_clen; /* length of current cluster */
int v_ralen; /* Read-ahead length */
daddr_t v_maxra; /* last readahead block */
- long v_spare[7]; /* round to 128 bytes */
+ struct simplelock v_interlock; /* lock on usecount and flag */
+ struct lock *v_vnlock; /* used for non-locking fs's */
+ long v_spare[3]; /* round to 128 bytes */
enum vtagtype v_tag; /* type of underlying data */
void *v_data; /* private data for fs */
};
@@ -120,6 +132,7 @@ struct vnode {
#define VBWAIT 0x0400 /* waiting for output to complete */
#define VALIASED 0x0800 /* vnode has an alias */
#define VDIROP 0x1000 /* LFS: vnode is involved in a directory op */
+#define VGONEHACK 0x2000 /* vgone: don't put me on the head of the free list */
/*
* Vnode attributes. A field value of VNOVAL represents a field whose value
@@ -151,7 +164,7 @@ struct vattr {
* Flags for va_cflags.
*/
#define VA_UTIMES_NULL 0x01 /* utimes argument was NULL */
-
+#define VA_EXCLUSIVE 0x02 /* exclusive create request */
/*
* Flags for ioflag.
*/
@@ -197,6 +210,14 @@ extern int vttoif_tab[];
#define V_SAVE 0x0001 /* vinvalbuf: sync file first */
#define V_SAVEMETA 0x0002 /* vinvalbuf: leave indirect blocks */
+#define REVOKEALL 0x0001 /* vop_reovke: revoke all aliases */
+
+
+TAILQ_HEAD(freelst, vnode);
+extern struct freelst vnode_hold_list; /* free vnodes referencing buffers */
+extern struct freelst vnode_free_list; /* vnode free list */
+extern struct simplelock vnode_free_list_slock;
+
#ifdef DIAGNOSTIC
#define HOLDRELE(vp) holdrele(vp)
#define VATTR_NULL(vap) vattr_null(vap)
@@ -208,11 +229,47 @@ void vattr_null __P((struct vattr *));
void vhold __P((struct vnode *));
void vref __P((struct vnode *));
#else
-#define HOLDRELE(vp) (vp)->v_holdcnt-- /* decrease buf or page ref */
+#define HOLDRELE(vp) holdrele(vp); /* decrease buf or page ref */
#define VATTR_NULL(vap) (*(vap) = va_null) /* initialize a vattr */
-#define VHOLD(vp) (vp)->v_holdcnt++ /* increase buf or page ref */
-#define VREF(vp) (vp)->v_usecount++ /* increase reference */
-#endif
+
+static __inline holdrele(vp)
+ struct vnode *vp;
+{
+ simple_lock(&vp->v_interlock);
+ vp->v_holdcnt--;
+ if (!(vp->v_flag & VGONEHACK) &&
+ vp->v_holdcnt == 0 && vp->v_usecount == 0) {
+ simple_lock(&vnode_free_list_slock);
+ TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
+ TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+ simple_unlock(&vnode_free_list_slock);
+ }
+ simple_unlock(&vp->v_interlock);
+}
+#define VHOLD(vp) vhold(vp) /* increase buf or page ref */
+static __inline vhold(vp)
+ struct vnode *vp;
+{
+ simple_lock(&vp->v_interlock);
+ if (!(vp->v_flag & VGONEHACK) &&
+ vp->v_holdcnt == 0 && vp->v_usecount == 0) {
+ simple_lock(&vnode_free_list_slock);
+ TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+ TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
+ simple_unlock(&vnode_free_list_slock);
+ }
+ vp->v_holdcnt++;
+ simple_unlock(&vp->v_interlock);
+}
+#define VREF(vp) vref(vp) /* increase reference */
+static __inline vref(vp)
+ struct vnode *vp;
+{
+ simple_lock(&vp->v_interlock);
+ vp->v_usecount++;
+ simple_unlock(&vp->v_interlock);
+}
+#endif /* DIAGNOSTIC */
#define NULLVP ((struct vnode *)NULL)
@@ -220,6 +277,7 @@ void vref __P((struct vnode *));
* Global vnode data.
*/
extern struct vnode *rootvnode; /* root (i.e. "/") vnode */
+extern time_t syncdelay; /* time to delay syncing vnodes */
extern int desiredvnodes; /* number of vnodes desired */
extern struct vattr va_null; /* predefined null vattr structure */
@@ -289,6 +347,11 @@ extern struct vnodeop_desc *vnodeop_descs[];
/*
+ * Interlock for scanning list of vnodes attached to a mountpoint
+ */
+struct simplelock mntvnode_slock;
+
+/*
* This macro is very helpful in defining those offsets in the vdesc struct.
*
* This is stolen from X11R4. I ingored all the fancy stuff for
@@ -371,14 +434,15 @@ int getvnode __P((struct filedesc *fdp, int fd, struct file **fpp));
void getnewfsid __P((struct mount *, int));
void vattr_null __P((struct vattr *vap));
int vcount __P((struct vnode *vp));
-void vclean __P((struct vnode *, int));
+void vclean __P((struct vnode *, int, struct proc *));
int vfinddev __P((dev_t, enum vtype, struct vnode **));
void vflushbuf __P((struct vnode *vp, int sync));
int vflush __P((struct mount *mp, struct vnode *vp, int flags));
void vntblinit __P((void));
void vwakeup __P((struct buf *));
-int vget __P((struct vnode *vp, int lockflag));
+int vget __P((struct vnode *vp, int lockflag, struct proc *p));
void vgone __P((struct vnode *vp));
+void vgonel __P((struct vnode *, struct proc *));
void vgoneall __P((struct vnode *vp));
int vinvalbuf __P((struct vnode *vp, int save, struct ucred *cred,
struct proc *p, int slpflag, int slptimeo));
@@ -391,14 +455,25 @@ int vn_closefile __P((struct file *fp, struct proc *p));
int vn_ioctl __P((struct file *fp, u_long com, caddr_t data,
struct proc *p));
int vn_open __P((struct nameidata *ndp, int fmode, int cmode));
+int vrecycle __P((struct vnode *vp, struct simplelock *inter_lkp,
+ struct proc *p));
int vn_rdwr __P((enum uio_rw rw, struct vnode *vp, caddr_t base,
int len, off_t offset, enum uio_seg segflg, int ioflg,
struct ucred *cred, int *aresid, struct proc *p));
+int vn_lock __P((struct vnode *vp, int flags, struct proc *p));
+int vop_noislocked __P((void *));
+int vop_nolock __P((void *));
+int vop_nounlock __P((void *));
+int vop_revoke __P((void *));
+
int vn_read __P((struct file *fp, struct uio *uio, struct ucred *cred));
int vn_select __P((struct file *fp, int which, struct proc *p));
int vn_stat __P((struct vnode *vp, struct stat *sb, struct proc *p));
int vn_write __P((struct file *fp, struct uio *uio, struct ucred *cred));
int vn_writechk __P((struct vnode *vp));
+void vn_syncer_add_to_worklist __P((struct vnode *vp, int delay));
+void sched_sync __P((struct proc *));
+
struct vnode *
checkalias __P((struct vnode *vp, dev_t nvp_rdev, struct mount *mp));
void vput __P((struct vnode *vp));
diff --git a/sys/sys/vnode_if.h b/sys/sys/vnode_if.h
index abf129f1126..43b56b5dc76 100644
--- a/sys/sys/vnode_if.h
+++ b/sys/sys/vnode_if.h
@@ -291,6 +291,31 @@ static __inline int VOP_WRITE(vp, uio, ioflag, cred)
return (VCALL(vp, VOFFSET(vop_write), &a));
}
+struct vop_lease_args {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ struct proc *a_p;
+ struct ucred *a_cred;
+ int a_flag;
+};
+extern struct vnodeop_desc vop_lease_desc;
+static __inline int VOP_LEASE __P((struct vnode *, struct proc *,
+ struct ucred *, int));
+static __inline int VOP_LEASE(vp, p, cred, flag)
+ struct vnode *vp;
+ struct proc *p;
+ struct ucred *cred;
+ int flag;
+{
+ struct vop_lease_args a;
+ a.a_desc = VDESC(vop_lease);
+ a.a_vp = vp;
+ a.a_p = p;
+ a.a_cred = cred;
+ a.a_flag = flag;
+ return (VCALL(vp, VOFFSET(vop_lease), &a));
+}
+
struct vop_ioctl_args {
struct vnodeop_desc *a_desc;
struct vnode *a_vp;
@@ -350,6 +375,24 @@ static __inline int VOP_SELECT(vp, which, fflags, cred, p)
return (VCALL(vp, VOFFSET(vop_select), &a));
}
+struct vop_revoke_args {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ int a_flags;
+};
+extern struct vnodeop_desc vop_revoke_desc;
+static __inline int VOP_REVOKE __P((struct vnode *, int));
+static __inline int VOP_REVOKE(vp, flags)
+ struct vnode *vp;
+ int flags;
+{
+ struct vop_revoke_args a;
+ a.a_desc = VDESC(vop_revoke);
+ a.a_vp = vp;
+ a.a_flags = flags;
+ return (VCALL(vp, VOFFSET(vop_revoke), &a));
+}
+
struct vop_mmap_args {
struct vnodeop_desc *a_desc;
struct vnode *a_vp;
@@ -582,19 +625,19 @@ struct vop_readdir_args {
struct uio *a_uio;
struct ucred *a_cred;
int *a_eofflag;
- u_long *a_cookies;
- int a_ncookies;
+ int *a_ncookies;
+ u_long **a_cookies;
};
extern struct vnodeop_desc vop_readdir_desc;
static __inline int VOP_READDIR __P((struct vnode *, struct uio *,
- struct ucred *, int *, u_long *, int));
-static __inline int VOP_READDIR(vp, uio, cred, eofflag, cookies, ncookies)
+ struct ucred *, int *, int *, u_long **));
+static __inline int VOP_READDIR(vp, uio, cred, eofflag, ncookies, cookies)
struct vnode *vp;
struct uio *uio;
struct ucred *cred;
int *eofflag;
- u_long *cookies;
- int ncookies;
+ int *ncookies;
+ u_long **cookies;
{
struct vop_readdir_args a;
a.a_desc = VDESC(vop_readdir);
@@ -602,8 +645,8 @@ static __inline int VOP_READDIR(vp, uio, cred, eofflag, cookies, ncookies)
a.a_uio = uio;
a.a_cred = cred;
a.a_eofflag = eofflag;
- a.a_cookies = cookies;
a.a_ncookies = ncookies;
+ a.a_cookies = cookies;
return (VCALL(vp, VOFFSET(vop_readdir), &a));
}
@@ -650,60 +693,78 @@ static __inline int VOP_ABORTOP(dvp, cnp)
struct vop_inactive_args {
struct vnodeop_desc *a_desc;
struct vnode *a_vp;
+ struct proc *a_p;
};
extern struct vnodeop_desc vop_inactive_desc;
-static __inline int VOP_INACTIVE __P((struct vnode *));
-static __inline int VOP_INACTIVE(vp)
+static __inline int VOP_INACTIVE __P((struct vnode *, struct proc *));
+static __inline int VOP_INACTIVE(vp, p)
struct vnode *vp;
+ struct proc *p;
{
struct vop_inactive_args a;
a.a_desc = VDESC(vop_inactive);
a.a_vp = vp;
+ a.a_p = p;
return (VCALL(vp, VOFFSET(vop_inactive), &a));
}
struct vop_reclaim_args {
struct vnodeop_desc *a_desc;
struct vnode *a_vp;
+ struct proc *a_p;
};
extern struct vnodeop_desc vop_reclaim_desc;
-static __inline int VOP_RECLAIM __P((struct vnode *));
-static __inline int VOP_RECLAIM(vp)
+static __inline int VOP_RECLAIM __P((struct vnode *, struct proc *));
+static __inline int VOP_RECLAIM(vp, p)
struct vnode *vp;
+ struct proc *p;
{
struct vop_reclaim_args a;
a.a_desc = VDESC(vop_reclaim);
a.a_vp = vp;
+ a.a_p = p;
return (VCALL(vp, VOFFSET(vop_reclaim), &a));
}
struct vop_lock_args {
struct vnodeop_desc *a_desc;
struct vnode *a_vp;
+ int a_flags;
+ struct proc *a_p;
};
extern struct vnodeop_desc vop_lock_desc;
-static __inline int VOP_LOCK __P((struct vnode *));
-static __inline int VOP_LOCK(vp)
+static __inline int VOP_LOCK __P((struct vnode *, int, struct proc *));
+static __inline int VOP_LOCK(vp, flags, p)
struct vnode *vp;
+ int flags;
+ struct proc *p;
{
struct vop_lock_args a;
a.a_desc = VDESC(vop_lock);
a.a_vp = vp;
+ a.a_flags = flags;
+ a.a_p = p;
return (VCALL(vp, VOFFSET(vop_lock), &a));
}
struct vop_unlock_args {
struct vnodeop_desc *a_desc;
struct vnode *a_vp;
+ int a_flags;
+ struct proc *a_p;
};
extern struct vnodeop_desc vop_unlock_desc;
-static __inline int VOP_UNLOCK __P((struct vnode *));
-static __inline int VOP_UNLOCK(vp)
+static __inline int VOP_UNLOCK __P((struct vnode *, int, struct proc *));
+static __inline int VOP_UNLOCK(vp, flags, p)
struct vnode *vp;
+ int flags;
+ struct proc *p;
{
struct vop_unlock_args a;
a.a_desc = VDESC(vop_unlock);
a.a_vp = vp;
+ a.a_flags = flags;
+ a.a_p = p;
return (VCALL(vp, VOFFSET(vop_unlock), &a));
}
@@ -864,6 +925,37 @@ static __inline int VOP_VALLOC(pvp, mode, cred, vpp)
return (VCALL(pvp, VOFFSET(vop_valloc), &a));
}
+struct vop_balloc_args {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ off_t a_startoffset;
+ int a_size;
+ struct ucred *a_cred;
+ int a_flags;
+ struct buf **a_bpp;
+};
+extern struct vnodeop_desc vop_balloc_desc;
+static __inline int VOP_BALLOC __P((struct vnode *, off_t, int,
+ struct ucred *, int, struct buf **));
+static __inline int VOP_BALLOC(vp, startoffset, size, cred, flags, bpp)
+ struct vnode *vp;
+ off_t startoffset;
+ int size;
+ struct ucred *cred;
+ int flags;
+ struct buf **bpp;
+{
+ struct vop_balloc_args a;
+ a.a_desc = VDESC(vop_balloc);
+ a.a_vp = vp;
+ a.a_startoffset = startoffset;
+ a.a_size = size;
+ a.a_cred = cred;
+ a.a_flags = flags;
+ a.a_bpp = bpp;
+ return (VCALL(vp, VOFFSET(vop_balloc), &a));
+}
+
struct vop_reallocblks_args {
struct vnodeop_desc *a_desc;
struct vnode *a_vp;
@@ -957,31 +1049,6 @@ static __inline int VOP_UPDATE(vp, access, modify, waitfor)
return (VCALL(vp, VOFFSET(vop_update), &a));
}
-struct vop_lease_args {
- struct vnodeop_desc *a_desc;
- struct vnode *a_vp;
- struct proc *a_p;
- struct ucred *a_cred;
- int a_flag;
-};
-extern struct vnodeop_desc vop_lease_desc;
-static __inline int VOP_LEASE __P((struct vnode *, struct proc *,
- struct ucred *, int));
-static __inline int VOP_LEASE(vp, p, cred, flag)
- struct vnode *vp;
- struct proc *p;
- struct ucred *cred;
- int flag;
-{
- struct vop_lease_args a;
- a.a_desc = VDESC(vop_lease);
- a.a_vp = vp;
- a.a_p = p;
- a.a_cred = cred;
- a.a_flag = flag;
- return (VCALL(vp, VOFFSET(vop_lease), &a));
-}
-
struct vop_whiteout_args {
struct vnodeop_desc *a_desc;
struct vnode *a_dvp;
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index e25073d6715..e4bf9e3d285 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_alloc.c,v 1.7 1997/07/22 10:31:50 deraadt Exp $ */
+/* $OpenBSD: ffs_alloc.c,v 1.8 1997/10/06 15:26:28 csapuntz Exp $ */
/* $NetBSD: ffs_alloc.c,v 1.11 1996/05/11 18:27:09 mycroft Exp $ */
/*
@@ -59,7 +59,7 @@
extern u_long nextgennumber;
static daddr_t ffs_alloccg __P((struct inode *, int, daddr_t, int));
-static daddr_t ffs_alloccgblk __P((struct fs *, struct cg *, daddr_t));
+static daddr_t ffs_alloccgblk __P((struct inode *, struct buf *, daddr_t));
static daddr_t ffs_clusteralloc __P((struct inode *, int, daddr_t, int));
static ino_t ffs_dirpref __P((struct fs *));
static daddr_t ffs_fragextend __P((struct inode *, int, long, int, int));
@@ -70,6 +70,11 @@ static u_long ffs_hashalloc __P((struct inode *, int, long, int,
static daddr_t ffs_nodealloccg __P((struct inode *, int, daddr_t, int));
static daddr_t ffs_mapsearch __P((struct fs *, struct cg *, daddr_t, int));
+#ifdef DIAGNOSTIC
+static int ffs_checkblk __P((struct inode *, daddr_t, long));
+#endif
+int ffs_freefile __P((struct vop_vfree_args *));
+
/*
* Allocate a block in the file system.
*
@@ -272,7 +277,8 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp)
if (bno > 0) {
bp->b_blkno = fsbtodb(fs, bno);
(void) vnode_pager_uncache(ITOV(ip));
- ffs_blkfree(ip, bprev, (long)osize);
+ if (!DOINGSOFTDEP(ITOV(ip)))
+ ffs_blkfree(ip, bprev, (long)osize);
if (nsize < request)
ffs_blkfree(ip, bno + numfrags(fs, nsize),
(long)(request - nsize));
@@ -314,15 +320,10 @@ nospace:
* Note that the error return is not reflected back to the user. Rather
* the previous block allocation will be used.
*/
-#ifdef DEBUG
-#include <sys/sysctl.h>
+
int doasyncfree = 1;
-struct ctldebug debug14 = { "doasyncfree", &doasyncfree };
+int doreallocblks = 1;
int prtrealloc = 0;
-struct ctldebug debug15 = { "prtrealloc", &prtrealloc };
-#else
-#define doasyncfree 1
-#endif
int
ffs_reallocblks(v)
@@ -343,6 +344,9 @@ ffs_reallocblks(v)
int i, len, start_lvl, end_lvl, pref, ssize;
struct timespec ts;
+ if (doreallocblks == 0)
+ return (ENOSPC);
+
vp = ap->a_vp;
ip = VTOI(vp);
fs = ip->i_fs;
@@ -352,10 +356,22 @@ ffs_reallocblks(v)
len = buflist->bs_nchildren;
start_lbn = buflist->bs_children[0]->b_lblkno;
end_lbn = start_lbn + len - 1;
+
#ifdef DIAGNOSTIC
+ for (i = 0; i < len; i++)
+ if (!ffs_checkblk(ip,
+ dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
+ panic("ffs_reallocblks: unallocated block 1");
+
for (i = 1; i < len; i++)
if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
- panic("ffs_reallocblks: non-cluster");
+ panic("ffs_reallocblks: non-logical cluster");
+
+ blkno = buflist->bs_children[0]->b_blkno;
+ ssize = fsbtodb(fs, fs->fs_frag);
+ for (i = 1; i < len - 1; i++)
+ if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize))
+ panic("ffs_reallocblks: non-physical cluster %d", i);
#endif
/*
* If the latest allocation is in a new cylinder group, assume that
@@ -422,9 +438,14 @@ ffs_reallocblks(v)
#endif
blkno = newblk;
for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) {
- if (i == ssize)
+ if (i == ssize) {
bap = ebap;
+ soff = -i;
+ }
#ifdef DIAGNOSTIC
+ if (!ffs_checkblk(ip,
+ dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
+ panic("ffs_reallocblks: unallocated block 2");
if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap)
panic("ffs_reallocblks: alloc mismatch");
#endif
@@ -432,6 +453,17 @@ ffs_reallocblks(v)
if (prtrealloc)
printf(" %d,", *bap);
#endif
+ if (DOINGSOFTDEP(vp)) {
+ if (sbap == &ip->i_ffs_db[0] && i < ssize)
+ softdep_setup_allocdirect(ip, start_lbn + i,
+ blkno, *bap, fs->fs_bsize, fs->fs_bsize,
+ buflist->bs_children[i]);
+ else
+ softdep_setup_allocindir_page(ip, start_lbn + i,
+ i < ssize ? sbp : ebp, soff + i, blkno,
+ *bap, buflist->bs_children[i]);
+ }
+
*bap++ = blkno;
}
/*
@@ -473,10 +505,15 @@ ffs_reallocblks(v)
printf("\n\tnew:");
#endif
for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
- ffs_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno),
- fs->fs_bsize);
+ if (!DOINGSOFTDEP(vp))
+ ffs_blkfree(ip,
+ dbtofsb(fs, buflist->bs_children[i]->b_blkno),
+ fs->fs_bsize);
buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
#ifdef DEBUG
+ if (!ffs_checkblk(ip,
+ dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
+ panic("ffs_reallocblks: unallocated block 3");
if (prtrealloc)
printf(" %d,", blkno);
#endif
@@ -815,6 +852,9 @@ ffs_fragextend(ip, cg, bprev, osize, nsize)
fs->fs_cs(fs, cg).cs_nffree--;
}
fs->fs_fmod = 1;
+ if (DOINGSOFTDEP(ITOV(ip)))
+ softdep_setup_blkmapdep(bp, fs, bprev);
+
bdwrite(bp);
return (bprev);
}
@@ -835,8 +875,8 @@ ffs_alloccg(ip, cg, bpref, size)
register struct fs *fs;
register struct cg *cgp;
struct buf *bp;
- register int i;
- int error, bno, frags, allocsiz;
+ daddr_t bno, blkno;
+ int error, i, frags, allocsiz;
fs = ip->i_fs;
if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
@@ -855,7 +895,7 @@ ffs_alloccg(ip, cg, bpref, size)
}
cgp->cg_time = time.tv_sec;
if (size == fs->fs_bsize) {
- bno = ffs_alloccgblk(fs, cgp, bpref);
+ bno = ffs_alloccgblk(ip, bp, bpref);
bdwrite(bp);
return (bno);
}
@@ -877,7 +917,7 @@ ffs_alloccg(ip, cg, bpref, size)
brelse(bp);
return (NULL);
}
- bno = ffs_alloccgblk(fs, cgp, bpref);
+ bno = ffs_alloccgblk(ip, bp, bpref);
bpref = dtogd(fs, bno);
for (i = frags; i < fs->fs_frag; i++)
setbit(cg_blksfree(cgp), bpref + i);
@@ -904,8 +944,12 @@ ffs_alloccg(ip, cg, bpref, size)
cgp->cg_frsum[allocsiz]--;
if (frags != allocsiz)
cgp->cg_frsum[allocsiz - frags]++;
- bdwrite(bp);
- return (cg * fs->fs_fpg + bno);
+
+ blkno = cg * fs->fs_fpg + bno;
+ if (DOINGSOFTDEP(ITOV(ip)))
+ softdep_setup_blkmapdep(bp, fs, blkno);
+ bdwrite(bp);
+ return ((u_long)blkno);
}
/*
@@ -920,16 +964,20 @@ ffs_alloccg(ip, cg, bpref, size)
* blocks may be fragmented by the routine that allocates them.
*/
static daddr_t
-ffs_alloccgblk(fs, cgp, bpref)
- register struct fs *fs;
- register struct cg *cgp;
+ffs_alloccgblk(ip, bp, bpref)
+ struct inode *ip;
+ struct buf *bp;
daddr_t bpref;
{
+ struct fs *fs;
+ struct cg *cgp;
daddr_t bno, blkno;
int cylno, pos, delta;
short *cylbp;
register int i;
+ fs = ip->i_fs;
+ cgp = (struct cg *)bp->b_data;
if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) {
bpref = cgp->cg_rotor;
goto norot;
@@ -1020,7 +1068,10 @@ gotit:
cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--;
cg_blktot(cgp)[cylno]--;
fs->fs_fmod = 1;
- return (cgp->cg_cgx * fs->fs_fpg + bno);
+ blkno = cgp->cg_cgx * fs->fs_fpg + bno;
+ if (DOINGSOFTDEP(ITOV(ip)))
+ softdep_setup_blkmapdep(bp, fs, blkno);
+ return (blkno);
}
/*
@@ -1040,7 +1091,7 @@ ffs_clusteralloc(ip, cg, bpref, len)
register struct fs *fs;
register struct cg *cgp;
struct buf *bp;
- int i, run, bno, bit, map;
+ int i, got, run, bno, bit, map;
u_char *mapp;
int32_t *lp;
@@ -1094,7 +1145,7 @@ ffs_clusteralloc(ip, cg, bpref, len)
mapp = &cg_clustersfree(cgp)[bpref / NBBY];
map = *mapp++;
bit = 1 << (bpref % NBBY);
- for (run = 0, i = bpref; i < cgp->cg_nclusterblks; i++) {
+ for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) {
if ((map & bit) == 0) {
run = 0;
} else {
@@ -1102,22 +1153,32 @@ ffs_clusteralloc(ip, cg, bpref, len)
if (run == len)
break;
}
- if ((i & (NBBY - 1)) != (NBBY - 1)) {
+ if ((got & (NBBY - 1)) != (NBBY - 1)) {
bit <<= 1;
} else {
map = *mapp++;
bit = 1;
}
}
- if (i >= cgp->cg_nclusterblks)
+ if (got >= cgp->cg_nclusterblks)
goto fail;
/*
* Allocate the cluster that we have found.
*/
- bno = cg * fs->fs_fpg + blkstofrags(fs, i - run + 1);
+#ifdef DIAGNOSTIC
+ for (i = 1; i <= len; i++)
+ if (!ffs_isblock(fs, cg_blksfree(cgp), got - run + i))
+ panic("ffs_clusteralloc: map mismatch");
+#endif
+ bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1);
+#ifdef DIAGNOSTIC
+ if (dtog(fs, bno) != cg)
+ panic("ffs_clusteralloc: allocated out of group");
+#endif
+
len = blkstofrags(fs, len);
for (i = 0; i < len; i += fs->fs_frag)
- if (ffs_alloccgblk(fs, cgp, bno + i) != bno + i)
+ if (ffs_alloccgblk(ip, bp, bno + i) != bno + i)
panic("ffs_clusteralloc: lost block");
bdwrite(bp);
return (bno);
@@ -1195,6 +1256,9 @@ ffs_nodealloccg(ip, cg, ipref, mode)
panic("ffs_nodealloccg: block not in map");
/* NOTREACHED */
gotit:
+ if (DOINGSOFTDEP(ITOV(ip)))
+ softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref);
+
setbit(cg_inosused(cgp), ipref);
cgp->cg_cs.cs_nifree--;
fs->fs_cstotal.cs_nifree--;
@@ -1229,7 +1293,8 @@ ffs_blkfree(ip, bno, size)
int i, error, cg, blk, frags, bbase;
fs = ip->i_fs;
- if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
+ if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 ||
+ fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) {
printf("dev = 0x%x, bsize = %d, size = %ld, fs = %s\n",
ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
panic("blkfree: bad size");
@@ -1255,7 +1320,7 @@ ffs_blkfree(ip, bno, size)
bno = dtogd(fs, bno);
if (size == fs->fs_bsize) {
blkno = fragstoblks(fs, bno);
- if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) {
+ if (!ffs_isfreeblock(fs, cg_blksfree(cgp), blkno)) {
printf("dev = 0x%x, block = %d, fs = %s\n",
ip->i_dev, bno, fs->fs_fsmnt);
panic("blkfree: freeing free block");
@@ -1318,8 +1383,6 @@ ffs_blkfree(ip, bno, size)
/*
* Free an inode.
- *
- * The specified inode is placed back in the free map.
*/
int
ffs_vfree(v)
@@ -1330,6 +1393,28 @@ ffs_vfree(v)
ino_t a_ino;
int a_mode;
} */ *ap = v;
+
+
+ if (DOINGSOFTDEP(ap->a_pvp)) {
+ softdep_freefile(ap);
+ return (0);
+ }
+
+ return (ffs_freefile(ap));
+}
+
+/*
+ * Do the actual free operation.
+ * The specified inode is placed back in the free map.
+ */
+int
+ffs_freefile(ap)
+ struct vop_vfree_args /* {
+ struct vnode *a_pvp;
+ ino_t a_ino;
+ int a_mode;
+ } */ *ap;
+{
register struct fs *fs;
register struct cg *cgp;
register struct inode *pip;
@@ -1347,7 +1432,7 @@ ffs_vfree(v)
(int)fs->fs_cgsize, NOCRED, &bp);
if (error) {
brelse(bp);
- return (0);
+ return (error);
}
cgp = (struct cg *)bp->b_data;
if (!cg_chkmagic(cgp)) {
@@ -1378,6 +1463,60 @@ ffs_vfree(v)
return (0);
}
+#ifdef DIAGNOSTIC
+/*
+ * Verify allocation of a block or fragment. Returns true if block or
+ * fragment is allocated, false if it is free.
+ */
+int
+ffs_checkblk(ip, bno, size)
+ struct inode *ip;
+ daddr_t bno;
+ long size;
+{
+ struct fs *fs;
+ struct cg *cgp;
+ struct buf *bp;
+ int i, error, frags, free;
+
+ fs = ip->i_fs;
+ if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
+ printf("bsize = %d, size = %d, fs = %s\n",
+ fs->fs_bsize, size, fs->fs_fsmnt);
+ panic("checkblk: bad size");
+ }
+ if ((u_int)bno >= fs->fs_size)
+ panic("checkblk: bad block %d", bno);
+ error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))),
+ (int)fs->fs_cgsize, NOCRED, &bp);
+ if (error) {
+ /* XXX -probably should pannic here */
+ brelse(bp);
+ return (-1);
+ }
+ cgp = (struct cg *)bp->b_data;
+ if (!cg_chkmagic(cgp)) {
+ /* XXX -probably should pannic here */
+ brelse(bp);
+ return (-1);
+ }
+ bno = dtogd(fs, bno);
+ if (size == fs->fs_bsize) {
+ free = ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno));
+ } else {
+ frags = numfrags(fs, size);
+ for (free = 0, i = 0; i < frags; i++)
+ if (isset(cg_blksfree(cgp), bno + i))
+ free++;
+ if (free != 0 && free != frags)
+ panic("checkblk: partially free fragment");
+ }
+ brelse(bp);
+ return (!free);
+}
+#endif /* DIAGNOSTIC */
+
+
/*
* Find a block of the specified size in the specified cylinder group.
*
@@ -1550,3 +1689,4 @@ ffs_fserr(fs, uid, cp)
log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp);
}
+
diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c
index 5a7dc3afcc2..285ca5f2ca7 100644
--- a/sys/ufs/ffs/ffs_balloc.c
+++ b/sys/ufs/ffs/ffs_balloc.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_balloc.c,v 1.3 1997/05/30 08:34:19 downsj Exp $ */
+/* $OpenBSD: ffs_balloc.c,v 1.4 1997/10/06 15:26:29 csapuntz Exp $ */
/* $NetBSD: ffs_balloc.c,v 1.3 1996/02/09 22:22:21 christos Exp $ */
/*
@@ -41,6 +41,7 @@
#include <sys/buf.h>
#include <sys/proc.h>
#include <sys/file.h>
+#include <sys/mount.h>
#include <sys/vnode.h>
#include <vm/vm.h>
@@ -58,27 +59,44 @@
* the inode and the logical block number in a file.
*/
int
-ffs_balloc(ip, bn, size, cred, bpp, flags)
- register struct inode *ip;
- register daddr_t bn;
+ffs_balloc(v)
+ void *v;
+{
+ struct vop_balloc_args /* {
+ struct vnode *a_vp;
+ off_t a_startpoint;
+ int a_size;
+ struct ucred *a_cred;
+ int a_flags;
+ struct buf *a_bpp;
+ } */ *ap = v;
+
+ struct inode *ip;
+ daddr_t lbn;
int size;
struct ucred *cred;
- struct buf **bpp;
int flags;
-{
- register struct fs *fs;
- register daddr_t nb;
+ struct fs *fs;
+ daddr_t nb;
struct buf *bp, *nbp;
- struct vnode *vp = ITOV(ip);
+ struct vnode *vp;
struct indir indirs[NIADDR + 2];
- daddr_t newb, lbn, *bap, pref;
- int osize, nsize, num, i, error;
+ daddr_t newb, *bap, pref;
+ int deallocated, osize, nsize, num, i, error;
+ daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR+1];
- *bpp = NULL;
- if (bn < 0)
- return (EFBIG);
+ vp = ap->a_vp;
+ ip = VTOI(vp);
fs = ip->i_fs;
- lbn = bn;
+ lbn = lblkno(fs, ap->a_startoffset);
+ size = blkoff(fs, ap->a_startoffset) + ap->a_size;
+ if (size > fs->fs_bsize)
+ panic("ffs_balloc; blk too big");
+ *ap->a_bpp = NULL;
+ if (lbn < 0)
+ return (EFBIG);
+ cred = ap->a_cred;
+ flags = ap->a_flags;
/*
* If the next write will extend the file into a new block,
@@ -86,7 +104,7 @@ ffs_balloc(ip, bn, size, cred, bpp, flags)
* this fragment has to be extended to be a full block.
*/
nb = lblkno(fs, ip->i_ffs_size);
- if (nb < NDADDR && nb < bn) {
+ if (nb < NDADDR && nb < lbn) {
osize = blksize(fs, ip, nb);
if (osize < fs->fs_bsize && osize > 0) {
error = ffs_realloccg(ip, nb,
@@ -94,6 +112,11 @@ ffs_balloc(ip, bn, size, cred, bpp, flags)
osize, (int)fs->fs_bsize, cred, &bp);
if (error)
return (error);
+ if (DOINGSOFTDEP(vp))
+ softdep_setup_allocdirect(ip, nb,
+ dbtofsb(fs, bp->b_blkno), ip->i_ffs_db[nb],
+ fs->fs_bsize, osize, bp);
+
ip->i_ffs_size = (nb + 1) * fs->fs_bsize;
vnode_pager_setsize(vp, (u_long)ip->i_ffs_size);
ip->i_ffs_db[nb] = dbtofsb(fs, bp->b_blkno);
@@ -107,15 +130,15 @@ ffs_balloc(ip, bn, size, cred, bpp, flags)
/*
* The first NDADDR blocks are direct blocks
*/
- if (bn < NDADDR) {
- nb = ip->i_ffs_db[bn];
- if (nb != 0 && ip->i_ffs_size >= (bn + 1) * fs->fs_bsize) {
- error = bread(vp, bn, fs->fs_bsize, NOCRED, &bp);
+ if (lbn < NDADDR) {
+ nb = ip->i_ffs_db[lbn];
+ if (nb != 0 && ip->i_ffs_size >= (lbn + 1) * fs->fs_bsize) {
+ error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
if (error) {
brelse(bp);
return (error);
}
- *bpp = bp;
+ *ap->a_bpp = bp;
return (0);
}
if (nb != 0) {
@@ -125,43 +148,52 @@ ffs_balloc(ip, bn, size, cred, bpp, flags)
osize = fragroundup(fs, blkoff(fs, ip->i_ffs_size));
nsize = fragroundup(fs, size);
if (nsize <= osize) {
- error = bread(vp, bn, osize, NOCRED, &bp);
+ error = bread(vp, lbn, osize, NOCRED, &bp);
if (error) {
brelse(bp);
return (error);
}
} else {
- error = ffs_realloccg(ip, bn,
- ffs_blkpref(ip, bn, (int)bn, &ip->i_ffs_db[0]),
+ error = ffs_realloccg(ip, lbn,
+ ffs_blkpref(ip, lbn, (int)lbn,
+ &ip->i_ffs_db[0]),
osize, nsize, cred, &bp);
if (error)
return (error);
+ if (DOINGSOFTDEP(vp))
+ softdep_setup_allocdirect(ip, lbn,
+ dbtofsb(fs, bp->b_blkno), nb,
+ nsize, osize, bp);
}
} else {
- if (ip->i_ffs_size < (bn + 1) * fs->fs_bsize)
+ if (ip->i_ffs_size < (lbn + 1) * fs->fs_bsize)
nsize = fragroundup(fs, size);
else
nsize = fs->fs_bsize;
- error = ffs_alloc(ip, bn,
- ffs_blkpref(ip, bn, (int)bn, &ip->i_ffs_db[0]),
+ error = ffs_alloc(ip, lbn,
+ ffs_blkpref(ip, lbn, (int)lbn, &ip->i_ffs_db[0]),
nsize, cred, &newb);
if (error)
return (error);
- bp = getblk(vp, bn, nsize, 0, 0);
+ bp = getblk(vp, lbn, nsize, 0, 0);
bp->b_blkno = fsbtodb(fs, newb);
if (flags & B_CLRBUF)
clrbuf(bp);
+ if (DOINGSOFTDEP(vp))
+ softdep_setup_allocdirect(ip, lbn, newb, 0,
+ nsize, 0, bp);
+
}
- ip->i_ffs_db[bn] = dbtofsb(fs, bp->b_blkno);
+ ip->i_ffs_db[lbn] = dbtofsb(fs, bp->b_blkno);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
- *bpp = bp;
+ *ap->a_bpp = bp;
return (0);
}
/*
* Determine the number of levels of indirection.
*/
pref = 0;
- if ((error = ufs_getlbns(vp, bn, indirs, &num)) != 0)
+ if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
return(error);
#ifdef DIAGNOSTIC
if (num < 1)
@@ -172,6 +204,9 @@ ffs_balloc(ip, bn, size, cred, bpp, flags)
*/
--num;
nb = ip->i_ffs_ib[indirs[0].in_off];
+
+ allocib = NULL;
+ allocblk = allociblk;
if (nb == 0) {
pref = ffs_blkpref(ip, lbn, 0, (daddr_t *)0);
error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
@@ -179,18 +214,26 @@ ffs_balloc(ip, bn, size, cred, bpp, flags)
if (error)
return (error);
nb = newb;
+
+ *allocblk++ = nb;
bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
- bp->b_blkno = fsbtodb(fs, newb);
+ bp->b_blkno = fsbtodb(fs, nb);
clrbuf(bp);
- /*
- * Write synchronously so that indirect blocks
- * never point at garbage.
- */
- if ((error = bwrite(bp)) != 0) {
- ffs_blkfree(ip, nb, fs->fs_bsize);
- return (error);
- }
- ip->i_ffs_ib[indirs[0].in_off] = newb;
+
+ if (DOINGSOFTDEP(vp)) {
+ softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
+ newb, 0, fs->fs_bsize, 0, bp);
+ bdwrite(bp);
+ } else {
+ /*
+ * Write synchronously so that indirect blocks
+ * never point at garbage.
+ */
+ if ((error = bwrite(bp)) != 0)
+ goto fail;
+ }
+ allocib = &ip->i_ffs_ib[indirs[0].in_off];
+ *allocib = nb;
ip->i_flag |= IN_CHANGE | IN_UPDATE;
}
/*
@@ -201,7 +244,7 @@ ffs_balloc(ip, bn, size, cred, bpp, flags)
indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
if (error) {
brelse(bp);
- return (error);
+ goto fail;
}
bap = (daddr_t *)bp->b_data;
nb = bap[indirs[i].in_off];
@@ -218,20 +261,27 @@ ffs_balloc(ip, bn, size, cred, bpp, flags)
&newb);
if (error) {
brelse(bp);
- return (error);
+ goto fail;
}
nb = newb;
+ *allocblk++ = nb;
nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
nbp->b_blkno = fsbtodb(fs, nb);
clrbuf(nbp);
- /*
- * Write synchronously so that indirect blocks
- * never point at garbage.
- */
- if ((error = bwrite(nbp)) != 0) {
- ffs_blkfree(ip, nb, fs->fs_bsize);
- brelse(bp);
- return (error);
+
+ if (DOINGSOFTDEP(vp)) {
+ softdep_setup_allocindir_meta(nbp, ip, bp,
+ indirs[i - 1].in_off, nb);
+ bdwrite(nbp);
+ } else {
+ /*
+ * Write synchronously so that indirect blocks
+ * never point at garbage.
+ */
+ if ((error = bwrite(nbp)) != 0) {
+ brelse(bp);
+ goto fail;
+ }
}
bap[indirs[i - 1].in_off] = nb;
/*
@@ -253,13 +303,17 @@ ffs_balloc(ip, bn, size, cred, bpp, flags)
&newb);
if (error) {
brelse(bp);
- return (error);
+ goto fail;
}
nb = newb;
+ *allocblk++ = nb;
nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
nbp->b_blkno = fsbtodb(fs, nb);
if (flags & B_CLRBUF)
clrbuf(nbp);
+ if (DOINGSOFTDEP(vp))
+ softdep_setup_allocindir_page(ip, lbn, bp,
+ indirs[i].in_off, nb, 0, nbp);
bap[indirs[i].in_off] = nb;
/*
* If required, write synchronously, otherwise use
@@ -270,7 +324,7 @@ ffs_balloc(ip, bn, size, cred, bpp, flags)
} else {
bdwrite(bp);
}
- *bpp = nbp;
+ *ap->a_bpp = nbp;
return (0);
}
brelse(bp);
@@ -278,12 +332,36 @@ ffs_balloc(ip, bn, size, cred, bpp, flags)
error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
if (error) {
brelse(nbp);
- return (error);
+ goto fail;
}
} else {
nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
nbp->b_blkno = fsbtodb(fs, nb);
}
- *bpp = nbp;
+ *ap->a_bpp = nbp;
return (0);
+
+fail:
+ /*
+ * If we have failed part way through block allocation, we
+ * have to deallocate any indirect blocks that we have allocated.
+ */
+ for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
+ ffs_blkfree(ip, *blkp, fs->fs_bsize);
+ deallocated += fs->fs_bsize;
+ }
+ if (allocib != NULL)
+ *allocib = 0;
+ if (deallocated) {
+#ifdef QUOTA
+ /*
+ * Restore user's disk quota because allocation failed.
+ */
+ (void) chkdq(ip, (long)-btodb(deallocated), cred, FORCE);
+#endif
+ ip->i_ffs_blocks -= btodb(deallocated);
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ }
+ return (error);
+
}
diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h
index 94ca01ad634..3fe5a46bfa8 100644
--- a/sys/ufs/ffs/ffs_extern.h
+++ b/sys/ufs/ffs/ffs_extern.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_extern.h,v 1.2 1996/02/27 07:27:36 niklas Exp $ */
+/* $OpenBSD: ffs_extern.h,v 1.3 1997/10/06 15:26:29 csapuntz Exp $ */
/* $NetBSD: ffs_extern.h,v 1.4 1996/02/09 22:22:22 christos Exp $ */
/*-
@@ -36,6 +36,21 @@
* @(#)ffs_extern.h 8.3 (Berkeley) 4/16/94
*/
+#define FFS_CLUSTERREAD 1 /* cluster reading enabled */
+#define FFS_CLUSTERWRITE 2 /* cluster writing enabled */
+#define FFS_REALLOCBLKS 3 /* block reallocation enabled */
+#define FFS_ASYNCFREE 4 /* asynchronous block freeing enabled */
+#define FFS_MAXID 5 /* number of valid ffs ids */
+
+#define FFS_NAMES { \
+ { 0, 0 }, \
+ { "doclusterread", CTLTYPE_INT }, \
+ { "doclusterwrite", CTLTYPE_INT }, \
+ { "doreallocblks", CTLTYPE_INT }, \
+ { "doasyncfree", CTLTYPE_INT }, \
+}
+
+
struct buf;
struct fid;
struct fs;
@@ -47,6 +62,7 @@ struct statfs;
struct timeval;
struct ucred;
struct ufsmount;
+struct vfsconf;
struct uio;
struct vnode;
struct mbuf;
@@ -67,11 +83,10 @@ int ffs_vfree __P((void *));
void ffs_clusteracct __P((struct fs *, struct cg *, daddr_t, int));
/* ffs_balloc.c */
-int ffs_balloc __P((struct inode *, daddr_t, int, struct ucred *,
- struct buf **, int));
+int ffs_balloc __P((void *));
/* ffs_inode.c */
-void ffs_init __P((void));
+int ffs_init __P((struct vfsconf *));
int ffs_update __P((void *));
int ffs_truncate __P((void *));
@@ -81,6 +96,8 @@ void ffs_fragacct __P((struct fs *, int, int32_t[], int));
#ifdef DIAGNOSTIC
void ffs_checkoverlap __P((struct buf *, struct inode *));
#endif
+int ffs_freefile __P((struct vop_vfree_args *));
+int ffs_isfreeblock __P((struct fs *, unsigned char *, daddr_t));
int ffs_isblock __P((struct fs *, unsigned char *, daddr_t));
void ffs_clrblock __P((struct fs *, u_char *, daddr_t));
void ffs_setblock __P((struct fs *, unsigned char *, daddr_t));
@@ -100,6 +117,8 @@ int ffs_vget __P((struct mount *, ino_t, struct vnode **));
int ffs_fhtovp __P((struct mount *, struct fid *, struct mbuf *,
struct vnode **, int *, struct ucred **));
int ffs_vptofh __P((struct vnode *, struct fid *));
+int ffs_sysctl __P((int *, u_int, void *, size_t *, void *, size_t,
+ struct proc *));
int ffs_sbupdate __P((struct ufsmount *, int));
int ffs_cgupdate __P((struct ufsmount *, int));
@@ -108,6 +127,38 @@ int ffs_read __P((void *));
int ffs_write __P((void *));
int ffs_fsync __P((void *));
int ffs_reclaim __P((void *));
+
+
+/*
+ * Soft dependency function prototypes.
+ */
+
+struct vop_vfree_args;
+struct vop_fsync_args;
+
+void softdep_initialize __P((void));
+int softdep_process_worklist __P((struct mount *));
+int softdep_mount __P((struct vnode *, struct mount *, struct fs *,
+ struct ucred *));
+int softdep_flushfiles __P((struct mount *, int, struct proc *));
+void softdep_update_inodeblock __P((struct inode *, struct buf *, int));
+void softdep_load_inodeblock __P((struct inode *));
+int softdep_fsync __P((struct vnode *));
+void softdep_freefile __P((struct vop_vfree_args *));
+void softdep_setup_freeblocks __P((struct inode *, off_t));
+void softdep_deallocate_dependencies __P((struct buf *));
+void softdep_setup_inomapdep __P((struct buf *, struct inode *, ino_t));
+void softdep_setup_blkmapdep __P((struct buf *, struct fs *, daddr_t));
+void softdep_setup_allocdirect __P((struct inode *, ufs_lbn_t, daddr_t,
+ daddr_t, long, long, struct buf *));
+void softdep_setup_allocindir_meta __P((struct buf *, struct inode *,
+ struct buf *, int, daddr_t));
+void softdep_setup_allocindir_page __P((struct inode *, ufs_lbn_t,
+ struct buf *, int, daddr_t, daddr_t, struct buf *));
+void softdep_disk_io_initiation __P((struct buf *));
+void softdep_disk_write_complete __P((struct buf *));
+int softdep_sync_metadata __P((struct vop_fsync_args *));
+
__END_DECLS
extern int (**ffs_vnodeop_p) __P((void *));
diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c
index 488841b5e7f..ba1eb996cb9 100644
--- a/sys/ufs/ffs/ffs_inode.c
+++ b/sys/ufs/ffs/ffs_inode.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_inode.c,v 1.6 1997/05/30 08:34:21 downsj Exp $ */
+/* $OpenBSD: ffs_inode.c,v 1.7 1997/10/06 15:26:30 csapuntz Exp $ */
/* $NetBSD: ffs_inode.c,v 1.10 1996/05/11 18:27:19 mycroft Exp $ */
/*
@@ -61,10 +61,12 @@
static int ffs_indirtrunc __P((struct inode *, daddr_t, daddr_t, daddr_t, int,
long *));
-void
-ffs_init()
+int
+ffs_init(vfsp)
+ struct vfsconf *vfsp;
{
- ufs_init();
+ softdep_initialize();
+ return (ufs_init(vfsp));
}
/*
@@ -101,7 +103,8 @@ ffs_update(v)
ip->i_flag &= ~IN_ACCESS;
}
if ((ip->i_flag &
- (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0)
+ (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
+ ap->a_waitfor != MNT_WAIT)
return (0);
if (ip->i_flag & IN_ACCESS) {
ip->i_ffs_atime = ap->a_access->tv_sec;
@@ -133,11 +136,17 @@ ffs_update(v)
brelse(bp);
return (error);
}
+
+ if (DOINGSOFTDEP(ap->a_vp))
+ softdep_update_inodeblock(ip, bp, ap->a_waitfor);
+ else if (ip->i_effnlink != ip->i_ffs_nlink)
+ panic("ffs_update: bad link cnt");
+
*((struct dinode *)bp->b_data +
ino_to_fsbo(fs, ip->i_number)) = ip->i_din.ffs_din;
- if (ap->a_waitfor)
+ if (ap->a_waitfor && (ap->a_vp->v_mount->mnt_flag & MNT_ASYNC) == 0) {
return (bwrite(bp));
- else {
+ } else {
bdwrite(bp);
return (0);
}
@@ -179,6 +188,8 @@ ffs_truncate(v)
if (length < 0)
return (EINVAL);
oip = VTOI(ovp);
+ if (oip->i_ffs_size == length)
+ return (0);
TIMEVAL_TO_TIMESPEC(&time, &ts);
if (ovp->v_type == VLNK &&
(oip->i_ffs_size < ovp->v_mount->mnt_maxsymlinklen ||
@@ -202,8 +213,34 @@ ffs_truncate(v)
return (error);
#endif
vnode_pager_setsize(ovp, (u_long)length);
+ ovp->v_lasta = ovp->v_clen = ovp->v_cstart = ovp->v_lastw = 0;
+ if (DOINGSOFTDEP(ovp)) {
+ if (length > 0) {
+ /*
+ * If a file is only partially truncated, then
+ * we have to clean up the data structures
+ * describing the allocation past the truncation
+ * point. Finding and deallocating those structures
+ * is a lot of work. Since partial truncation occurs
+ * rarely, we solve the problem by syncing the file
+ * so that it will have no data structures left.
+ */
+ if ((error = VOP_FSYNC(ovp, ap->a_cred, MNT_WAIT,
+ ap->a_p)) != 0)
+ return (error);
+ } else {
+#ifdef QUOTA
+ (void) chkdq(oip, -oip->i_ffs_blocks, NOCRED, 0);
+#endif
+ softdep_setup_freeblocks(oip, length);
+ (void) vinvalbuf(ovp, 0, ap->a_cred, ap->a_p, 0, 0);
+ oip->i_flag |= IN_CHANGE | IN_UPDATE;
+ return (VOP_UPDATE(ovp, &ts, &ts, 0));
+ }
+ }
+
fs = oip->i_fs;
- osize = oip->i_ffs_size;
+ osize = oip->i_ffs_size;
/*
* Lengthen the size of the file. We must ensure that the
* last byte of the file is allocated. Since the smallest
@@ -217,11 +254,12 @@ ffs_truncate(v)
aflags = B_CLRBUF;
if (ap->a_flags & IO_SYNC)
aflags |= B_SYNC;
- error = ffs_balloc(oip, lbn, offset + 1, ap->a_cred, &bp,
- aflags);
+ error = VOP_BALLOC(ovp, length -1, 1,
+ ap->a_cred, aflags, &bp);
if (error)
return (error);
oip->i_ffs_size = length;
+ vnode_pager_setsize(ovp, (u_long)length);
(void) vnode_pager_uncache(ovp);
if (aflags & B_SYNC)
bwrite(bp);
@@ -230,6 +268,8 @@ ffs_truncate(v)
oip->i_flag |= IN_CHANGE | IN_UPDATE;
return (VOP_UPDATE(ovp, &ts, &ts, 1));
}
+ vnode_pager_setsize(ovp, (u_long)length);
+
/*
* Shorten the size of the file. If the file is not being
* truncated to a block boundry, the contents of the
@@ -245,7 +285,8 @@ ffs_truncate(v)
aflags = B_CLRBUF;
if (ap->a_flags & IO_SYNC)
aflags |= B_SYNC;
- error = ffs_balloc(oip, lbn, offset, ap->a_cred, &bp, aflags);
+ error = VOP_BALLOC(ovp, length - 1, 1,
+ ap->a_cred, aflags, &bp);
if (error)
return (error);
oip->i_ffs_size = length;
diff --git a/sys/ufs/ffs/ffs_subr.c b/sys/ufs/ffs/ffs_subr.c
index e5d0c350387..7e5e417cf53 100644
--- a/sys/ufs/ffs/ffs_subr.c
+++ b/sys/ufs/ffs/ffs_subr.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_subr.c,v 1.3 1996/04/21 22:32:33 deraadt Exp $ */
+/* $OpenBSD: ffs_subr.c,v 1.4 1997/10/06 15:26:31 csapuntz Exp $ */
/* $NetBSD: ffs_subr.c,v 1.6 1996/03/17 02:16:23 christos Exp $ */
/*
@@ -42,10 +42,10 @@
#ifdef _KERNEL
#include <sys/systm.h>
#include <sys/vnode.h>
-#include <ufs/ffs/ffs_extern.h>
#include <sys/buf.h>
#include <ufs/ufs/quota.h>
#include <ufs/ufs/inode.h>
+#include <ufs/ffs/ffs_extern.h>
/*
* Return buffer with the contents of block "offset" from the beginning of
@@ -240,3 +240,30 @@ ffs_setblock(fs, cp, h)
panic("ffs_setblock");
}
}
+
+
+/*
+ * check if a block is free
+ */
+int
+ffs_isfreeblock(fs, cp, h)
+ struct fs *fs;
+ unsigned char *cp;
+ daddr_t h;
+{
+
+ switch ((int)fs->fs_frag) {
+ case 8:
+ return (cp[h] == 0);
+ case 4:
+ return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0);
+ case 2:
+ return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0);
+ case 1:
+ return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0);
+ default:
+ panic("ffs_isfreeblock");
+ }
+}
+
+
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index b70f7b0db8d..7b5f8b2463a 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_vfsops.c,v 1.9 1997/06/20 14:04:32 kstailey Exp $ */
+/* $OpenBSD: ffs_vfsops.c,v 1.10 1997/10/06 15:26:31 csapuntz Exp $ */
/* $NetBSD: ffs_vfsops.c,v 1.19 1996/02/09 22:22:26 christos Exp $ */
/*
@@ -51,6 +51,7 @@
#include <sys/ioctl.h>
#include <sys/errno.h>
#include <sys/malloc.h>
+#include <sys/sysctl.h>
#include <dev/rndvar.h>
@@ -68,7 +69,6 @@
int ffs_sbupdate __P((struct ufsmount *, int));
struct vfsops ffs_vfsops = {
- MOUNT_FFS,
ffs_mount,
ufs_start,
ffs_unmount,
@@ -80,61 +80,53 @@ struct vfsops ffs_vfsops = {
ffs_fhtovp,
ffs_vptofh,
ffs_init,
+ ffs_sysctl
};
extern u_long nextgennumber;
/*
* Called by main() when ufs is going to be mounted as root.
- *
- * Name is updated by mount(8) after booting.
*/
-#define ROOTNAME "root_device"
int
ffs_mountroot()
{
extern struct vnode *rootvp;
- register struct fs *fs;
- register struct mount *mp;
+ struct fs *fs;
+ struct mount *mp;
struct proc *p = curproc; /* XXX */
struct ufsmount *ump;
- size_t size;
int error;
/*
* Get vnodes for swapdev and rootdev.
*/
- if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp))
- panic("ffs_mountroot: can't setup bdevvp's");
-
- mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
- bzero((char *)mp, (u_long)sizeof(struct mount));
- mp->mnt_op = &ffs_vfsops;
- mp->mnt_flag = MNT_RDONLY;
- if ((error = ffs_mountfs(rootvp, mp, p)) != 0) {
- free(mp, M_MOUNT);
+ if ((error = bdevvp(swapdev, &swapdev_vp)) ||
+ (error = bdevvp(rootdev, &rootvp))) {
+ printf("ffs_mountroot: can't setup bdevvp's");
return (error);
}
- if ((error = vfs_lock(mp)) != 0) {
- (void)ffs_unmount(mp, 0, p);
- free(mp, M_MOUNT);
+
+ if ((error = vfs_rootmountalloc("ffs", "root_device", &mp)) != 0)
return (error);
- }
+ if ((error = ffs_mountfs(rootvp, mp, p)) != 0) {
+ mp->mnt_vfc->vfc_refcount--;
+ vfs_unbusy(mp, p);
+ free(mp, M_MOUNT);
+ return (error);
+ }
+ simple_lock(&mountlist_slock);
CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
- mp->mnt_vnodecovered = NULLVP;
- ump = VFSTOUFS(mp);
- fs = ump->um_fs;
- bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt));
- fs->fs_fsmnt[0] = '/';
- bcopy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MNAMELEN);
- (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
- &size);
- bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
- (void)ffs_statfs(mp, &mp->mnt_stat, p);
- vfs_unlock(mp);
- inittodr(fs->fs_time);
- return (0);
+ simple_unlock(&mountlist_slock);
+ ump = VFSTOUFS(mp);
+ fs = ump->um_fs;
+ (void) copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0);
+ (void)ffs_statfs(mp, &mp->mnt_stat, p);
+
+ vfs_unbusy(mp, p);
+ inittodr(fs->fs_time);
+ return (0);
}
/*
@@ -172,8 +164,6 @@ ffs_mount(mp, path, data, ndp, p)
flags = WRITECLOSE;
if (mp->mnt_flag & MNT_FORCE)
flags |= FORCECLOSE;
- if (vfs_busy(mp))
- return (EBUSY);
error = ffs_flushfiles(mp, flags, p);
if (error == 0 &&
ffs_cgupdate(ump, MNT_WAIT) == 0 &&
@@ -181,7 +171,6 @@ ffs_mount(mp, path, data, ndp, p)
fs->fs_clean = FS_ISCLEAN;
(void) ffs_sbupdate(ump, MNT_WAIT);
}
- vfs_unbusy(mp);
if (error)
return (error);
fs->fs_ronly = 1;
@@ -198,18 +187,19 @@ ffs_mount(mp, path, data, ndp, p)
*/
if (p->p_ucred->cr_uid != 0) {
devvp = ump->um_devvp;
- VOP_LOCK(devvp);
+ vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
error = VOP_ACCESS(devvp, VREAD | VWRITE,
p->p_ucred, p);
if (error) {
- VOP_UNLOCK(devvp);
+ VOP_UNLOCK(devvp, 0, p);
return (error);
}
- VOP_UNLOCK(devvp);
+ VOP_UNLOCK(devvp, 0, p);
}
fs->fs_ronly = 0;
fs->fs_clean <<= 1;
fs->fs_fmod = 1;
+ (void) ffs_sbupdate(ump, MNT_WAIT);
}
if (args.fspec == 0) {
/*
@@ -243,13 +233,13 @@ ffs_mount(mp, path, data, ndp, p)
accessmode = VREAD;
if ((mp->mnt_flag & MNT_RDONLY) == 0)
accessmode |= VWRITE;
- VOP_LOCK(devvp);
+ vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p);
if (error) {
vput(devvp);
return (error);
}
- VOP_UNLOCK(devvp);
+ VOP_UNLOCK(devvp, 0, p);
}
if ((mp->mnt_flag & MNT_UPDATE) == 0)
error = ffs_mountfs(devvp, mp, p);
@@ -317,8 +307,12 @@ ffs_reload(mountp, cred, p)
* Step 1: invalidate all cached meta-data.
*/
devvp = VFSTOUFS(mountp)->um_devvp;
- if (vinvalbuf(devvp, 0, cred, p, 0, 0))
+ vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
+ error = vinvalbuf(devvp, 0, cred, p, 0, 0);
+ VOP_UNLOCK(devvp, 0, p);
+ if (error)
panic("ffs_reload: dirty1");
+
/*
* Step 2: re-read superblock from disk.
*/
@@ -375,19 +369,26 @@ ffs_reload(mountp, cred, p)
}
loop:
+ simple_lock(&mntvnode_slock);
for (vp = mountp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
+ if (vp->v_mount != mountp) {
+ simple_unlock(&mntvnode_slock);
+ goto loop;
+ }
+
nvp = vp->v_mntvnodes.le_next;
/*
* Step 4: invalidate all inactive vnodes.
*/
- if (vp->v_usecount == 0) {
- vgone(vp);
- continue;
- }
+ if (vrecycle(vp, &mntvnode_slock, p))
+ goto loop;
+
/*
* Step 5: invalidate all cached file data.
*/
- if (vget(vp, 1))
+ simple_lock(&vp->v_interlock);
+ simple_unlock(&mntvnode_slock);
+ if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p))
goto loop;
if (vinvalbuf(vp, 0, cred, p, 0, 0))
panic("ffs_reload: dirty2");
@@ -403,11 +404,12 @@ loop:
}
ip->i_din.ffs_din = *((struct dinode *)bp->b_data +
ino_to_fsbo(fs, ip->i_number));
+ ip->i_effnlink = ip->i_ffs_nlink;
brelse(bp);
vput(vp);
- if (vp->v_mount != mountp)
- goto loop;
+ simple_lock(&mntvnode_slock);
}
+ simple_unlock(&mntvnode_slock);
return (0);
}
@@ -426,8 +428,7 @@ ffs_mountfs(devvp, mp, p)
dev_t dev;
struct partinfo dpart;
caddr_t base, space;
- int blks;
- int error, i, size, ronly;
+ int error, i, blks, size, ronly;
int32_t *lp;
struct ucred *cred;
extern struct vnode *rootvp;
@@ -445,7 +446,10 @@ ffs_mountfs(devvp, mp, p)
return (error);
if (vcount(devvp) > 1 && devvp != rootvp)
return (EBUSY);
- if ((error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0)) != 0)
+ vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
+ error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0);
+ VOP_UNLOCK(devvp, 0, p);
+ if (error)
return (error);
ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
@@ -484,10 +488,6 @@ ffs_mountfs(devvp, mp, p)
bp = NULL;
fs = ump->um_fs;
fs->fs_ronly = ronly;
- if (ronly == 0) {
- fs->fs_clean <<= 1;
- fs->fs_fmod = 1;
- }
size = fs->fs_cssize;
blks = howmany(size, fs->fs_fsize);
if (fs->fs_contigsumsize > 0)
@@ -520,9 +520,8 @@ ffs_mountfs(devvp, mp, p)
if (fs->fs_id[0] != 0 && fs->fs_id[1] != 0)
mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
else
- mp->mnt_stat.f_fsid.val[1] = makefstype(MOUNT_FFS);
+ mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
- mp->mnt_flag |= MNT_LOCAL;
ump->um_mountp = mp;
ump->um_dev = dev;
ump->um_devvp = devvp;
@@ -531,14 +530,24 @@ ffs_mountfs(devvp, mp, p)
ump->um_seqinc = fs->fs_frag;
for (i = 0; i < MAXQUOTAS; i++)
ump->um_quotas[i] = NULLVP;
- devvp->v_specflags |= SI_MOUNTEDON;
+ devvp->v_specmountpoint = mp;
ffs_oldfscompat(fs);
ump->um_savedmaxfilesize = fs->fs_maxfilesize; /* XXX */
maxfilesize = (u_int64_t)0x80000000 * fs->fs_bsize - 1; /* XXX */
if (fs->fs_maxfilesize > maxfilesize) /* XXX */
fs->fs_maxfilesize = maxfilesize; /* XXX */
+ if (ronly == 0) {
+ if ((fs->fs_flags & FS_DOSOFTDEP) &&
+ (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
+ free(base, M_UFSMNT);
+ goto out;
+ }
+ fs->fs_clean = 0;
+ (void) ffs_sbupdate(ump, MNT_WAIT);
+ }
return (0);
out:
+ devvp->v_specmountpoint = NULL;
if (bp)
brelse(bp);
(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p);
@@ -595,8 +604,14 @@ ffs_unmount(mp, mntflags, p)
flags = 0;
if (mntflags & MNT_FORCE)
flags |= FORCECLOSE;
- if ((error = ffs_flushfiles(mp, flags, p)) != 0)
- return (error);
+ if (mp->mnt_flag & MNT_SOFTDEP) {
+ if ((error = softdep_flushfiles(mp, flags, p)) != 0)
+ return (error);
+ } else {
+ if ((error = ffs_flushfiles(mp, flags, p)) != 0)
+ return (error);
+ }
+
ump = VFSTOUFS(mp);
fs = ump->um_fs;
if (fs->fs_ronly == 0 &&
@@ -605,7 +620,7 @@ ffs_unmount(mp, mntflags, p)
fs->fs_clean = FS_ISCLEAN;
(void) ffs_sbupdate(ump, MNT_WAIT);
}
- ump->um_devvp->v_specflags &= ~SI_MOUNTEDON;
+ ump->um_devvp->v_specmountpoint = NULL;
error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
NOCRED, p);
vrele(ump->um_devvp);
@@ -613,7 +628,6 @@ ffs_unmount(mp, mntflags, p)
free(fs, M_UFSMNT);
free(ump, M_UFSMNT);
mp->mnt_data = (qaddr_t)0;
- mp->mnt_flag &= ~MNT_LOCAL;
return (error);
}
@@ -626,12 +640,9 @@ ffs_flushfiles(mp, flags, p)
int flags;
struct proc *p;
{
- extern int doforce;
register struct ufsmount *ump;
int error;
- if (!doforce)
- flags &= ~FORCECLOSE;
ump = VFSTOUFS(mp);
#ifdef QUOTA
if (mp->mnt_flag & MNT_QUOTA) {
@@ -649,7 +660,17 @@ ffs_flushfiles(mp, flags, p)
*/
}
#endif
- error = vflush(mp, NULLVP, flags);
+ /*
+ * Flush all the files.
+ */
+ if ((error = vflush(mp, NULL, flags)) != 0)
+ return (error);
+ /*
+ * Flush filesystem metadata.
+ */
+ vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
+ error = VOP_FSYNC(ump->um_devvp, p->p_ucred, MNT_WAIT, p);
+ VOP_UNLOCK(ump->um_devvp, 0, p);
return (error);
}
@@ -684,10 +705,11 @@ ffs_statfs(mp, sbp, p)
sbp->f_files = fs->fs_ncg * fs->fs_ipg - ROOTINO;
sbp->f_ffree = fs->fs_cstotal.cs_nifree;
if (sbp != &mp->mnt_stat) {
+ sbp->f_type = mp->mnt_vfc->vfc_typenum;
bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
}
- strncpy(sbp->f_fstypename, mp->mnt_op->vfs_name, MFSNAMELEN);
+ strncpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN);
return (0);
}
@@ -705,7 +727,7 @@ ffs_sync(mp, waitfor, cred, p)
struct ucred *cred;
struct proc *p;
{
- register struct vnode *vp;
+ register struct vnode *vp, *nvp;
register struct inode *ip;
register struct ufsmount *ump = VFSTOUFS(mp);
register struct fs *fs;
@@ -717,49 +739,71 @@ ffs_sync(mp, waitfor, cred, p)
* Consistency check that the superblock
* is still in the buffer cache.
*/
- if (fs->fs_fmod != 0) {
- if (fs->fs_ronly != 0) { /* XXX */
- printf("fs = %s\n", fs->fs_fsmnt);
- panic("update: rofs mod");
- }
- fs->fs_fmod = 0;
- fs->fs_time = time.tv_sec;
- allerror = ffs_cgupdate(ump, waitfor);
+ if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {
+ printf("fs = %s\n", fs->fs_fsmnt);
+ panic("update: rofs mod");
}
/*
* Write back each (modified) inode.
*/
+ simple_lock(&mntvnode_slock);
loop:
for (vp = mp->mnt_vnodelist.lh_first;
vp != NULL;
- vp = vp->v_mntvnodes.le_next) {
+ vp = nvp) {
/*
* If the vnode that we are about to sync is no longer
* associated with this mount point, start over.
*/
if (vp->v_mount != mp)
goto loop;
- if (VOP_ISLOCKED(vp))
- continue;
+
+ simple_lock(&vp->v_interlock);
+ nvp = vp->v_mntvnodes.le_next;
ip = VTOI(vp);
- if ((ip->i_flag &
- (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
- vp->v_dirtyblkhd.lh_first == NULL)
+ if (vp->v_type == VNON || ((ip->i_flag &
+ (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
+ vp->v_dirtyblkhd.lh_first == NULL) ||
+ waitfor == MNT_LAZY) {
+ simple_unlock(&vp->v_interlock);
continue;
- if (vget(vp, 1))
- goto loop;
+ }
+ simple_unlock(&mntvnode_slock);
+ error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
+ if (error) {
+ simple_lock(&mntvnode_slock);
+ if (error == ENOENT)
+ goto loop;
+ continue;
+ }
if ((error = VOP_FSYNC(vp, cred, waitfor, p)) != 0)
allerror = error;
- vput(vp);
+ VOP_UNLOCK(vp, 0, p);
+ vrele(vp);
+ simple_lock(&mntvnode_slock);
}
+ simple_unlock(&mntvnode_slock);
/*
* Force stale file system control information to be flushed.
*/
- if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0)
- allerror = error;
+ if (waitfor != MNT_LAZY) {
+ if (ump->um_mountp->mnt_flag & MNT_SOFTDEP)
+ waitfor = MNT_NOWAIT;
+ vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
+ if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0)
+ allerror = error;
+ VOP_UNLOCK(ump->um_devvp, 0, p);
+ }
#ifdef QUOTA
qsync(mp);
#endif
+ /*
+ * Write back modified superblock.
+ */
+
+ if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
+ allerror = error;
+
return (allerror);
}
@@ -796,6 +840,7 @@ ffs_vget(mp, ino, vpp)
type = ump->um_devvp->v_tag == VT_MFS ? M_MFSNODE : M_FFSNODE; /* XXX */
MALLOC(ip, struct inode *, sizeof(struct inode), type, M_WAITOK);
bzero((caddr_t)ip, sizeof(struct inode));
+ lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
vp->v_data = ip;
ip->i_vnode = vp;
ip->i_fs = fs = ump->um_fs;
@@ -833,6 +878,10 @@ ffs_vget(mp, ino, vpp)
return (error);
}
ip->i_din.ffs_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino));
+ if (DOINGSOFTDEP(vp))
+ softdep_load_inodeblock(ip);
+ else
+ ip->i_effnlink = ip->i_ffs_nlink;
brelse(bp);
/*
@@ -965,7 +1014,7 @@ ffs_cgupdate(mp, waitfor)
struct ufsmount *mp;
int waitfor;
{
- register struct fs *fs = mp->um_fs;
+ register struct fs *fs = mp->um_fs, *dfs;
register struct buf *bp;
int blks;
caddr_t space;
@@ -987,7 +1036,74 @@ ffs_cgupdate(mp, waitfor)
else
bawrite(bp);
}
- if (!allerror && error)
+
+ /*
+ * Now write back the superblock itself. If any errors occurred
+ * up to this point, then fail so that the superblock avoids
+ * being written out as clean.
+ */
+ if (allerror)
+ return (allerror);
+ bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0);
+ fs->fs_fmod = 0;
+ fs->fs_time = time.tv_sec;
+ bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
+ /* Restore compatibility to old file systems. XXX */
+ dfs = (struct fs *)bp->b_data; /* XXX */
+ if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */
+ dfs->fs_nrpos = -1; /* XXX */
+ if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */
+ int32_t *lp, tmp; /* XXX */
+ /* XXX */
+ lp = (int32_t *)&dfs->fs_qbmask; /* XXX */
+ tmp = lp[4]; /* XXX */
+ for (i = 4; i > 0; i--) /* XXX */
+ lp[i] = lp[i-1]; /* XXX */
+ lp[0] = tmp; /* XXX */
+ } /* XXX */
+ dfs->fs_maxfilesize = mp->um_savedmaxfilesize; /* XXX */
+ if (waitfor != MNT_WAIT)
+ bawrite(bp);
+ else if ((error = bwrite(bp)) != 0)
allerror = error;
+
return (allerror);
}
+
+/*
+ * fast filesystem related variables.
+ */
+int
+ffs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
+ int *name;
+ u_int namelen;
+ void *oldp;
+ size_t *oldlenp;
+ void *newp;
+ size_t newlen;
+ struct proc *p;
+{
+ extern int doclusterread, doclusterwrite, doreallocblks, doasyncfree;
+
+ /* all sysctl names at this level are terminal */
+ if (namelen != 1)
+ return (ENOTDIR); /* overloaded */
+
+ switch (name[0]) {
+ case FFS_CLUSTERREAD:
+ return (sysctl_int(oldp, oldlenp, newp, newlen,
+ &doclusterread));
+ case FFS_CLUSTERWRITE:
+ return (sysctl_int(oldp, oldlenp, newp, newlen,
+ &doclusterwrite));
+ case FFS_REALLOCBLKS:
+ return (sysctl_int(oldp, oldlenp, newp, newlen,
+ &doreallocblks));
+ case FFS_ASYNCFREE:
+ return (sysctl_int(oldp, oldlenp, newp, newlen, &doasyncfree));
+ default:
+ return (EOPNOTSUPP);
+ }
+ /* NOTREACHED */
+}
+
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
index e9462ff50be..088ba291a3a 100644
--- a/sys/ufs/ffs/ffs_vnops.c
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ffs_vnops.c,v 1.3 1996/05/22 11:47:18 deraadt Exp $ */
+/* $OpenBSD: ffs_vnops.c,v 1.4 1997/10/06 15:26:32 csapuntz Exp $ */
/* $NetBSD: ffs_vnops.c,v 1.7 1996/05/11 18:27:24 mycroft Exp $ */
/*
@@ -82,6 +82,7 @@ struct vnodeopv_entry_desc ffs_vnodeop_entries[] = {
{ &vop_lease_desc, ufs_lease_check }, /* lease */
{ &vop_ioctl_desc, ufs_ioctl }, /* ioctl */
{ &vop_select_desc, ufs_select }, /* select */
+ { &vop_revoke_desc, ufs_revoke }, /* revoke */
{ &vop_mmap_desc, ufs_mmap }, /* mmap */
{ &vop_fsync_desc, ffs_fsync }, /* fsync */
{ &vop_seek_desc, ufs_seek }, /* seek */
@@ -106,6 +107,7 @@ struct vnodeopv_entry_desc ffs_vnodeop_entries[] = {
{ &vop_advlock_desc, ufs_advlock }, /* advlock */
{ &vop_blkatoff_desc, ffs_blkatoff }, /* blkatoff */
{ &vop_valloc_desc, ffs_valloc }, /* valloc */
+ { &vop_balloc_desc, ffs_balloc }, /* balloc */
{ &vop_reallocblks_desc, ffs_reallocblks }, /* reallocblks */
{ &vop_vfree_desc, ffs_vfree }, /* vfree */
{ &vop_truncate_desc, ffs_truncate }, /* truncate */
@@ -132,6 +134,7 @@ struct vnodeopv_entry_desc ffs_specop_entries[] = {
{ &vop_lease_desc, spec_lease_check }, /* lease */
{ &vop_ioctl_desc, spec_ioctl }, /* ioctl */
{ &vop_select_desc, spec_select }, /* select */
+ { &vop_revoke_desc, spec_revoke }, /* revoke */
{ &vop_mmap_desc, spec_mmap }, /* mmap */
{ &vop_fsync_desc, ffs_fsync }, /* fsync */
{ &vop_seek_desc, spec_seek }, /* seek */
@@ -183,6 +186,7 @@ struct vnodeopv_entry_desc ffs_fifoop_entries[] = {
{ &vop_lease_desc, fifo_lease_check }, /* lease */
{ &vop_ioctl_desc, fifo_ioctl }, /* ioctl */
{ &vop_select_desc, fifo_select }, /* select */
+ { &vop_revoke_desc, fifo_revoke }, /* revoke */
{ &vop_mmap_desc, fifo_mmap }, /* mmap */
{ &vop_fsync_desc, ffs_fsync }, /* fsync */
{ &vop_seek_desc, fifo_seek }, /* seek */
@@ -218,20 +222,11 @@ struct vnodeopv_desc ffs_fifoop_opv_desc =
{ &ffs_fifoop_p, ffs_fifoop_entries };
#endif /* FIFO */
-#ifdef DEBUG
/*
* Enabling cluster read/write operations.
*/
-#include <sys/sysctl.h>
int doclusterread = 1;
-struct ctldebug debug11 = { "doclusterread", &doclusterread };
int doclusterwrite = 1;
-struct ctldebug debug12 = { "doclusterwrite", &doclusterwrite };
-#else
-/* XXX for ufs_readwrite */
-#define doclusterread 1
-#define doclusterwrite 1
-#endif
#include <ufs/ufs/ufs_readwrite.c>
@@ -249,12 +244,84 @@ ffs_fsync(v)
int a_waitfor;
struct proc *a_p;
} */ *ap = v;
- register struct vnode *vp = ap->a_vp;
+ struct vnode *vp = ap->a_vp;
+ struct buf *bp, *nbp;
struct timespec ts;
+ int s, error, passes, skipmeta;
- vflushbuf(vp, ap->a_waitfor == MNT_WAIT);
+ /*
+ * Flush all dirty buffers associated with a vnode
+ */
+ passes = NIADDR;
+ skipmeta = 0;
+ if (ap->a_waitfor == MNT_WAIT)
+ skipmeta = 1;
+loop:
+ s = splbio();
+loop2:
+ for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
+ nbp = bp->b_vnbufs.le_next;
+ if ((bp->b_flags & B_BUSY))
+ continue;
+ if ((bp->b_flags & B_DELWRI) == 0)
+ panic("ffs_fsync: not dirty");
+ if (skipmeta && bp->b_lblkno < 0)
+ continue;
+ bremfree(bp);
+ bp->b_flags |= B_BUSY;
+ splx(s);
+ /*
+ * Wait for I/O associated with indirect blocks to complete,
+ * since there is no way to quickly wait for them below.
+ */
+ if (bp->b_vp == vp || ap->a_waitfor != MNT_WAIT)
+ (void) bawrite(bp);
+ else if ((error = bwrite(bp)) != 0)
+ return (error);
+ goto loop;
+ }
+ if (skipmeta) {
+ skipmeta = 0;
+ goto loop2;
+ }
+ if (ap->a_waitfor == MNT_WAIT) {
+ while (vp->v_numoutput) {
+ vp->v_flag |= VBWAIT;
+ sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1);
+ }
+ /*
+ * Ensure that any filesystem metatdata associated
+ * with the vnode has been written.
+ */
+ splx(s);
+ if ((error = softdep_sync_metadata(ap)) != 0)
+ return (error);
+ s = splbio();
+ if (vp->v_dirtyblkhd.lh_first) {
+ /*
+ * Block devices associated with filesystems may
+ * have new I/O requests posted for them even if
+ * the vnode is locked, so no amount of trying will
+ * get them clean. Thus we give block devices a
+ * good effort, then just give up. For all other file
+ * types, go around and try again until it is clean.
+ */
+ if (passes > 0) {
+ passes -= 1;
+ goto loop2;
+ }
+#ifdef DIAGNOSTIC
+ if (vp->v_type != VBLK)
+ vprint("ffs_fsync: dirty", vp);
+#endif
+ }
+ }
+ splx(s);
TIMEVAL_TO_TIMESPEC(&time, &ts);
- return (VOP_UPDATE(ap->a_vp, &ts, &ts, ap->a_waitfor == MNT_WAIT));
+ if ((error = VOP_UPDATE(vp, &ts, &ts, ap->a_waitfor == MNT_WAIT)) != 0) return (error);
+ if (DOINGSOFTDEP(vp) && ap->a_waitfor == MNT_WAIT)
+ error = softdep_fsync(vp);
+ return (error);
}
/*
@@ -266,11 +333,12 @@ ffs_reclaim(v)
{
struct vop_reclaim_args /* {
struct vnode *a_vp;
+ sturct proc *a_p;
} */ *ap = v;
register struct vnode *vp = ap->a_vp;
int error;
- if ((error = ufs_reclaim(vp)) != 0)
+ if ((error = ufs_reclaim(vp, ap->a_p)) != 0)
return (error);
FREE(vp->v_data, VFSTOUFS(vp->v_mount)->um_devvp->v_tag == VT_MFS ?
M_MFSNODE : M_FFSNODE);
diff --git a/sys/ufs/ffs/fs.h b/sys/ufs/ffs/fs.h
index e5a17da3a22..2979a3c4fe9 100644
--- a/sys/ufs/ffs/fs.h
+++ b/sys/ufs/ffs/fs.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: fs.h,v 1.4 1997/05/30 08:34:28 downsj Exp $ */
+/* $OpenBSD: fs.h,v 1.5 1997/10/06 15:26:32 csapuntz Exp $ */
/* $NetBSD: fs.h,v 1.6 1995/04/12 21:21:02 mycroft Exp $ */
/*
@@ -221,7 +221,7 @@ struct fs {
int8_t fs_fmod; /* super block modified flag */
int8_t fs_clean; /* file system is clean flag */
int8_t fs_ronly; /* mounted read-only flag */
- int8_t fs_flags; /* currently unused flag */
+ int8_t fs_flags; /* see FS_ below */
u_char fs_fsmnt[MAXMNTLEN]; /* name mounted on */
/* these fields retain the current block allocation info */
int32_t fs_cgrotor; /* last cg searched */
@@ -267,6 +267,12 @@ struct fs {
#define FS_OPTTIME 0 /* minimize allocation time */
#define FS_OPTSPACE 1 /* minimize disk fragmentation */
+/*
+ * Filesystem falgs.
+ */
+#define FS_UNCLEAN 0x01 /* filesystem not clean at mount */
+#define FS_DOSOFTDEP 0x02 /* filesystem using soft dependencies */
+
/*
* Rotational layout table format types
*/
@@ -490,6 +496,12 @@ struct ocg {
? (fs)->fs_bsize \
: (fragroundup(fs, blkoff(fs, (dip)->di_size))))
+#define sblksize(fs, size, lbn) \
+ (((lbn) >= NDADDR || (size) >= ((lbn) + 1) << (fs)->fs_bshift) \
+ ? (fs)->fs_bsize \
+ : (fragroundup(fs, blkoff(fs, (size)))))
+
+
/*
* Number of disk sectors per block/fragment; assumes DEV_BSIZE byte
* sector size.
diff --git a/sys/ufs/mfs/mfs_extern.h b/sys/ufs/mfs/mfs_extern.h
index bd14c23226d..3616acedf76 100644
--- a/sys/ufs/mfs/mfs_extern.h
+++ b/sys/ufs/mfs/mfs_extern.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: mfs_extern.h,v 1.2 1996/02/27 07:15:46 niklas Exp $ */
+/* $OpenBSD: mfs_extern.h,v 1.3 1997/10/06 15:27:12 csapuntz Exp $ */
/* $NetBSD: mfs_extern.h,v 1.4 1996/02/09 22:31:27 christos Exp $ */
/*-
@@ -43,6 +43,7 @@ struct proc;
struct statfs;
struct ucred;
struct vnode;
+struct vfsconf;
__BEGIN_DECLS
/* mfs_vfsops.c */
@@ -53,7 +54,7 @@ int mfs_mount __P((struct mount *, char *, caddr_t,
int mfs_start __P((struct mount *, int, struct proc *));
int mfs_statfs __P((struct mount *, struct statfs *, struct proc *));
-void mfs_init __P((void));
+int mfs_init __P((struct vfsconf *));
/* mfs_vnops.c */
int mfs_open __P((void *));
@@ -65,6 +66,7 @@ int mfs_close __P((void *));
int mfs_inactive __P((void *));
int mfs_reclaim __P((void *));
int mfs_print __P((void *));
+#define mfs_revoke vop_revoke
int mfs_badop __P((void *));
__END_DECLS
diff --git a/sys/ufs/mfs/mfs_vfsops.c b/sys/ufs/mfs/mfs_vfsops.c
index 577325fe95b..dbd32e6ea2f 100644
--- a/sys/ufs/mfs/mfs_vfsops.c
+++ b/sys/ufs/mfs/mfs_vfsops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: mfs_vfsops.c,v 1.2 1996/02/27 07:15:47 niklas Exp $ */
+/* $OpenBSD: mfs_vfsops.c,v 1.3 1997/10/06 15:27:12 csapuntz Exp $ */
/* $NetBSD: mfs_vfsops.c,v 1.10 1996/02/09 22:31:28 christos Exp $ */
/*
@@ -69,7 +69,6 @@ extern int (**mfs_vnodeop_p) __P((void *));
* mfs vfs operations.
*/
struct vfsops mfs_vfsops = {
- MOUNT_MFS,
mfs_mount,
mfs_start,
ffs_unmount,
@@ -81,37 +80,31 @@ struct vfsops mfs_vfsops = {
ffs_fhtovp,
ffs_vptofh,
mfs_init,
+ ffs_sysctl
};
/*
* Called by main() when mfs is going to be mounted as root.
- *
- * Name is updated by mount(8) after booting.
*/
-#define ROOTNAME "mfs_root"
int
mfs_mountroot()
{
extern struct vnode *rootvp;
register struct fs *fs;
- register struct mount *mp;
+ struct mount *mp;
struct proc *p = curproc; /* XXX */
struct ufsmount *ump;
struct mfsnode *mfsp;
- size_t size;
int error;
- /*
- * Get vnodes for swapdev and rootdev.
- */
- if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp))
- panic("mfs_mountroot: can't setup bdevvp's");
-
- mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
- bzero((char *)mp, (u_long)sizeof(struct mount));
- mp->mnt_op = &mfs_vfsops;
- mp->mnt_flag = MNT_RDONLY;
+ if ((error = bdevvp(swapdev, &swapdev_vp)) ||
+ (error = bdevvp(rootdev, &rootvp))) {
+ printf("mfs_mountroot: can't setup bdevvp's");
+ return (error);
+ }
+ if ((error = vfs_rootmountalloc("mfs", "mfs_root", &mp)) != 0)
+ return (error);
mfsp = malloc(sizeof *mfsp, M_MFSNODE, M_WAITOK);
rootvp->v_data = mfsp;
rootvp->v_op = mfs_vnodeop_p;
@@ -122,28 +115,20 @@ mfs_mountroot()
mfsp->mfs_pid = p->p_pid;
mfsp->mfs_buflist = (struct buf *)0;
if ((error = ffs_mountfs(rootvp, mp, p)) != 0) {
+ mp->mnt_vfc->vfc_refcount--;
+ vfs_unbusy(mp, p);
free(mp, M_MOUNT);
free(mfsp, M_MFSNODE);
return (error);
}
- if ((error = vfs_lock(mp)) != 0) {
- (void)ffs_unmount(mp, 0, p);
- free(mp, M_MOUNT);
- free(mfsp, M_MFSNODE);
- return (error);
- }
+ simple_lock(&mountlist_slock);
CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
- mp->mnt_vnodecovered = NULLVP;
+ simple_unlock(&mountlist_slock);
ump = VFSTOUFS(mp);
fs = ump->um_fs;
- bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt));
- fs->fs_fsmnt[0] = '/';
- bcopy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MNAMELEN);
- (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
- &size);
- bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+ (void) copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0);
(void)ffs_statfs(mp, &mp->mnt_stat, p);
- vfs_unlock(mp);
+ vfs_unbusy(mp, p);
inittodr((time_t)0);
return (0);
}
@@ -207,10 +192,7 @@ mfs_mount(mp, path, data, ndp, p)
flags = WRITECLOSE;
if (mp->mnt_flag & MNT_FORCE)
flags |= FORCECLOSE;
- if (vfs_busy(mp))
- return (EBUSY);
error = ffs_flushfiles(mp, flags, p);
- vfs_unbusy(mp);
if (error)
return (error);
}
@@ -272,7 +254,6 @@ mfs_start(mp, flags, p)
register struct mfsnode *mfsp = VTOMFS(vp);
register struct buf *bp;
register caddr_t base;
- int error = 0;
base = mfsp->mfs_baseoff;
while (mfsp->mfs_buflist != (struct buf *)-1) {
@@ -289,13 +270,11 @@ mfs_start(mp, flags, p)
* otherwise we will loop here, as tsleep will always return
* EINTR/ERESTART.
*/
- if ((error = tsleep((caddr_t)vp, mfs_pri, "mfsidl", 0)) != 0) {
- DOIO();
- if (dounmount(mp, 0, p) != 0)
- CLRSIG(p, CURSIG(p));
- }
+ if (tsleep((caddr_t)vp, mfs_pri, "mfsidl", 0) &&
+ dounmount(mp, 0, p) != 0)
+ CLRSIG(p, CURSIG(p));
}
- return (error);
+ return (0);
}
/*
@@ -311,10 +290,10 @@ mfs_statfs(mp, sbp, p)
error = ffs_statfs(mp, sbp, p);
#ifdef COMPAT_09
- sbp->f_type = 3;
+ sbp->f_type = mp->mnt_vfc->vfc_typenum;
#else
sbp->f_type = 0;
#endif
- strncpy(&sbp->f_fstypename[0], mp->mnt_op->vfs_name, MFSNAMELEN);
+ strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN);
return (error);
}
diff --git a/sys/ufs/mfs/mfs_vnops.c b/sys/ufs/mfs/mfs_vnops.c
index 63b20a029bf..84a5ed3d368 100644
--- a/sys/ufs/mfs/mfs_vnops.c
+++ b/sys/ufs/mfs/mfs_vnops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: mfs_vnops.c,v 1.4 1996/04/21 22:32:49 deraadt Exp $ */
+/* $OpenBSD: mfs_vnops.c,v 1.5 1997/10/06 15:27:13 csapuntz Exp $ */
/* $NetBSD: mfs_vnops.c,v 1.8 1996/03/17 02:16:32 christos Exp $ */
/*
@@ -72,6 +72,7 @@ struct vnodeopv_entry_desc mfs_vnodeop_entries[] = {
{ &vop_write_desc, mfs_write }, /* write */
{ &vop_ioctl_desc, mfs_ioctl }, /* ioctl */
{ &vop_select_desc, mfs_select }, /* select */
+ { &vop_revoke_desc, mfs_revoke }, /* revoke */
{ &vop_mmap_desc, mfs_mmap }, /* mmap */
{ &vop_fsync_desc, spec_fsync }, /* fsync */
{ &vop_seek_desc, mfs_seek }, /* seek */
@@ -231,6 +232,9 @@ mfs_bmap(v)
*ap->a_vpp = ap->a_vp;
if (ap->a_bnp != NULL)
*ap->a_bnp = ap->a_bn;
+ if (ap->a_runp != NULL)
+ *ap->a_runp = 0;
+
return (0);
}
@@ -294,12 +298,14 @@ mfs_inactive(v)
{
struct vop_inactive_args /* {
struct vnode *a_vp;
+ struct proc *a_p;
} */ *ap = v;
register struct mfsnode *mfsp = VTOMFS(ap->a_vp);
if (mfsp->mfs_buflist && mfsp->mfs_buflist != (struct buf *)(-1))
panic("mfs_inactive: not inactive (mfs_buflist %p)",
mfsp->mfs_buflist);
+ VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
return (0);
}
@@ -352,8 +358,9 @@ mfs_badop(v)
/*
* Memory based filesystem initialization.
*/
-void
-mfs_init()
+int
+mfs_init(vfsp)
+ struct vfsconf *vfsp;
{
-
+ return (0);
}
diff --git a/sys/ufs/mfs/mfsnode.h b/sys/ufs/mfs/mfsnode.h
index d37f7ba4e68..29c290c4e09 100644
--- a/sys/ufs/mfs/mfsnode.h
+++ b/sys/ufs/mfs/mfsnode.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: mfsnode.h,v 1.3 1996/06/11 03:25:15 tholo Exp $ */
+/* $OpenBSD: mfsnode.h,v 1.4 1997/10/06 15:27:13 csapuntz Exp $ */
/* $NetBSD: mfsnode.h,v 1.3 1996/02/09 22:31:31 christos Exp $ */
/*
@@ -76,9 +76,9 @@ struct mfsnode {
#define mfs_readdir mfs_badop
#define mfs_readlink mfs_badop
#define mfs_abortop mfs_badop
-#define mfs_lock nullop
-#define mfs_unlock nullop
-#define mfs_islocked nullop
+#define mfs_lock vop_nolock
+#define mfs_unlock vop_nounlock
+#define mfs_islocked vop_noislocked
#define mfs_pathconf mfs_badop
#define mfs_advlock mfs_badop
#define mfs_blkatoff mfs_badop
diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h
index 9dcc48697f1..0a9a7a24151 100644
--- a/sys/ufs/ufs/inode.h
+++ b/sys/ufs/ufs/inode.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: inode.h,v 1.6 1997/05/30 15:18:49 downsj Exp $ */
+/* $OpenBSD: inode.h,v 1.7 1997/10/06 15:27:36 csapuntz Exp $ */
/* $NetBSD: inode.h,v 1.8 1995/06/15 23:22:50 cgd Exp $ */
/*
@@ -45,6 +45,8 @@
#include <ufs/ufs/dir.h>
#include <ufs/ext2fs/ext2fs_dinode.h>
+typedef long ufs_lbn_t;
+
/*
* Per-filesystem inode extensions.
*/
@@ -63,13 +65,13 @@ struct ext2fs_inode_ext {
* active, and is put back when the file is no longer being used.
*/
struct inode {
- struct inode *i_next; /* Hash chain forward. */
- struct inode **i_prev; /* Hash chain back. */
+ LIST_ENTRY(inode) i_hash; /* Hash chain */
struct vnode *i_vnode;/* Vnode associated with this inode. */
struct vnode *i_devvp;/* Vnode for block I/O. */
u_int32_t i_flag; /* flags, see below */
dev_t i_dev; /* Device associated with the inode. */
ino_t i_number; /* The identity of the inode. */
+ int i_effnlink; /* i_nlink when I/O completes */
union { /* Associated filesystem. */
struct fs *fs; /* FFS */
@@ -83,8 +85,8 @@ struct inode {
struct dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */
u_quad_t i_modrev; /* Revision level for NFS lease. */
struct lockf *i_lockf;/* Head of byte-level lock list. */
- pid_t i_lockholder; /* DEBUG: holder of inode lock. */
- pid_t i_lockwaiter; /* DEBUG: latest blocked for inode lock. */
+ struct lock i_lock; /* Inode lock */
+
/*
* Side effects; used during directory lookup.
*/
@@ -180,14 +182,11 @@ struct inode {
/* These flags are kept in i_flag. */
#define IN_ACCESS 0x0001 /* Access time update request. */
#define IN_CHANGE 0x0002 /* Inode change time update request. */
-#define IN_EXLOCK 0x0004 /* File has exclusive lock. */
-#define IN_LOCKED 0x0008 /* Inode lock. */
-#define IN_LWAIT 0x0010 /* Process waiting on file lock. */
-#define IN_MODIFIED 0x0020 /* Inode has been modified. */
-#define IN_RENAME 0x0040 /* Inode is being renamed. */
-#define IN_SHLOCK 0x0080 /* File has shared lock. */
-#define IN_UPDATE 0x0100 /* Modification time update request. */
-#define IN_WANTED 0x0200 /* Inode is wanted by a process. */
+#define IN_UPDATE 0x0004 /* Modification time update request */
+#define IN_MODIFIED 0x0008 /* Inode has been modified. */
+#define IN_RENAME 0x0010 /* Inode is being renamed. */
+#define IN_SHLOCK 0x0020 /* FIle has shared lock. */
+#define IN_EXLOCK 0x0040 /* File has exclusive lock. */
#ifdef _KERNEL
/*
@@ -242,6 +241,9 @@ struct indir {
} \
}
+/* Determine if soft dependencies are being done */
+#define DOINGSOFTDEP(vp) ((vp)->v_mount->mnt_flag & MNT_SOFTDEP)
+
/* This overlays the fid structure (see mount.h). */
struct ufid {
u_int16_t ufid_len; /* Length of structure. */
diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h
index 4dbeed61a92..166d8f43684 100644
--- a/sys/ufs/ufs/ufs_extern.h
+++ b/sys/ufs/ufs/ufs_extern.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: ufs_extern.h,v 1.2 1996/02/27 07:21:25 niklas Exp $ */
+/* $OpenBSD: ufs_extern.h,v 1.3 1997/10/06 15:27:36 csapuntz Exp $ */
/* $NetBSD: ufs_extern.h,v 1.5 1996/02/09 22:36:03 christos Exp $ */
/*-
@@ -54,6 +54,7 @@ struct ufs_args;
struct ufsmount;
struct uio;
struct vattr;
+struct vfsconf;
struct vnode;
__BEGIN_DECLS
@@ -86,6 +87,7 @@ int ufs_readdir __P((void *));
int ufs_readlink __P((void *));
int ufs_remove __P((void *));
int ufs_rename __P((void *));
+#define ufs_revoke vop_revoke
int ufs_rmdir __P((void *));
int ufs_seek __P((void *));
int ufs_select __P((void *));
@@ -117,19 +119,19 @@ void ufs_ihashins __P((struct inode *));
void ufs_ihashrem __P((struct inode *));
/* ufs_inode.c */
-void ufs_init __P((void));
-int ufs_reclaim __P((struct vnode *));
+int ufs_init __P((struct vfsconf *));
+int ufs_reclaim __P((struct vnode *, struct proc *));
/* ufs_lookup.c */
void ufs_dirbad __P((struct inode *, doff_t, char *));
int ufs_dirbadentry __P((struct vnode *, struct direct *, int));
-int ufs_direnter __P((struct inode *, struct vnode *,
- struct componentname *));
-int ufs_direnter2 __P((struct vnode *, struct direct *, struct ucred *,
- struct proc *));
-int ufs_dirremove __P((struct vnode *, struct componentname *));
+void ufs_makedirentry __P((struct inode *, struct componentname *,
+ struct direct *));
+int ufs_direnter __P((struct vnode *, struct direct *,
+ struct componentname *, struct buf *));
+int ufs_dirremove __P((struct vnode *, struct inode *, int, int));
int ufs_dirrewrite __P((struct inode *, struct inode *,
- struct componentname *));
+ ino_t, int, int));
int ufs_dirempty __P((struct inode *, ino_t, struct ucred *));
int ufs_checkpath __P((struct inode *, struct inode *, struct ucred *));
@@ -165,4 +167,19 @@ int ufs_vinit __P((struct mount *, int (**) __P((void *)),
int (**) __P((void *)), struct vnode **));
int ufs_makeinode __P((int, struct vnode *, struct vnode **,
struct componentname *));
+
+
+/*
+ * Soft dependency function prototypes.
+ */
+void softdep_setup_directory_add __P((struct buf *, struct inode *, off_t,
+ long, struct buf *));
+void softdep_change_directoryentry_offset __P((struct inode *, caddr_t,
+ caddr_t, caddr_t, int));
+void softdep_setup_remove __P((struct buf *,struct inode *, struct inode *,
+ int));
+void softdep_setup_directory_change __P((struct buf *, struct inode *,
+ struct inode *, long, int));
+void softdep_increase_linkcnt __P((struct inode *));
+
__END_DECLS
diff --git a/sys/ufs/ufs/ufs_ihash.c b/sys/ufs/ufs/ufs_ihash.c
index a9b7227942d..84ff51b8b39 100644
--- a/sys/ufs/ufs/ufs_ihash.c
+++ b/sys/ufs/ufs/ufs_ihash.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ufs_ihash.c,v 1.2 1996/02/27 07:21:26 niklas Exp $ */
+/* $OpenBSD: ufs_ihash.c,v 1.3 1997/10/06 15:27:37 csapuntz Exp $ */
/* $NetBSD: ufs_ihash.c,v 1.3 1996/02/09 22:36:04 christos Exp $ */
/*
@@ -49,9 +49,10 @@
/*
* Structures associated with inode cacheing.
*/
-struct inode **ihashtbl;
+LIST_HEAD(ihashhead, inode) *ihashtbl;
u_long ihash; /* size of hash table - 1 */
-#define INOHASH(device, inum) (((device) + (inum)) & ihash)
+#define INOHASH(device, inum) (&ihashtbl[((device) + (inum)) & ihash])
+struct simplelock ufs_ihash_slock;
/*
* Initialize inode hash table.
@@ -61,6 +62,7 @@ ufs_ihashinit()
{
ihashtbl = hashinit(desiredvnodes, M_UFSMNT, &ihash);
+ simple_lock_init(&ufs_ihash_slock);
}
/*
@@ -68,19 +70,21 @@ ufs_ihashinit()
* to it. If it is in core, return it, even if it is locked.
*/
struct vnode *
-ufs_ihashlookup(device, inum)
- dev_t device;
+ufs_ihashlookup(dev, inum)
+ dev_t dev;
ino_t inum;
{
- register struct inode *ip;
+ struct inode *ip;
- for (ip = ihashtbl[INOHASH(device, inum)];; ip = ip->i_next) {
- if (ip == NULL)
- return (NULL);
- if (inum == ip->i_number && device == ip->i_dev)
- return (ITOV(ip));
- }
- /* NOTREACHED */
+ simple_lock(&ufs_ihash_slock);
+ for (ip = INOHASH(dev, inum)->lh_first; ip; ip = ip->i_hash.le_next)
+ if (inum == ip->i_number && dev == ip->i_dev)
+ break;
+ simple_unlock(&ufs_ihash_slock);
+
+ if (ip)
+ return (ITOV(ip));
+ return (NULLVP);
}
/*
@@ -88,30 +92,28 @@ ufs_ihashlookup(device, inum)
* to it. If it is in core, but locked, wait for it.
*/
struct vnode *
-ufs_ihashget(device, inum)
- dev_t device;
+ufs_ihashget(dev, inum)
+ dev_t dev;
ino_t inum;
{
- register struct inode *ip;
+ struct proc *p = curproc;
+ struct inode *ip;
struct vnode *vp;
+loop:
+ simple_lock(&ufs_ihash_slock);
+ for (ip = INOHASH(dev, inum)->lh_first; ip; ip = ip->i_hash.le_next) {
+ if (inum == ip->i_number && dev == ip->i_dev) {
+ vp = ITOV(ip);
+ simple_lock(&vp->v_interlock);
+ simple_unlock(&ufs_ihash_slock);
+ if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p))
+ goto loop;
+ return (vp);
+ }
- for (;;)
- for (ip = ihashtbl[INOHASH(device, inum)];; ip = ip->i_next) {
- if (ip == NULL)
- return (NULL);
- if (inum == ip->i_number && device == ip->i_dev) {
- if (ip->i_flag & IN_LOCKED) {
- ip->i_flag |= IN_WANTED;
- sleep(ip, PINOD);
- break;
- }
- vp = ITOV(ip);
- if (!vget(vp, 1))
- return (vp);
- break;
- }
- }
- /* NOTREACHED */
+ }
+ simple_unlock(&ufs_ihash_slock);
+ return (NULL);
}
/*
@@ -121,21 +123,16 @@ void
ufs_ihashins(ip)
struct inode *ip;
{
- struct inode **ipp, *iq;
+ struct proc *p = curproc; /* XXX */
+ struct ihashhead *ipp;
- ipp = &ihashtbl[INOHASH(ip->i_dev, ip->i_number)];
- if ((iq = *ipp) != NULL)
- iq->i_prev = &ip->i_next;
- ip->i_next = iq;
- ip->i_prev = ipp;
- *ipp = ip;
- if (ip->i_flag & IN_LOCKED)
- panic("ufs_ihashins: already locked");
- if (curproc)
- ip->i_lockholder = curproc->p_pid;
- else
- ip->i_lockholder = -1;
- ip->i_flag |= IN_LOCKED;
+ /* lock the inode, then put it on the appropriate hash list */
+ lockmgr(&ip->i_lock, LK_EXCLUSIVE, (struct simplelock *)0, p);
+
+ simple_lock(&ufs_ihash_slock);
+ ipp = INOHASH(ip->i_dev, ip->i_number);
+ LIST_INSERT_HEAD(ipp, ip, i_hash);
+ simple_unlock(&ufs_ihash_slock);
}
/*
@@ -143,15 +140,14 @@ ufs_ihashins(ip)
*/
void
ufs_ihashrem(ip)
- register struct inode *ip;
+ struct inode *ip;
{
- register struct inode *iq;
+ simple_lock(&ufs_ihash_slock);
+ LIST_REMOVE(ip, i_hash);
+ #ifdef DIAGNOSTIC
+ ip->i_hash.le_next = NULL;
+ ip->i_hash.le_prev = NULL;
+ #endif
+ simple_unlock(&ufs_ihash_slock);
- if ((iq = ip->i_next) != NULL)
- iq->i_prev = ip->i_prev;
- *ip->i_prev = iq;
-#ifdef DIAGNOSTIC
- ip->i_next = NULL;
- ip->i_prev = NULL;
-#endif
}
diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c
index eed08b7f2cf..31437cd4bfd 100644
--- a/sys/ufs/ufs/ufs_inode.c
+++ b/sys/ufs/ufs/ufs_inode.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ufs_inode.c,v 1.4 1997/05/30 08:35:04 downsj Exp $ */
+/* $OpenBSD: ufs_inode.c,v 1.5 1997/10/06 15:27:37 csapuntz Exp $ */
/* $NetBSD: ufs_inode.c,v 1.7 1996/05/11 18:27:52 mycroft Exp $ */
/*
@@ -57,6 +57,7 @@
u_long nextgennumber; /* Next generation number to assign. */
+#if 0
void
ufs_init()
{
@@ -71,7 +72,7 @@ ufs_init()
#endif
return;
}
-
+#endif
/*
* Last reference to an inode. If necessary, write or delete it.
*/
@@ -81,39 +82,29 @@ ufs_inactive(v)
{
struct vop_inactive_args /* {
struct vnode *a_vp;
+ sturct proc *a_p;
} */ *ap = v;
- register struct vnode *vp = ap->a_vp;
- register struct inode *ip = VTOI(vp);
+ struct vnode *vp = ap->a_vp;
+ struct inode *ip = VTOI(vp);
+ struct proc *p = ap->a_p;
struct timespec ts;
- int mode, error;
+ int mode, error = 0;
extern int prtactive;
if (prtactive && vp->v_usecount != 0)
vprint("ffs_inactive: pushing active", vp);
- /* Get rid of inodes related to stale file handles. */
- if (ip->i_ffs_mode == 0) {
- if ((vp->v_flag & VXLOCK) == 0)
- vgone(vp);
- return (0);
- }
-
- error = 0;
-#ifdef DIAGNOSTIC
- if (VOP_ISLOCKED(vp))
- panic("ffs_inactive: locked inode");
- if (curproc)
- ip->i_lockholder = curproc->p_pid;
- else
- ip->i_lockholder = -1;
-#endif
- ip->i_flag |= IN_LOCKED;
+ /*
+ * Ignore inodes related to stale file handles.
+ */
+ if (ip->i_ffs_mode == 0)
+ goto out;
if (ip->i_ffs_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
#ifdef QUOTA
if (!getinoquota(ip))
(void)chkiq(ip, -1, NOCRED, 0);
#endif
- error = VOP_TRUNCATE(vp, (off_t)0, 0, NOCRED, NULL);
+ error = VOP_TRUNCATE(vp, (off_t)0, 0, NOCRED, p);
ip->i_ffs_rdev = 0;
mode = ip->i_ffs_mode;
ip->i_ffs_mode = 0;
@@ -124,13 +115,14 @@ ufs_inactive(v)
TIMEVAL_TO_TIMESPEC(&time, &ts);
VOP_UPDATE(vp, &ts, &ts, 0);
}
- VOP_UNLOCK(vp);
+out:
+ VOP_UNLOCK(vp, 0, p);
/*
* If we are done with the inode, reclaim it
* so that it can be reused immediately.
*/
- if (vp->v_usecount == 0 && ip->i_ffs_mode == 0)
- vgone(vp);
+ if (ip->i_ffs_mode == 0)
+ vrecycle(vp, (struct simplelock *)0, p);
return (error);
}
@@ -138,8 +130,9 @@ ufs_inactive(v)
* Reclaim an inode so that it can be used for other purposes.
*/
int
-ufs_reclaim(vp)
+ufs_reclaim(vp, p)
register struct vnode *vp;
+ struct proc *p;
{
register struct inode *ip;
extern int prtactive;
diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c
index 38d828b987e..47587cdd00a 100644
--- a/sys/ufs/ufs/ufs_lookup.c
+++ b/sys/ufs/ufs/ufs_lookup.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ufs_lookup.c,v 1.4 1997/05/30 08:35:08 downsj Exp $ */
+/* $OpenBSD: ufs_lookup.c,v 1.5 1997/10/06 15:27:38 csapuntz Exp $ */
/* $NetBSD: ufs_lookup.c,v 1.7 1996/02/09 22:36:06 christos Exp $ */
/*
@@ -43,12 +43,16 @@
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/kernel.h>
#include <sys/namei.h>
#include <sys/buf.h>
#include <sys/file.h>
+#include <sys/stat.h>
#include <sys/mount.h>
#include <sys/vnode.h>
+#include <vm/vm.h>
+
#include <ufs/ufs/quota.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/dir.h>
@@ -131,6 +135,7 @@ ufs_lookup(v)
struct ucred *cred = cnp->cn_cred;
int flags = cnp->cn_flags;
int nameiop = cnp->cn_nameiop;
+ struct proc *p = cnp->cn_proc;
bp = NULL;
slotoffset = -1;
@@ -148,6 +153,10 @@ ufs_lookup(v)
if ((error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc)) != 0)
return (error);
+ if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) &&
+ (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
+ return (EROFS);
+
/*
* We now have a segment name to search for, and a directory to search.
*
@@ -173,14 +182,14 @@ ufs_lookup(v)
VREF(vdp);
error = 0;
} else if (flags & ISDOTDOT) {
- VOP_UNLOCK(pdp);
- error = vget(vdp, 1);
+ VOP_UNLOCK(pdp, 0, p);
+ error = vget(vdp, LK_EXCLUSIVE, p);
if (!error && lockparent && (flags & ISLASTCN))
- error = VOP_LOCK(pdp);
+ error = vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p);
} else {
- error = vget(vdp, 1);
+ error = vget(vdp, LK_EXCLUSIVE, p);
if (!lockparent || error || !(flags & ISLASTCN))
- VOP_UNLOCK(pdp);
+ VOP_UNLOCK(pdp, 0, p);
}
/*
* Check that the capability number did not change
@@ -191,13 +200,14 @@ ufs_lookup(v)
return (0);
vput(vdp);
if (lockparent && pdp != vdp && (flags & ISLASTCN))
- VOP_UNLOCK(pdp);
+ VOP_UNLOCK(pdp, 0, p);
}
- if ((error = VOP_LOCK(pdp)) != 0)
+ *vpp = NULL;
+
+ if ((error = vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p)) != 0)
return (error);
vdp = pdp;
dp = VTOI(pdp);
- *vpp = NULL;
}
/*
@@ -396,7 +406,7 @@ notfound:
(nameiop == DELETE &&
(ap->a_cnp->cn_flags & DOWHITEOUT) &&
(ap->a_cnp->cn_flags & ISWHITEOUT))) &&
- (flags & ISLASTCN) && dp->i_ffs_nlink != 0) {
+ (flags & ISLASTCN) && dp->i_effnlink != 0) {
/*
* Access for write is interpreted as allowing
* creation of files in the directory.
@@ -446,7 +456,7 @@ notfound:
*/
cnp->cn_flags |= SAVENAME;
if (!lockparent)
- VOP_UNLOCK(vdp);
+ VOP_UNLOCK(vdp, 0, p);
return (EJUSTRETURN);
}
/*
@@ -524,7 +534,7 @@ found:
}
*vpp = tdp;
if (!lockparent)
- VOP_UNLOCK(vdp);
+ VOP_UNLOCK(vdp, 0, p);
return (0);
}
@@ -551,7 +561,7 @@ found:
*vpp = tdp;
cnp->cn_flags |= SAVENAME;
if (!lockparent)
- VOP_UNLOCK(vdp);
+ VOP_UNLOCK(vdp, 0, p);
return (0);
}
@@ -576,14 +586,14 @@ found:
*/
pdp = vdp;
if (flags & ISDOTDOT) {
- VOP_UNLOCK(pdp); /* race to get the inode */
+ VOP_UNLOCK(pdp, 0, p); /* race to get the inode */
error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp);
if (error) {
- VOP_LOCK(pdp);
+ vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p);
return (error);
}
if (lockparent && (flags & ISLASTCN) &&
- (error = VOP_LOCK(pdp))) {
+ (error = vn_lock(pdp, LK_EXCLUSIVE, p))) {
vput(tdp);
return (error);
}
@@ -596,7 +606,7 @@ found:
if (error)
return (error);
if (!lockparent || !(flags & ISLASTCN))
- VOP_UNLOCK(pdp);
+ VOP_UNLOCK(pdp, 0, p);
*vpp = tdp;
}
@@ -671,108 +681,130 @@ bad:
}
/*
- * Write a directory entry after a call to namei, using the parameters
- * that it left in nameidata. The argument ip is the inode which the new
- * directory entry will refer to. Dvp is a pointer to the directory to
- * be written, which was left locked by namei. Remaining parameters
- * (dp->i_offset, dp->i_count) indicate how the space for the new
- * entry is to be obtained.
+ * Construct a new directory entry after a call to namei, using the
+ * parameters that it left in the componentname argument cnp. The
+ * argument ip is the inode to which the new directory entry will refer.
*/
-int
-ufs_direnter(ip, dvp, cnp)
- struct inode *ip;
- struct vnode *dvp;
- register struct componentname *cnp;
+void
+ufs_makedirentry(ip, cnp, newdirp)
+ struct inode *ip;
+ struct componentname *cnp;
+ struct direct *newdirp;
{
- register struct inode *dp;
- struct direct newdir;
-
+
#ifdef DIAGNOSTIC
- if ((cnp->cn_flags & SAVENAME) == 0)
- panic("direnter: missing name");
+ if ((cnp->cn_flags & SAVENAME) == 0)
+ panic("ufs_makedirentry: missing name");
#endif
- dp = VTOI(dvp);
- newdir.d_ino = ip->i_number;
- newdir.d_namlen = cnp->cn_namelen;
- bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
- if (dvp->v_mount->mnt_maxsymlinklen > 0)
- newdir.d_type = IFTODT(ip->i_ffs_mode);
- else {
- newdir.d_type = 0;
+ newdirp->d_ino = ip->i_number;
+ newdirp->d_namlen = cnp->cn_namelen;
+ bcopy(cnp->cn_nameptr, newdirp->d_name, (unsigned)cnp->cn_namelen + 1);
+ if (ITOV(ip)->v_mount->mnt_maxsymlinklen > 0)
+ newdirp->d_type = IFTODT(ip->i_ffs_mode);
+ else {
+ newdirp->d_type = 0;
# if (BYTE_ORDER == LITTLE_ENDIAN)
- { u_char tmp = newdir.d_namlen;
- newdir.d_namlen = newdir.d_type;
- newdir.d_type = tmp; }
+ { u_char tmp = newdirp->d_namlen;
+ newdirp->d_namlen = newdirp->d_type;
+ newdirp->d_type = tmp; }
# endif
- }
- return (ufs_direnter2(dvp, &newdir, cnp->cn_cred, cnp->cn_proc));
+ }
}
-
+
/*
- * Common entry point for directory entry removal used by ufs_direnter
- * and ufs_whiteout
+ * Write a directory entry after a call to namei, using the parameters
+ * that it left in nameidata. The argument dirp is the new directory
+ * entry contents. Dvp is a pointer to the directory to be written,
+ * which was left locked by namei. Remaining parameters (dp->i_offset,
+ * dp->i_count) indicate how the space for the new entry is to be obtained.
+ * Non-null bp indicates that a directory is being created (for the
+ * soft dependency code).
*/
int
-ufs_direnter2(dvp, dirp, cr, p)
- struct vnode *dvp;
- struct direct *dirp;
- struct ucred *cr;
- struct proc *p;
+ufs_direnter(dvp, dirp, cnp, newdirbp)
+ struct vnode *dvp;
+ struct direct *dirp;
+ struct componentname *cnp;
+ struct buf *newdirbp;
{
- int newentrysize;
- struct inode *dp;
- struct buf *bp;
- struct iovec aiov;
- struct uio auio;
- u_int dsize;
- struct direct *ep, *nep;
- int error, loc, spacefree;
- char *dirbuf;
+ struct ucred *cr;
+ struct proc *p;
+ int newentrysize;
+ struct inode *dp;
+ struct buf *bp;
+ u_int dsize;
+ struct direct *ep, *nep;
+ int error, ret, blkoff, loc, spacefree, flags;
+ char *dirbuf;
+ struct timespec ts;
- dp = VTOI(dvp);
- newentrysize = DIRSIZ(FSFMT(dvp), dirp);
+ error = 0;
+ cr = cnp->cn_cred;
+ p = cnp->cn_proc;
+ dp = VTOI(dvp);
+ newentrysize = DIRSIZ(FSFMT(dvp), dirp);
if (dp->i_count == 0) {
/*
* If dp->i_count is 0, then namei could find no
* space in the directory. Here, dp->i_offset will
* be on a directory block boundary and we will write the
- * new entry into a fresh block.
- */
- if (dp->i_offset & (DIRBLKSIZ - 1))
- panic("ufs_direnter2: newblk");
- auio.uio_offset = dp->i_offset;
- dirp->d_reclen = DIRBLKSIZ;
- auio.uio_resid = newentrysize;
- aiov.iov_len = newentrysize;
- aiov.iov_base = (caddr_t)dirp;
- auio.uio_iov = &aiov;
- auio.uio_iovcnt = 1;
- auio.uio_rw = UIO_WRITE;
- auio.uio_segflg = UIO_SYSSPACE;
- auio.uio_procp = (struct proc *)0;
- error = VOP_WRITE(dvp, &auio, IO_SYNC, cr);
- if (DIRBLKSIZ >
- VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize)
- /* XXX should grow with balloc() */
- panic("ufs_direnter2: frag size");
- else if (!error) {
- dp->i_ffs_size = roundup(dp->i_ffs_size, DIRBLKSIZ);
- dp->i_flag |= IN_CHANGE;
+ * new entry into a fresh block.
+ */
+ if (dp->i_offset & (DIRBLKSIZ - 1))
+ panic("ufs_direnter: newblk");
+ flags = B_CLRBUF;
+ if (!DOINGSOFTDEP(dvp))
+ flags |= B_SYNC;
+ if ((error = VOP_BALLOC(dvp, (off_t)dp->i_offset, DIRBLKSIZ,
+ cr, flags, &bp)) != 0) {
+ if (DOINGSOFTDEP(dvp) && newdirbp != NULL)
+ bdwrite(newdirbp);
+ return (error);
}
- return (error);
- }
-
- /*
- * If dp->i_count is non-zero, then namei found space
- * for the new entry in the range dp->i_offset to
- * dp->i_offset + dp->i_count in the directory.
- * To use this space, we may have to compact the entries located
- * there, by copying them together towards the beginning of the
- * block, leaving the free space in one usable chunk at the end.
- */
-
- /*
+ dp->i_ffs_size = dp->i_offset + DIRBLKSIZ;
+ dp->i_flag |= IN_CHANGE | IN_UPDATE;
+ vnode_pager_setsize(dvp, (u_long)dp->i_ffs_size);
+ dirp->d_reclen = DIRBLKSIZ;
+ blkoff = dp->i_offset &
+ (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1);
+ bcopy((caddr_t)dirp, (caddr_t)bp->b_data + blkoff,newentrysize);
+ if (DOINGSOFTDEP(dvp)) {
+ /*
+ * Ensure that the entire newly allocated block is a
+ * valid directory so that future growth within the
+ * block does not have to ensure that the block is
+ * written before the inode.
+ */
+ blkoff += DIRBLKSIZ;
+ while (blkoff < bp->b_bcount) {
+ ((struct direct *)
+ (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ;
+ blkoff += DIRBLKSIZ;
+ }
+ softdep_setup_directory_add(bp, dp, dp->i_offset,
+ dirp->d_ino, newdirbp);
+ bdwrite(bp);
+ } else {
+ error = VOP_BWRITE(bp);
+ }
+ TIMEVAL_TO_TIMESPEC(&time, &ts);
+ ret = VOP_UPDATE(dvp, &ts, &ts, !DOINGSOFTDEP(dvp));
+ if (error == 0)
+ return (ret);
+ return (error);
+ }
+
+ /*
+ * If dp->i_count is non-zero, then namei found space for the new
+ * entry in the range dp->i_offset to dp->i_offset + dp->i_count
+ * in the directory. To use this space, we may have to compact
+ * the entries located there, by copying them together towards the
+ * beginning of the block, leaving the free space in one usable
+ * chunk at the end.
+ */
+
+ /*
* Increase size of directory if entry eats into new space.
* This should never push the size past a new multiple of
* DIRBLKSIZE.
@@ -784,15 +816,17 @@ ufs_direnter2(dvp, dirp, cr, p)
/*
* Get the block containing the space for the new directory entry.
*/
- error = VOP_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp);
- if (error)
- return (error);
+ if ((error = VOP_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp))
+ != 0) {
+ if (DOINGSOFTDEP(dvp) && newdirbp != NULL)
+ bdwrite(newdirbp);
+ return (error);
+ }
/*
* Find space for the new entry. In the simple case, the entry at
* offset base will have the space. If it does not, then namei
* arranged that compacting the region dp->i_offset to
- * dp->i_offset + dp->i_count would yield the
- * space.
+ * dp->i_offset + dp->i_count would yield the space.
*/
ep = (struct direct *)dirbuf;
dsize = DIRSIZ(FSFMT(dvp), ep);
@@ -810,7 +844,11 @@ ufs_direnter2(dvp, dirp, cr, p)
dsize = DIRSIZ(FSFMT(dvp), nep);
spacefree += nep->d_reclen - dsize;
loc += nep->d_reclen;
- bcopy((caddr_t)nep, (caddr_t)ep, dsize);
+ if (DOINGSOFTDEP(dvp))
+ softdep_change_directoryentry_offset(dp, dirbuf,
+ (caddr_t)nep, (caddr_t)ep, dsize);
+ else
+ bcopy((caddr_t)nep, (caddr_t)ep, dsize);
}
/*
* Update the pointer fields in the previous entry (if any),
@@ -820,19 +858,26 @@ ufs_direnter2(dvp, dirp, cr, p)
(ep->d_ino == WINO &&
bcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) {
if (spacefree + dsize < newentrysize)
- panic("ufs_direnter2: compact1");
+ panic("ufs_direnter: compact1");
dirp->d_reclen = spacefree + dsize;
} else {
if (spacefree < newentrysize)
- panic("ufs_direnter2: compact2");
+ panic("ufs_direnter: compact2");
dirp->d_reclen = spacefree;
ep->d_reclen = dsize;
ep = (struct direct *)((char *)ep + dsize);
}
bcopy((caddr_t)dirp, (caddr_t)ep, (u_int)newentrysize);
- error = VOP_BWRITE(bp);
+
+ if (DOINGSOFTDEP(dvp)) {
+ softdep_setup_directory_add(bp, dp,
+ dp->i_offset + (caddr_t)ep - dirbuf, dirp->d_ino, newdirbp);
+ bdwrite(bp);
+ } else {
+ error = VOP_BWRITE(bp);
+ }
dp->i_flag |= IN_CHANGE | IN_UPDATE;
- if (!error && dp->i_endoff && dp->i_endoff < dp->i_ffs_size)
+ if (error == 0 && dp->i_endoff && dp->i_endoff < dp->i_ffs_size)
error = VOP_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC, cr, p);
return (error);
}
@@ -850,18 +895,20 @@ ufs_direnter2(dvp, dirp, cr, p)
* to the size of the previous entry.
*/
int
-ufs_dirremove(dvp, cnp)
+ufs_dirremove(dvp, ip, flags, isrmdir)
struct vnode *dvp;
- struct componentname *cnp;
+ struct inode *ip;
+ int flags;
+ int isrmdir;
{
- register struct inode *dp;
+ struct inode *dp;
struct direct *ep;
struct buf *bp;
int error;
dp = VTOI(dvp);
- if (cnp->cn_flags & DOWHITEOUT) {
+ if (flags & DOWHITEOUT) {
/*
* Whiteout entry: set d_ino to WINO.
*/
@@ -871,33 +918,39 @@ ufs_dirremove(dvp, cnp)
return (error);
ep->d_ino = WINO;
ep->d_type = DT_WHT;
- error = VOP_BWRITE(bp);
- dp->i_flag |= IN_CHANGE | IN_UPDATE;
- return (error);
+ goto out;
}
+ if ((error = VOP_BLKATOFF(dvp,
+ (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) != 0)
+ return (error);
+
if (dp->i_count == 0) {
/*
* First entry in block: set d_ino to zero.
*/
- error = VOP_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep,
- &bp);
- if (error)
- return (error);
ep->d_ino = 0;
+ } else {
+ /*
+ * Collapse new free space into previous entry.
+ */
+ ep->d_reclen += dp->i_reclen;
+ }
+out:
+ if (ip) {
+ ip->i_effnlink--;
+ ip->i_flag |= IN_CHANGE;
+ }
+ if (DOINGSOFTDEP(dvp)) {
+ if (ip)
+ softdep_setup_remove(bp, dp, ip, isrmdir);
+ bdwrite(bp);
+ } else {
+ if (ip)
+ ip->i_ffs_nlink--; /* XXX */
+
error = VOP_BWRITE(bp);
- dp->i_flag |= IN_CHANGE | IN_UPDATE;
- return (error);
}
- /*
- * Collapse new free space into previous entry.
- */
- error = VOP_BLKATOFF(dvp, (off_t)(dp->i_offset - dp->i_count),
- (char **)&ep, &bp);
- if (error)
- return (error);
- ep->d_reclen += dp->i_reclen;
- error = VOP_BWRITE(bp);
dp->i_flag |= IN_CHANGE | IN_UPDATE;
return (error);
}
@@ -908,9 +961,11 @@ ufs_dirremove(dvp, cnp)
* set up by a call to namei.
*/
int
-ufs_dirrewrite(dp, ip, cnp)
- struct inode *dp, *ip;
- struct componentname *cnp;
+ufs_dirrewrite(dp, oip, newinum, newtype, isrmdir)
+ struct inode *dp, *oip;
+ ino_t newinum;
+ int newtype;
+ int isrmdir;
{
struct buf *bp;
struct direct *ep;
@@ -920,10 +975,18 @@ ufs_dirrewrite(dp, ip, cnp)
error = VOP_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp);
if (error)
return (error);
- ep->d_ino = ip->i_number;
+ ep->d_ino = newinum;
if (vdp->v_mount->mnt_maxsymlinklen > 0)
- ep->d_type = IFTODT(ip->i_ffs_mode);
- error = VOP_BWRITE(bp);
+ ep->d_type = newtype;
+ oip->i_effnlink--;
+ oip->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(vdp)) {
+ softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir);
+ bdwrite(bp);
+ } else {
+ oip->i_ffs_nlink--; /* XXX */
+ error = VOP_BWRITE(bp);
+ }
dp->i_flag |= IN_CHANGE | IN_UPDATE;
return (error);
}
@@ -983,7 +1046,7 @@ ufs_dirempty(ip, parentino, cred)
* 1 implies ".", 2 implies ".." if second
* char is also "."
*/
- if (namlen == 1)
+ if (namlen == 1 && dp->d_ino == ip->i_number)
continue;
if (dp->d_name[1] == '.' && dp->d_ino == parentino)
continue;
diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c
index f6ea0606058..bc295d57e26 100644
--- a/sys/ufs/ufs/ufs_quota.c
+++ b/sys/ufs/ufs/ufs_quota.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ufs_quota.c,v 1.3 1997/05/30 08:35:10 downsj Exp $ */
+/* $OpenBSD: ufs_quota.c,v 1.4 1997/10/06 15:27:38 csapuntz Exp $ */
/* $NetBSD: ufs_quota.c,v 1.8 1996/02/09 22:36:09 christos Exp $ */
/*
@@ -376,15 +376,11 @@ quotaon(p, mp, type, fname)
if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0)
return (error);
vp = nd.ni_vp;
- VOP_UNLOCK(vp);
+ VOP_UNLOCK(vp, 0, p);
if (vp->v_type != VREG) {
(void) vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
return (EACCES);
}
- if (vfs_busy(mp)) {
- (void) vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
- return (EBUSY);
- }
if (*vpp != vp)
quotaoff(p, mp, type);
ump->um_qflags[type] |= QTF_OPENING;
@@ -414,9 +410,9 @@ quotaon(p, mp, type, fname)
again:
for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) {
nextvp = vp->v_mntvnodes.le_next;
- if (vp->v_writecount == 0)
+ if (vp->v_type == VNON || vp->v_writecount == 0)
continue;
- if (vget(vp, 1))
+ if (vget(vp, LK_EXCLUSIVE, p))
goto again;
if ((error = getinoquota(VTOI(vp))) != 0) {
vput(vp);
@@ -429,7 +425,6 @@ again:
ump->um_qflags[type] &= ~QTF_OPENING;
if (error)
quotaoff(p, mp, type);
- vfs_unbusy(mp);
return (error);
}
@@ -449,8 +444,6 @@ quotaoff(p, mp, type)
register struct inode *ip;
int error;
- if ((mp->mnt_flag & MNT_MPBUSY) == 0)
- panic("quotaoff: not busy");
if ((qvp = ump->um_quotas[type]) == NULLVP)
return (0);
ump->um_qflags[type] |= QTF_CLOSING;
@@ -461,7 +454,9 @@ quotaoff(p, mp, type)
again:
for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) {
nextvp = vp->v_mntvnodes.le_next;
- if (vget(vp, 1))
+ if (vp->v_type == VNON)
+ continue;
+ if (vget(vp, LK_EXCLUSIVE, p))
goto again;
ip = VTOI(vp);
dq = ip->i_dquot[type];
@@ -621,16 +616,16 @@ qsync(mp)
struct mount *mp;
{
struct ufsmount *ump = VFSTOUFS(mp);
+ struct proc *p = curproc;
register struct vnode *vp, *nextvp;
register struct dquot *dq;
register int i;
+ int error = 0;
/*
* Check if the mount point has any quotas.
* If not, simply return.
*/
- if ((mp->mnt_flag & MNT_MPBUSY) == 0)
- panic("qsync: not busy");
for (i = 0; i < MAXQUOTAS; i++)
if (ump->um_quotas[i] != NULLVP)
break;
@@ -640,22 +635,34 @@ qsync(mp)
* Search vnodes associated with this mount point,
* synchronizing any modified dquot structures.
*/
+ simple_lock(&mntvnode_slock);
again:
- for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) {
- nextvp = vp->v_mntvnodes.le_next;
- if (VOP_ISLOCKED(vp))
- continue;
- if (vget(vp, 1))
+ for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) {
+ if (vp->v_mount != mp)
goto again;
+ nextvp = vp->v_mntvnodes.le_next;
+ if (vp->v_type == VNON)
+ continue;
+ simple_lock(&vp->v_interlock);
+ simple_unlock(&mntvnode_slock);
+ error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
+ if (error) {
+ simple_lock(&mntvnode_slock);
+ if (error == ENOENT)
+ goto again;
+ continue;
+ }
for (i = 0; i < MAXQUOTAS; i++) {
dq = VTOI(vp)->i_dquot[i];
if (dq != NODQUOT && (dq->dq_flags & DQ_MOD))
dqsync(vp, dq);
}
vput(vp);
- if (vp->v_mntvnodes.le_next != nextvp || vp->v_mount != mp)
- goto again;
- }
+ simple_lock(&mntvnode_slock);
+ if (vp->v_mntvnodes.le_next != nextvp)
+ goto again;
+ }
+ simple_unlock(&mntvnode_slock);
return (0);
}
@@ -697,6 +704,7 @@ dqget(vp, id, ump, type, dqp)
register int type;
struct dquot **dqp;
{
+ struct proc *p = curproc;
register struct dquot *dq;
struct dqhash *dqh;
register struct vnode *dqvp;
@@ -752,7 +760,7 @@ dqget(vp, id, ump, type, dqp)
* Initialize the contents of the dquot structure.
*/
if (vp != dqvp)
- VOP_LOCK(dqvp);
+ vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY, p);
LIST_INSERT_HEAD(dqh, dq, dq_hash);
DQREF(dq);
dq->dq_flags = DQ_LOCK;
@@ -772,7 +780,7 @@ dqget(vp, id, ump, type, dqp)
if (auio.uio_resid == sizeof(struct dqblk) && error == 0)
bzero((caddr_t)&dq->dq_dqb, sizeof(struct dqblk));
if (vp != dqvp)
- VOP_UNLOCK(dqvp);
+ VOP_UNLOCK(dqvp, 0, p);
if (dq->dq_flags & DQ_WANT)
wakeup((caddr_t)dq);
dq->dq_flags = 0;
@@ -844,6 +852,7 @@ dqsync(vp, dq)
struct vnode *vp;
register struct dquot *dq;
{
+ struct proc *p = curproc;
struct vnode *dqvp;
struct iovec aiov;
struct uio auio;
@@ -856,13 +865,13 @@ dqsync(vp, dq)
if ((dqvp = dq->dq_ump->um_quotas[dq->dq_type]) == NULLVP)
panic("dqsync: file");
if (vp != dqvp)
- VOP_LOCK(dqvp);
+ vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY, p);
while (dq->dq_flags & DQ_LOCK) {
dq->dq_flags |= DQ_WANT;
sleep((caddr_t)dq, PINOD+2);
if ((dq->dq_flags & DQ_MOD) == 0) {
if (vp != dqvp)
- VOP_UNLOCK(dqvp);
+ VOP_UNLOCK(dqvp, 0, p);
return (0);
}
}
@@ -883,7 +892,7 @@ dqsync(vp, dq)
wakeup((caddr_t)dq);
dq->dq_flags &= ~(DQ_MOD|DQ_LOCK|DQ_WANT);
if (vp != dqvp)
- VOP_UNLOCK(dqvp);
+ VOP_UNLOCK(dqvp, 0, p);
return (error);
}
diff --git a/sys/ufs/ufs/ufs_readwrite.c b/sys/ufs/ufs/ufs_readwrite.c
index 604c16fcb90..25148b78f61 100644
--- a/sys/ufs/ufs/ufs_readwrite.c
+++ b/sys/ufs/ufs/ufs_readwrite.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ufs_readwrite.c,v 1.9 1997/05/30 08:35:13 downsj Exp $ */
+/* $OpenBSD: ufs_readwrite.c,v 1.10 1997/10/06 15:27:39 csapuntz Exp $ */
/* $NetBSD: ufs_readwrite.c,v 1.9 1996/05/11 18:27:57 mycroft Exp $ */
/*-
@@ -242,19 +242,13 @@ WRITE(v)
xfersize = fs->fs_bsize - blkoffset;
if (uio->uio_resid < xfersize)
xfersize = uio->uio_resid;
-#ifdef LFS_READWRITE
- (void)lfs_check(vp, lbn);
- error = lfs_balloc(vp, blkoffset, xfersize, lbn, &bp);
-#else
if (fs->fs_bsize > xfersize)
flags |= B_CLRBUF;
else
flags &= ~B_CLRBUF;
- error = ffs_balloc(ip,
- lbn, blkoffset + xfersize, ap->a_cred, &bp, flags);
-#endif
- if (error)
+ if ((error = VOP_BALLOC(vp, uio->uio_offset, xfersize,
+ ap->a_cred, flags, &bp)) != 0)
break;
if (uio->uio_offset + xfersize > ip->i_ffs_size) {
ip->i_ffs_size = uio->uio_offset + xfersize;
diff --git a/sys/ufs/ufs/ufs_vfsops.c b/sys/ufs/ufs/ufs_vfsops.c
index 0e308fd39dd..11dfa3086c4 100644
--- a/sys/ufs/ufs/ufs_vfsops.c
+++ b/sys/ufs/ufs/ufs_vfsops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ufs_vfsops.c,v 1.3 1997/05/30 08:35:15 downsj Exp $ */
+/* $OpenBSD: ufs_vfsops.c,v 1.4 1997/10/06 15:27:39 csapuntz Exp $ */
/* $NetBSD: ufs_vfsops.c,v 1.4 1996/02/09 22:36:12 christos Exp $ */
/*
@@ -125,39 +125,64 @@ ufs_quotactl(mp, cmds, uid, arg, p)
if ((u_int)type >= MAXQUOTAS)
return (EINVAL);
+ if (vfs_busy(mp, LK_NOWAIT, 0, p))
+ return (0);
+
+
switch (cmd) {
case Q_QUOTAON:
- return (quotaon(p, mp, type, arg));
+ error = quotaon(p, mp, type, arg);
+ break;
case Q_QUOTAOFF:
- if (vfs_busy(mp))
- return (0);
error = quotaoff(p, mp, type);
- vfs_unbusy(mp);
- return (error);
+ break;
case Q_SETQUOTA:
- return (setquota(mp, uid, type, arg));
+ error = setquota(mp, uid, type, arg) ;
+ break;
case Q_SETUSE:
- return (setuse(mp, uid, type, arg));
+ error = setuse(mp, uid, type, arg);
+ break;
case Q_GETQUOTA:
- return (getquota(mp, uid, type, arg));
+ error = getquota(mp, uid, type, arg);
+ break;
case Q_SYNC:
- if (vfs_busy(mp))
- return (0);
error = qsync(mp);
- vfs_unbusy(mp);
- return (error);
+ break;
default:
- return (EINVAL);
+ error = EINVAL;
+ break;
}
- /* NOTREACHED */
+
+ vfs_unbusy(mp, p);
+ return (error);
+#endif
+}
+
+
+/*
+ * Initial UFS filesystems, done only once.
+ */
+int
+ufs_init(vfsp)
+ struct vfsconf *vfsp;
+{
+ static int done;
+
+ if (done)
+ return (0);
+ done = 1;
+ ufs_ihashinit();
+#ifdef QUOTA
+ dqinit();
#endif
+ return (0);
}
/*
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index fe58d6e899e..12245ddece3 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: ufs_vnops.c,v 1.10 1997/07/03 17:49:49 deraadt Exp $ */
+/* $OpenBSD: ufs_vnops.c,v 1.11 1997/10/06 15:27:40 csapuntz Exp $ */
/* $NetBSD: ufs_vnops.c,v 1.18 1996/05/11 18:28:04 mycroft Exp $ */
/*
@@ -90,6 +90,19 @@ union _qcvt {
(q) = tmp.qcvt; \
}
+
+/*
+ * A virgin directory (no blushing please).
+ */
+static struct dirtemplate mastertemplate = {
+ 0, 12, DT_DIR, 1, ".",
+ 0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
+};
+static struct odirtemplate omastertemplate = {
+ 0, 12, 1, ".",
+ 0, DIRBLKSIZ - 12, 2, ".."
+};
+
/*
* Create a regular file
*/
@@ -117,19 +130,19 @@ ufs_mknod(v)
void *v;
{
struct vop_mknod_args /* {
- struct vnode *a_dvp;
- struct vnode **a_vpp;
- struct componentname *a_cnp;
- struct vattr *a_vap;
- } */ *ap = v;
- register struct vattr *vap = ap->a_vap;
- register struct vnode **vpp = ap->a_vpp;
- register struct inode *ip;
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap = v;
+ struct vattr *vap = ap->a_vap;
+ struct vnode **vpp = ap->a_vpp;
+ struct inode *ip;
int error;
if ((error =
- ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
- ap->a_dvp, vpp, ap->a_cnp)) != 0)
+ ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
+ ap->a_dvp, vpp, ap->a_cnp)) != 0)
return (error);
ip = VTOI(*vpp);
ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
@@ -163,11 +176,11 @@ ufs_open(v)
void *v;
{
struct vop_open_args /* {
- struct vnode *a_vp;
- int a_mode;
- struct ucred *a_cred;
- struct proc *a_p;
- } */ *ap = v;
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap = v;
/*
* Files marked append-only must be opened for appending.
@@ -194,11 +207,13 @@ ufs_close(v)
struct ucred *a_cred;
struct proc *a_p;
} */ *ap = v;
- register struct vnode *vp = ap->a_vp;
- register struct inode *ip = VTOI(vp);
+ struct vnode *vp = ap->a_vp;
+ struct inode *ip = VTOI(vp);
- if (vp->v_usecount > 1 && !(ip->i_flag & IN_LOCKED))
+ simple_lock(&vp->v_interlock);
+ if (vp->v_usecount > 1)
ITIMES(ip, &time, &time);
+ simple_unlock(&vp->v_interlock);
return (0);
}
@@ -212,25 +227,27 @@ ufs_access(v)
struct ucred *a_cred;
struct proc *a_p;
} */ *ap = v;
- register struct vnode *vp = ap->a_vp;
- register struct inode *ip = VTOI(vp);
+ struct vnode *vp = ap->a_vp;
+ struct inode *ip = VTOI(vp);
mode_t mode = ap->a_mode;
-#ifdef DIAGNOSTIC
- if (!VOP_ISLOCKED(vp)) {
- vprint("ufs_access: not locked", vp);
- panic("ufs_access: not locked");
- }
-#endif
-#ifdef QUOTA
- if (mode & VWRITE)
+ /*
+ * Disallow write attempts on read-only file systems;
+ * unless the file is a socket, fifo, or a block or
+ * character device resident on the file system.
+ */
+ if (mode & VWRITE) {
switch (vp->v_type) {
int error;
case VDIR:
case VLNK:
case VREG:
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ return (EROFS);
+#ifdef QUOTA
if ((error = getinoquota(ip)) != 0)
return (error);
+#endif
break;
case VBAD:
case VBLK:
@@ -239,8 +256,9 @@ ufs_access(v)
case VFIFO:
case VNON:
break;
+
}
-#endif
+ }
/* If immutable bit set, nobody gets to write it. */
if ((mode & VWRITE) && (ip->i_ffs_flags & IMMUTABLE))
@@ -261,9 +279,9 @@ ufs_getattr(v)
struct ucred *a_cred;
struct proc *a_p;
} */ *ap = v;
- register struct vnode *vp = ap->a_vp;
- register struct inode *ip = VTOI(vp);
- register struct vattr *vap = ap->a_vap;
+ struct vnode *vp = ap->a_vp;
+ struct inode *ip = VTOI(vp);
+ struct vattr *vap = ap->a_vap;
ITIMES(ip, &time, &time);
/*
@@ -272,7 +290,7 @@ ufs_getattr(v)
vap->va_fsid = ip->i_dev;
vap->va_fileid = ip->i_number;
vap->va_mode = ip->i_ffs_mode & ~IFMT;
- vap->va_nlink = ip->i_ffs_nlink;
+ vap->va_nlink = ip->i_effnlink;
vap->va_uid = ip->i_ffs_uid;
vap->va_gid = ip->i_ffs_gid;
vap->va_rdev = (dev_t)ip->i_ffs_rdev;
@@ -311,11 +329,11 @@ ufs_setattr(v)
struct ucred *a_cred;
struct proc *a_p;
} */ *ap = v;
- register struct vattr *vap = ap->a_vap;
- register struct vnode *vp = ap->a_vp;
- register struct inode *ip = VTOI(vp);
- register struct ucred *cred = ap->a_cred;
- register struct proc *p = ap->a_p;
+ struct vattr *vap = ap->a_vap;
+ struct vnode *vp = ap->a_vp;
+ struct inode *ip = VTOI(vp);
+ struct ucred *cred = ap->a_cred;
+ struct proc *p = ap->a_p;
int error;
/*
@@ -328,6 +346,8 @@ ufs_setattr(v)
return (EINVAL);
}
if (vap->va_flags != VNOVAL) {
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ return (EROFS);
if (cred->cr_uid != ip->i_ffs_uid &&
(error = suser(cred, &p->p_acflag)))
return (error);
@@ -337,7 +357,8 @@ ufs_setattr(v)
return (EPERM);
ip->i_ffs_flags = vap->va_flags;
} else {
- if (ip->i_ffs_flags & (SF_IMMUTABLE | SF_APPEND))
+ if (ip->i_ffs_flags & (SF_IMMUTABLE | SF_APPEND) ||
+ (vap->va_flags & UF_SETTABLE) != vap->va_flags)
return (EPERM);
ip->i_ffs_flags &= SF_SETTABLE;
ip->i_ffs_flags |= (vap->va_flags & UF_SETTABLE);
@@ -352,19 +373,36 @@ ufs_setattr(v)
* Go through the fields and update if not VNOVAL.
*/
if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ return (EROFS);
error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, p);
if (error)
return (error);
}
if (vap->va_size != VNOVAL) {
- if (vp->v_type == VDIR)
- return (EISDIR);
- error = VOP_TRUNCATE(vp, vap->va_size, 0, cred, p);
- if (error)
- return (error);
+ /*
+ * Disallow write attempts on read-only file systems;
+ * unless the file is a socket, fifo, or a block or
+ * character device resident on the file system.
+ */
+ switch (vp->v_type) {
+ case VDIR:
+ return (EISDIR);
+ case VLNK:
+ case VREG:
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ return (EROFS);
+ break;
+ default:
+ break;
+ }
+ if ((error = VOP_TRUNCATE(vp, vap->va_size, 0, cred, p)) != 0)
+ return (error);
}
ip = VTOI(vp);
if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ return (EROFS);
if (cred->cr_uid != ip->i_ffs_uid &&
(error = suser(cred, &p->p_acflag)) &&
((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
@@ -374,13 +412,16 @@ ufs_setattr(v)
ip->i_flag |= IN_ACCESS;
if (vap->va_mtime.tv_sec != VNOVAL)
ip->i_flag |= IN_CHANGE | IN_UPDATE;
- error = VOP_UPDATE(vp, &vap->va_atime, &vap->va_mtime, 1);
+ error = VOP_UPDATE(vp, &vap->va_atime, &vap->va_mtime, 0);
if (error)
return (error);
}
error = 0;
- if (vap->va_mode != (mode_t)VNOVAL)
+ if (vap->va_mode != (mode_t)VNOVAL) {
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ return (EROFS);
error = ufs_chmod(vp, (int)vap->va_mode, cred, p);
+ }
return (error);
}
@@ -390,12 +431,12 @@ ufs_setattr(v)
*/
static int
ufs_chmod(vp, mode, cred, p)
- register struct vnode *vp;
- register int mode;
- register struct ucred *cred;
+ struct vnode *vp;
+ int mode;
+ struct ucred *cred;
struct proc *p;
{
- register struct inode *ip = VTOI(vp);
+ struct inode *ip = VTOI(vp);
int error;
if (cred->cr_uid != ip->i_ffs_uid &&
@@ -421,18 +462,18 @@ ufs_chmod(vp, mode, cred, p)
*/
static int
ufs_chown(vp, uid, gid, cred, p)
- register struct vnode *vp;
+ struct vnode *vp;
uid_t uid;
gid_t gid;
struct ucred *cred;
struct proc *p;
{
- register struct inode *ip = VTOI(vp);
+ struct inode *ip = VTOI(vp);
uid_t ouid;
gid_t ogid;
int error = 0;
#ifdef QUOTA
- register int i;
+ int i;
long change;
#endif
@@ -614,9 +655,9 @@ ufs_remove(v)
struct vnode *a_vp;
struct componentname *a_cnp;
} */ *ap = v;
- register struct inode *ip;
- register struct vnode *vp = ap->a_vp;
- register struct vnode *dvp = ap->a_dvp;
+ struct inode *ip;
+ struct vnode *vp = ap->a_vp;
+ struct vnode *dvp = ap->a_dvp;
int error;
ip = VTOI(vp);
@@ -625,10 +666,8 @@ ufs_remove(v)
error = EPERM;
goto out;
}
- if ((error = ufs_dirremove(dvp, ap->a_cnp)) == 0) {
- ip->i_ffs_nlink--;
- ip->i_flag |= IN_CHANGE;
- }
+ if ((error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0)) != 0)
+ goto out;
out:
if (dvp == vp)
vrele(vp);
@@ -650,10 +689,12 @@ ufs_link(v)
struct vnode *a_vp;
struct componentname *a_cnp;
} */ *ap = v;
- register struct vnode *dvp = ap->a_dvp;
- register struct vnode *vp = ap->a_vp;
- register struct componentname *cnp = ap->a_cnp;
- register struct inode *ip;
+ struct vnode *dvp = ap->a_dvp;
+ struct vnode *vp = ap->a_vp;
+ struct componentname *cnp = ap->a_cnp;
+ struct proc *p = cnp->cn_proc;
+ struct inode *ip;
+ struct direct newdir;
struct timespec ts;
int error;
@@ -671,7 +712,7 @@ ufs_link(v)
error = EXDEV;
goto out2;
}
- if (dvp != vp && (error = VOP_LOCK(vp))) {
+ if (dvp != vp && (error = vn_lock(vp, LK_EXCLUSIVE, p))) {
VOP_ABORTOP(dvp, cnp);
goto out2;
}
@@ -686,20 +727,25 @@ ufs_link(v)
error = EPERM;
goto out1;
}
+ ip->i_effnlink++;
ip->i_ffs_nlink++;
ip->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(vp))
+ softdep_increase_linkcnt(ip);
TIMEVAL_TO_TIMESPEC(&time, &ts);
- error = VOP_UPDATE(vp, &ts, &ts, 1);
- if (!error)
- error = ufs_direnter(ip, dvp, cnp);
+ if ((error = VOP_UPDATE(vp, &ts, &ts, !DOINGSOFTDEP(vp))) == 0) {
+ ufs_makedirentry(ip, cnp, &newdir);
+ error = ufs_direnter(dvp, &newdir, cnp, NULL);
+ }
if (error) {
+ ip->i_effnlink--;
ip->i_ffs_nlink--;
ip->i_flag |= IN_CHANGE;
}
FREE(cnp->cn_pnbuf, M_NAMEI);
out1:
if (dvp != vp)
- VOP_UNLOCK(vp);
+ VOP_UNLOCK(vp, 0, p);
out2:
vput(dvp);
return (error);
@@ -742,7 +788,7 @@ ufs_whiteout(v)
newdir.d_namlen = cnp->cn_namelen;
bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
newdir.d_type = DT_WHT;
- error = ufs_direnter2(dvp, &newdir, cnp->cn_cred, cnp->cn_proc);
+ error = ufs_direnter(dvp, &newdir, cnp, NULL);
break;
case DELETE:
@@ -753,8 +799,11 @@ ufs_whiteout(v)
#endif
cnp->cn_flags &= ~DOWHITEOUT;
- error = ufs_dirremove(dvp, cnp);
+ error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0);
break;
+ default:
+ panic("ufs_whiteout: unknown op");
+ /* NOTREACHED */
}
if (cnp->cn_flags & HASBUF) {
FREE(cnp->cn_pnbuf, M_NAMEI);
@@ -801,17 +850,17 @@ ufs_rename(v)
struct componentname *a_tcnp;
} */ *ap = v;
struct vnode *tvp = ap->a_tvp;
- register struct vnode *tdvp = ap->a_tdvp;
+ struct vnode *tdvp = ap->a_tdvp;
struct vnode *fvp = ap->a_fvp;
- register struct vnode *fdvp = ap->a_fdvp;
- register struct componentname *tcnp = ap->a_tcnp;
- register struct componentname *fcnp = ap->a_fcnp;
- register struct inode *ip, *xp, *dp;
- struct dirtemplate dirbuf;
+ struct vnode *fdvp = ap->a_fdvp;
+ struct componentname *tcnp = ap->a_tcnp;
+ struct componentname *fcnp = ap->a_fcnp;
+ struct proc *p = fcnp->cn_proc;
+ struct inode *ip, *xp, *dp;
+ struct direct newdir;
struct timespec ts;
int doingdirectory = 0, oldparent = 0, newparent = 0;
int error = 0;
- u_char namlen;
#ifdef DIAGNOSTIC
if ((tcnp->cn_flags & HASBUF) == 0 ||
@@ -868,13 +917,13 @@ abortit:
(void) relookup(fdvp, &fvp, fcnp);
return (VOP_REMOVE(fdvp, fvp, fcnp));
}
- if ((error = VOP_LOCK(fvp)) != 0)
+ if ((error = vn_lock(fvp, LK_EXCLUSIVE, p)) != 0)
goto abortit;
dp = VTOI(fdvp);
ip = VTOI(fvp);
if ((ip->i_ffs_flags & (IMMUTABLE | APPEND)) ||
(dp->i_ffs_flags & APPEND)) {
- VOP_UNLOCK(fvp);
+ VOP_UNLOCK(fvp, 0, p);
error = EPERM;
goto abortit;
}
@@ -883,7 +932,7 @@ abortit:
if (!error && tvp)
error = VOP_ACCESS(tvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc);
if (error) {
- VOP_UNLOCK(fvp);
+ VOP_UNLOCK(fvp, 0, p);
error = EACCES;
goto abortit;
}
@@ -895,7 +944,7 @@ abortit:
(fcnp->cn_flags & ISDOTDOT) ||
(tcnp->cn_flags & ISDOTDOT) ||
(ip->i_flag & IN_RENAME)) {
- VOP_UNLOCK(fvp);
+ VOP_UNLOCK(fvp, 0, p);
error = EINVAL;
goto abortit;
}
@@ -920,11 +969,14 @@ abortit:
* completing our work, the link count
* may be wrong, but correctable.
*/
+ ip->i_effnlink++;
ip->i_ffs_nlink++;
ip->i_flag |= IN_CHANGE;
+ if (DOINGSOFTDEP(fvp))
+ softdep_increase_linkcnt(ip);
TIMEVAL_TO_TIMESPEC(&time, &ts);
- if ((error = VOP_UPDATE(fvp, &ts, &ts, 1)) != 0) {
- VOP_UNLOCK(fvp);
+ if ((error = VOP_UPDATE(fvp, &ts, &ts, !DOINGSOFTDEP(fvp))) != 0) {
+ VOP_UNLOCK(fvp, 0, p);
goto bad;
}
@@ -939,7 +991,7 @@ abortit:
* call to checkpath().
*/
error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc);
- VOP_UNLOCK(fvp);
+ VOP_UNLOCK(fvp, 0, p);
if (oldparent != dp->i_number)
newparent = dp->i_number;
if (doingdirectory && newparent) {
@@ -978,13 +1030,19 @@ abortit:
error = EMLINK;
goto bad;
}
+ dp->i_effnlink++;
dp->i_ffs_nlink++;
dp->i_flag |= IN_CHANGE;
- if ((error = VOP_UPDATE(tdvp, &ts, &ts, 1)) != 0)
+ if (DOINGSOFTDEP(tdvp))
+ softdep_increase_linkcnt(dp);
+ if ((error = VOP_UPDATE(tdvp, &ts, &ts,
+ !DOINGSOFTDEP(tdvp))) != 0)
goto bad;
}
- if ((error = ufs_direnter(ip, tdvp, tcnp)) != 0) {
+ ufs_makedirentry(ip, tcnp, &newdir);
+ if ((error = ufs_direnter(tdvp, &newdir, tcnp, NULL)) != 0) {
if (doingdirectory && newparent) {
+ dp->i_effnlink--;
dp->i_ffs_nlink--;
dp->i_flag |= IN_CHANGE;
(void)VOP_UPDATE(tdvp, &ts, &ts, 1);
@@ -1018,8 +1076,8 @@ abortit:
* (both directories, or both not directories).
*/
if ((xp->i_ffs_mode & IFMT) == IFDIR) {
- if (!ufs_dirempty(xp, dp->i_number, tcnp->cn_cred) ||
- xp->i_ffs_nlink > 2) {
+ if (xp->i_effnlink > 2 ||
+ !ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) {
error = ENOTEMPTY;
goto bad;
}
@@ -1032,37 +1090,35 @@ abortit:
error = EISDIR;
goto bad;
}
- if ((error = ufs_dirrewrite(dp, ip, tcnp)) != 0)
- goto bad;
- /*
- * If the target directory is in the same
- * directory as the source directory,
- * decrement the link count on the parent
- * of the target directory.
- */
- if (doingdirectory && !newparent) {
- dp->i_ffs_nlink--;
- dp->i_flag |= IN_CHANGE;
- }
- vput(tdvp);
- /*
- * Adjust the link count of the target to
- * reflect the dirrewrite above. If this is
- * a directory it is empty and there are
- * no links to it, so we can squash the inode and
- * any space associated with it. We disallowed
- * renaming over top of a directory with links to
- * it above, as the remaining link would point to
- * a directory without "." or ".." entries.
- */
- xp->i_ffs_nlink--;
+
+ if ((error = ufs_dirrewrite(dp, xp, ip->i_number,
+ IFTODT(ip->i_ffs_mode), doingdirectory)) != 0)
+ goto bad;
if (doingdirectory) {
- if (--xp->i_ffs_nlink != 0)
- panic("rename: linked directory");
- error = VOP_TRUNCATE(tvp, (off_t)0, IO_SYNC,
- tcnp->cn_cred, tcnp->cn_proc);
+ dp->i_effnlink--;
+ dp->i_flag |= IN_CHANGE;
+ xp->i_effnlink--;
+ xp->i_flag |= IN_CHANGE;
}
- xp->i_flag |= IN_CHANGE;
+ if (doingdirectory && !DOINGSOFTDEP(tvp)) {
+ /*
+ * Truncate inode. The only stuff left in the directory
+ * is "." and "..". The "." reference is inconsequential
+ * since we are quashing it. We have removed the "."
+ * reference and the reference in the parent directory,
+ * but there may be other hard links. The soft
+ * dependency code will arrange to do these operations
+ * after the parent directory entry has been deleted on
+ * disk, so when running with that code we avoid doing
+ * them now.
+ */
+ dp->i_ffs_nlink--;
+ xp->i_ffs_nlink--;
+ if ((error = VOP_TRUNCATE(tvp, (off_t)0, IO_SYNC,
+ tcnp->cn_cred, tcnp->cn_proc)) != 0)
+ goto bad;
+ }
+ vput(tdvp);
vput(tvp);
xp = NULL;
}
@@ -1092,10 +1148,9 @@ abortit:
* changed while the new name has been entered. If the source is
* a file then the entry may have been unlinked or renamed. In
* either case there is no further work to be done. If the source
- * is a directory then it cannot have been rmdir'ed; its link
- * count of three would cause a rmdir to fail with ENOTEMPTY.
- * The IRENAME flag ensures that it cannot be moved by another
- * rename.
+ * is a directory then it cannot have been rmdir'ed; the IN_RENAME
+ * flag ensures that it cannot be moved by another rename or removed
+ * by a rmdir.
*/
if (xp != ip) {
if (doingdirectory)
@@ -1108,44 +1163,11 @@ abortit:
* and ".." set to point to the new parent.
*/
if (doingdirectory && newparent) {
- dp->i_ffs_nlink--;
- dp->i_flag |= IN_CHANGE;
- error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf,
- sizeof (struct dirtemplate), (off_t)0,
- UIO_SYSSPACE, IO_NODELOCKED,
- tcnp->cn_cred, (int *)0, (struct proc *)0);
- if (error == 0) {
-# if (BYTE_ORDER == LITTLE_ENDIAN)
- if (fvp->v_mount->mnt_maxsymlinklen <= 0)
- namlen = dirbuf.dotdot_type;
- else
- namlen = dirbuf.dotdot_namlen;
-# else
- namlen = dirbuf.dotdot_namlen;
-# endif
- if (namlen != 2 ||
- dirbuf.dotdot_name[0] != '.' ||
- dirbuf.dotdot_name[1] != '.') {
- ufs_dirbad(xp, (doff_t)12,
- "rename: mangled dir");
- } else {
- dirbuf.dotdot_ino = newparent;
- (void) vn_rdwr(UIO_WRITE, fvp,
- (caddr_t)&dirbuf,
- sizeof (struct dirtemplate),
- (off_t)0, UIO_SYSSPACE,
- IO_NODELOCKED|IO_SYNC,
- tcnp->cn_cred, (int *)0,
- (struct proc *)0);
- cache_purge(fdvp);
- }
- }
- }
- error = ufs_dirremove(fdvp, fcnp);
- if (!error) {
- xp->i_ffs_nlink--;
- xp->i_flag |= IN_CHANGE;
+ xp->i_offset = mastertemplate.dot_reclen;
+ ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0);
+ cache_purge(fdvp);
}
+ error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0);
xp->i_flag &= ~IN_RENAME;
}
if (dp)
@@ -1162,7 +1184,8 @@ bad:
out:
if (doingdirectory)
ip->i_flag &= ~IN_RENAME;
- if (VOP_LOCK(fvp) == 0) {
+ if (vn_lock(fvp, LK_EXCLUSIVE, p) == 0) {
+ ip->i_effnlink--;
ip->i_ffs_nlink--;
ip->i_flag |= IN_CHANGE;
vput(fvp);
@@ -1172,18 +1195,6 @@ out:
}
/*
- * A virgin directory (no blushing please).
- */
-static struct dirtemplate mastertemplate = {
- 0, 12, DT_DIR, 1, ".",
- 0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
-};
-static struct odirtemplate omastertemplate = {
- 0, 12, 1, ".",
- 0, DIRBLKSIZ - 12, 2, ".."
-};
-
-/*
* Mkdir system call
*/
int
@@ -1196,11 +1207,13 @@ ufs_mkdir(v)
struct componentname *a_cnp;
struct vattr *a_vap;
} */ *ap = v;
- register struct vnode *dvp = ap->a_dvp;
- register struct vattr *vap = ap->a_vap;
- register struct componentname *cnp = ap->a_cnp;
- register struct inode *ip, *dp;
+ struct vnode *dvp = ap->a_dvp;
+ struct vattr *vap = ap->a_vap;
+ struct componentname *cnp = ap->a_cnp;
+ struct inode *ip, *dp;
struct vnode *tvp;
+ struct buf *bp;
+ struct direct newdir;
struct dirtemplate dirtemplate, *dtp;
struct timespec ts;
int error, dmode;
@@ -1239,24 +1252,31 @@ ufs_mkdir(v)
ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
ip->i_ffs_mode = dmode;
tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */
+ ip->i_effnlink = 2;
ip->i_ffs_nlink = 2;
+ if (DOINGSOFTDEP(tvp))
+ softdep_increase_linkcnt(ip);
+
if (cnp->cn_flags & ISWHITEOUT)
ip->i_ffs_flags |= UF_OPAQUE;
- TIMEVAL_TO_TIMESPEC(&time, &ts);
- error = VOP_UPDATE(tvp, &ts, &ts, 1);
/*
- * Bump link count in parent directory
- * to reflect work done below. Should
- * be done before reference is created
- * so reparation is possible if we crash.
+ * Bump link count in parent directory to reflect work done below.
+ * Should be done before reference is create so cleanup is
+ * possible if we crash.
*/
+ dp->i_effnlink++;
dp->i_ffs_nlink++;
dp->i_flag |= IN_CHANGE;
- if ((error = VOP_UPDATE(dvp, &ts, &ts, 1)) != 0)
+ if (DOINGSOFTDEP(dvp))
+ softdep_increase_linkcnt(dp);
+ TIMEVAL_TO_TIMESPEC(&time, &ts);
+ if ((error = VOP_UPDATE(dvp, &ts, &ts, !DOINGSOFTDEP(dvp))) != 0)
goto bad;
- /* Initialize directory with "." and ".." from static template. */
+ /*
+ * Initialize directory with "." and ".." from static template.
+ */
if (dvp->v_mount->mnt_maxsymlinklen > 0)
dtp = &mastertemplate;
else
@@ -1264,40 +1284,56 @@ ufs_mkdir(v)
dirtemplate = *dtp;
dirtemplate.dot_ino = ip->i_number;
dirtemplate.dotdot_ino = dp->i_number;
- error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)&dirtemplate,
- sizeof (dirtemplate), (off_t)0, UIO_SYSSPACE,
- IO_NODELOCKED|IO_SYNC, cnp->cn_cred, (int *)0, (struct proc *)0);
- if (error) {
- dp->i_ffs_nlink--;
- dp->i_flag |= IN_CHANGE;
+
+ if ((error = VOP_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred,
+ B_CLRBUF, &bp)) != 0)
+ goto bad;
+ ip->i_ffs_size = DIRBLKSIZ;
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ vnode_pager_setsize(tvp, (u_long)ip->i_ffs_size);
+ bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate);
+ if ((error = VOP_UPDATE(tvp, &ts, &ts, !DOINGSOFTDEP(tvp))) != 0) {
+ (void)VOP_BWRITE(bp);
goto bad;
- }
- if (DIRBLKSIZ > VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize)
- panic("ufs_mkdir: blksize"); /* XXX should grow with balloc() */
- else {
- ip->i_ffs_size = DIRBLKSIZ;
- ip->i_flag |= IN_CHANGE;
}
- /* Directory set up, now install it's entry in the parent directory. */
- if ((error = ufs_direnter(ip, dvp, cnp)) != 0) {
- dp->i_ffs_nlink--;
- dp->i_flag |= IN_CHANGE;
- }
-bad:
/*
- * No need to do an explicit VOP_TRUNCATE here, vrele will do this
- * for us because we set the link count to 0.
+ * Directory set up, now install it's entry in the parent directory.
+ *
+ * If we are not doing soft dependencies, then we must write out the
+ * buffer containing the new directory body before entering the new
+ * name in the parent. If we are doing soft dependencies, then the
+ * buffer containing the new directory body will be passed to and
+ * released in the soft dependency code after the code has attached
+ * an appropriate ordering dependency to the buffer which ensures that
+ * the buffer is written before the new name is written in the parent.
*/
- if (error) {
- ip->i_ffs_nlink = 0;
- ip->i_flag |= IN_CHANGE;
+ if (!DOINGSOFTDEP(dvp) && ((error = VOP_BWRITE(bp)) != 0))
+ goto bad;
+ ufs_makedirentry(ip, cnp, &newdir);
+ error = ufs_direnter(dvp, &newdir, cnp, bp);
+
+bad:
+ if (error == 0) {
+ *ap->a_vpp = tvp;
+ } else {
+ dp->i_effnlink--;
+ dp->i_ffs_nlink--;
+ dp->i_flag |= IN_CHANGE;
+ /*
+ * No need to do an explicit VOP_TRUNCATE here, vrele will
+ * do this for us because we set the link count to 0.
+ */
+ ip->i_effnlink = 0;
+ ip->i_ffs_nlink = 0;
+ ip->i_flag |= IN_CHANGE;
+
vput(tvp);
- } else
- *ap->a_vpp = tvp;
+ }
out:
FREE(cnp->cn_pnbuf, M_NAMEI);
vput(dvp);
+
return (error);
}
@@ -1313,10 +1349,10 @@ ufs_rmdir(v)
struct vnode *a_vp;
struct componentname *a_cnp;
} */ *ap = v;
- register struct vnode *vp = ap->a_vp;
- register struct vnode *dvp = ap->a_dvp;
- register struct componentname *cnp = ap->a_cnp;
- register struct inode *ip, *dp;
+ struct vnode *vp = ap->a_vp;
+ struct vnode *dvp = ap->a_dvp;
+ struct componentname *cnp = ap->a_cnp;
+ struct inode *ip, *dp;
int error;
ip = VTOI(vp);
@@ -1330,14 +1366,17 @@ ufs_rmdir(v)
return (EINVAL);
}
/*
- * Verify the directory is empty (and valid).
- * (Rmdir ".." won't be valid since
- * ".." will contain a reference to
- * the current directory and thus be
- * non-empty.)
+ * Do not remove a directory that is in the process of being renamed.
+ * Verify the directory is empty (and valid). Rmdir ".." will not be
+ * valid since ".." will contain a reference to the current directory
+ * and thus be non-empty.
*/
error = 0;
- if (ip->i_ffs_nlink != 2 ||
+ if (ip->i_flag & IN_RENAME) {
+ error = EINVAL;
+ goto out;
+ }
+ if (ip->i_effnlink != 2 ||
!ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
error = ENOTEMPTY;
goto out;
@@ -1352,31 +1391,33 @@ ufs_rmdir(v)
* inode. If we crash in between, the directory
* will be reattached to lost+found,
*/
- if ((error = ufs_dirremove(dvp, cnp)) != 0)
+ if ((error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1)) != 0)
goto out;
- dp->i_ffs_nlink--;
- dp->i_flag |= IN_CHANGE;
cache_purge(dvp);
- vput(dvp);
- dvp = NULL;
- /*
- * Truncate inode. The only stuff left
- * in the directory is "." and "..". The
- * "." reference is inconsequential since
- * we're quashing it. The ".." reference
- * has already been adjusted above. We've
- * removed the "." reference and the reference
- * in the parent directory, but there may be
- * other hard links so decrement by 2 and
- * worry about them later.
+ /*
+ * Truncate inode. The only stuff left in the directory is "." and
+ * "..". The "." reference is inconsequential since we are quashing
+ * it. We have removed the "." reference and the reference in the
+ * parent directory, but there may be other hard links. So,
+ * ufs_dirremove will set the UF_IMMUTABLE flag to ensure that no
+ * new entries are made. The soft dependency code will arrange to
+ * do these operations after the parent directory entry has been
+ * deleted on disk, so when running with that code we avoid doing
+ * them now.
*/
- ip->i_ffs_nlink -= 2;
- error = VOP_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred,
- cnp->cn_proc);
- cache_purge(ITOV(ip));
+ dp->i_effnlink--;
+ dp->i_flag |= IN_CHANGE;
+ ip->i_effnlink--;
+ ip->i_flag |= IN_CHANGE;
+ if (!DOINGSOFTDEP(vp)) {
+ dp->i_ffs_nlink--;
+ ip->i_ffs_nlink--;
+ error = VOP_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred,
+ cnp->cn_proc);
+ }
+ cache_purge(vp);
out:
- if (dvp)
- vput(dvp);
+ vput(dvp);
vput(vp);
return (error);
}
@@ -1395,8 +1436,8 @@ ufs_symlink(v)
struct vattr *a_vap;
char *a_target;
} */ *ap = v;
- register struct vnode *vp, **vpp = ap->a_vpp;
- register struct inode *ip;
+ struct vnode *vp, **vpp = ap->a_vpp;
+ struct inode *ip;
int len, error;
error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
@@ -1436,10 +1477,10 @@ ufs_readdir(v)
struct uio *a_uio;
struct ucred *a_cred;
int *a_eofflag;
- u_long *a_cookies;
- int ncookies;
+ u_long **a_cookies;
+ int *ncookies;
} */ *ap = v;
- register struct uio *uio = ap->a_uio;
+ struct uio *uio = ap->a_uio;
int error;
size_t count, lost;
off_t off = uio->uio_offset;
@@ -1495,9 +1536,10 @@ ufs_readdir(v)
error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
# endif
if (!error && ap->a_ncookies) {
- register struct dirent *dp;
- register u_long *cookies = ap->a_cookies;
- register int ncookies = ap->a_ncookies;
+ struct dirent *dp, *dpstart;
+ off_t offstart;
+ u_long *cookies;
+ int ncookies;
/*
* Only the NFS server and emulations use cookies, and they
@@ -1506,17 +1548,28 @@ ufs_readdir(v)
*/
if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
panic("ufs_readdir: lost in space");
- dp = (struct dirent *)
- (uio->uio_iov->iov_base - (uio->uio_offset - off));
- while (ncookies-- && off < uio->uio_offset) {
- if (dp->d_reclen == 0)
- break;
+
+ dpstart = (struct dirent *)
+ (uio->uio_iov->iov_base - (uio->uio_offset - off));
+ offstart = off;
+ for (dp = dpstart, ncookies = 0; off < uio->uio_offset; ) {
+ if (dp->d_reclen == 0)
+ break;
+ off += dp->d_reclen;
+ ncookies++;
+ dp = (struct dirent *)((caddr_t)dp + dp->d_reclen);
+ }
+ lost += uio->uio_offset - off;
+ uio->uio_offset = off;
+ MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP,
+ M_WAITOK);
+ *ap->a_ncookies = ncookies;
+ *ap->a_cookies = cookies;
+ for (off = offstart, dp = dpstart; off < uio->uio_offset; ) {
+ *(cookies++) = off;
off += dp->d_reclen;
- *(cookies++) = off;
- dp = (struct dirent *)((caddr_t)dp + dp->d_reclen);
+ dp = (struct dirent *)((caddr_t)dp + dp->d_reclen);
}
- lost += uio->uio_offset - off;
- uio->uio_offset = off;
}
uio->uio_resid += lost;
*ap->a_eofflag = VTOI(ap->a_vp)->i_ffs_size <= uio->uio_offset;
@@ -1535,8 +1588,8 @@ ufs_readlink(v)
struct uio *a_uio;
struct ucred *a_cred;
} */ *ap = v;
- register struct vnode *vp = ap->a_vp;
- register struct inode *ip = VTOI(vp);
+ struct vnode *vp = ap->a_vp;
+ struct inode *ip = VTOI(vp);
int isize;
isize = ip->i_ffs_size;
@@ -1575,82 +1628,31 @@ ufs_lock(v)
{
struct vop_lock_args /* {
struct vnode *a_vp;
+ int a_flags;
+ sturct proc *a_p;
} */ *ap = v;
- register struct vnode *vp = ap->a_vp;
- register struct inode *ip;
-#ifdef DIAGNOSTIC
- struct proc *p = curproc; /* XXX */
-#endif
+ struct vnode *vp = ap->a_vp;
-start:
- while (vp->v_flag & VXLOCK) {
- vp->v_flag |= VXWANT;
- sleep((caddr_t)vp, PINOD);
- }
- if (vp->v_tag == VT_NON)
- return (ENOENT);
- ip = VTOI(vp);
- if (ip->i_flag & IN_LOCKED) {
- ip->i_flag |= IN_WANTED;
-#ifdef DIAGNOSTIC
- if (p) {
- if (p->p_pid == ip->i_lockholder)
- panic("locking against myself");
- ip->i_lockwaiter = p->p_pid;
- } else
- ip->i_lockwaiter = -1;
-#endif
- (void) sleep((caddr_t)ip, PINOD);
- goto start;
- }
-#ifdef DIAGNOSTIC
- ip->i_lockwaiter = 0;
- if (ip->i_lockholder != 0)
- panic("lockholder (%d) != 0", ip->i_lockholder);
- if (p && p->p_pid == 0)
- printf("locking by process 0\n");
- if (p)
- ip->i_lockholder = p->p_pid;
- else
- ip->i_lockholder = -1;
-#endif
- ip->i_flag |= IN_LOCKED;
- return (0);
+ return (lockmgr(&VTOI(vp)->i_lock, ap->a_flags, &vp->v_interlock,
+ ap->a_p));
}
/*
* Unlock an inode. If WANT bit is on, wakeup.
*/
-int lockcount = 90;
int
ufs_unlock(v)
void *v;
{
struct vop_unlock_args /* {
struct vnode *a_vp;
+ int a_flags;
+ struct proc *a_p;
} */ *ap = v;
- register struct inode *ip = VTOI(ap->a_vp);
-#ifdef DIAGNOSTIC
- struct proc *p = curproc; /* XXX */
-#endif
+ struct vnode *vp = ap->a_vp;
-#ifdef DIAGNOSTIC
- if ((ip->i_flag & IN_LOCKED) == 0) {
- vprint("ufs_unlock: unlocked inode", ap->a_vp);
- panic("ufs_unlock NOT LOCKED");
- }
- if (p && p->p_pid != ip->i_lockholder && p->p_pid > -1 &&
- ip->i_lockholder > -1 && lockcount++ < 100)
- panic("unlocker (%d) != lock holder (%d)",
- p->p_pid, ip->i_lockholder);
- ip->i_lockholder = 0;
-#endif
- ip->i_flag &= ~IN_LOCKED;
- if (ip->i_flag & IN_WANTED) {
- ip->i_flag &= ~IN_WANTED;
- wakeup((caddr_t)ip);
- }
- return (0);
+ return (lockmgr(&VTOI(vp)->i_lock, ap->a_flags | LK_RELEASE,
+ &vp->v_interlock, ap->a_p));
}
/*
@@ -1664,9 +1666,7 @@ ufs_islocked(v)
struct vnode *a_vp;
} */ *ap = v;
- if (VTOI(ap->a_vp)->i_flag & IN_LOCKED)
- return (1);
- return (0);
+ return (lockstatus(&VTOI(ap->a_vp)->i_lock));
}
/*
@@ -1680,9 +1680,9 @@ ufs_strategy(v)
struct vop_strategy_args /* {
struct buf *a_bp;
} */ *ap = v;
- register struct buf *bp = ap->a_bp;
- register struct vnode *vp = bp->b_vp;
- register struct inode *ip;
+ struct buf *bp = ap->a_bp;
+ struct vnode *vp = bp->b_vp;
+ struct inode *ip;
int error;
ip = VTOI(vp);
@@ -1720,8 +1720,8 @@ ufs_print(v)
struct vop_print_args /* {
struct vnode *a_vp;
} */ *ap = v;
- register struct vnode *vp = ap->a_vp;
- register struct inode *ip = VTOI(vp);
+ struct vnode *vp = ap->a_vp;
+ struct inode *ip = VTOI(vp);
printf("tag VT_UFS, ino %d, on dev %d, %d", ip->i_number,
major(ip->i_dev), minor(ip->i_dev));
@@ -1729,12 +1729,7 @@ ufs_print(v)
if (vp->v_type == VFIFO)
fifo_printinfo(vp);
#endif /* FIFO */
- printf("%s\n", (ip->i_flag & IN_LOCKED) ? " (LOCKED)" : "");
- if (ip->i_lockholder == 0)
- return (0);
- printf("\towner pid %d", ip->i_lockholder);
- if (ip->i_lockwaiter)
- printf(" waiting pid %d", ip->i_lockwaiter);
+ lockmgr_printinfo(&ip->i_lock);
printf("\n");
return (0);
}
@@ -1796,10 +1791,12 @@ ufsspec_close(v)
struct ucred *a_cred;
struct proc *a_p;
} */ *ap = v;
- register struct inode *ip = VTOI(ap->a_vp);
+ struct inode *ip = VTOI(ap->a_vp);
- if (ap->a_vp->v_usecount > 1 && !(ip->i_flag & IN_LOCKED))
+ simple_lock(&vp->v_interlock);
+ if (ap->a_vp->v_usecount > 1)
ITIMES(ip, &time, &time);
+ simple_unlock(&vp->v_interlock);
return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap));
}
@@ -1864,10 +1861,13 @@ ufsfifo_close(v)
struct proc *a_p;
} */ *ap = v;
extern int (**fifo_vnodeop_p) __P((void *));
- register struct inode *ip = VTOI(ap->a_vp);
+ struct vnode *vp = ap->a_vp;
+ struct inode *ip = VTOI(vp);
- if (ap->a_vp->v_usecount > 1 && !(ip->i_flag & IN_LOCKED))
+ simple_lock(&vp->v_interlock);
+ if (ap->a_vp->v_usecount > 1)
ITIMES(ip, &time, &time);
+ simple_unlock(&vp->v_interlock);
return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap));
}
#endif /* FIFO */
@@ -1924,7 +1924,7 @@ ufs_advlock(v)
struct flock *a_fl;
int a_flags;
} */ *ap = v;
- register struct inode *ip = VTOI(ap->a_vp);
+ struct inode *ip = VTOI(ap->a_vp);
return (lf_advlock(&ip->i_lockf, ip->i_ffs_size, ap->a_id, ap->a_op,
ap->a_fl, ap->a_flags));
@@ -1953,9 +1953,9 @@ ufs_vinit(mntp, specops, fifoops, vpp)
if ((nvp = checkalias(vp, ip->i_ffs_rdev, mntp)) != NULL) {
/*
* Discard unneeded vnode, but save its inode.
+ * Note that the lock is carried over in the inode
+ * to the replacement vnode.
*/
- ufs_ihashrem(ip);
- VOP_UNLOCK(vp);
nvp->v_data = vp->v_data;
vp->v_data = NULL;
vp->v_op = spec_vnodeop_p;
@@ -1966,7 +1966,6 @@ ufs_vinit(mntp, specops, fifoops, vpp)
*/
vp = nvp;
ip->i_vnode = vp;
- ufs_ihashins(ip);
}
break;
case VFIFO:
@@ -2005,7 +2004,8 @@ ufs_makeinode(mode, dvp, vpp, cnp)
struct vnode **vpp;
struct componentname *cnp;
{
- register struct inode *ip, *pdir;
+ struct inode *ip, *pdir;
+ struct direct newdir;
struct timespec ts;
struct vnode *tvp;
int error;
@@ -2040,7 +2040,10 @@ ufs_makeinode(mode, dvp, vpp, cnp)
ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
ip->i_ffs_mode = mode;
tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */
+ ip->i_effnlink = 1;
ip->i_ffs_nlink = 1;
+ if (DOINGSOFTDEP(tvp))
+ softdep_increase_linkcnt(ip);
if ((ip->i_ffs_mode & ISGID) &&
!groupmember(ip->i_ffs_gid, cnp->cn_cred) &&
suser(cnp->cn_cred, NULL))
@@ -2053,10 +2056,13 @@ ufs_makeinode(mode, dvp, vpp, cnp)
* Make sure inode goes to disk before directory entry.
*/
TIMEVAL_TO_TIMESPEC(&time, &ts);
- if ((error = VOP_UPDATE(tvp, &ts, &ts, 1)) != 0)
+ if ((error = VOP_UPDATE(tvp, &ts, &ts, !DOINGSOFTDEP(tvp))) != 0)
goto bad;
- if ((error = ufs_direnter(ip, dvp, cnp)) != 0)
+
+ ufs_makedirentry(ip, cnp, &newdir);
+ if ((error = ufs_direnter(dvp, &newdir, cnp, NULL)) != 0)
goto bad;
+
if ((cnp->cn_flags & SAVESTART) == 0)
FREE(cnp->cn_pnbuf, M_NAMEI);
vput(dvp);
@@ -2070,8 +2076,12 @@ bad:
*/
free(cnp->cn_pnbuf, M_NAMEI);
vput(dvp);
+ ip->i_effnlink = 0;
ip->i_ffs_nlink = 0;
ip->i_flag |= IN_CHANGE;
vput(tvp);
+
return (error);
}
+
+