diff options
32 files changed, 2194 insertions, 1037 deletions
diff --git a/sys/sys/buf.h b/sys/sys/buf.h index 34587d51cc8..4b2582677a0 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -1,4 +1,4 @@ -/* $OpenBSD: buf.h,v 1.7 1997/07/28 09:13:14 deraadt Exp $ */ +/* $OpenBSD: buf.h,v 1.8 1997/10/06 15:25:32 csapuntz Exp $ */ /* $NetBSD: buf.h,v 1.25 1997/04/09 21:12:17 mycroft Exp $ */ /* @@ -48,6 +48,27 @@ #define NOLIST ((struct buf *)0x87654321) /* + * To avoid including <ufs/ffs/softdep.h> + */ + +LIST_HEAD(workhead, worklist); + +/* + * These are currently used only by the soft dependency code, hence + * are stored once in a global variable. If other subsystems wanted + * to use these hooks, a pointer to a set of bio_ops could be added + * to each buffer. + */ +struct mount; +extern struct bio_ops { + void (*io_start) __P((struct buf *)); + void (*io_complete) __P((struct buf *)); + void (*io_deallocate) __P((struct buf *)); + int (*io_sync) __P((struct mount *)); +} bioops; + + +/* * The buffer header describes an I/O operation in the kernel. */ struct buf { @@ -79,6 +100,7 @@ struct buf { struct ucred *b_wcred; /* Write credentials reference. */ int b_validoff; /* Offset in buffer of valid region. */ int b_validend; /* Offset of end of valid region. */ + struct workhead b_dep; /* List of filesystem dependencies. */ }; /* @@ -177,6 +199,7 @@ int breadn __P((struct vnode *, daddr_t, int, daddr_t *, int *, int, void brelse __P((struct buf *)); void bremfree __P((struct buf *)); void bufinit __P((void)); +void bdirty __P((struct buf *)); int bwrite __P((struct buf *)); void cluster_callback __P((struct buf *)); int cluster_read __P((struct vnode *, u_quad_t, daddr_t, long, diff --git a/sys/sys/lock.h b/sys/sys/lock.h new file mode 100644 index 00000000000..f4491b09520 --- /dev/null +++ b/sys/sys/lock.h @@ -0,0 +1,167 @@ +/* + * Copyright (c) 1995 + * The Regents of the University of California. All rights reserved. + * + * This code contains ideas from software contributed to Berkeley by + * Avadis Tevanian, Jr., Michael Wayne Young, and the Mach Operating + * System project at Carnegie-Mellon University. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)lock.h 8.12 (Berkeley) 5/19/95 + */ + +#ifndef _LOCK_H_ +#define _LOCK_H_ + +#include <sys/simplelock.h> + +/* + * The general lock structure. Provides for multiple shared locks, + * upgrading from shared to exclusive, and sleeping until the lock + * can be gained. The simple locks are defined in <machine/param.h>. + */ +struct lock { + struct simplelock lk_interlock; /* lock on remaining fields */ + u_int lk_flags; /* see below */ + int lk_sharecount; /* # of accepted shared locks */ + int lk_waitcount; /* # of processes sleeping for lock */ + short lk_exclusivecount; /* # of recursive exclusive locks */ + short lk_prio; /* priority at which to sleep */ + char *lk_wmesg; /* resource sleeping (for tsleep) */ + int lk_timo; /* maximum sleep time (for tsleep) */ + pid_t lk_lockholder; /* pid of exclusive lock holder */ +}; +/* + * Lock request types: + * LK_SHARED - get one of many possible shared locks. If a process + * holding an exclusive lock requests a shared lock, the exclusive + * lock(s) will be downgraded to shared locks. + * LK_EXCLUSIVE - stop further shared locks, when they are cleared, + * grant a pending upgrade if it exists, then grant an exclusive + * lock. Only one exclusive lock may exist at a time, except that + * a process holding an exclusive lock may get additional exclusive + * locks if it explicitly sets the LK_CANRECURSE flag in the lock + * request, or if the LK_CANRECUSE flag was set when the lock was + * initialized. + * LK_UPGRADE - the process must hold a shared lock that it wants to + * have upgraded to an exclusive lock. Other processes may get + * exclusive access to the resource between the time that the upgrade + * is requested and the time that it is granted. + * LK_EXCLUPGRADE - the process must hold a shared lock that it wants to + * have upgraded to an exclusive lock. If the request succeeds, no + * other processes will have gotten exclusive access to the resource + * between the time that the upgrade is requested and the time that + * it is granted. However, if another process has already requested + * an upgrade, the request will fail (see error returns below). + * LK_DOWNGRADE - the process must hold an exclusive lock that it wants + * to have downgraded to a shared lock. If the process holds multiple + * (recursive) exclusive locks, they will all be downgraded to shared + * locks. + * LK_RELEASE - release one instance of a lock. + * LK_DRAIN - wait for all activity on the lock to end, then mark it + * decommissioned. This feature is used before freeing a lock that + * is part of a piece of memory that is about to be freed. + * + * These are flags that are passed to the lockmgr routine. + */ +#define LK_TYPE_MASK 0x0000000f /* type of lock sought */ +#define LK_SHARED 0x00000001 /* shared lock */ +#define LK_EXCLUSIVE 0x00000002 /* exclusive lock */ +#define LK_UPGRADE 0x00000003 /* shared-to-exclusive upgrade */ +#define LK_EXCLUPGRADE 0x00000004 /* first shared-to-exclusive upgrade */ +#define LK_DOWNGRADE 0x00000005 /* exclusive-to-shared downgrade */ +#define LK_RELEASE 0x00000006 /* release any type of lock */ +#define LK_DRAIN 0x00000007 /* wait for all lock activity to end */ +/* + * External lock flags. + * + * The first three flags may be set in lock_init to set their mode permanently, + * or passed in as arguments to the lock manager. The LK_REENABLE flag may be + * set only at the release of a lock obtained by drain. + */ +#define LK_EXTFLG_MASK 0x00000070 /* mask of external flags */ +#define LK_NOWAIT 0x00000010 /* do not sleep to await lock */ +#define LK_SLEEPFAIL 0x00000020 /* sleep, then return failure */ +#define LK_CANRECURSE 0x00000040 /* allow recursive exclusive lock */ +#define LK_REENABLE 0x00000080 /* lock is be reenabled after drain */ +/* + * Internal lock flags. + * + * These flags are used internally to the lock manager. + */ +#define LK_WANT_UPGRADE 0x00000100 /* waiting for share-to-excl upgrade */ +#define LK_WANT_EXCL 0x00000200 /* exclusive lock sought */ +#define LK_HAVE_EXCL 0x00000400 /* exclusive lock obtained */ +#define LK_WAITDRAIN 0x00000800 /* process waiting for lock to drain */ +#define LK_DRAINING 0x00004000 /* lock is being drained */ +#define LK_DRAINED 0x00008000 /* lock has been decommissioned */ +/* + * Control flags + * + * Non-persistent external flags. + */ +#define LK_INTERLOCK 0x00010000 /* unlock passed simple lock after + getting lk_interlock */ +#define LK_RETRY 0x00020000 /* vn_lock: retry until locked */ + +/* + * Lock return status. + * + * Successfully obtained locks return 0. Locks will always succeed + * unless one of the following is true: + * LK_FORCEUPGRADE is requested and some other process has already + * requested a lock upgrade (returns EBUSY). + * LK_WAIT is set and a sleep would be required (returns EBUSY). + * LK_SLEEPFAIL is set and a sleep was done (returns ENOLCK). + * PCATCH is set in lock priority and a signal arrives (returns + * either EINTR or ERESTART if system calls is to be restarted). + * Non-null lock timeout and timeout expires (returns EWOULDBLOCK). + * A failed lock attempt always returns a non-zero error value. No lock + * is held after an error return (in particular, a failed LK_UPGRADE + * or LK_FORCEUPGRADE will have released its shared access lock). + */ + +/* + * Indicator that no process holds exclusive lock + */ +#define LK_KERNPROC ((pid_t) -2) +#define LK_NOPROC ((pid_t) -1) + +struct proc; + +void lockinit __P((struct lock *, int prio, char *wmesg, int timo, + int flags)); +int lockmgr __P((__volatile struct lock *, u_int flags, + struct simplelock *, struct proc *p)); +void lockmgr_printinfo __P((struct lock *)); +int lockstatus __P((struct lock *)); + +#endif /* !_LOCK_H_ */ + diff --git a/sys/sys/malloc.h b/sys/sys/malloc.h index 4b87be6fa20..3e380f50dfd 100644 --- a/sys/sys/malloc.h +++ b/sys/sys/malloc.h @@ -1,4 +1,4 @@ -/* $OpenBSD: malloc.h,v 1.10 1997/03/01 21:24:46 kstailey Exp $ */ +/* $OpenBSD: malloc.h,v 1.11 1997/10/06 15:25:33 csapuntz Exp $ */ /* $NetBSD: malloc.h,v 1.23 1996/04/05 04:52:52 mhitch Exp $ */ /* @@ -128,8 +128,25 @@ #define M_PFIL 73 /* packer filter */ #define M_TDB 75 /* Transforms database */ #define M_XDATA 76 /* IPsec data */ -#define M_TEMP 84 /* misc temporary data buffers */ -#define M_LAST 85 /* Must be last type + 1 */ +#define M_VFS 77 /* VFS file systems */ + +#define M_PAGEDEP 78 /* File page dependencies */ +#define M_INODEDEP 79 /* Inode dependencies */ +#define M_NEWBLK 80 /* New block allocation */ +#define M_BMSAFEMAP 81 /* Block or frag allocated from cyl group map */ +#define M_ALLOCDIRECT 82 /* Block or frag dependency for an inode */ +#define M_INDIRDEP 83 /* Indirect block dependencies */ +#define M_ALLOCINDIR 84 /* Block dependency for an indirect block */ +#define M_FREEFRAG 85 /* Previously used frag for an inode */ +#define M_FREEBLKS 86 /* Blocks freed from an inode */ +#define M_FREEFILE 87 /* Inode deallocated */ +#define M_DIRADD 88 /* New directory entry */ +#define M_MKDIR 89 /* New directory */ +#define M_DIRREM 90 /* Directory entry deleted */ + +#define M_TEMP 127 /* misc temporary data buffers */ +#define M_LAST 128 /* Must be last type + 1 */ + #define INITKMEMNAMES { \ "free", /* 0 M_FREE */ \ @@ -209,9 +226,29 @@ NULL, \ "tdb", /* 75 M_TDB */ \ "xform_data", /* 76 M_XDATA */ \ - NULL, NULL, \ + "vfs", /* 77 M_VFS */ \ + "pagedep", /* 78 M_PAGEDEP */ \ + "inodedep", /* 79 M_INODEDEP */ \ + "newblk", /* 80 M_NEWBLK */ \ + "bmsafemap", /* 81 M_BMSAFEMAP */ \ + "allocdirect", /* 82 M_ALLOCDIRECT */ \ + "indirdep", /* 83 M_INDIRDEP */ \ + "allocindir", /* 84 M_ALLOCINDIR */ \ + "freefrag", /* 85 M_FREEFRAG */ \ + "freeblks", /* 86 M_FREEBLKS */ \ + "freefile", /* 87 M_FREEFILE */ \ + "diradd", /* 88 M_DIRADD */ \ + "mkdir", /* 89 M_MKDIR */ \ + "dirrem", /* 90 M_DIRREM */ \ + NULL, NULL, NULL, NULL, NULL, \ + NULL, NULL, NULL, NULL, NULL, \ + NULL, NULL, NULL, NULL, NULL, \ + NULL, NULL, NULL, NULL, NULL, \ + NULL, NULL, NULL, NULL, NULL, \ + NULL, NULL, NULL, NULL, NULL, \ NULL, NULL, NULL, NULL, NULL, \ - "temp", /* 84 M_TEMP */ \ + NULL, \ + "temp", /* 127 M_TEMP */ \ } struct kmemstats { diff --git a/sys/sys/mount.h b/sys/sys/mount.h index 2ad19911a6a..776740078d0 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -1,4 +1,4 @@ -/* $OpenBSD: mount.h,v 1.18 1997/04/16 09:49:00 downsj Exp $ */ +/* $OpenBSD: mount.h,v 1.19 1997/10/06 15:25:33 csapuntz Exp $ */ /* $NetBSD: mount.h,v 1.48 1996/02/18 11:55:47 fvdl Exp $ */ /* @@ -43,6 +43,7 @@ #include <sys/ucred.h> #endif #include <sys/queue.h> +#include <sys/lock.h> typedef struct { int32_t val[2]; } fsid_t; /* file system id type */ @@ -55,7 +56,7 @@ typedef struct { int32_t val[2]; } fsid_t; /* file system id type */ struct fid { u_short fid_len; /* length of data in bytes */ u_short fid_reserved; /* force longword alignment */ - char fid_data[MAXFIDSZ]; /* data (variable length) */ + char fid_data[MAXFIDSZ]; /* data (variable length) */ }; /* @@ -77,7 +78,9 @@ struct statfs { long f_ffree; /* free file nodes in fs */ fsid_t f_fsid; /* file system id */ uid_t f_owner; /* user that mounted the file system */ - long f_spare[4]; /* spare for later */ + long f_syncwrites; /* count of sync writes since mount */ + long f_asyncwrites; /* count of async writes since mount */ + long f_spare[2]; /* spare for later */ char f_fstypename[MFSNAMELEN]; /* fs type name */ char f_mntonname[MNAMELEN]; /* directory on which mounted */ char f_mntfromname[MNAMELEN]; /* mounted file system */ @@ -116,8 +119,11 @@ LIST_HEAD(vnodelst, vnode); struct mount { CIRCLEQ_ENTRY(mount) mnt_list; /* mount list */ struct vfsops *mnt_op; /* operations on fs */ + struct vfsconf *mnt_vfc; /* configuration info */ struct vnode *mnt_vnodecovered; /* vnode we mounted on */ + struct vnode *mnt_syncer; /* syncer vnode */ struct vnodelst mnt_vnodelist; /* list of vnodes this mount */ + struct lock mnt_lock; /* mount structure lock */ int mnt_flag; /* flags */ int mnt_maxsymlinklen; /* max size of short symlink */ struct statfs mnt_stat; /* cache of filesystem stats */ @@ -161,7 +167,7 @@ struct mount { /* * Mask of flags that are visible to statfs() */ -#define MNT_VISFLAGMASK 0x0000ffff +#define MNT_VISFLAGMASK 0x0400ffff /* * filesystem control flags. @@ -180,6 +186,37 @@ struct mount { #define MNT_MPWANT 0x00800000 /* waiting for mount point */ #define MNT_UNMOUNT 0x01000000 /* unmount in progress */ #define MNT_WANTRDWR 0x02000000 /* want upgrade to read/write */ +#define MNT_SOFTDEP 0x04000000 /* soft dependencies being done */ +/* + * Sysctl CTL_VFS definitions. + * + * Second level identifier specifies which filesystem. Second level + * identifier VFS_GENERIC returns information about all filesystems. + */ +#define VFS_GENERIC 0 /* generic filesystem information */ +/* + * Third level identifiers for VFS_GENERIC are given below; third + * level identifiers for specific filesystems are given in their + * mount specific header files. + */ +#define VFS_MAXTYPENUM 1 /* int: highest defined filesystem type */ +#define VFS_CONF 2 /* struct: vfsconf for filesystem given + as next argument */ + +/* + * Filesystem configuration information. One of these exists for each + * type of filesystem supported by the kernel. These are searched at + * mount time to identify the requested filesystem. + */ +struct vfsconf { + struct vfsops *vfc_vfsops; /* filesystem operations vector */ + char vfc_name[MFSNAMELEN]; /* filesystem type name */ + int vfc_typenum; /* historic filesystem type number */ + int vfc_refcount; /* number mounted of this type */ + int vfc_flags; /* permanent flags */ + int (*vfc_mountroot)(void); /* if != NULL, routine to mount root */ + struct vfsconf *vfc_next; /* next in list */ +}; /* * Operations supported on mounted file system. @@ -190,8 +227,10 @@ struct nameidata; struct mbuf; #endif +extern int maxvfsconf; /* highest defined filesystem type */ +extern struct vfsconf *vfsconf; /* head of list of filesystem types */ + struct vfsops { - char *vfs_name; int (*vfs_mount) __P((struct mount *mp, char *path, caddr_t data, struct nameidata *ndp, struct proc *p)); int (*vfs_start) __P((struct mount *mp, int flags, @@ -211,8 +250,9 @@ struct vfsops { struct mbuf *nam, struct vnode **vpp, int *exflagsp, struct ucred **credanonp)); int (*vfs_vptofh) __P((struct vnode *vp, struct fid *fhp)); - void (*vfs_init) __P((void)); - int vfs_refcount; + int (*vfs_init) __P((struct vfsconf *)); + int (*vfs_sysctl) __P((int *, u_int, void *, size_t *, void *, + size_t, struct proc *)); }; #define VFS_MOUNT(MP, PATH, DATA, NDP, P) \ @@ -234,8 +274,9 @@ struct vfsops { * * waitfor flags to vfs_sync() and getfsstat() */ -#define MNT_WAIT 1 -#define MNT_NOWAIT 2 +#define MNT_WAIT 1 /* synchronously wait for I/O to complete */ +#define MNT_NOWAIT 2 /* start all I/O, but do not wait for it */ +#define MNT_LAZY 3 /* push data not written by filesystem syncer */ /* * Generic file handle @@ -446,21 +487,25 @@ struct adosfs_args { /* * exported vnode operations */ +int vfs_busy __P((struct mount *, int, struct simplelock *, struct proc *)); +void vfs_getnewfsid __P((struct mount *)); +struct mount *vfs_getvfs __P((fsid_t *)); +int vfs_mountedon __P((struct vnode *)); +int vfs_mountroot __P((void)); +int vfs_rootmountalloc __P((char *, char *, struct mount **)); +void vfs_unbusy __P((struct mount *, struct proc *)); +void vfs_unmountall __P((void)); +extern CIRCLEQ_HEAD(mntlist, mount) mountlist; +extern struct simplelock mountlist_slock; + struct mount *getvfs __P((fsid_t *)); /* return vfs given fsid */ int vfs_export /* process mount export info */ __P((struct mount *, struct netexport *, struct export_args *)); struct netcred *vfs_export_lookup /* lookup host in fs export list */ __P((struct mount *, struct netexport *, struct mbuf *)); -int vfs_lock __P((struct mount *)); /* lock a vfs */ -int vfs_mountedon __P((struct vnode *));/* is a vfs mounted on vp */ +int vfs_allocate_syncvnode __P((struct mount *)); + void vfs_shutdown __P((void)); /* unmount and sync file systems */ -void vfs_unlock __P((struct mount *)); /* unlock a vfs */ -void vfs_unmountall __P((void)); /* unmount file systems */ -int vfs_busy __P((struct mount *)); -void vfs_unbusy __P((struct mount *)); -extern CIRCLEQ_HEAD(mntlist, mount) mountlist; /* mounted filesystem list */ -extern struct vfsops *vfssw[]; /* filesystem type table */ -extern int nvfssw; long makefstype __P((char *)); int dounmount __P((struct mount *, int, struct proc *)); void vfsinit __P((void)); @@ -479,6 +524,8 @@ int getmntinfo __P((struct statfs **, int)); int mount __P((const char *, const char *, int, void *)); int statfs __P((const char *, struct statfs *)); int unmount __P((const char *, int)); + + __END_DECLS #endif /* _KERNEL */ diff --git a/sys/sys/param.h b/sys/sys/param.h index a7d227ee2da..d9b459abc71 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -1,4 +1,4 @@ -/* $OpenBSD: param.h,v 1.15 1997/10/01 21:53:36 deraadt Exp $ */ +/* $OpenBSD: param.h,v 1.16 1997/10/06 15:25:34 csapuntz Exp $ */ /* $NetBSD: param.h,v 1.23 1996/03/17 01:02:29 thorpej Exp $ */ /*- @@ -54,6 +54,7 @@ #ifndef _LOCORE #include <sys/types.h> +#include <sys/simplelock.h> #endif /* diff --git a/sys/sys/queue.h b/sys/sys/queue.h index 962009c90d4..e617d3c4052 100644 --- a/sys/sys/queue.h +++ b/sys/sys/queue.h @@ -1,4 +1,4 @@ -/* $OpenBSD: queue.h,v 1.4 1996/05/22 12:07:15 deraadt Exp $ */ +/* $OpenBSD: queue.h,v 1.5 1997/10/06 15:25:34 csapuntz Exp $ */ /* $NetBSD: queue.h,v 1.11 1996/05/16 05:17:14 mycroft Exp $ */ /* @@ -62,7 +62,7 @@ * linked so that an arbitrary element can be removed without a need to * traverse the list. New elements can be added to the list before or * after an existing element, at the head of the list, or at the end of - * the list. A tail queue may only be traversed in the forward direction. + * the list. A tail queue may be traversed in either direction. * * A circle queue is headed by a pair of pointers, one to the head of the * list and the other to the tail of the list. The elements are doubly @@ -81,7 +81,7 @@ #define LIST_HEAD(name, type) \ struct name { \ struct type *lh_first; /* first element */ \ -} +} #define LIST_ENTRY(type) \ struct { \ @@ -89,41 +89,45 @@ struct { \ struct type **le_prev; /* address of previous next element */ \ } +#define LIST_FIRST(head) ((head)->lh_first) +#define LIST_NEXT(elm, field) ((elm)->field.le_next) +#define LIST_END(head) NULL + /* * List functions. */ -#define LIST_INIT(head) { \ +#define LIST_INIT(head) do { \ (head)->lh_first = NULL; \ -} +} while (0) -#define LIST_INSERT_AFTER(listelm, elm, field) { \ +#define LIST_INSERT_AFTER(listelm, elm, field) do { \ if (((elm)->field.le_next = (listelm)->field.le_next) != NULL) \ (listelm)->field.le_next->field.le_prev = \ &(elm)->field.le_next; \ (listelm)->field.le_next = (elm); \ (elm)->field.le_prev = &(listelm)->field.le_next; \ -} +} while (0) -#define LIST_INSERT_BEFORE(listelm, elm, field) { \ +#define LIST_INSERT_BEFORE(listelm, elm, field) do { \ (elm)->field.le_prev = (listelm)->field.le_prev; \ (elm)->field.le_next = (listelm); \ *(listelm)->field.le_prev = (elm); \ (listelm)->field.le_prev = &(elm)->field.le_next; \ -} +} while (0) -#define LIST_INSERT_HEAD(head, elm, field) { \ +#define LIST_INSERT_HEAD(head, elm, field) do { \ if (((elm)->field.le_next = (head)->lh_first) != NULL) \ (head)->lh_first->field.le_prev = &(elm)->field.le_next;\ (head)->lh_first = (elm); \ (elm)->field.le_prev = &(head)->lh_first; \ -} +} while (0) -#define LIST_REMOVE(elm, field) { \ +#define LIST_REMOVE(elm, field) do { \ if ((elm)->field.le_next != NULL) \ (elm)->field.le_next->field.le_prev = \ (elm)->field.le_prev; \ *(elm)->field.le_prev = (elm)->field.le_next; \ -} +} while (0) /* * Simple queue definitions. @@ -142,33 +146,33 @@ struct { \ /* * Simple queue functions. */ -#define SIMPLEQ_INIT(head) { \ +#define SIMPLEQ_INIT(head) do { \ (head)->sqh_first = NULL; \ (head)->sqh_last = &(head)->sqh_first; \ -} +} while (0) -#define SIMPLEQ_INSERT_HEAD(head, elm, field) { \ +#define SIMPLEQ_INSERT_HEAD(head, elm, field) do { \ if (((elm)->field.sqe_next = (head)->sqh_first) == NULL) \ (head)->sqh_last = &(elm)->field.sqe_next; \ (head)->sqh_first = (elm); \ -} +} while (0) -#define SIMPLEQ_INSERT_TAIL(head, elm, field) { \ +#define SIMPLEQ_INSERT_TAIL(head, elm, field) do { \ (elm)->field.sqe_next = NULL; \ *(head)->sqh_last = (elm); \ (head)->sqh_last = &(elm)->field.sqe_next; \ -} +} while (0) -#define SIMPLEQ_INSERT_AFTER(head, listelm, elm, field) { \ +#define SIMPLEQ_INSERT_AFTER(head, listelm, elm, field) do { \ if (((elm)->field.sqe_next = (listelm)->field.sqe_next) == NULL)\ (head)->sqh_last = &(elm)->field.sqe_next; \ (listelm)->field.sqe_next = (elm); \ -} +} while (0) -#define SIMPLEQ_REMOVE_HEAD(head, elm, field) { \ +#define SIMPLEQ_REMOVE_HEAD(head, elm, field) do { \ if (((head)->sqh_first = (elm)->field.sqe_next) == NULL) \ (head)->sqh_last = &(head)->sqh_first; \ -} +} while (0) /* * Tail queue definitions. @@ -185,15 +189,24 @@ struct { \ struct type **tqe_prev; /* address of previous next element */ \ } + +#define TAILQ_FIRST(head) ((head)->tqh_first) +#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next) +#define TAILQ_END(head) NULL +#define TAILQ_LAST(head, headname) \ + (*(((struct headname *)((head)->tqh_last))->tqh_last)) +#define TAILQ_PREV(elm, headname, field) \ + (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last)) + /* * Tail queue functions. */ -#define TAILQ_INIT(head) { \ +#define TAILQ_INIT(head) do { \ (head)->tqh_first = NULL; \ (head)->tqh_last = &(head)->tqh_first; \ -} +} while (0) -#define TAILQ_INSERT_HEAD(head, elm, field) { \ +#define TAILQ_INSERT_HEAD(head, elm, field) do { \ if (((elm)->field.tqe_next = (head)->tqh_first) != NULL) \ (head)->tqh_first->field.tqe_prev = \ &(elm)->field.tqe_next; \ @@ -201,16 +214,16 @@ struct { \ (head)->tqh_last = &(elm)->field.tqe_next; \ (head)->tqh_first = (elm); \ (elm)->field.tqe_prev = &(head)->tqh_first; \ -} +} while (0) -#define TAILQ_INSERT_TAIL(head, elm, field) { \ +#define TAILQ_INSERT_TAIL(head, elm, field) do { \ (elm)->field.tqe_next = NULL; \ (elm)->field.tqe_prev = (head)->tqh_last; \ *(head)->tqh_last = (elm); \ (head)->tqh_last = &(elm)->field.tqe_next; \ -} +} while (0) -#define TAILQ_INSERT_AFTER(head, listelm, elm, field) { \ +#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ if (((elm)->field.tqe_next = (listelm)->field.tqe_next) != NULL)\ (elm)->field.tqe_next->field.tqe_prev = \ &(elm)->field.tqe_next; \ @@ -218,23 +231,23 @@ struct { \ (head)->tqh_last = &(elm)->field.tqe_next; \ (listelm)->field.tqe_next = (elm); \ (elm)->field.tqe_prev = &(listelm)->field.tqe_next; \ -} +} while (0) -#define TAILQ_INSERT_BEFORE(listelm, elm, field) { \ +#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \ (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \ (elm)->field.tqe_next = (listelm); \ *(listelm)->field.tqe_prev = (elm); \ (listelm)->field.tqe_prev = &(elm)->field.tqe_next; \ -} +} while (0) -#define TAILQ_REMOVE(head, elm, field) { \ +#define TAILQ_REMOVE(head, elm, field) do { \ if (((elm)->field.tqe_next) != NULL) \ (elm)->field.tqe_next->field.tqe_prev = \ (elm)->field.tqe_prev; \ else \ (head)->tqh_last = (elm)->field.tqe_prev; \ *(elm)->field.tqe_prev = (elm)->field.tqe_next; \ -} +} while (0) /* * Circular queue definitions. @@ -251,15 +264,21 @@ struct { \ struct type *cqe_prev; /* previous element */ \ } +#define CIRCLEQ_FIRST(head) ((head)->cqh_first) +#define CIRCLEQ_LAST(head) ((head)->cqh_last) +#define CIRCLEQ_END(head) ((void *)(head)) +#define CIRCLEQ_NEXT(elm, field) ((elm)->field.cqe_next) +#define CIRCLEQ_PREV(elm, field) ((elm)->field.cqe_prev) + /* * Circular queue functions. */ -#define CIRCLEQ_INIT(head) { \ +#define CIRCLEQ_INIT(head) do { \ (head)->cqh_first = (void *)(head); \ (head)->cqh_last = (void *)(head); \ -} +} while (0) -#define CIRCLEQ_INSERT_AFTER(head, listelm, elm, field) { \ +#define CIRCLEQ_INSERT_AFTER(head, listelm, elm, field) do { \ (elm)->field.cqe_next = (listelm)->field.cqe_next; \ (elm)->field.cqe_prev = (listelm); \ if ((listelm)->field.cqe_next == (void *)(head)) \ @@ -267,9 +286,9 @@ struct { \ else \ (listelm)->field.cqe_next->field.cqe_prev = (elm); \ (listelm)->field.cqe_next = (elm); \ -} +} while (0) -#define CIRCLEQ_INSERT_BEFORE(head, listelm, elm, field) { \ +#define CIRCLEQ_INSERT_BEFORE(head, listelm, elm, field) do { \ (elm)->field.cqe_next = (listelm); \ (elm)->field.cqe_prev = (listelm)->field.cqe_prev; \ if ((listelm)->field.cqe_prev == (void *)(head)) \ @@ -277,9 +296,9 @@ struct { \ else \ (listelm)->field.cqe_prev->field.cqe_next = (elm); \ (listelm)->field.cqe_prev = (elm); \ -} +} while (0) -#define CIRCLEQ_INSERT_HEAD(head, elm, field) { \ +#define CIRCLEQ_INSERT_HEAD(head, elm, field) do { \ (elm)->field.cqe_next = (head)->cqh_first; \ (elm)->field.cqe_prev = (void *)(head); \ if ((head)->cqh_last == (void *)(head)) \ @@ -287,9 +306,9 @@ struct { \ else \ (head)->cqh_first->field.cqe_prev = (elm); \ (head)->cqh_first = (elm); \ -} +} while (0) -#define CIRCLEQ_INSERT_TAIL(head, elm, field) { \ +#define CIRCLEQ_INSERT_TAIL(head, elm, field) do { \ (elm)->field.cqe_next = (void *)(head); \ (elm)->field.cqe_prev = (head)->cqh_last; \ if ((head)->cqh_first == (void *)(head)) \ @@ -297,9 +316,9 @@ struct { \ else \ (head)->cqh_last->field.cqe_next = (elm); \ (head)->cqh_last = (elm); \ -} +} while (0) -#define CIRCLEQ_REMOVE(head, elm, field) { \ +#define CIRCLEQ_REMOVE(head, elm, field) do { \ if ((elm)->field.cqe_next == (void *)(head)) \ (head)->cqh_last = (elm)->field.cqe_prev; \ else \ @@ -310,5 +329,5 @@ struct { \ else \ (elm)->field.cqe_prev->field.cqe_next = \ (elm)->field.cqe_next; \ -} +} while (0) #endif /* !_SYS_QUEUE_H_ */ diff --git a/sys/sys/simplelock.h b/sys/sys/simplelock.h new file mode 100644 index 00000000000..c979f157b08 --- /dev/null +++ b/sys/sys/simplelock.h @@ -0,0 +1,86 @@ +#ifndef _SIMPLELOCK_H_ +#define _SIMPLELOCK_H_ +/* + * A simple spin lock. + * + * This structure only sets one bit of data, but is sized based on the + * minimum word size that can be operated on by the hardware test-and-set + * instruction. It is only needed for multiprocessors, as uniprocessors + * will always run to completion or a sleep. It is an error to hold one + * of these locks while a process is sleeping. + */ +struct simplelock { + int lock_data; +}; + +#ifndef NCPUS +#define NCPUS 1 +#endif + +#if NCPUS == 1 + +#if !defined(DEBUG) +#define simple_lock(alp) +#define simple_lock_try(alp) (1) /* always succeeds */ +#define simple_unlock(alp) + +static __inline void simple_lock_init __P((struct simplelock *)); + +static __inline void +simple_lock_init(lkp) + struct simplelock *lkp; +{ + + lkp->lock_data = 0; +} + +#else + +void _simple_unlock __P((__volatile struct simplelock *alp, const char *, int)); +#define simple_unlock(alp) _simple_unlock(alp, __FILE__, __LINE__) +int _simple_lock_try __P((__volatile struct simplelock *alp, const char *, int)); +#define simple_lock_try(alp) _simple_lock_try(alp, __FILE__, __LINE__) +void _simple_lock __P((__volatile struct simplelock *alp, const char *, int)); +#define simple_lock(alp) _simple_lock(alp, __FILE__, __LINE__) +void simple_lock_init __P((struct simplelock *alp)); + +#endif /* !defined(DEBUG) */ + +#else /* NCPUS > 1 */ + +/* + * The simple-lock routines are the primitives out of which the lock + * package is built. The machine-dependent code must implement an + * atomic test_and_set operation that indivisibly sets the simple lock + * to non-zero and returns its old value. It also assumes that the + * setting of the lock to zero below is indivisible. Simple locks may + * only be used for exclusive locks. + */ + +static __inline void +simple_lock(lkp) + __volatile struct simplelock *lkp; +{ + + while (test_and_set(&lkp->lock_data)) + continue; +} + +static __inline int +simple_lock_try(lkp) + __volatile struct simplelock *lkp; +{ + + return (!test_and_set(&lkp->lock_data)) +} + +static __inline void +simple_unlock(lkp) + __volatile struct simplelock *lkp; +{ + + lkp->lock_data = 0; +} +#endif /* NCPUS > 1 */ + +#endif /* !_SIMPLELOCK_H_ */ diff --git a/sys/sys/sysctl.h b/sys/sys/sysctl.h index 9eb21269f2c..3cc255b0b1c 100644 --- a/sys/sys/sysctl.h +++ b/sys/sys/sysctl.h @@ -1,4 +1,4 @@ -/* $OpenBSD: sysctl.h,v 1.18 1997/09/08 17:28:18 kstailey Exp $ */ +/* $OpenBSD: sysctl.h,v 1.19 1997/10/06 15:25:35 csapuntz Exp $ */ /* $NetBSD: sysctl.h,v 1.16 1996/04/09 20:55:36 cgd Exp $ */ /* @@ -49,9 +49,10 @@ #include <sys/time.h> #include <sys/ucred.h> #include <sys/proc.h> -#include <vm/vm.h> #endif +#include <vm/vm.h> + /* * Definitions for sysctl call. The sysctl call uses a hierarchical name * for objects that can be examined or modified. The name is expressed as @@ -93,7 +94,8 @@ struct ctlname { #define CTL_MACHDEP 7 /* machine dependent */ #define CTL_USER 8 /* user-level */ #define CTL_DDB 9 /* DDB user interface, see ddb_var.h */ -#define CTL_MAXID 10 /* number of valid top-level ids */ +#define CTL_VFS 10 /* VFS sysctl's */ +#define CTL_MAXID 11 /* number of valid top-level ids */ #define CTL_NAMES { \ { 0, 0 }, \ @@ -106,6 +108,7 @@ struct ctlname { { "machdep", CTLTYPE_NODE }, \ { "user", CTLTYPE_NODE }, \ { "ddb", CTLTYPE_NODE }, \ + { "vfs", CTLTYPE_NODE }, \ } /* @@ -383,7 +386,7 @@ int sysctl_rtable __P((int *, u_int, void *, size_t *, void *, size_t)); int sysctl_clockrate __P((char *, size_t *)); int sysctl_rdstring __P((void *, size_t *, void *, char *)); int sysctl_rdstruct __P((void *, size_t *, void *, void *, int)); -int sysctl_vnode __P((char *, size_t *)); +int sysctl_vnode __P((char *, size_t *, struct proc *)); int sysctl_ntptime __P((char *, size_t *)); #ifdef GPROF int sysctl_doprof __P((int *, u_int, void *, size_t *, void *, size_t)); @@ -409,6 +412,8 @@ int net_sysctl __P((int *, u_int, void *, size_t *, void *, size_t, struct proc *)); int cpu_sysctl __P((int *, u_int, void *, size_t *, void *, size_t, struct proc *)); +int vfs_sysctl __P((int *, u_int, void *, size_t *, void *, size_t, + struct proc *)); #else /* !_KERNEL */ #include <sys/cdefs.h> diff --git a/sys/sys/systm.h b/sys/sys/systm.h index 757aa464ec2..1dbd1ed3c57 100644 --- a/sys/sys/systm.h +++ b/sys/sys/systm.h @@ -1,4 +1,4 @@ -/* $OpenBSD: systm.h,v 1.20 1997/03/06 07:05:54 tholo Exp $ */ +/* $OpenBSD: systm.h,v 1.21 1997/10/06 15:25:35 csapuntz Exp $ */ /* $NetBSD: systm.h,v 1.50 1996/06/09 04:55:09 briggs Exp $ */ /*- @@ -128,7 +128,7 @@ int enodev __P((void)); int enosys __P((void)); int enoioctl __P((void)); int enxio __P((void)); -int eopnotsupp __P((void)); +int eopnotsupp __P((void *)); int lkmenodev __P((void)); @@ -240,7 +240,7 @@ void kmstartup __P((void)); int nfs_mountroot __P((void)); int dk_mountroot __P((void)); -int (*mountroot) __P((void)); +int (*mountroot)__P((void)); #include <lib/libkern/libkern.h> diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index ed2fbcebca7..ebb93d38447 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -1,4 +1,4 @@ -/* $OpenBSD: vnode.h,v 1.8 1996/07/14 08:54:05 downsj Exp $ */ +/* $OpenBSD: vnode.h,v 1.9 1997/10/06 15:25:36 csapuntz Exp $ */ /* $NetBSD: vnode.h,v 1.38 1996/02/29 20:59:05 cgd Exp $ */ /* @@ -37,6 +37,7 @@ */ #include <sys/queue.h> +#include <sys/lock.h> /* * The vnode is the focus of all file activity in UNIX. There is a @@ -60,7 +61,7 @@ enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD }; enum vtagtype { VT_NON, VT_UFS, VT_NFS, VT_MFS, VT_MSDOSFS, VT_LFS, VT_LOFS, VT_FDESC, VT_PORTAL, VT_NULL, VT_UMAP, VT_KERNFS, VT_PROCFS, VT_AFS, VT_ISOFS, - VT_UNION, VT_ADOSFS, VT_EXT2FS, VT_NCPFS + VT_UNION, VT_ADOSFS, VT_EXT2FS, VT_NCPFS, VT_VFS }; /* @@ -69,6 +70,14 @@ enum vtagtype { */ LIST_HEAD(buflists, buf); +/* + * Reading or writing any of these items requires holding the appropriate lock. + * v_freelist is locked by the global vnode_free_list simple lock. + * v_mntvnodes is locked by the global mntvnodes simple lock. + * v_flag, v_usecount, v_holdcount and v_writecount are + * locked by the v_interlock simple lock. + */ + struct vnode { u_long v_flag; /* vnode flags (see below) */ short v_usecount; /* reference count of users */ @@ -83,6 +92,7 @@ struct vnode { struct buflists v_cleanblkhd; /* clean blocklist head */ struct buflists v_dirtyblkhd; /* dirty blocklist head */ long v_numoutput; /* num of writes in progress */ + LIST_ENTRY(vnode) v_synclist; /* vnode with dirty buffers */ enum vtype v_type; /* vnode type */ union { struct mount *vu_mountedhere;/* ptr to mounted vfs (VDIR) */ @@ -98,7 +108,9 @@ struct vnode { int v_clen; /* length of current cluster */ int v_ralen; /* Read-ahead length */ daddr_t v_maxra; /* last readahead block */ - long v_spare[7]; /* round to 128 bytes */ + struct simplelock v_interlock; /* lock on usecount and flag */ + struct lock *v_vnlock; /* used for non-locking fs's */ + long v_spare[3]; /* round to 128 bytes */ enum vtagtype v_tag; /* type of underlying data */ void *v_data; /* private data for fs */ }; @@ -120,6 +132,7 @@ struct vnode { #define VBWAIT 0x0400 /* waiting for output to complete */ #define VALIASED 0x0800 /* vnode has an alias */ #define VDIROP 0x1000 /* LFS: vnode is involved in a directory op */ +#define VGONEHACK 0x2000 /* vgone: don't put me on the head of the free list */ /* * Vnode attributes. A field value of VNOVAL represents a field whose value @@ -151,7 +164,7 @@ struct vattr { * Flags for va_cflags. */ #define VA_UTIMES_NULL 0x01 /* utimes argument was NULL */ - +#define VA_EXCLUSIVE 0x02 /* exclusive create request */ /* * Flags for ioflag. */ @@ -197,6 +210,14 @@ extern int vttoif_tab[]; #define V_SAVE 0x0001 /* vinvalbuf: sync file first */ #define V_SAVEMETA 0x0002 /* vinvalbuf: leave indirect blocks */ +#define REVOKEALL 0x0001 /* vop_reovke: revoke all aliases */ + + +TAILQ_HEAD(freelst, vnode); +extern struct freelst vnode_hold_list; /* free vnodes referencing buffers */ +extern struct freelst vnode_free_list; /* vnode free list */ +extern struct simplelock vnode_free_list_slock; + #ifdef DIAGNOSTIC #define HOLDRELE(vp) holdrele(vp) #define VATTR_NULL(vap) vattr_null(vap) @@ -208,11 +229,47 @@ void vattr_null __P((struct vattr *)); void vhold __P((struct vnode *)); void vref __P((struct vnode *)); #else -#define HOLDRELE(vp) (vp)->v_holdcnt-- /* decrease buf or page ref */ +#define HOLDRELE(vp) holdrele(vp); /* decrease buf or page ref */ #define VATTR_NULL(vap) (*(vap) = va_null) /* initialize a vattr */ -#define VHOLD(vp) (vp)->v_holdcnt++ /* increase buf or page ref */ -#define VREF(vp) (vp)->v_usecount++ /* increase reference */ -#endif + +static __inline holdrele(vp) + struct vnode *vp; +{ + simple_lock(&vp->v_interlock); + vp->v_holdcnt--; + if (!(vp->v_flag & VGONEHACK) && + vp->v_holdcnt == 0 && vp->v_usecount == 0) { + simple_lock(&vnode_free_list_slock); + TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); + TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); + simple_unlock(&vnode_free_list_slock); + } + simple_unlock(&vp->v_interlock); +} +#define VHOLD(vp) vhold(vp) /* increase buf or page ref */ +static __inline vhold(vp) + struct vnode *vp; +{ + simple_lock(&vp->v_interlock); + if (!(vp->v_flag & VGONEHACK) && + vp->v_holdcnt == 0 && vp->v_usecount == 0) { + simple_lock(&vnode_free_list_slock); + TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); + TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); + simple_unlock(&vnode_free_list_slock); + } + vp->v_holdcnt++; + simple_unlock(&vp->v_interlock); +} +#define VREF(vp) vref(vp) /* increase reference */ +static __inline vref(vp) + struct vnode *vp; +{ + simple_lock(&vp->v_interlock); + vp->v_usecount++; + simple_unlock(&vp->v_interlock); +} +#endif /* DIAGNOSTIC */ #define NULLVP ((struct vnode *)NULL) @@ -220,6 +277,7 @@ void vref __P((struct vnode *)); * Global vnode data. */ extern struct vnode *rootvnode; /* root (i.e. "/") vnode */ +extern time_t syncdelay; /* time to delay syncing vnodes */ extern int desiredvnodes; /* number of vnodes desired */ extern struct vattr va_null; /* predefined null vattr structure */ @@ -289,6 +347,11 @@ extern struct vnodeop_desc *vnodeop_descs[]; /* + * Interlock for scanning list of vnodes attached to a mountpoint + */ +struct simplelock mntvnode_slock; + +/* * This macro is very helpful in defining those offsets in the vdesc struct. * * This is stolen from X11R4. I ingored all the fancy stuff for @@ -371,14 +434,15 @@ int getvnode __P((struct filedesc *fdp, int fd, struct file **fpp)); void getnewfsid __P((struct mount *, int)); void vattr_null __P((struct vattr *vap)); int vcount __P((struct vnode *vp)); -void vclean __P((struct vnode *, int)); +void vclean __P((struct vnode *, int, struct proc *)); int vfinddev __P((dev_t, enum vtype, struct vnode **)); void vflushbuf __P((struct vnode *vp, int sync)); int vflush __P((struct mount *mp, struct vnode *vp, int flags)); void vntblinit __P((void)); void vwakeup __P((struct buf *)); -int vget __P((struct vnode *vp, int lockflag)); +int vget __P((struct vnode *vp, int lockflag, struct proc *p)); void vgone __P((struct vnode *vp)); +void vgonel __P((struct vnode *, struct proc *)); void vgoneall __P((struct vnode *vp)); int vinvalbuf __P((struct vnode *vp, int save, struct ucred *cred, struct proc *p, int slpflag, int slptimeo)); @@ -391,14 +455,25 @@ int vn_closefile __P((struct file *fp, struct proc *p)); int vn_ioctl __P((struct file *fp, u_long com, caddr_t data, struct proc *p)); int vn_open __P((struct nameidata *ndp, int fmode, int cmode)); +int vrecycle __P((struct vnode *vp, struct simplelock *inter_lkp, + struct proc *p)); int vn_rdwr __P((enum uio_rw rw, struct vnode *vp, caddr_t base, int len, off_t offset, enum uio_seg segflg, int ioflg, struct ucred *cred, int *aresid, struct proc *p)); +int vn_lock __P((struct vnode *vp, int flags, struct proc *p)); +int vop_noislocked __P((void *)); +int vop_nolock __P((void *)); +int vop_nounlock __P((void *)); +int vop_revoke __P((void *)); + int vn_read __P((struct file *fp, struct uio *uio, struct ucred *cred)); int vn_select __P((struct file *fp, int which, struct proc *p)); int vn_stat __P((struct vnode *vp, struct stat *sb, struct proc *p)); int vn_write __P((struct file *fp, struct uio *uio, struct ucred *cred)); int vn_writechk __P((struct vnode *vp)); +void vn_syncer_add_to_worklist __P((struct vnode *vp, int delay)); +void sched_sync __P((struct proc *)); + struct vnode * checkalias __P((struct vnode *vp, dev_t nvp_rdev, struct mount *mp)); void vput __P((struct vnode *vp)); diff --git a/sys/sys/vnode_if.h b/sys/sys/vnode_if.h index abf129f1126..43b56b5dc76 100644 --- a/sys/sys/vnode_if.h +++ b/sys/sys/vnode_if.h @@ -291,6 +291,31 @@ static __inline int VOP_WRITE(vp, uio, ioflag, cred) return (VCALL(vp, VOFFSET(vop_write), &a)); } +struct vop_lease_args { + struct vnodeop_desc *a_desc; + struct vnode *a_vp; + struct proc *a_p; + struct ucred *a_cred; + int a_flag; +}; +extern struct vnodeop_desc vop_lease_desc; +static __inline int VOP_LEASE __P((struct vnode *, struct proc *, + struct ucred *, int)); +static __inline int VOP_LEASE(vp, p, cred, flag) + struct vnode *vp; + struct proc *p; + struct ucred *cred; + int flag; +{ + struct vop_lease_args a; + a.a_desc = VDESC(vop_lease); + a.a_vp = vp; + a.a_p = p; + a.a_cred = cred; + a.a_flag = flag; + return (VCALL(vp, VOFFSET(vop_lease), &a)); +} + struct vop_ioctl_args { struct vnodeop_desc *a_desc; struct vnode *a_vp; @@ -350,6 +375,24 @@ static __inline int VOP_SELECT(vp, which, fflags, cred, p) return (VCALL(vp, VOFFSET(vop_select), &a)); } +struct vop_revoke_args { + struct vnodeop_desc *a_desc; + struct vnode *a_vp; + int a_flags; +}; +extern struct vnodeop_desc vop_revoke_desc; +static __inline int VOP_REVOKE __P((struct vnode *, int)); +static __inline int VOP_REVOKE(vp, flags) + struct vnode *vp; + int flags; +{ + struct vop_revoke_args a; + a.a_desc = VDESC(vop_revoke); + a.a_vp = vp; + a.a_flags = flags; + return (VCALL(vp, VOFFSET(vop_revoke), &a)); +} + struct vop_mmap_args { struct vnodeop_desc *a_desc; struct vnode *a_vp; @@ -582,19 +625,19 @@ struct vop_readdir_args { struct uio *a_uio; struct ucred *a_cred; int *a_eofflag; - u_long *a_cookies; - int a_ncookies; + int *a_ncookies; + u_long **a_cookies; }; extern struct vnodeop_desc vop_readdir_desc; static __inline int VOP_READDIR __P((struct vnode *, struct uio *, - struct ucred *, int *, u_long *, int)); -static __inline int VOP_READDIR(vp, uio, cred, eofflag, cookies, ncookies) + struct ucred *, int *, int *, u_long **)); +static __inline int VOP_READDIR(vp, uio, cred, eofflag, ncookies, cookies) struct vnode *vp; struct uio *uio; struct ucred *cred; int *eofflag; - u_long *cookies; - int ncookies; + int *ncookies; + u_long **cookies; { struct vop_readdir_args a; a.a_desc = VDESC(vop_readdir); @@ -602,8 +645,8 @@ static __inline int VOP_READDIR(vp, uio, cred, eofflag, cookies, ncookies) a.a_uio = uio; a.a_cred = cred; a.a_eofflag = eofflag; - a.a_cookies = cookies; a.a_ncookies = ncookies; + a.a_cookies = cookies; return (VCALL(vp, VOFFSET(vop_readdir), &a)); } @@ -650,60 +693,78 @@ static __inline int VOP_ABORTOP(dvp, cnp) struct vop_inactive_args { struct vnodeop_desc *a_desc; struct vnode *a_vp; + struct proc *a_p; }; extern struct vnodeop_desc vop_inactive_desc; -static __inline int VOP_INACTIVE __P((struct vnode *)); -static __inline int VOP_INACTIVE(vp) +static __inline int VOP_INACTIVE __P((struct vnode *, struct proc *)); +static __inline int VOP_INACTIVE(vp, p) struct vnode *vp; + struct proc *p; { struct vop_inactive_args a; a.a_desc = VDESC(vop_inactive); a.a_vp = vp; + a.a_p = p; return (VCALL(vp, VOFFSET(vop_inactive), &a)); } struct vop_reclaim_args { struct vnodeop_desc *a_desc; struct vnode *a_vp; + struct proc *a_p; }; extern struct vnodeop_desc vop_reclaim_desc; -static __inline int VOP_RECLAIM __P((struct vnode *)); -static __inline int VOP_RECLAIM(vp) +static __inline int VOP_RECLAIM __P((struct vnode *, struct proc *)); +static __inline int VOP_RECLAIM(vp, p) struct vnode *vp; + struct proc *p; { struct vop_reclaim_args a; a.a_desc = VDESC(vop_reclaim); a.a_vp = vp; + a.a_p = p; return (VCALL(vp, VOFFSET(vop_reclaim), &a)); } struct vop_lock_args { struct vnodeop_desc *a_desc; struct vnode *a_vp; + int a_flags; + struct proc *a_p; }; extern struct vnodeop_desc vop_lock_desc; -static __inline int VOP_LOCK __P((struct vnode *)); -static __inline int VOP_LOCK(vp) +static __inline int VOP_LOCK __P((struct vnode *, int, struct proc *)); +static __inline int VOP_LOCK(vp, flags, p) struct vnode *vp; + int flags; + struct proc *p; { struct vop_lock_args a; a.a_desc = VDESC(vop_lock); a.a_vp = vp; + a.a_flags = flags; + a.a_p = p; return (VCALL(vp, VOFFSET(vop_lock), &a)); } struct vop_unlock_args { struct vnodeop_desc *a_desc; struct vnode *a_vp; + int a_flags; + struct proc *a_p; }; extern struct vnodeop_desc vop_unlock_desc; -static __inline int VOP_UNLOCK __P((struct vnode *)); -static __inline int VOP_UNLOCK(vp) +static __inline int VOP_UNLOCK __P((struct vnode *, int, struct proc *)); +static __inline int VOP_UNLOCK(vp, flags, p) struct vnode *vp; + int flags; + struct proc *p; { struct vop_unlock_args a; a.a_desc = VDESC(vop_unlock); a.a_vp = vp; + a.a_flags = flags; + a.a_p = p; return (VCALL(vp, VOFFSET(vop_unlock), &a)); } @@ -864,6 +925,37 @@ static __inline int VOP_VALLOC(pvp, mode, cred, vpp) return (VCALL(pvp, VOFFSET(vop_valloc), &a)); } +struct vop_balloc_args { + struct vnodeop_desc *a_desc; + struct vnode *a_vp; + off_t a_startoffset; + int a_size; + struct ucred *a_cred; + int a_flags; + struct buf **a_bpp; +}; +extern struct vnodeop_desc vop_balloc_desc; +static __inline int VOP_BALLOC __P((struct vnode *, off_t, int, + struct ucred *, int, struct buf **)); +static __inline int VOP_BALLOC(vp, startoffset, size, cred, flags, bpp) + struct vnode *vp; + off_t startoffset; + int size; + struct ucred *cred; + int flags; + struct buf **bpp; +{ + struct vop_balloc_args a; + a.a_desc = VDESC(vop_balloc); + a.a_vp = vp; + a.a_startoffset = startoffset; + a.a_size = size; + a.a_cred = cred; + a.a_flags = flags; + a.a_bpp = bpp; + return (VCALL(vp, VOFFSET(vop_balloc), &a)); +} + struct vop_reallocblks_args { struct vnodeop_desc *a_desc; struct vnode *a_vp; @@ -957,31 +1049,6 @@ static __inline int VOP_UPDATE(vp, access, modify, waitfor) return (VCALL(vp, VOFFSET(vop_update), &a)); } -struct vop_lease_args { - struct vnodeop_desc *a_desc; - struct vnode *a_vp; - struct proc *a_p; - struct ucred *a_cred; - int a_flag; -}; -extern struct vnodeop_desc vop_lease_desc; -static __inline int VOP_LEASE __P((struct vnode *, struct proc *, - struct ucred *, int)); -static __inline int VOP_LEASE(vp, p, cred, flag) - struct vnode *vp; - struct proc *p; - struct ucred *cred; - int flag; -{ - struct vop_lease_args a; - a.a_desc = VDESC(vop_lease); - a.a_vp = vp; - a.a_p = p; - a.a_cred = cred; - a.a_flag = flag; - return (VCALL(vp, VOFFSET(vop_lease), &a)); -} - struct vop_whiteout_args { struct vnodeop_desc *a_desc; struct vnode *a_dvp; diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c index e25073d6715..e4bf9e3d285 100644 --- a/sys/ufs/ffs/ffs_alloc.c +++ b/sys/ufs/ffs/ffs_alloc.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_alloc.c,v 1.7 1997/07/22 10:31:50 deraadt Exp $ */ +/* $OpenBSD: ffs_alloc.c,v 1.8 1997/10/06 15:26:28 csapuntz Exp $ */ /* $NetBSD: ffs_alloc.c,v 1.11 1996/05/11 18:27:09 mycroft Exp $ */ /* @@ -59,7 +59,7 @@ extern u_long nextgennumber; static daddr_t ffs_alloccg __P((struct inode *, int, daddr_t, int)); -static daddr_t ffs_alloccgblk __P((struct fs *, struct cg *, daddr_t)); +static daddr_t ffs_alloccgblk __P((struct inode *, struct buf *, daddr_t)); static daddr_t ffs_clusteralloc __P((struct inode *, int, daddr_t, int)); static ino_t ffs_dirpref __P((struct fs *)); static daddr_t ffs_fragextend __P((struct inode *, int, long, int, int)); @@ -70,6 +70,11 @@ static u_long ffs_hashalloc __P((struct inode *, int, long, int, static daddr_t ffs_nodealloccg __P((struct inode *, int, daddr_t, int)); static daddr_t ffs_mapsearch __P((struct fs *, struct cg *, daddr_t, int)); +#ifdef DIAGNOSTIC +static int ffs_checkblk __P((struct inode *, daddr_t, long)); +#endif +int ffs_freefile __P((struct vop_vfree_args *)); + /* * Allocate a block in the file system. * @@ -272,7 +277,8 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp) if (bno > 0) { bp->b_blkno = fsbtodb(fs, bno); (void) vnode_pager_uncache(ITOV(ip)); - ffs_blkfree(ip, bprev, (long)osize); + if (!DOINGSOFTDEP(ITOV(ip))) + ffs_blkfree(ip, bprev, (long)osize); if (nsize < request) ffs_blkfree(ip, bno + numfrags(fs, nsize), (long)(request - nsize)); @@ -314,15 +320,10 @@ nospace: * Note that the error return is not reflected back to the user. Rather * the previous block allocation will be used. */ -#ifdef DEBUG -#include <sys/sysctl.h> + int doasyncfree = 1; -struct ctldebug debug14 = { "doasyncfree", &doasyncfree }; +int doreallocblks = 1; int prtrealloc = 0; -struct ctldebug debug15 = { "prtrealloc", &prtrealloc }; -#else -#define doasyncfree 1 -#endif int ffs_reallocblks(v) @@ -343,6 +344,9 @@ ffs_reallocblks(v) int i, len, start_lvl, end_lvl, pref, ssize; struct timespec ts; + if (doreallocblks == 0) + return (ENOSPC); + vp = ap->a_vp; ip = VTOI(vp); fs = ip->i_fs; @@ -352,10 +356,22 @@ ffs_reallocblks(v) len = buflist->bs_nchildren; start_lbn = buflist->bs_children[0]->b_lblkno; end_lbn = start_lbn + len - 1; + #ifdef DIAGNOSTIC + for (i = 0; i < len; i++) + if (!ffs_checkblk(ip, + dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) + panic("ffs_reallocblks: unallocated block 1"); + for (i = 1; i < len; i++) if (buflist->bs_children[i]->b_lblkno != start_lbn + i) - panic("ffs_reallocblks: non-cluster"); + panic("ffs_reallocblks: non-logical cluster"); + + blkno = buflist->bs_children[0]->b_blkno; + ssize = fsbtodb(fs, fs->fs_frag); + for (i = 1; i < len - 1; i++) + if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize)) + panic("ffs_reallocblks: non-physical cluster %d", i); #endif /* * If the latest allocation is in a new cylinder group, assume that @@ -422,9 +438,14 @@ ffs_reallocblks(v) #endif blkno = newblk; for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { - if (i == ssize) + if (i == ssize) { bap = ebap; + soff = -i; + } #ifdef DIAGNOSTIC + if (!ffs_checkblk(ip, + dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) + panic("ffs_reallocblks: unallocated block 2"); if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap) panic("ffs_reallocblks: alloc mismatch"); #endif @@ -432,6 +453,17 @@ ffs_reallocblks(v) if (prtrealloc) printf(" %d,", *bap); #endif + if (DOINGSOFTDEP(vp)) { + if (sbap == &ip->i_ffs_db[0] && i < ssize) + softdep_setup_allocdirect(ip, start_lbn + i, + blkno, *bap, fs->fs_bsize, fs->fs_bsize, + buflist->bs_children[i]); + else + softdep_setup_allocindir_page(ip, start_lbn + i, + i < ssize ? sbp : ebp, soff + i, blkno, + *bap, buflist->bs_children[i]); + } + *bap++ = blkno; } /* @@ -473,10 +505,15 @@ ffs_reallocblks(v) printf("\n\tnew:"); #endif for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { - ffs_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), - fs->fs_bsize); + if (!DOINGSOFTDEP(vp)) + ffs_blkfree(ip, + dbtofsb(fs, buflist->bs_children[i]->b_blkno), + fs->fs_bsize); buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); #ifdef DEBUG + if (!ffs_checkblk(ip, + dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) + panic("ffs_reallocblks: unallocated block 3"); if (prtrealloc) printf(" %d,", blkno); #endif @@ -815,6 +852,9 @@ ffs_fragextend(ip, cg, bprev, osize, nsize) fs->fs_cs(fs, cg).cs_nffree--; } fs->fs_fmod = 1; + if (DOINGSOFTDEP(ITOV(ip))) + softdep_setup_blkmapdep(bp, fs, bprev); + bdwrite(bp); return (bprev); } @@ -835,8 +875,8 @@ ffs_alloccg(ip, cg, bpref, size) register struct fs *fs; register struct cg *cgp; struct buf *bp; - register int i; - int error, bno, frags, allocsiz; + daddr_t bno, blkno; + int error, i, frags, allocsiz; fs = ip->i_fs; if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) @@ -855,7 +895,7 @@ ffs_alloccg(ip, cg, bpref, size) } cgp->cg_time = time.tv_sec; if (size == fs->fs_bsize) { - bno = ffs_alloccgblk(fs, cgp, bpref); + bno = ffs_alloccgblk(ip, bp, bpref); bdwrite(bp); return (bno); } @@ -877,7 +917,7 @@ ffs_alloccg(ip, cg, bpref, size) brelse(bp); return (NULL); } - bno = ffs_alloccgblk(fs, cgp, bpref); + bno = ffs_alloccgblk(ip, bp, bpref); bpref = dtogd(fs, bno); for (i = frags; i < fs->fs_frag; i++) setbit(cg_blksfree(cgp), bpref + i); @@ -904,8 +944,12 @@ ffs_alloccg(ip, cg, bpref, size) cgp->cg_frsum[allocsiz]--; if (frags != allocsiz) cgp->cg_frsum[allocsiz - frags]++; - bdwrite(bp); - return (cg * fs->fs_fpg + bno); + + blkno = cg * fs->fs_fpg + bno; + if (DOINGSOFTDEP(ITOV(ip))) + softdep_setup_blkmapdep(bp, fs, blkno); + bdwrite(bp); + return ((u_long)blkno); } /* @@ -920,16 +964,20 @@ ffs_alloccg(ip, cg, bpref, size) * blocks may be fragmented by the routine that allocates them. */ static daddr_t -ffs_alloccgblk(fs, cgp, bpref) - register struct fs *fs; - register struct cg *cgp; +ffs_alloccgblk(ip, bp, bpref) + struct inode *ip; + struct buf *bp; daddr_t bpref; { + struct fs *fs; + struct cg *cgp; daddr_t bno, blkno; int cylno, pos, delta; short *cylbp; register int i; + fs = ip->i_fs; + cgp = (struct cg *)bp->b_data; if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) { bpref = cgp->cg_rotor; goto norot; @@ -1020,7 +1068,10 @@ gotit: cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--; cg_blktot(cgp)[cylno]--; fs->fs_fmod = 1; - return (cgp->cg_cgx * fs->fs_fpg + bno); + blkno = cgp->cg_cgx * fs->fs_fpg + bno; + if (DOINGSOFTDEP(ITOV(ip))) + softdep_setup_blkmapdep(bp, fs, blkno); + return (blkno); } /* @@ -1040,7 +1091,7 @@ ffs_clusteralloc(ip, cg, bpref, len) register struct fs *fs; register struct cg *cgp; struct buf *bp; - int i, run, bno, bit, map; + int i, got, run, bno, bit, map; u_char *mapp; int32_t *lp; @@ -1094,7 +1145,7 @@ ffs_clusteralloc(ip, cg, bpref, len) mapp = &cg_clustersfree(cgp)[bpref / NBBY]; map = *mapp++; bit = 1 << (bpref % NBBY); - for (run = 0, i = bpref; i < cgp->cg_nclusterblks; i++) { + for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) { if ((map & bit) == 0) { run = 0; } else { @@ -1102,22 +1153,32 @@ ffs_clusteralloc(ip, cg, bpref, len) if (run == len) break; } - if ((i & (NBBY - 1)) != (NBBY - 1)) { + if ((got & (NBBY - 1)) != (NBBY - 1)) { bit <<= 1; } else { map = *mapp++; bit = 1; } } - if (i >= cgp->cg_nclusterblks) + if (got >= cgp->cg_nclusterblks) goto fail; /* * Allocate the cluster that we have found. */ - bno = cg * fs->fs_fpg + blkstofrags(fs, i - run + 1); +#ifdef DIAGNOSTIC + for (i = 1; i <= len; i++) + if (!ffs_isblock(fs, cg_blksfree(cgp), got - run + i)) + panic("ffs_clusteralloc: map mismatch"); +#endif + bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1); +#ifdef DIAGNOSTIC + if (dtog(fs, bno) != cg) + panic("ffs_clusteralloc: allocated out of group"); +#endif + len = blkstofrags(fs, len); for (i = 0; i < len; i += fs->fs_frag) - if (ffs_alloccgblk(fs, cgp, bno + i) != bno + i) + if (ffs_alloccgblk(ip, bp, bno + i) != bno + i) panic("ffs_clusteralloc: lost block"); bdwrite(bp); return (bno); @@ -1195,6 +1256,9 @@ ffs_nodealloccg(ip, cg, ipref, mode) panic("ffs_nodealloccg: block not in map"); /* NOTREACHED */ gotit: + if (DOINGSOFTDEP(ITOV(ip))) + softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref); + setbit(cg_inosused(cgp), ipref); cgp->cg_cs.cs_nifree--; fs->fs_cstotal.cs_nifree--; @@ -1229,7 +1293,8 @@ ffs_blkfree(ip, bno, size) int i, error, cg, blk, frags, bbase; fs = ip->i_fs; - if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { + if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 || + fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) { printf("dev = 0x%x, bsize = %d, size = %ld, fs = %s\n", ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt); panic("blkfree: bad size"); @@ -1255,7 +1320,7 @@ ffs_blkfree(ip, bno, size) bno = dtogd(fs, bno); if (size == fs->fs_bsize) { blkno = fragstoblks(fs, bno); - if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) { + if (!ffs_isfreeblock(fs, cg_blksfree(cgp), blkno)) { printf("dev = 0x%x, block = %d, fs = %s\n", ip->i_dev, bno, fs->fs_fsmnt); panic("blkfree: freeing free block"); @@ -1318,8 +1383,6 @@ ffs_blkfree(ip, bno, size) /* * Free an inode. - * - * The specified inode is placed back in the free map. */ int ffs_vfree(v) @@ -1330,6 +1393,28 @@ ffs_vfree(v) ino_t a_ino; int a_mode; } */ *ap = v; + + + if (DOINGSOFTDEP(ap->a_pvp)) { + softdep_freefile(ap); + return (0); + } + + return (ffs_freefile(ap)); +} + +/* + * Do the actual free operation. + * The specified inode is placed back in the free map. + */ +int +ffs_freefile(ap) + struct vop_vfree_args /* { + struct vnode *a_pvp; + ino_t a_ino; + int a_mode; + } */ *ap; +{ register struct fs *fs; register struct cg *cgp; register struct inode *pip; @@ -1347,7 +1432,7 @@ ffs_vfree(v) (int)fs->fs_cgsize, NOCRED, &bp); if (error) { brelse(bp); - return (0); + return (error); } cgp = (struct cg *)bp->b_data; if (!cg_chkmagic(cgp)) { @@ -1378,6 +1463,60 @@ ffs_vfree(v) return (0); } +#ifdef DIAGNOSTIC +/* + * Verify allocation of a block or fragment. Returns true if block or + * fragment is allocated, false if it is free. + */ +int +ffs_checkblk(ip, bno, size) + struct inode *ip; + daddr_t bno; + long size; +{ + struct fs *fs; + struct cg *cgp; + struct buf *bp; + int i, error, frags, free; + + fs = ip->i_fs; + if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { + printf("bsize = %d, size = %d, fs = %s\n", + fs->fs_bsize, size, fs->fs_fsmnt); + panic("checkblk: bad size"); + } + if ((u_int)bno >= fs->fs_size) + panic("checkblk: bad block %d", bno); + error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))), + (int)fs->fs_cgsize, NOCRED, &bp); + if (error) { + /* XXX -probably should pannic here */ + brelse(bp); + return (-1); + } + cgp = (struct cg *)bp->b_data; + if (!cg_chkmagic(cgp)) { + /* XXX -probably should pannic here */ + brelse(bp); + return (-1); + } + bno = dtogd(fs, bno); + if (size == fs->fs_bsize) { + free = ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno)); + } else { + frags = numfrags(fs, size); + for (free = 0, i = 0; i < frags; i++) + if (isset(cg_blksfree(cgp), bno + i)) + free++; + if (free != 0 && free != frags) + panic("checkblk: partially free fragment"); + } + brelse(bp); + return (!free); +} +#endif /* DIAGNOSTIC */ + + /* * Find a block of the specified size in the specified cylinder group. * @@ -1550,3 +1689,4 @@ ffs_fserr(fs, uid, cp) log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp); } + diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c index 5a7dc3afcc2..285ca5f2ca7 100644 --- a/sys/ufs/ffs/ffs_balloc.c +++ b/sys/ufs/ffs/ffs_balloc.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_balloc.c,v 1.3 1997/05/30 08:34:19 downsj Exp $ */ +/* $OpenBSD: ffs_balloc.c,v 1.4 1997/10/06 15:26:29 csapuntz Exp $ */ /* $NetBSD: ffs_balloc.c,v 1.3 1996/02/09 22:22:21 christos Exp $ */ /* @@ -41,6 +41,7 @@ #include <sys/buf.h> #include <sys/proc.h> #include <sys/file.h> +#include <sys/mount.h> #include <sys/vnode.h> #include <vm/vm.h> @@ -58,27 +59,44 @@ * the inode and the logical block number in a file. */ int -ffs_balloc(ip, bn, size, cred, bpp, flags) - register struct inode *ip; - register daddr_t bn; +ffs_balloc(v) + void *v; +{ + struct vop_balloc_args /* { + struct vnode *a_vp; + off_t a_startpoint; + int a_size; + struct ucred *a_cred; + int a_flags; + struct buf *a_bpp; + } */ *ap = v; + + struct inode *ip; + daddr_t lbn; int size; struct ucred *cred; - struct buf **bpp; int flags; -{ - register struct fs *fs; - register daddr_t nb; + struct fs *fs; + daddr_t nb; struct buf *bp, *nbp; - struct vnode *vp = ITOV(ip); + struct vnode *vp; struct indir indirs[NIADDR + 2]; - daddr_t newb, lbn, *bap, pref; - int osize, nsize, num, i, error; + daddr_t newb, *bap, pref; + int deallocated, osize, nsize, num, i, error; + daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR+1]; - *bpp = NULL; - if (bn < 0) - return (EFBIG); + vp = ap->a_vp; + ip = VTOI(vp); fs = ip->i_fs; - lbn = bn; + lbn = lblkno(fs, ap->a_startoffset); + size = blkoff(fs, ap->a_startoffset) + ap->a_size; + if (size > fs->fs_bsize) + panic("ffs_balloc; blk too big"); + *ap->a_bpp = NULL; + if (lbn < 0) + return (EFBIG); + cred = ap->a_cred; + flags = ap->a_flags; /* * If the next write will extend the file into a new block, @@ -86,7 +104,7 @@ ffs_balloc(ip, bn, size, cred, bpp, flags) * this fragment has to be extended to be a full block. */ nb = lblkno(fs, ip->i_ffs_size); - if (nb < NDADDR && nb < bn) { + if (nb < NDADDR && nb < lbn) { osize = blksize(fs, ip, nb); if (osize < fs->fs_bsize && osize > 0) { error = ffs_realloccg(ip, nb, @@ -94,6 +112,11 @@ ffs_balloc(ip, bn, size, cred, bpp, flags) osize, (int)fs->fs_bsize, cred, &bp); if (error) return (error); + if (DOINGSOFTDEP(vp)) + softdep_setup_allocdirect(ip, nb, + dbtofsb(fs, bp->b_blkno), ip->i_ffs_db[nb], + fs->fs_bsize, osize, bp); + ip->i_ffs_size = (nb + 1) * fs->fs_bsize; vnode_pager_setsize(vp, (u_long)ip->i_ffs_size); ip->i_ffs_db[nb] = dbtofsb(fs, bp->b_blkno); @@ -107,15 +130,15 @@ ffs_balloc(ip, bn, size, cred, bpp, flags) /* * The first NDADDR blocks are direct blocks */ - if (bn < NDADDR) { - nb = ip->i_ffs_db[bn]; - if (nb != 0 && ip->i_ffs_size >= (bn + 1) * fs->fs_bsize) { - error = bread(vp, bn, fs->fs_bsize, NOCRED, &bp); + if (lbn < NDADDR) { + nb = ip->i_ffs_db[lbn]; + if (nb != 0 && ip->i_ffs_size >= (lbn + 1) * fs->fs_bsize) { + error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp); if (error) { brelse(bp); return (error); } - *bpp = bp; + *ap->a_bpp = bp; return (0); } if (nb != 0) { @@ -125,43 +148,52 @@ ffs_balloc(ip, bn, size, cred, bpp, flags) osize = fragroundup(fs, blkoff(fs, ip->i_ffs_size)); nsize = fragroundup(fs, size); if (nsize <= osize) { - error = bread(vp, bn, osize, NOCRED, &bp); + error = bread(vp, lbn, osize, NOCRED, &bp); if (error) { brelse(bp); return (error); } } else { - error = ffs_realloccg(ip, bn, - ffs_blkpref(ip, bn, (int)bn, &ip->i_ffs_db[0]), + error = ffs_realloccg(ip, lbn, + ffs_blkpref(ip, lbn, (int)lbn, + &ip->i_ffs_db[0]), osize, nsize, cred, &bp); if (error) return (error); + if (DOINGSOFTDEP(vp)) + softdep_setup_allocdirect(ip, lbn, + dbtofsb(fs, bp->b_blkno), nb, + nsize, osize, bp); } } else { - if (ip->i_ffs_size < (bn + 1) * fs->fs_bsize) + if (ip->i_ffs_size < (lbn + 1) * fs->fs_bsize) nsize = fragroundup(fs, size); else nsize = fs->fs_bsize; - error = ffs_alloc(ip, bn, - ffs_blkpref(ip, bn, (int)bn, &ip->i_ffs_db[0]), + error = ffs_alloc(ip, lbn, + ffs_blkpref(ip, lbn, (int)lbn, &ip->i_ffs_db[0]), nsize, cred, &newb); if (error) return (error); - bp = getblk(vp, bn, nsize, 0, 0); + bp = getblk(vp, lbn, nsize, 0, 0); bp->b_blkno = fsbtodb(fs, newb); if (flags & B_CLRBUF) clrbuf(bp); + if (DOINGSOFTDEP(vp)) + softdep_setup_allocdirect(ip, lbn, newb, 0, + nsize, 0, bp); + } - ip->i_ffs_db[bn] = dbtofsb(fs, bp->b_blkno); + ip->i_ffs_db[lbn] = dbtofsb(fs, bp->b_blkno); ip->i_flag |= IN_CHANGE | IN_UPDATE; - *bpp = bp; + *ap->a_bpp = bp; return (0); } /* * Determine the number of levels of indirection. */ pref = 0; - if ((error = ufs_getlbns(vp, bn, indirs, &num)) != 0) + if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) return(error); #ifdef DIAGNOSTIC if (num < 1) @@ -172,6 +204,9 @@ ffs_balloc(ip, bn, size, cred, bpp, flags) */ --num; nb = ip->i_ffs_ib[indirs[0].in_off]; + + allocib = NULL; + allocblk = allociblk; if (nb == 0) { pref = ffs_blkpref(ip, lbn, 0, (daddr_t *)0); error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, @@ -179,18 +214,26 @@ ffs_balloc(ip, bn, size, cred, bpp, flags) if (error) return (error); nb = newb; + + *allocblk++ = nb; bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0); - bp->b_blkno = fsbtodb(fs, newb); + bp->b_blkno = fsbtodb(fs, nb); clrbuf(bp); - /* - * Write synchronously so that indirect blocks - * never point at garbage. - */ - if ((error = bwrite(bp)) != 0) { - ffs_blkfree(ip, nb, fs->fs_bsize); - return (error); - } - ip->i_ffs_ib[indirs[0].in_off] = newb; + + if (DOINGSOFTDEP(vp)) { + softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, + newb, 0, fs->fs_bsize, 0, bp); + bdwrite(bp); + } else { + /* + * Write synchronously so that indirect blocks + * never point at garbage. + */ + if ((error = bwrite(bp)) != 0) + goto fail; + } + allocib = &ip->i_ffs_ib[indirs[0].in_off]; + *allocib = nb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* @@ -201,7 +244,7 @@ ffs_balloc(ip, bn, size, cred, bpp, flags) indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); if (error) { brelse(bp); - return (error); + goto fail; } bap = (daddr_t *)bp->b_data; nb = bap[indirs[i].in_off]; @@ -218,20 +261,27 @@ ffs_balloc(ip, bn, size, cred, bpp, flags) &newb); if (error) { brelse(bp); - return (error); + goto fail; } nb = newb; + *allocblk++ = nb; nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); clrbuf(nbp); - /* - * Write synchronously so that indirect blocks - * never point at garbage. - */ - if ((error = bwrite(nbp)) != 0) { - ffs_blkfree(ip, nb, fs->fs_bsize); - brelse(bp); - return (error); + + if (DOINGSOFTDEP(vp)) { + softdep_setup_allocindir_meta(nbp, ip, bp, + indirs[i - 1].in_off, nb); + bdwrite(nbp); + } else { + /* + * Write synchronously so that indirect blocks + * never point at garbage. + */ + if ((error = bwrite(nbp)) != 0) { + brelse(bp); + goto fail; + } } bap[indirs[i - 1].in_off] = nb; /* @@ -253,13 +303,17 @@ ffs_balloc(ip, bn, size, cred, bpp, flags) &newb); if (error) { brelse(bp); - return (error); + goto fail; } nb = newb; + *allocblk++ = nb; nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); if (flags & B_CLRBUF) clrbuf(nbp); + if (DOINGSOFTDEP(vp)) + softdep_setup_allocindir_page(ip, lbn, bp, + indirs[i].in_off, nb, 0, nbp); bap[indirs[i].in_off] = nb; /* * If required, write synchronously, otherwise use @@ -270,7 +324,7 @@ ffs_balloc(ip, bn, size, cred, bpp, flags) } else { bdwrite(bp); } - *bpp = nbp; + *ap->a_bpp = nbp; return (0); } brelse(bp); @@ -278,12 +332,36 @@ ffs_balloc(ip, bn, size, cred, bpp, flags) error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); if (error) { brelse(nbp); - return (error); + goto fail; } } else { nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); } - *bpp = nbp; + *ap->a_bpp = nbp; return (0); + +fail: + /* + * If we have failed part way through block allocation, we + * have to deallocate any indirect blocks that we have allocated. + */ + for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { + ffs_blkfree(ip, *blkp, fs->fs_bsize); + deallocated += fs->fs_bsize; + } + if (allocib != NULL) + *allocib = 0; + if (deallocated) { +#ifdef QUOTA + /* + * Restore user's disk quota because allocation failed. + */ + (void) chkdq(ip, (long)-btodb(deallocated), cred, FORCE); +#endif + ip->i_ffs_blocks -= btodb(deallocated); + ip->i_flag |= IN_CHANGE | IN_UPDATE; + } + return (error); + } diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h index 94ca01ad634..3fe5a46bfa8 100644 --- a/sys/ufs/ffs/ffs_extern.h +++ b/sys/ufs/ffs/ffs_extern.h @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_extern.h,v 1.2 1996/02/27 07:27:36 niklas Exp $ */ +/* $OpenBSD: ffs_extern.h,v 1.3 1997/10/06 15:26:29 csapuntz Exp $ */ /* $NetBSD: ffs_extern.h,v 1.4 1996/02/09 22:22:22 christos Exp $ */ /*- @@ -36,6 +36,21 @@ * @(#)ffs_extern.h 8.3 (Berkeley) 4/16/94 */ +#define FFS_CLUSTERREAD 1 /* cluster reading enabled */ +#define FFS_CLUSTERWRITE 2 /* cluster writing enabled */ +#define FFS_REALLOCBLKS 3 /* block reallocation enabled */ +#define FFS_ASYNCFREE 4 /* asynchronous block freeing enabled */ +#define FFS_MAXID 5 /* number of valid ffs ids */ + +#define FFS_NAMES { \ + { 0, 0 }, \ + { "doclusterread", CTLTYPE_INT }, \ + { "doclusterwrite", CTLTYPE_INT }, \ + { "doreallocblks", CTLTYPE_INT }, \ + { "doasyncfree", CTLTYPE_INT }, \ +} + + struct buf; struct fid; struct fs; @@ -47,6 +62,7 @@ struct statfs; struct timeval; struct ucred; struct ufsmount; +struct vfsconf; struct uio; struct vnode; struct mbuf; @@ -67,11 +83,10 @@ int ffs_vfree __P((void *)); void ffs_clusteracct __P((struct fs *, struct cg *, daddr_t, int)); /* ffs_balloc.c */ -int ffs_balloc __P((struct inode *, daddr_t, int, struct ucred *, - struct buf **, int)); +int ffs_balloc __P((void *)); /* ffs_inode.c */ -void ffs_init __P((void)); +int ffs_init __P((struct vfsconf *)); int ffs_update __P((void *)); int ffs_truncate __P((void *)); @@ -81,6 +96,8 @@ void ffs_fragacct __P((struct fs *, int, int32_t[], int)); #ifdef DIAGNOSTIC void ffs_checkoverlap __P((struct buf *, struct inode *)); #endif +int ffs_freefile __P((struct vop_vfree_args *)); +int ffs_isfreeblock __P((struct fs *, unsigned char *, daddr_t)); int ffs_isblock __P((struct fs *, unsigned char *, daddr_t)); void ffs_clrblock __P((struct fs *, u_char *, daddr_t)); void ffs_setblock __P((struct fs *, unsigned char *, daddr_t)); @@ -100,6 +117,8 @@ int ffs_vget __P((struct mount *, ino_t, struct vnode **)); int ffs_fhtovp __P((struct mount *, struct fid *, struct mbuf *, struct vnode **, int *, struct ucred **)); int ffs_vptofh __P((struct vnode *, struct fid *)); +int ffs_sysctl __P((int *, u_int, void *, size_t *, void *, size_t, + struct proc *)); int ffs_sbupdate __P((struct ufsmount *, int)); int ffs_cgupdate __P((struct ufsmount *, int)); @@ -108,6 +127,38 @@ int ffs_read __P((void *)); int ffs_write __P((void *)); int ffs_fsync __P((void *)); int ffs_reclaim __P((void *)); + + +/* + * Soft dependency function prototypes. + */ + +struct vop_vfree_args; +struct vop_fsync_args; + +void softdep_initialize __P((void)); +int softdep_process_worklist __P((struct mount *)); +int softdep_mount __P((struct vnode *, struct mount *, struct fs *, + struct ucred *)); +int softdep_flushfiles __P((struct mount *, int, struct proc *)); +void softdep_update_inodeblock __P((struct inode *, struct buf *, int)); +void softdep_load_inodeblock __P((struct inode *)); +int softdep_fsync __P((struct vnode *)); +void softdep_freefile __P((struct vop_vfree_args *)); +void softdep_setup_freeblocks __P((struct inode *, off_t)); +void softdep_deallocate_dependencies __P((struct buf *)); +void softdep_setup_inomapdep __P((struct buf *, struct inode *, ino_t)); +void softdep_setup_blkmapdep __P((struct buf *, struct fs *, daddr_t)); +void softdep_setup_allocdirect __P((struct inode *, ufs_lbn_t, daddr_t, + daddr_t, long, long, struct buf *)); +void softdep_setup_allocindir_meta __P((struct buf *, struct inode *, + struct buf *, int, daddr_t)); +void softdep_setup_allocindir_page __P((struct inode *, ufs_lbn_t, + struct buf *, int, daddr_t, daddr_t, struct buf *)); +void softdep_disk_io_initiation __P((struct buf *)); +void softdep_disk_write_complete __P((struct buf *)); +int softdep_sync_metadata __P((struct vop_fsync_args *)); + __END_DECLS extern int (**ffs_vnodeop_p) __P((void *)); diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c index 488841b5e7f..ba1eb996cb9 100644 --- a/sys/ufs/ffs/ffs_inode.c +++ b/sys/ufs/ffs/ffs_inode.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_inode.c,v 1.6 1997/05/30 08:34:21 downsj Exp $ */ +/* $OpenBSD: ffs_inode.c,v 1.7 1997/10/06 15:26:30 csapuntz Exp $ */ /* $NetBSD: ffs_inode.c,v 1.10 1996/05/11 18:27:19 mycroft Exp $ */ /* @@ -61,10 +61,12 @@ static int ffs_indirtrunc __P((struct inode *, daddr_t, daddr_t, daddr_t, int, long *)); -void -ffs_init() +int +ffs_init(vfsp) + struct vfsconf *vfsp; { - ufs_init(); + softdep_initialize(); + return (ufs_init(vfsp)); } /* @@ -101,7 +103,8 @@ ffs_update(v) ip->i_flag &= ~IN_ACCESS; } if ((ip->i_flag & - (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) + (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && + ap->a_waitfor != MNT_WAIT) return (0); if (ip->i_flag & IN_ACCESS) { ip->i_ffs_atime = ap->a_access->tv_sec; @@ -133,11 +136,17 @@ ffs_update(v) brelse(bp); return (error); } + + if (DOINGSOFTDEP(ap->a_vp)) + softdep_update_inodeblock(ip, bp, ap->a_waitfor); + else if (ip->i_effnlink != ip->i_ffs_nlink) + panic("ffs_update: bad link cnt"); + *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)) = ip->i_din.ffs_din; - if (ap->a_waitfor) + if (ap->a_waitfor && (ap->a_vp->v_mount->mnt_flag & MNT_ASYNC) == 0) { return (bwrite(bp)); - else { + } else { bdwrite(bp); return (0); } @@ -179,6 +188,8 @@ ffs_truncate(v) if (length < 0) return (EINVAL); oip = VTOI(ovp); + if (oip->i_ffs_size == length) + return (0); TIMEVAL_TO_TIMESPEC(&time, &ts); if (ovp->v_type == VLNK && (oip->i_ffs_size < ovp->v_mount->mnt_maxsymlinklen || @@ -202,8 +213,34 @@ ffs_truncate(v) return (error); #endif vnode_pager_setsize(ovp, (u_long)length); + ovp->v_lasta = ovp->v_clen = ovp->v_cstart = ovp->v_lastw = 0; + if (DOINGSOFTDEP(ovp)) { + if (length > 0) { + /* + * If a file is only partially truncated, then + * we have to clean up the data structures + * describing the allocation past the truncation + * point. Finding and deallocating those structures + * is a lot of work. Since partial truncation occurs + * rarely, we solve the problem by syncing the file + * so that it will have no data structures left. + */ + if ((error = VOP_FSYNC(ovp, ap->a_cred, MNT_WAIT, + ap->a_p)) != 0) + return (error); + } else { +#ifdef QUOTA + (void) chkdq(oip, -oip->i_ffs_blocks, NOCRED, 0); +#endif + softdep_setup_freeblocks(oip, length); + (void) vinvalbuf(ovp, 0, ap->a_cred, ap->a_p, 0, 0); + oip->i_flag |= IN_CHANGE | IN_UPDATE; + return (VOP_UPDATE(ovp, &ts, &ts, 0)); + } + } + fs = oip->i_fs; - osize = oip->i_ffs_size; + osize = oip->i_ffs_size; /* * Lengthen the size of the file. We must ensure that the * last byte of the file is allocated. Since the smallest @@ -217,11 +254,12 @@ ffs_truncate(v) aflags = B_CLRBUF; if (ap->a_flags & IO_SYNC) aflags |= B_SYNC; - error = ffs_balloc(oip, lbn, offset + 1, ap->a_cred, &bp, - aflags); + error = VOP_BALLOC(ovp, length -1, 1, + ap->a_cred, aflags, &bp); if (error) return (error); oip->i_ffs_size = length; + vnode_pager_setsize(ovp, (u_long)length); (void) vnode_pager_uncache(ovp); if (aflags & B_SYNC) bwrite(bp); @@ -230,6 +268,8 @@ ffs_truncate(v) oip->i_flag |= IN_CHANGE | IN_UPDATE; return (VOP_UPDATE(ovp, &ts, &ts, 1)); } + vnode_pager_setsize(ovp, (u_long)length); + /* * Shorten the size of the file. If the file is not being * truncated to a block boundry, the contents of the @@ -245,7 +285,8 @@ ffs_truncate(v) aflags = B_CLRBUF; if (ap->a_flags & IO_SYNC) aflags |= B_SYNC; - error = ffs_balloc(oip, lbn, offset, ap->a_cred, &bp, aflags); + error = VOP_BALLOC(ovp, length - 1, 1, + ap->a_cred, aflags, &bp); if (error) return (error); oip->i_ffs_size = length; diff --git a/sys/ufs/ffs/ffs_subr.c b/sys/ufs/ffs/ffs_subr.c index e5d0c350387..7e5e417cf53 100644 --- a/sys/ufs/ffs/ffs_subr.c +++ b/sys/ufs/ffs/ffs_subr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_subr.c,v 1.3 1996/04/21 22:32:33 deraadt Exp $ */ +/* $OpenBSD: ffs_subr.c,v 1.4 1997/10/06 15:26:31 csapuntz Exp $ */ /* $NetBSD: ffs_subr.c,v 1.6 1996/03/17 02:16:23 christos Exp $ */ /* @@ -42,10 +42,10 @@ #ifdef _KERNEL #include <sys/systm.h> #include <sys/vnode.h> -#include <ufs/ffs/ffs_extern.h> #include <sys/buf.h> #include <ufs/ufs/quota.h> #include <ufs/ufs/inode.h> +#include <ufs/ffs/ffs_extern.h> /* * Return buffer with the contents of block "offset" from the beginning of @@ -240,3 +240,30 @@ ffs_setblock(fs, cp, h) panic("ffs_setblock"); } } + + +/* + * check if a block is free + */ +int +ffs_isfreeblock(fs, cp, h) + struct fs *fs; + unsigned char *cp; + daddr_t h; +{ + + switch ((int)fs->fs_frag) { + case 8: + return (cp[h] == 0); + case 4: + return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0); + case 2: + return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0); + case 1: + return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0); + default: + panic("ffs_isfreeblock"); + } +} + + diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index b70f7b0db8d..7b5f8b2463a 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_vfsops.c,v 1.9 1997/06/20 14:04:32 kstailey Exp $ */ +/* $OpenBSD: ffs_vfsops.c,v 1.10 1997/10/06 15:26:31 csapuntz Exp $ */ /* $NetBSD: ffs_vfsops.c,v 1.19 1996/02/09 22:22:26 christos Exp $ */ /* @@ -51,6 +51,7 @@ #include <sys/ioctl.h> #include <sys/errno.h> #include <sys/malloc.h> +#include <sys/sysctl.h> #include <dev/rndvar.h> @@ -68,7 +69,6 @@ int ffs_sbupdate __P((struct ufsmount *, int)); struct vfsops ffs_vfsops = { - MOUNT_FFS, ffs_mount, ufs_start, ffs_unmount, @@ -80,61 +80,53 @@ struct vfsops ffs_vfsops = { ffs_fhtovp, ffs_vptofh, ffs_init, + ffs_sysctl }; extern u_long nextgennumber; /* * Called by main() when ufs is going to be mounted as root. - * - * Name is updated by mount(8) after booting. */ -#define ROOTNAME "root_device" int ffs_mountroot() { extern struct vnode *rootvp; - register struct fs *fs; - register struct mount *mp; + struct fs *fs; + struct mount *mp; struct proc *p = curproc; /* XXX */ struct ufsmount *ump; - size_t size; int error; /* * Get vnodes for swapdev and rootdev. */ - if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp)) - panic("ffs_mountroot: can't setup bdevvp's"); - - mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); - bzero((char *)mp, (u_long)sizeof(struct mount)); - mp->mnt_op = &ffs_vfsops; - mp->mnt_flag = MNT_RDONLY; - if ((error = ffs_mountfs(rootvp, mp, p)) != 0) { - free(mp, M_MOUNT); + if ((error = bdevvp(swapdev, &swapdev_vp)) || + (error = bdevvp(rootdev, &rootvp))) { + printf("ffs_mountroot: can't setup bdevvp's"); return (error); } - if ((error = vfs_lock(mp)) != 0) { - (void)ffs_unmount(mp, 0, p); - free(mp, M_MOUNT); + + if ((error = vfs_rootmountalloc("ffs", "root_device", &mp)) != 0) return (error); - } + if ((error = ffs_mountfs(rootvp, mp, p)) != 0) { + mp->mnt_vfc->vfc_refcount--; + vfs_unbusy(mp, p); + free(mp, M_MOUNT); + return (error); + } + simple_lock(&mountlist_slock); CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); - mp->mnt_vnodecovered = NULLVP; - ump = VFSTOUFS(mp); - fs = ump->um_fs; - bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt)); - fs->fs_fsmnt[0] = '/'; - bcopy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MNAMELEN); - (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, - &size); - bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); - (void)ffs_statfs(mp, &mp->mnt_stat, p); - vfs_unlock(mp); - inittodr(fs->fs_time); - return (0); + simple_unlock(&mountlist_slock); + ump = VFSTOUFS(mp); + fs = ump->um_fs; + (void) copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0); + (void)ffs_statfs(mp, &mp->mnt_stat, p); + + vfs_unbusy(mp, p); + inittodr(fs->fs_time); + return (0); } /* @@ -172,8 +164,6 @@ ffs_mount(mp, path, data, ndp, p) flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; - if (vfs_busy(mp)) - return (EBUSY); error = ffs_flushfiles(mp, flags, p); if (error == 0 && ffs_cgupdate(ump, MNT_WAIT) == 0 && @@ -181,7 +171,6 @@ ffs_mount(mp, path, data, ndp, p) fs->fs_clean = FS_ISCLEAN; (void) ffs_sbupdate(ump, MNT_WAIT); } - vfs_unbusy(mp); if (error) return (error); fs->fs_ronly = 1; @@ -198,18 +187,19 @@ ffs_mount(mp, path, data, ndp, p) */ if (p->p_ucred->cr_uid != 0) { devvp = ump->um_devvp; - VOP_LOCK(devvp); + vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_ACCESS(devvp, VREAD | VWRITE, p->p_ucred, p); if (error) { - VOP_UNLOCK(devvp); + VOP_UNLOCK(devvp, 0, p); return (error); } - VOP_UNLOCK(devvp); + VOP_UNLOCK(devvp, 0, p); } fs->fs_ronly = 0; fs->fs_clean <<= 1; fs->fs_fmod = 1; + (void) ffs_sbupdate(ump, MNT_WAIT); } if (args.fspec == 0) { /* @@ -243,13 +233,13 @@ ffs_mount(mp, path, data, ndp, p) accessmode = VREAD; if ((mp->mnt_flag & MNT_RDONLY) == 0) accessmode |= VWRITE; - VOP_LOCK(devvp); + vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p); if (error) { vput(devvp); return (error); } - VOP_UNLOCK(devvp); + VOP_UNLOCK(devvp, 0, p); } if ((mp->mnt_flag & MNT_UPDATE) == 0) error = ffs_mountfs(devvp, mp, p); @@ -317,8 +307,12 @@ ffs_reload(mountp, cred, p) * Step 1: invalidate all cached meta-data. */ devvp = VFSTOUFS(mountp)->um_devvp; - if (vinvalbuf(devvp, 0, cred, p, 0, 0)) + vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); + error = vinvalbuf(devvp, 0, cred, p, 0, 0); + VOP_UNLOCK(devvp, 0, p); + if (error) panic("ffs_reload: dirty1"); + /* * Step 2: re-read superblock from disk. */ @@ -375,19 +369,26 @@ ffs_reload(mountp, cred, p) } loop: + simple_lock(&mntvnode_slock); for (vp = mountp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { + if (vp->v_mount != mountp) { + simple_unlock(&mntvnode_slock); + goto loop; + } + nvp = vp->v_mntvnodes.le_next; /* * Step 4: invalidate all inactive vnodes. */ - if (vp->v_usecount == 0) { - vgone(vp); - continue; - } + if (vrecycle(vp, &mntvnode_slock, p)) + goto loop; + /* * Step 5: invalidate all cached file data. */ - if (vget(vp, 1)) + simple_lock(&vp->v_interlock); + simple_unlock(&mntvnode_slock); + if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) goto loop; if (vinvalbuf(vp, 0, cred, p, 0, 0)) panic("ffs_reload: dirty2"); @@ -403,11 +404,12 @@ loop: } ip->i_din.ffs_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)); + ip->i_effnlink = ip->i_ffs_nlink; brelse(bp); vput(vp); - if (vp->v_mount != mountp) - goto loop; + simple_lock(&mntvnode_slock); } + simple_unlock(&mntvnode_slock); return (0); } @@ -426,8 +428,7 @@ ffs_mountfs(devvp, mp, p) dev_t dev; struct partinfo dpart; caddr_t base, space; - int blks; - int error, i, size, ronly; + int error, i, blks, size, ronly; int32_t *lp; struct ucred *cred; extern struct vnode *rootvp; @@ -445,7 +446,10 @@ ffs_mountfs(devvp, mp, p) return (error); if (vcount(devvp) > 1 && devvp != rootvp) return (EBUSY); - if ((error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0)) != 0) + vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); + error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0); + VOP_UNLOCK(devvp, 0, p); + if (error) return (error); ronly = (mp->mnt_flag & MNT_RDONLY) != 0; @@ -484,10 +488,6 @@ ffs_mountfs(devvp, mp, p) bp = NULL; fs = ump->um_fs; fs->fs_ronly = ronly; - if (ronly == 0) { - fs->fs_clean <<= 1; - fs->fs_fmod = 1; - } size = fs->fs_cssize; blks = howmany(size, fs->fs_fsize); if (fs->fs_contigsumsize > 0) @@ -520,9 +520,8 @@ ffs_mountfs(devvp, mp, p) if (fs->fs_id[0] != 0 && fs->fs_id[1] != 0) mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1]; else - mp->mnt_stat.f_fsid.val[1] = makefstype(MOUNT_FFS); + mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; - mp->mnt_flag |= MNT_LOCAL; ump->um_mountp = mp; ump->um_dev = dev; ump->um_devvp = devvp; @@ -531,14 +530,24 @@ ffs_mountfs(devvp, mp, p) ump->um_seqinc = fs->fs_frag; for (i = 0; i < MAXQUOTAS; i++) ump->um_quotas[i] = NULLVP; - devvp->v_specflags |= SI_MOUNTEDON; + devvp->v_specmountpoint = mp; ffs_oldfscompat(fs); ump->um_savedmaxfilesize = fs->fs_maxfilesize; /* XXX */ maxfilesize = (u_int64_t)0x80000000 * fs->fs_bsize - 1; /* XXX */ if (fs->fs_maxfilesize > maxfilesize) /* XXX */ fs->fs_maxfilesize = maxfilesize; /* XXX */ + if (ronly == 0) { + if ((fs->fs_flags & FS_DOSOFTDEP) && + (error = softdep_mount(devvp, mp, fs, cred)) != 0) { + free(base, M_UFSMNT); + goto out; + } + fs->fs_clean = 0; + (void) ffs_sbupdate(ump, MNT_WAIT); + } return (0); out: + devvp->v_specmountpoint = NULL; if (bp) brelse(bp); (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p); @@ -595,8 +604,14 @@ ffs_unmount(mp, mntflags, p) flags = 0; if (mntflags & MNT_FORCE) flags |= FORCECLOSE; - if ((error = ffs_flushfiles(mp, flags, p)) != 0) - return (error); + if (mp->mnt_flag & MNT_SOFTDEP) { + if ((error = softdep_flushfiles(mp, flags, p)) != 0) + return (error); + } else { + if ((error = ffs_flushfiles(mp, flags, p)) != 0) + return (error); + } + ump = VFSTOUFS(mp); fs = ump->um_fs; if (fs->fs_ronly == 0 && @@ -605,7 +620,7 @@ ffs_unmount(mp, mntflags, p) fs->fs_clean = FS_ISCLEAN; (void) ffs_sbupdate(ump, MNT_WAIT); } - ump->um_devvp->v_specflags &= ~SI_MOUNTEDON; + ump->um_devvp->v_specmountpoint = NULL; error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE, NOCRED, p); vrele(ump->um_devvp); @@ -613,7 +628,6 @@ ffs_unmount(mp, mntflags, p) free(fs, M_UFSMNT); free(ump, M_UFSMNT); mp->mnt_data = (qaddr_t)0; - mp->mnt_flag &= ~MNT_LOCAL; return (error); } @@ -626,12 +640,9 @@ ffs_flushfiles(mp, flags, p) int flags; struct proc *p; { - extern int doforce; register struct ufsmount *ump; int error; - if (!doforce) - flags &= ~FORCECLOSE; ump = VFSTOUFS(mp); #ifdef QUOTA if (mp->mnt_flag & MNT_QUOTA) { @@ -649,7 +660,17 @@ ffs_flushfiles(mp, flags, p) */ } #endif - error = vflush(mp, NULLVP, flags); + /* + * Flush all the files. + */ + if ((error = vflush(mp, NULL, flags)) != 0) + return (error); + /* + * Flush filesystem metadata. + */ + vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p); + error = VOP_FSYNC(ump->um_devvp, p->p_ucred, MNT_WAIT, p); + VOP_UNLOCK(ump->um_devvp, 0, p); return (error); } @@ -684,10 +705,11 @@ ffs_statfs(mp, sbp, p) sbp->f_files = fs->fs_ncg * fs->fs_ipg - ROOTINO; sbp->f_ffree = fs->fs_cstotal.cs_nifree; if (sbp != &mp->mnt_stat) { + sbp->f_type = mp->mnt_vfc->vfc_typenum; bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); } - strncpy(sbp->f_fstypename, mp->mnt_op->vfs_name, MFSNAMELEN); + strncpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN); return (0); } @@ -705,7 +727,7 @@ ffs_sync(mp, waitfor, cred, p) struct ucred *cred; struct proc *p; { - register struct vnode *vp; + register struct vnode *vp, *nvp; register struct inode *ip; register struct ufsmount *ump = VFSTOUFS(mp); register struct fs *fs; @@ -717,49 +739,71 @@ ffs_sync(mp, waitfor, cred, p) * Consistency check that the superblock * is still in the buffer cache. */ - if (fs->fs_fmod != 0) { - if (fs->fs_ronly != 0) { /* XXX */ - printf("fs = %s\n", fs->fs_fsmnt); - panic("update: rofs mod"); - } - fs->fs_fmod = 0; - fs->fs_time = time.tv_sec; - allerror = ffs_cgupdate(ump, waitfor); + if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { + printf("fs = %s\n", fs->fs_fsmnt); + panic("update: rofs mod"); } /* * Write back each (modified) inode. */ + simple_lock(&mntvnode_slock); loop: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; - vp = vp->v_mntvnodes.le_next) { + vp = nvp) { /* * If the vnode that we are about to sync is no longer * associated with this mount point, start over. */ if (vp->v_mount != mp) goto loop; - if (VOP_ISLOCKED(vp)) - continue; + + simple_lock(&vp->v_interlock); + nvp = vp->v_mntvnodes.le_next; ip = VTOI(vp); - if ((ip->i_flag & - (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && - vp->v_dirtyblkhd.lh_first == NULL) + if (vp->v_type == VNON || ((ip->i_flag & + (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && + vp->v_dirtyblkhd.lh_first == NULL) || + waitfor == MNT_LAZY) { + simple_unlock(&vp->v_interlock); continue; - if (vget(vp, 1)) - goto loop; + } + simple_unlock(&mntvnode_slock); + error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); + if (error) { + simple_lock(&mntvnode_slock); + if (error == ENOENT) + goto loop; + continue; + } if ((error = VOP_FSYNC(vp, cred, waitfor, p)) != 0) allerror = error; - vput(vp); + VOP_UNLOCK(vp, 0, p); + vrele(vp); + simple_lock(&mntvnode_slock); } + simple_unlock(&mntvnode_slock); /* * Force stale file system control information to be flushed. */ - if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0) - allerror = error; + if (waitfor != MNT_LAZY) { + if (ump->um_mountp->mnt_flag & MNT_SOFTDEP) + waitfor = MNT_NOWAIT; + vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p); + if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0) + allerror = error; + VOP_UNLOCK(ump->um_devvp, 0, p); + } #ifdef QUOTA qsync(mp); #endif + /* + * Write back modified superblock. + */ + + if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0) + allerror = error; + return (allerror); } @@ -796,6 +840,7 @@ ffs_vget(mp, ino, vpp) type = ump->um_devvp->v_tag == VT_MFS ? M_MFSNODE : M_FFSNODE; /* XXX */ MALLOC(ip, struct inode *, sizeof(struct inode), type, M_WAITOK); bzero((caddr_t)ip, sizeof(struct inode)); + lockinit(&ip->i_lock, PINOD, "inode", 0, 0); vp->v_data = ip; ip->i_vnode = vp; ip->i_fs = fs = ump->um_fs; @@ -833,6 +878,10 @@ ffs_vget(mp, ino, vpp) return (error); } ip->i_din.ffs_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino)); + if (DOINGSOFTDEP(vp)) + softdep_load_inodeblock(ip); + else + ip->i_effnlink = ip->i_ffs_nlink; brelse(bp); /* @@ -965,7 +1014,7 @@ ffs_cgupdate(mp, waitfor) struct ufsmount *mp; int waitfor; { - register struct fs *fs = mp->um_fs; + register struct fs *fs = mp->um_fs, *dfs; register struct buf *bp; int blks; caddr_t space; @@ -987,7 +1036,74 @@ ffs_cgupdate(mp, waitfor) else bawrite(bp); } - if (!allerror && error) + + /* + * Now write back the superblock itself. If any errors occurred + * up to this point, then fail so that the superblock avoids + * being written out as clean. + */ + if (allerror) + return (allerror); + bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0); + fs->fs_fmod = 0; + fs->fs_time = time.tv_sec; + bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize); + /* Restore compatibility to old file systems. XXX */ + dfs = (struct fs *)bp->b_data; /* XXX */ + if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */ + dfs->fs_nrpos = -1; /* XXX */ + if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ + int32_t *lp, tmp; /* XXX */ + /* XXX */ + lp = (int32_t *)&dfs->fs_qbmask; /* XXX */ + tmp = lp[4]; /* XXX */ + for (i = 4; i > 0; i--) /* XXX */ + lp[i] = lp[i-1]; /* XXX */ + lp[0] = tmp; /* XXX */ + } /* XXX */ + dfs->fs_maxfilesize = mp->um_savedmaxfilesize; /* XXX */ + if (waitfor != MNT_WAIT) + bawrite(bp); + else if ((error = bwrite(bp)) != 0) allerror = error; + return (allerror); } + +/* + * fast filesystem related variables. + */ +int +ffs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) + int *name; + u_int namelen; + void *oldp; + size_t *oldlenp; + void *newp; + size_t newlen; + struct proc *p; +{ + extern int doclusterread, doclusterwrite, doreallocblks, doasyncfree; + + /* all sysctl names at this level are terminal */ + if (namelen != 1) + return (ENOTDIR); /* overloaded */ + + switch (name[0]) { + case FFS_CLUSTERREAD: + return (sysctl_int(oldp, oldlenp, newp, newlen, + &doclusterread)); + case FFS_CLUSTERWRITE: + return (sysctl_int(oldp, oldlenp, newp, newlen, + &doclusterwrite)); + case FFS_REALLOCBLKS: + return (sysctl_int(oldp, oldlenp, newp, newlen, + &doreallocblks)); + case FFS_ASYNCFREE: + return (sysctl_int(oldp, oldlenp, newp, newlen, &doasyncfree)); + default: + return (EOPNOTSUPP); + } + /* NOTREACHED */ +} + diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c index e9462ff50be..088ba291a3a 100644 --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ffs_vnops.c,v 1.3 1996/05/22 11:47:18 deraadt Exp $ */ +/* $OpenBSD: ffs_vnops.c,v 1.4 1997/10/06 15:26:32 csapuntz Exp $ */ /* $NetBSD: ffs_vnops.c,v 1.7 1996/05/11 18:27:24 mycroft Exp $ */ /* @@ -82,6 +82,7 @@ struct vnodeopv_entry_desc ffs_vnodeop_entries[] = { { &vop_lease_desc, ufs_lease_check }, /* lease */ { &vop_ioctl_desc, ufs_ioctl }, /* ioctl */ { &vop_select_desc, ufs_select }, /* select */ + { &vop_revoke_desc, ufs_revoke }, /* revoke */ { &vop_mmap_desc, ufs_mmap }, /* mmap */ { &vop_fsync_desc, ffs_fsync }, /* fsync */ { &vop_seek_desc, ufs_seek }, /* seek */ @@ -106,6 +107,7 @@ struct vnodeopv_entry_desc ffs_vnodeop_entries[] = { { &vop_advlock_desc, ufs_advlock }, /* advlock */ { &vop_blkatoff_desc, ffs_blkatoff }, /* blkatoff */ { &vop_valloc_desc, ffs_valloc }, /* valloc */ + { &vop_balloc_desc, ffs_balloc }, /* balloc */ { &vop_reallocblks_desc, ffs_reallocblks }, /* reallocblks */ { &vop_vfree_desc, ffs_vfree }, /* vfree */ { &vop_truncate_desc, ffs_truncate }, /* truncate */ @@ -132,6 +134,7 @@ struct vnodeopv_entry_desc ffs_specop_entries[] = { { &vop_lease_desc, spec_lease_check }, /* lease */ { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ { &vop_select_desc, spec_select }, /* select */ + { &vop_revoke_desc, spec_revoke }, /* revoke */ { &vop_mmap_desc, spec_mmap }, /* mmap */ { &vop_fsync_desc, ffs_fsync }, /* fsync */ { &vop_seek_desc, spec_seek }, /* seek */ @@ -183,6 +186,7 @@ struct vnodeopv_entry_desc ffs_fifoop_entries[] = { { &vop_lease_desc, fifo_lease_check }, /* lease */ { &vop_ioctl_desc, fifo_ioctl }, /* ioctl */ { &vop_select_desc, fifo_select }, /* select */ + { &vop_revoke_desc, fifo_revoke }, /* revoke */ { &vop_mmap_desc, fifo_mmap }, /* mmap */ { &vop_fsync_desc, ffs_fsync }, /* fsync */ { &vop_seek_desc, fifo_seek }, /* seek */ @@ -218,20 +222,11 @@ struct vnodeopv_desc ffs_fifoop_opv_desc = { &ffs_fifoop_p, ffs_fifoop_entries }; #endif /* FIFO */ -#ifdef DEBUG /* * Enabling cluster read/write operations. */ -#include <sys/sysctl.h> int doclusterread = 1; -struct ctldebug debug11 = { "doclusterread", &doclusterread }; int doclusterwrite = 1; -struct ctldebug debug12 = { "doclusterwrite", &doclusterwrite }; -#else -/* XXX for ufs_readwrite */ -#define doclusterread 1 -#define doclusterwrite 1 -#endif #include <ufs/ufs/ufs_readwrite.c> @@ -249,12 +244,84 @@ ffs_fsync(v) int a_waitfor; struct proc *a_p; } */ *ap = v; - register struct vnode *vp = ap->a_vp; + struct vnode *vp = ap->a_vp; + struct buf *bp, *nbp; struct timespec ts; + int s, error, passes, skipmeta; - vflushbuf(vp, ap->a_waitfor == MNT_WAIT); + /* + * Flush all dirty buffers associated with a vnode + */ + passes = NIADDR; + skipmeta = 0; + if (ap->a_waitfor == MNT_WAIT) + skipmeta = 1; +loop: + s = splbio(); +loop2: + for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { + nbp = bp->b_vnbufs.le_next; + if ((bp->b_flags & B_BUSY)) + continue; + if ((bp->b_flags & B_DELWRI) == 0) + panic("ffs_fsync: not dirty"); + if (skipmeta && bp->b_lblkno < 0) + continue; + bremfree(bp); + bp->b_flags |= B_BUSY; + splx(s); + /* + * Wait for I/O associated with indirect blocks to complete, + * since there is no way to quickly wait for them below. + */ + if (bp->b_vp == vp || ap->a_waitfor != MNT_WAIT) + (void) bawrite(bp); + else if ((error = bwrite(bp)) != 0) + return (error); + goto loop; + } + if (skipmeta) { + skipmeta = 0; + goto loop2; + } + if (ap->a_waitfor == MNT_WAIT) { + while (vp->v_numoutput) { + vp->v_flag |= VBWAIT; + sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1); + } + /* + * Ensure that any filesystem metatdata associated + * with the vnode has been written. + */ + splx(s); + if ((error = softdep_sync_metadata(ap)) != 0) + return (error); + s = splbio(); + if (vp->v_dirtyblkhd.lh_first) { + /* + * Block devices associated with filesystems may + * have new I/O requests posted for them even if + * the vnode is locked, so no amount of trying will + * get them clean. Thus we give block devices a + * good effort, then just give up. For all other file + * types, go around and try again until it is clean. + */ + if (passes > 0) { + passes -= 1; + goto loop2; + } +#ifdef DIAGNOSTIC + if (vp->v_type != VBLK) + vprint("ffs_fsync: dirty", vp); +#endif + } + } + splx(s); TIMEVAL_TO_TIMESPEC(&time, &ts); - return (VOP_UPDATE(ap->a_vp, &ts, &ts, ap->a_waitfor == MNT_WAIT)); + if ((error = VOP_UPDATE(vp, &ts, &ts, ap->a_waitfor == MNT_WAIT)) != 0) return (error); + if (DOINGSOFTDEP(vp) && ap->a_waitfor == MNT_WAIT) + error = softdep_fsync(vp); + return (error); } /* @@ -266,11 +333,12 @@ ffs_reclaim(v) { struct vop_reclaim_args /* { struct vnode *a_vp; + sturct proc *a_p; } */ *ap = v; register struct vnode *vp = ap->a_vp; int error; - if ((error = ufs_reclaim(vp)) != 0) + if ((error = ufs_reclaim(vp, ap->a_p)) != 0) return (error); FREE(vp->v_data, VFSTOUFS(vp->v_mount)->um_devvp->v_tag == VT_MFS ? M_MFSNODE : M_FFSNODE); diff --git a/sys/ufs/ffs/fs.h b/sys/ufs/ffs/fs.h index e5a17da3a22..2979a3c4fe9 100644 --- a/sys/ufs/ffs/fs.h +++ b/sys/ufs/ffs/fs.h @@ -1,4 +1,4 @@ -/* $OpenBSD: fs.h,v 1.4 1997/05/30 08:34:28 downsj Exp $ */ +/* $OpenBSD: fs.h,v 1.5 1997/10/06 15:26:32 csapuntz Exp $ */ /* $NetBSD: fs.h,v 1.6 1995/04/12 21:21:02 mycroft Exp $ */ /* @@ -221,7 +221,7 @@ struct fs { int8_t fs_fmod; /* super block modified flag */ int8_t fs_clean; /* file system is clean flag */ int8_t fs_ronly; /* mounted read-only flag */ - int8_t fs_flags; /* currently unused flag */ + int8_t fs_flags; /* see FS_ below */ u_char fs_fsmnt[MAXMNTLEN]; /* name mounted on */ /* these fields retain the current block allocation info */ int32_t fs_cgrotor; /* last cg searched */ @@ -267,6 +267,12 @@ struct fs { #define FS_OPTTIME 0 /* minimize allocation time */ #define FS_OPTSPACE 1 /* minimize disk fragmentation */ +/* + * Filesystem falgs. + */ +#define FS_UNCLEAN 0x01 /* filesystem not clean at mount */ +#define FS_DOSOFTDEP 0x02 /* filesystem using soft dependencies */ + /* * Rotational layout table format types */ @@ -490,6 +496,12 @@ struct ocg { ? (fs)->fs_bsize \ : (fragroundup(fs, blkoff(fs, (dip)->di_size)))) +#define sblksize(fs, size, lbn) \ + (((lbn) >= NDADDR || (size) >= ((lbn) + 1) << (fs)->fs_bshift) \ + ? (fs)->fs_bsize \ + : (fragroundup(fs, blkoff(fs, (size))))) + + /* * Number of disk sectors per block/fragment; assumes DEV_BSIZE byte * sector size. diff --git a/sys/ufs/mfs/mfs_extern.h b/sys/ufs/mfs/mfs_extern.h index bd14c23226d..3616acedf76 100644 --- a/sys/ufs/mfs/mfs_extern.h +++ b/sys/ufs/mfs/mfs_extern.h @@ -1,4 +1,4 @@ -/* $OpenBSD: mfs_extern.h,v 1.2 1996/02/27 07:15:46 niklas Exp $ */ +/* $OpenBSD: mfs_extern.h,v 1.3 1997/10/06 15:27:12 csapuntz Exp $ */ /* $NetBSD: mfs_extern.h,v 1.4 1996/02/09 22:31:27 christos Exp $ */ /*- @@ -43,6 +43,7 @@ struct proc; struct statfs; struct ucred; struct vnode; +struct vfsconf; __BEGIN_DECLS /* mfs_vfsops.c */ @@ -53,7 +54,7 @@ int mfs_mount __P((struct mount *, char *, caddr_t, int mfs_start __P((struct mount *, int, struct proc *)); int mfs_statfs __P((struct mount *, struct statfs *, struct proc *)); -void mfs_init __P((void)); +int mfs_init __P((struct vfsconf *)); /* mfs_vnops.c */ int mfs_open __P((void *)); @@ -65,6 +66,7 @@ int mfs_close __P((void *)); int mfs_inactive __P((void *)); int mfs_reclaim __P((void *)); int mfs_print __P((void *)); +#define mfs_revoke vop_revoke int mfs_badop __P((void *)); __END_DECLS diff --git a/sys/ufs/mfs/mfs_vfsops.c b/sys/ufs/mfs/mfs_vfsops.c index 577325fe95b..dbd32e6ea2f 100644 --- a/sys/ufs/mfs/mfs_vfsops.c +++ b/sys/ufs/mfs/mfs_vfsops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: mfs_vfsops.c,v 1.2 1996/02/27 07:15:47 niklas Exp $ */ +/* $OpenBSD: mfs_vfsops.c,v 1.3 1997/10/06 15:27:12 csapuntz Exp $ */ /* $NetBSD: mfs_vfsops.c,v 1.10 1996/02/09 22:31:28 christos Exp $ */ /* @@ -69,7 +69,6 @@ extern int (**mfs_vnodeop_p) __P((void *)); * mfs vfs operations. */ struct vfsops mfs_vfsops = { - MOUNT_MFS, mfs_mount, mfs_start, ffs_unmount, @@ -81,37 +80,31 @@ struct vfsops mfs_vfsops = { ffs_fhtovp, ffs_vptofh, mfs_init, + ffs_sysctl }; /* * Called by main() when mfs is going to be mounted as root. - * - * Name is updated by mount(8) after booting. */ -#define ROOTNAME "mfs_root" int mfs_mountroot() { extern struct vnode *rootvp; register struct fs *fs; - register struct mount *mp; + struct mount *mp; struct proc *p = curproc; /* XXX */ struct ufsmount *ump; struct mfsnode *mfsp; - size_t size; int error; - /* - * Get vnodes for swapdev and rootdev. - */ - if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp)) - panic("mfs_mountroot: can't setup bdevvp's"); - - mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); - bzero((char *)mp, (u_long)sizeof(struct mount)); - mp->mnt_op = &mfs_vfsops; - mp->mnt_flag = MNT_RDONLY; + if ((error = bdevvp(swapdev, &swapdev_vp)) || + (error = bdevvp(rootdev, &rootvp))) { + printf("mfs_mountroot: can't setup bdevvp's"); + return (error); + } + if ((error = vfs_rootmountalloc("mfs", "mfs_root", &mp)) != 0) + return (error); mfsp = malloc(sizeof *mfsp, M_MFSNODE, M_WAITOK); rootvp->v_data = mfsp; rootvp->v_op = mfs_vnodeop_p; @@ -122,28 +115,20 @@ mfs_mountroot() mfsp->mfs_pid = p->p_pid; mfsp->mfs_buflist = (struct buf *)0; if ((error = ffs_mountfs(rootvp, mp, p)) != 0) { + mp->mnt_vfc->vfc_refcount--; + vfs_unbusy(mp, p); free(mp, M_MOUNT); free(mfsp, M_MFSNODE); return (error); } - if ((error = vfs_lock(mp)) != 0) { - (void)ffs_unmount(mp, 0, p); - free(mp, M_MOUNT); - free(mfsp, M_MFSNODE); - return (error); - } + simple_lock(&mountlist_slock); CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); - mp->mnt_vnodecovered = NULLVP; + simple_unlock(&mountlist_slock); ump = VFSTOUFS(mp); fs = ump->um_fs; - bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt)); - fs->fs_fsmnt[0] = '/'; - bcopy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MNAMELEN); - (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, - &size); - bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); + (void) copystr(mp->mnt_stat.f_mntonname, fs->fs_fsmnt, MNAMELEN - 1, 0); (void)ffs_statfs(mp, &mp->mnt_stat, p); - vfs_unlock(mp); + vfs_unbusy(mp, p); inittodr((time_t)0); return (0); } @@ -207,10 +192,7 @@ mfs_mount(mp, path, data, ndp, p) flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; - if (vfs_busy(mp)) - return (EBUSY); error = ffs_flushfiles(mp, flags, p); - vfs_unbusy(mp); if (error) return (error); } @@ -272,7 +254,6 @@ mfs_start(mp, flags, p) register struct mfsnode *mfsp = VTOMFS(vp); register struct buf *bp; register caddr_t base; - int error = 0; base = mfsp->mfs_baseoff; while (mfsp->mfs_buflist != (struct buf *)-1) { @@ -289,13 +270,11 @@ mfs_start(mp, flags, p) * otherwise we will loop here, as tsleep will always return * EINTR/ERESTART. */ - if ((error = tsleep((caddr_t)vp, mfs_pri, "mfsidl", 0)) != 0) { - DOIO(); - if (dounmount(mp, 0, p) != 0) - CLRSIG(p, CURSIG(p)); - } + if (tsleep((caddr_t)vp, mfs_pri, "mfsidl", 0) && + dounmount(mp, 0, p) != 0) + CLRSIG(p, CURSIG(p)); } - return (error); + return (0); } /* @@ -311,10 +290,10 @@ mfs_statfs(mp, sbp, p) error = ffs_statfs(mp, sbp, p); #ifdef COMPAT_09 - sbp->f_type = 3; + sbp->f_type = mp->mnt_vfc->vfc_typenum; #else sbp->f_type = 0; #endif - strncpy(&sbp->f_fstypename[0], mp->mnt_op->vfs_name, MFSNAMELEN); + strncpy(&sbp->f_fstypename[0], mp->mnt_vfc->vfc_name, MFSNAMELEN); return (error); } diff --git a/sys/ufs/mfs/mfs_vnops.c b/sys/ufs/mfs/mfs_vnops.c index 63b20a029bf..84a5ed3d368 100644 --- a/sys/ufs/mfs/mfs_vnops.c +++ b/sys/ufs/mfs/mfs_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: mfs_vnops.c,v 1.4 1996/04/21 22:32:49 deraadt Exp $ */ +/* $OpenBSD: mfs_vnops.c,v 1.5 1997/10/06 15:27:13 csapuntz Exp $ */ /* $NetBSD: mfs_vnops.c,v 1.8 1996/03/17 02:16:32 christos Exp $ */ /* @@ -72,6 +72,7 @@ struct vnodeopv_entry_desc mfs_vnodeop_entries[] = { { &vop_write_desc, mfs_write }, /* write */ { &vop_ioctl_desc, mfs_ioctl }, /* ioctl */ { &vop_select_desc, mfs_select }, /* select */ + { &vop_revoke_desc, mfs_revoke }, /* revoke */ { &vop_mmap_desc, mfs_mmap }, /* mmap */ { &vop_fsync_desc, spec_fsync }, /* fsync */ { &vop_seek_desc, mfs_seek }, /* seek */ @@ -231,6 +232,9 @@ mfs_bmap(v) *ap->a_vpp = ap->a_vp; if (ap->a_bnp != NULL) *ap->a_bnp = ap->a_bn; + if (ap->a_runp != NULL) + *ap->a_runp = 0; + return (0); } @@ -294,12 +298,14 @@ mfs_inactive(v) { struct vop_inactive_args /* { struct vnode *a_vp; + struct proc *a_p; } */ *ap = v; register struct mfsnode *mfsp = VTOMFS(ap->a_vp); if (mfsp->mfs_buflist && mfsp->mfs_buflist != (struct buf *)(-1)) panic("mfs_inactive: not inactive (mfs_buflist %p)", mfsp->mfs_buflist); + VOP_UNLOCK(ap->a_vp, 0, ap->a_p); return (0); } @@ -352,8 +358,9 @@ mfs_badop(v) /* * Memory based filesystem initialization. */ -void -mfs_init() +int +mfs_init(vfsp) + struct vfsconf *vfsp; { - + return (0); } diff --git a/sys/ufs/mfs/mfsnode.h b/sys/ufs/mfs/mfsnode.h index d37f7ba4e68..29c290c4e09 100644 --- a/sys/ufs/mfs/mfsnode.h +++ b/sys/ufs/mfs/mfsnode.h @@ -1,4 +1,4 @@ -/* $OpenBSD: mfsnode.h,v 1.3 1996/06/11 03:25:15 tholo Exp $ */ +/* $OpenBSD: mfsnode.h,v 1.4 1997/10/06 15:27:13 csapuntz Exp $ */ /* $NetBSD: mfsnode.h,v 1.3 1996/02/09 22:31:31 christos Exp $ */ /* @@ -76,9 +76,9 @@ struct mfsnode { #define mfs_readdir mfs_badop #define mfs_readlink mfs_badop #define mfs_abortop mfs_badop -#define mfs_lock nullop -#define mfs_unlock nullop -#define mfs_islocked nullop +#define mfs_lock vop_nolock +#define mfs_unlock vop_nounlock +#define mfs_islocked vop_noislocked #define mfs_pathconf mfs_badop #define mfs_advlock mfs_badop #define mfs_blkatoff mfs_badop diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h index 9dcc48697f1..0a9a7a24151 100644 --- a/sys/ufs/ufs/inode.h +++ b/sys/ufs/ufs/inode.h @@ -1,4 +1,4 @@ -/* $OpenBSD: inode.h,v 1.6 1997/05/30 15:18:49 downsj Exp $ */ +/* $OpenBSD: inode.h,v 1.7 1997/10/06 15:27:36 csapuntz Exp $ */ /* $NetBSD: inode.h,v 1.8 1995/06/15 23:22:50 cgd Exp $ */ /* @@ -45,6 +45,8 @@ #include <ufs/ufs/dir.h> #include <ufs/ext2fs/ext2fs_dinode.h> +typedef long ufs_lbn_t; + /* * Per-filesystem inode extensions. */ @@ -63,13 +65,13 @@ struct ext2fs_inode_ext { * active, and is put back when the file is no longer being used. */ struct inode { - struct inode *i_next; /* Hash chain forward. */ - struct inode **i_prev; /* Hash chain back. */ + LIST_ENTRY(inode) i_hash; /* Hash chain */ struct vnode *i_vnode;/* Vnode associated with this inode. */ struct vnode *i_devvp;/* Vnode for block I/O. */ u_int32_t i_flag; /* flags, see below */ dev_t i_dev; /* Device associated with the inode. */ ino_t i_number; /* The identity of the inode. */ + int i_effnlink; /* i_nlink when I/O completes */ union { /* Associated filesystem. */ struct fs *fs; /* FFS */ @@ -83,8 +85,8 @@ struct inode { struct dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */ u_quad_t i_modrev; /* Revision level for NFS lease. */ struct lockf *i_lockf;/* Head of byte-level lock list. */ - pid_t i_lockholder; /* DEBUG: holder of inode lock. */ - pid_t i_lockwaiter; /* DEBUG: latest blocked for inode lock. */ + struct lock i_lock; /* Inode lock */ + /* * Side effects; used during directory lookup. */ @@ -180,14 +182,11 @@ struct inode { /* These flags are kept in i_flag. */ #define IN_ACCESS 0x0001 /* Access time update request. */ #define IN_CHANGE 0x0002 /* Inode change time update request. */ -#define IN_EXLOCK 0x0004 /* File has exclusive lock. */ -#define IN_LOCKED 0x0008 /* Inode lock. */ -#define IN_LWAIT 0x0010 /* Process waiting on file lock. */ -#define IN_MODIFIED 0x0020 /* Inode has been modified. */ -#define IN_RENAME 0x0040 /* Inode is being renamed. */ -#define IN_SHLOCK 0x0080 /* File has shared lock. */ -#define IN_UPDATE 0x0100 /* Modification time update request. */ -#define IN_WANTED 0x0200 /* Inode is wanted by a process. */ +#define IN_UPDATE 0x0004 /* Modification time update request */ +#define IN_MODIFIED 0x0008 /* Inode has been modified. */ +#define IN_RENAME 0x0010 /* Inode is being renamed. */ +#define IN_SHLOCK 0x0020 /* FIle has shared lock. */ +#define IN_EXLOCK 0x0040 /* File has exclusive lock. */ #ifdef _KERNEL /* @@ -242,6 +241,9 @@ struct indir { } \ } +/* Determine if soft dependencies are being done */ +#define DOINGSOFTDEP(vp) ((vp)->v_mount->mnt_flag & MNT_SOFTDEP) + /* This overlays the fid structure (see mount.h). */ struct ufid { u_int16_t ufid_len; /* Length of structure. */ diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h index 4dbeed61a92..166d8f43684 100644 --- a/sys/ufs/ufs/ufs_extern.h +++ b/sys/ufs/ufs/ufs_extern.h @@ -1,4 +1,4 @@ -/* $OpenBSD: ufs_extern.h,v 1.2 1996/02/27 07:21:25 niklas Exp $ */ +/* $OpenBSD: ufs_extern.h,v 1.3 1997/10/06 15:27:36 csapuntz Exp $ */ /* $NetBSD: ufs_extern.h,v 1.5 1996/02/09 22:36:03 christos Exp $ */ /*- @@ -54,6 +54,7 @@ struct ufs_args; struct ufsmount; struct uio; struct vattr; +struct vfsconf; struct vnode; __BEGIN_DECLS @@ -86,6 +87,7 @@ int ufs_readdir __P((void *)); int ufs_readlink __P((void *)); int ufs_remove __P((void *)); int ufs_rename __P((void *)); +#define ufs_revoke vop_revoke int ufs_rmdir __P((void *)); int ufs_seek __P((void *)); int ufs_select __P((void *)); @@ -117,19 +119,19 @@ void ufs_ihashins __P((struct inode *)); void ufs_ihashrem __P((struct inode *)); /* ufs_inode.c */ -void ufs_init __P((void)); -int ufs_reclaim __P((struct vnode *)); +int ufs_init __P((struct vfsconf *)); +int ufs_reclaim __P((struct vnode *, struct proc *)); /* ufs_lookup.c */ void ufs_dirbad __P((struct inode *, doff_t, char *)); int ufs_dirbadentry __P((struct vnode *, struct direct *, int)); -int ufs_direnter __P((struct inode *, struct vnode *, - struct componentname *)); -int ufs_direnter2 __P((struct vnode *, struct direct *, struct ucred *, - struct proc *)); -int ufs_dirremove __P((struct vnode *, struct componentname *)); +void ufs_makedirentry __P((struct inode *, struct componentname *, + struct direct *)); +int ufs_direnter __P((struct vnode *, struct direct *, + struct componentname *, struct buf *)); +int ufs_dirremove __P((struct vnode *, struct inode *, int, int)); int ufs_dirrewrite __P((struct inode *, struct inode *, - struct componentname *)); + ino_t, int, int)); int ufs_dirempty __P((struct inode *, ino_t, struct ucred *)); int ufs_checkpath __P((struct inode *, struct inode *, struct ucred *)); @@ -165,4 +167,19 @@ int ufs_vinit __P((struct mount *, int (**) __P((void *)), int (**) __P((void *)), struct vnode **)); int ufs_makeinode __P((int, struct vnode *, struct vnode **, struct componentname *)); + + +/* + * Soft dependency function prototypes. + */ +void softdep_setup_directory_add __P((struct buf *, struct inode *, off_t, + long, struct buf *)); +void softdep_change_directoryentry_offset __P((struct inode *, caddr_t, + caddr_t, caddr_t, int)); +void softdep_setup_remove __P((struct buf *,struct inode *, struct inode *, + int)); +void softdep_setup_directory_change __P((struct buf *, struct inode *, + struct inode *, long, int)); +void softdep_increase_linkcnt __P((struct inode *)); + __END_DECLS diff --git a/sys/ufs/ufs/ufs_ihash.c b/sys/ufs/ufs/ufs_ihash.c index a9b7227942d..84ff51b8b39 100644 --- a/sys/ufs/ufs/ufs_ihash.c +++ b/sys/ufs/ufs/ufs_ihash.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ufs_ihash.c,v 1.2 1996/02/27 07:21:26 niklas Exp $ */ +/* $OpenBSD: ufs_ihash.c,v 1.3 1997/10/06 15:27:37 csapuntz Exp $ */ /* $NetBSD: ufs_ihash.c,v 1.3 1996/02/09 22:36:04 christos Exp $ */ /* @@ -49,9 +49,10 @@ /* * Structures associated with inode cacheing. */ -struct inode **ihashtbl; +LIST_HEAD(ihashhead, inode) *ihashtbl; u_long ihash; /* size of hash table - 1 */ -#define INOHASH(device, inum) (((device) + (inum)) & ihash) +#define INOHASH(device, inum) (&ihashtbl[((device) + (inum)) & ihash]) +struct simplelock ufs_ihash_slock; /* * Initialize inode hash table. @@ -61,6 +62,7 @@ ufs_ihashinit() { ihashtbl = hashinit(desiredvnodes, M_UFSMNT, &ihash); + simple_lock_init(&ufs_ihash_slock); } /* @@ -68,19 +70,21 @@ ufs_ihashinit() * to it. If it is in core, return it, even if it is locked. */ struct vnode * -ufs_ihashlookup(device, inum) - dev_t device; +ufs_ihashlookup(dev, inum) + dev_t dev; ino_t inum; { - register struct inode *ip; + struct inode *ip; - for (ip = ihashtbl[INOHASH(device, inum)];; ip = ip->i_next) { - if (ip == NULL) - return (NULL); - if (inum == ip->i_number && device == ip->i_dev) - return (ITOV(ip)); - } - /* NOTREACHED */ + simple_lock(&ufs_ihash_slock); + for (ip = INOHASH(dev, inum)->lh_first; ip; ip = ip->i_hash.le_next) + if (inum == ip->i_number && dev == ip->i_dev) + break; + simple_unlock(&ufs_ihash_slock); + + if (ip) + return (ITOV(ip)); + return (NULLVP); } /* @@ -88,30 +92,28 @@ ufs_ihashlookup(device, inum) * to it. If it is in core, but locked, wait for it. */ struct vnode * -ufs_ihashget(device, inum) - dev_t device; +ufs_ihashget(dev, inum) + dev_t dev; ino_t inum; { - register struct inode *ip; + struct proc *p = curproc; + struct inode *ip; struct vnode *vp; +loop: + simple_lock(&ufs_ihash_slock); + for (ip = INOHASH(dev, inum)->lh_first; ip; ip = ip->i_hash.le_next) { + if (inum == ip->i_number && dev == ip->i_dev) { + vp = ITOV(ip); + simple_lock(&vp->v_interlock); + simple_unlock(&ufs_ihash_slock); + if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) + goto loop; + return (vp); + } - for (;;) - for (ip = ihashtbl[INOHASH(device, inum)];; ip = ip->i_next) { - if (ip == NULL) - return (NULL); - if (inum == ip->i_number && device == ip->i_dev) { - if (ip->i_flag & IN_LOCKED) { - ip->i_flag |= IN_WANTED; - sleep(ip, PINOD); - break; - } - vp = ITOV(ip); - if (!vget(vp, 1)) - return (vp); - break; - } - } - /* NOTREACHED */ + } + simple_unlock(&ufs_ihash_slock); + return (NULL); } /* @@ -121,21 +123,16 @@ void ufs_ihashins(ip) struct inode *ip; { - struct inode **ipp, *iq; + struct proc *p = curproc; /* XXX */ + struct ihashhead *ipp; - ipp = &ihashtbl[INOHASH(ip->i_dev, ip->i_number)]; - if ((iq = *ipp) != NULL) - iq->i_prev = &ip->i_next; - ip->i_next = iq; - ip->i_prev = ipp; - *ipp = ip; - if (ip->i_flag & IN_LOCKED) - panic("ufs_ihashins: already locked"); - if (curproc) - ip->i_lockholder = curproc->p_pid; - else - ip->i_lockholder = -1; - ip->i_flag |= IN_LOCKED; + /* lock the inode, then put it on the appropriate hash list */ + lockmgr(&ip->i_lock, LK_EXCLUSIVE, (struct simplelock *)0, p); + + simple_lock(&ufs_ihash_slock); + ipp = INOHASH(ip->i_dev, ip->i_number); + LIST_INSERT_HEAD(ipp, ip, i_hash); + simple_unlock(&ufs_ihash_slock); } /* @@ -143,15 +140,14 @@ ufs_ihashins(ip) */ void ufs_ihashrem(ip) - register struct inode *ip; + struct inode *ip; { - register struct inode *iq; + simple_lock(&ufs_ihash_slock); + LIST_REMOVE(ip, i_hash); + #ifdef DIAGNOSTIC + ip->i_hash.le_next = NULL; + ip->i_hash.le_prev = NULL; + #endif + simple_unlock(&ufs_ihash_slock); - if ((iq = ip->i_next) != NULL) - iq->i_prev = ip->i_prev; - *ip->i_prev = iq; -#ifdef DIAGNOSTIC - ip->i_next = NULL; - ip->i_prev = NULL; -#endif } diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c index eed08b7f2cf..31437cd4bfd 100644 --- a/sys/ufs/ufs/ufs_inode.c +++ b/sys/ufs/ufs/ufs_inode.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ufs_inode.c,v 1.4 1997/05/30 08:35:04 downsj Exp $ */ +/* $OpenBSD: ufs_inode.c,v 1.5 1997/10/06 15:27:37 csapuntz Exp $ */ /* $NetBSD: ufs_inode.c,v 1.7 1996/05/11 18:27:52 mycroft Exp $ */ /* @@ -57,6 +57,7 @@ u_long nextgennumber; /* Next generation number to assign. */ +#if 0 void ufs_init() { @@ -71,7 +72,7 @@ ufs_init() #endif return; } - +#endif /* * Last reference to an inode. If necessary, write or delete it. */ @@ -81,39 +82,29 @@ ufs_inactive(v) { struct vop_inactive_args /* { struct vnode *a_vp; + sturct proc *a_p; } */ *ap = v; - register struct vnode *vp = ap->a_vp; - register struct inode *ip = VTOI(vp); + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); + struct proc *p = ap->a_p; struct timespec ts; - int mode, error; + int mode, error = 0; extern int prtactive; if (prtactive && vp->v_usecount != 0) vprint("ffs_inactive: pushing active", vp); - /* Get rid of inodes related to stale file handles. */ - if (ip->i_ffs_mode == 0) { - if ((vp->v_flag & VXLOCK) == 0) - vgone(vp); - return (0); - } - - error = 0; -#ifdef DIAGNOSTIC - if (VOP_ISLOCKED(vp)) - panic("ffs_inactive: locked inode"); - if (curproc) - ip->i_lockholder = curproc->p_pid; - else - ip->i_lockholder = -1; -#endif - ip->i_flag |= IN_LOCKED; + /* + * Ignore inodes related to stale file handles. + */ + if (ip->i_ffs_mode == 0) + goto out; if (ip->i_ffs_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { #ifdef QUOTA if (!getinoquota(ip)) (void)chkiq(ip, -1, NOCRED, 0); #endif - error = VOP_TRUNCATE(vp, (off_t)0, 0, NOCRED, NULL); + error = VOP_TRUNCATE(vp, (off_t)0, 0, NOCRED, p); ip->i_ffs_rdev = 0; mode = ip->i_ffs_mode; ip->i_ffs_mode = 0; @@ -124,13 +115,14 @@ ufs_inactive(v) TIMEVAL_TO_TIMESPEC(&time, &ts); VOP_UPDATE(vp, &ts, &ts, 0); } - VOP_UNLOCK(vp); +out: + VOP_UNLOCK(vp, 0, p); /* * If we are done with the inode, reclaim it * so that it can be reused immediately. */ - if (vp->v_usecount == 0 && ip->i_ffs_mode == 0) - vgone(vp); + if (ip->i_ffs_mode == 0) + vrecycle(vp, (struct simplelock *)0, p); return (error); } @@ -138,8 +130,9 @@ ufs_inactive(v) * Reclaim an inode so that it can be used for other purposes. */ int -ufs_reclaim(vp) +ufs_reclaim(vp, p) register struct vnode *vp; + struct proc *p; { register struct inode *ip; extern int prtactive; diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c index 38d828b987e..47587cdd00a 100644 --- a/sys/ufs/ufs/ufs_lookup.c +++ b/sys/ufs/ufs/ufs_lookup.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ufs_lookup.c,v 1.4 1997/05/30 08:35:08 downsj Exp $ */ +/* $OpenBSD: ufs_lookup.c,v 1.5 1997/10/06 15:27:38 csapuntz Exp $ */ /* $NetBSD: ufs_lookup.c,v 1.7 1996/02/09 22:36:06 christos Exp $ */ /* @@ -43,12 +43,16 @@ #include <sys/param.h> #include <sys/systm.h> +#include <sys/kernel.h> #include <sys/namei.h> #include <sys/buf.h> #include <sys/file.h> +#include <sys/stat.h> #include <sys/mount.h> #include <sys/vnode.h> +#include <vm/vm.h> + #include <ufs/ufs/quota.h> #include <ufs/ufs/inode.h> #include <ufs/ufs/dir.h> @@ -131,6 +135,7 @@ ufs_lookup(v) struct ucred *cred = cnp->cn_cred; int flags = cnp->cn_flags; int nameiop = cnp->cn_nameiop; + struct proc *p = cnp->cn_proc; bp = NULL; slotoffset = -1; @@ -148,6 +153,10 @@ ufs_lookup(v) if ((error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc)) != 0) return (error); + if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) && + (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) + return (EROFS); + /* * We now have a segment name to search for, and a directory to search. * @@ -173,14 +182,14 @@ ufs_lookup(v) VREF(vdp); error = 0; } else if (flags & ISDOTDOT) { - VOP_UNLOCK(pdp); - error = vget(vdp, 1); + VOP_UNLOCK(pdp, 0, p); + error = vget(vdp, LK_EXCLUSIVE, p); if (!error && lockparent && (flags & ISLASTCN)) - error = VOP_LOCK(pdp); + error = vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p); } else { - error = vget(vdp, 1); + error = vget(vdp, LK_EXCLUSIVE, p); if (!lockparent || error || !(flags & ISLASTCN)) - VOP_UNLOCK(pdp); + VOP_UNLOCK(pdp, 0, p); } /* * Check that the capability number did not change @@ -191,13 +200,14 @@ ufs_lookup(v) return (0); vput(vdp); if (lockparent && pdp != vdp && (flags & ISLASTCN)) - VOP_UNLOCK(pdp); + VOP_UNLOCK(pdp, 0, p); } - if ((error = VOP_LOCK(pdp)) != 0) + *vpp = NULL; + + if ((error = vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p)) != 0) return (error); vdp = pdp; dp = VTOI(pdp); - *vpp = NULL; } /* @@ -396,7 +406,7 @@ notfound: (nameiop == DELETE && (ap->a_cnp->cn_flags & DOWHITEOUT) && (ap->a_cnp->cn_flags & ISWHITEOUT))) && - (flags & ISLASTCN) && dp->i_ffs_nlink != 0) { + (flags & ISLASTCN) && dp->i_effnlink != 0) { /* * Access for write is interpreted as allowing * creation of files in the directory. @@ -446,7 +456,7 @@ notfound: */ cnp->cn_flags |= SAVENAME; if (!lockparent) - VOP_UNLOCK(vdp); + VOP_UNLOCK(vdp, 0, p); return (EJUSTRETURN); } /* @@ -524,7 +534,7 @@ found: } *vpp = tdp; if (!lockparent) - VOP_UNLOCK(vdp); + VOP_UNLOCK(vdp, 0, p); return (0); } @@ -551,7 +561,7 @@ found: *vpp = tdp; cnp->cn_flags |= SAVENAME; if (!lockparent) - VOP_UNLOCK(vdp); + VOP_UNLOCK(vdp, 0, p); return (0); } @@ -576,14 +586,14 @@ found: */ pdp = vdp; if (flags & ISDOTDOT) { - VOP_UNLOCK(pdp); /* race to get the inode */ + VOP_UNLOCK(pdp, 0, p); /* race to get the inode */ error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); if (error) { - VOP_LOCK(pdp); + vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p); return (error); } if (lockparent && (flags & ISLASTCN) && - (error = VOP_LOCK(pdp))) { + (error = vn_lock(pdp, LK_EXCLUSIVE, p))) { vput(tdp); return (error); } @@ -596,7 +606,7 @@ found: if (error) return (error); if (!lockparent || !(flags & ISLASTCN)) - VOP_UNLOCK(pdp); + VOP_UNLOCK(pdp, 0, p); *vpp = tdp; } @@ -671,108 +681,130 @@ bad: } /* - * Write a directory entry after a call to namei, using the parameters - * that it left in nameidata. The argument ip is the inode which the new - * directory entry will refer to. Dvp is a pointer to the directory to - * be written, which was left locked by namei. Remaining parameters - * (dp->i_offset, dp->i_count) indicate how the space for the new - * entry is to be obtained. + * Construct a new directory entry after a call to namei, using the + * parameters that it left in the componentname argument cnp. The + * argument ip is the inode to which the new directory entry will refer. */ -int -ufs_direnter(ip, dvp, cnp) - struct inode *ip; - struct vnode *dvp; - register struct componentname *cnp; +void +ufs_makedirentry(ip, cnp, newdirp) + struct inode *ip; + struct componentname *cnp; + struct direct *newdirp; { - register struct inode *dp; - struct direct newdir; - + #ifdef DIAGNOSTIC - if ((cnp->cn_flags & SAVENAME) == 0) - panic("direnter: missing name"); + if ((cnp->cn_flags & SAVENAME) == 0) + panic("ufs_makedirentry: missing name"); #endif - dp = VTOI(dvp); - newdir.d_ino = ip->i_number; - newdir.d_namlen = cnp->cn_namelen; - bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1); - if (dvp->v_mount->mnt_maxsymlinklen > 0) - newdir.d_type = IFTODT(ip->i_ffs_mode); - else { - newdir.d_type = 0; + newdirp->d_ino = ip->i_number; + newdirp->d_namlen = cnp->cn_namelen; + bcopy(cnp->cn_nameptr, newdirp->d_name, (unsigned)cnp->cn_namelen + 1); + if (ITOV(ip)->v_mount->mnt_maxsymlinklen > 0) + newdirp->d_type = IFTODT(ip->i_ffs_mode); + else { + newdirp->d_type = 0; # if (BYTE_ORDER == LITTLE_ENDIAN) - { u_char tmp = newdir.d_namlen; - newdir.d_namlen = newdir.d_type; - newdir.d_type = tmp; } + { u_char tmp = newdirp->d_namlen; + newdirp->d_namlen = newdirp->d_type; + newdirp->d_type = tmp; } # endif - } - return (ufs_direnter2(dvp, &newdir, cnp->cn_cred, cnp->cn_proc)); + } } - + /* - * Common entry point for directory entry removal used by ufs_direnter - * and ufs_whiteout + * Write a directory entry after a call to namei, using the parameters + * that it left in nameidata. The argument dirp is the new directory + * entry contents. Dvp is a pointer to the directory to be written, + * which was left locked by namei. Remaining parameters (dp->i_offset, + * dp->i_count) indicate how the space for the new entry is to be obtained. + * Non-null bp indicates that a directory is being created (for the + * soft dependency code). */ int -ufs_direnter2(dvp, dirp, cr, p) - struct vnode *dvp; - struct direct *dirp; - struct ucred *cr; - struct proc *p; +ufs_direnter(dvp, dirp, cnp, newdirbp) + struct vnode *dvp; + struct direct *dirp; + struct componentname *cnp; + struct buf *newdirbp; { - int newentrysize; - struct inode *dp; - struct buf *bp; - struct iovec aiov; - struct uio auio; - u_int dsize; - struct direct *ep, *nep; - int error, loc, spacefree; - char *dirbuf; + struct ucred *cr; + struct proc *p; + int newentrysize; + struct inode *dp; + struct buf *bp; + u_int dsize; + struct direct *ep, *nep; + int error, ret, blkoff, loc, spacefree, flags; + char *dirbuf; + struct timespec ts; - dp = VTOI(dvp); - newentrysize = DIRSIZ(FSFMT(dvp), dirp); + error = 0; + cr = cnp->cn_cred; + p = cnp->cn_proc; + dp = VTOI(dvp); + newentrysize = DIRSIZ(FSFMT(dvp), dirp); if (dp->i_count == 0) { /* * If dp->i_count is 0, then namei could find no * space in the directory. Here, dp->i_offset will * be on a directory block boundary and we will write the - * new entry into a fresh block. - */ - if (dp->i_offset & (DIRBLKSIZ - 1)) - panic("ufs_direnter2: newblk"); - auio.uio_offset = dp->i_offset; - dirp->d_reclen = DIRBLKSIZ; - auio.uio_resid = newentrysize; - aiov.iov_len = newentrysize; - aiov.iov_base = (caddr_t)dirp; - auio.uio_iov = &aiov; - auio.uio_iovcnt = 1; - auio.uio_rw = UIO_WRITE; - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_procp = (struct proc *)0; - error = VOP_WRITE(dvp, &auio, IO_SYNC, cr); - if (DIRBLKSIZ > - VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) - /* XXX should grow with balloc() */ - panic("ufs_direnter2: frag size"); - else if (!error) { - dp->i_ffs_size = roundup(dp->i_ffs_size, DIRBLKSIZ); - dp->i_flag |= IN_CHANGE; + * new entry into a fresh block. + */ + if (dp->i_offset & (DIRBLKSIZ - 1)) + panic("ufs_direnter: newblk"); + flags = B_CLRBUF; + if (!DOINGSOFTDEP(dvp)) + flags |= B_SYNC; + if ((error = VOP_BALLOC(dvp, (off_t)dp->i_offset, DIRBLKSIZ, + cr, flags, &bp)) != 0) { + if (DOINGSOFTDEP(dvp) && newdirbp != NULL) + bdwrite(newdirbp); + return (error); } - return (error); - } - - /* - * If dp->i_count is non-zero, then namei found space - * for the new entry in the range dp->i_offset to - * dp->i_offset + dp->i_count in the directory. - * To use this space, we may have to compact the entries located - * there, by copying them together towards the beginning of the - * block, leaving the free space in one usable chunk at the end. - */ - - /* + dp->i_ffs_size = dp->i_offset + DIRBLKSIZ; + dp->i_flag |= IN_CHANGE | IN_UPDATE; + vnode_pager_setsize(dvp, (u_long)dp->i_ffs_size); + dirp->d_reclen = DIRBLKSIZ; + blkoff = dp->i_offset & + (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1); + bcopy((caddr_t)dirp, (caddr_t)bp->b_data + blkoff,newentrysize); + if (DOINGSOFTDEP(dvp)) { + /* + * Ensure that the entire newly allocated block is a + * valid directory so that future growth within the + * block does not have to ensure that the block is + * written before the inode. + */ + blkoff += DIRBLKSIZ; + while (blkoff < bp->b_bcount) { + ((struct direct *) + (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; + blkoff += DIRBLKSIZ; + } + softdep_setup_directory_add(bp, dp, dp->i_offset, + dirp->d_ino, newdirbp); + bdwrite(bp); + } else { + error = VOP_BWRITE(bp); + } + TIMEVAL_TO_TIMESPEC(&time, &ts); + ret = VOP_UPDATE(dvp, &ts, &ts, !DOINGSOFTDEP(dvp)); + if (error == 0) + return (ret); + return (error); + } + + /* + * If dp->i_count is non-zero, then namei found space for the new + * entry in the range dp->i_offset to dp->i_offset + dp->i_count + * in the directory. To use this space, we may have to compact + * the entries located there, by copying them together towards the + * beginning of the block, leaving the free space in one usable + * chunk at the end. + */ + + /* * Increase size of directory if entry eats into new space. * This should never push the size past a new multiple of * DIRBLKSIZE. @@ -784,15 +816,17 @@ ufs_direnter2(dvp, dirp, cr, p) /* * Get the block containing the space for the new directory entry. */ - error = VOP_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp); - if (error) - return (error); + if ((error = VOP_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp)) + != 0) { + if (DOINGSOFTDEP(dvp) && newdirbp != NULL) + bdwrite(newdirbp); + return (error); + } /* * Find space for the new entry. In the simple case, the entry at * offset base will have the space. If it does not, then namei * arranged that compacting the region dp->i_offset to - * dp->i_offset + dp->i_count would yield the - * space. + * dp->i_offset + dp->i_count would yield the space. */ ep = (struct direct *)dirbuf; dsize = DIRSIZ(FSFMT(dvp), ep); @@ -810,7 +844,11 @@ ufs_direnter2(dvp, dirp, cr, p) dsize = DIRSIZ(FSFMT(dvp), nep); spacefree += nep->d_reclen - dsize; loc += nep->d_reclen; - bcopy((caddr_t)nep, (caddr_t)ep, dsize); + if (DOINGSOFTDEP(dvp)) + softdep_change_directoryentry_offset(dp, dirbuf, + (caddr_t)nep, (caddr_t)ep, dsize); + else + bcopy((caddr_t)nep, (caddr_t)ep, dsize); } /* * Update the pointer fields in the previous entry (if any), @@ -820,19 +858,26 @@ ufs_direnter2(dvp, dirp, cr, p) (ep->d_ino == WINO && bcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) { if (spacefree + dsize < newentrysize) - panic("ufs_direnter2: compact1"); + panic("ufs_direnter: compact1"); dirp->d_reclen = spacefree + dsize; } else { if (spacefree < newentrysize) - panic("ufs_direnter2: compact2"); + panic("ufs_direnter: compact2"); dirp->d_reclen = spacefree; ep->d_reclen = dsize; ep = (struct direct *)((char *)ep + dsize); } bcopy((caddr_t)dirp, (caddr_t)ep, (u_int)newentrysize); - error = VOP_BWRITE(bp); + + if (DOINGSOFTDEP(dvp)) { + softdep_setup_directory_add(bp, dp, + dp->i_offset + (caddr_t)ep - dirbuf, dirp->d_ino, newdirbp); + bdwrite(bp); + } else { + error = VOP_BWRITE(bp); + } dp->i_flag |= IN_CHANGE | IN_UPDATE; - if (!error && dp->i_endoff && dp->i_endoff < dp->i_ffs_size) + if (error == 0 && dp->i_endoff && dp->i_endoff < dp->i_ffs_size) error = VOP_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC, cr, p); return (error); } @@ -850,18 +895,20 @@ ufs_direnter2(dvp, dirp, cr, p) * to the size of the previous entry. */ int -ufs_dirremove(dvp, cnp) +ufs_dirremove(dvp, ip, flags, isrmdir) struct vnode *dvp; - struct componentname *cnp; + struct inode *ip; + int flags; + int isrmdir; { - register struct inode *dp; + struct inode *dp; struct direct *ep; struct buf *bp; int error; dp = VTOI(dvp); - if (cnp->cn_flags & DOWHITEOUT) { + if (flags & DOWHITEOUT) { /* * Whiteout entry: set d_ino to WINO. */ @@ -871,33 +918,39 @@ ufs_dirremove(dvp, cnp) return (error); ep->d_ino = WINO; ep->d_type = DT_WHT; - error = VOP_BWRITE(bp); - dp->i_flag |= IN_CHANGE | IN_UPDATE; - return (error); + goto out; } + if ((error = VOP_BLKATOFF(dvp, + (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) != 0) + return (error); + if (dp->i_count == 0) { /* * First entry in block: set d_ino to zero. */ - error = VOP_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep, - &bp); - if (error) - return (error); ep->d_ino = 0; + } else { + /* + * Collapse new free space into previous entry. + */ + ep->d_reclen += dp->i_reclen; + } +out: + if (ip) { + ip->i_effnlink--; + ip->i_flag |= IN_CHANGE; + } + if (DOINGSOFTDEP(dvp)) { + if (ip) + softdep_setup_remove(bp, dp, ip, isrmdir); + bdwrite(bp); + } else { + if (ip) + ip->i_ffs_nlink--; /* XXX */ + error = VOP_BWRITE(bp); - dp->i_flag |= IN_CHANGE | IN_UPDATE; - return (error); } - /* - * Collapse new free space into previous entry. - */ - error = VOP_BLKATOFF(dvp, (off_t)(dp->i_offset - dp->i_count), - (char **)&ep, &bp); - if (error) - return (error); - ep->d_reclen += dp->i_reclen; - error = VOP_BWRITE(bp); dp->i_flag |= IN_CHANGE | IN_UPDATE; return (error); } @@ -908,9 +961,11 @@ ufs_dirremove(dvp, cnp) * set up by a call to namei. */ int -ufs_dirrewrite(dp, ip, cnp) - struct inode *dp, *ip; - struct componentname *cnp; +ufs_dirrewrite(dp, oip, newinum, newtype, isrmdir) + struct inode *dp, *oip; + ino_t newinum; + int newtype; + int isrmdir; { struct buf *bp; struct direct *ep; @@ -920,10 +975,18 @@ ufs_dirrewrite(dp, ip, cnp) error = VOP_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp); if (error) return (error); - ep->d_ino = ip->i_number; + ep->d_ino = newinum; if (vdp->v_mount->mnt_maxsymlinklen > 0) - ep->d_type = IFTODT(ip->i_ffs_mode); - error = VOP_BWRITE(bp); + ep->d_type = newtype; + oip->i_effnlink--; + oip->i_flag |= IN_CHANGE; + if (DOINGSOFTDEP(vdp)) { + softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir); + bdwrite(bp); + } else { + oip->i_ffs_nlink--; /* XXX */ + error = VOP_BWRITE(bp); + } dp->i_flag |= IN_CHANGE | IN_UPDATE; return (error); } @@ -983,7 +1046,7 @@ ufs_dirempty(ip, parentino, cred) * 1 implies ".", 2 implies ".." if second * char is also "." */ - if (namlen == 1) + if (namlen == 1 && dp->d_ino == ip->i_number) continue; if (dp->d_name[1] == '.' && dp->d_ino == parentino) continue; diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c index f6ea0606058..bc295d57e26 100644 --- a/sys/ufs/ufs/ufs_quota.c +++ b/sys/ufs/ufs/ufs_quota.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ufs_quota.c,v 1.3 1997/05/30 08:35:10 downsj Exp $ */ +/* $OpenBSD: ufs_quota.c,v 1.4 1997/10/06 15:27:38 csapuntz Exp $ */ /* $NetBSD: ufs_quota.c,v 1.8 1996/02/09 22:36:09 christos Exp $ */ /* @@ -376,15 +376,11 @@ quotaon(p, mp, type, fname) if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) return (error); vp = nd.ni_vp; - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); if (vp->v_type != VREG) { (void) vn_close(vp, FREAD|FWRITE, p->p_ucred, p); return (EACCES); } - if (vfs_busy(mp)) { - (void) vn_close(vp, FREAD|FWRITE, p->p_ucred, p); - return (EBUSY); - } if (*vpp != vp) quotaoff(p, mp, type); ump->um_qflags[type] |= QTF_OPENING; @@ -414,9 +410,9 @@ quotaon(p, mp, type, fname) again: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) { nextvp = vp->v_mntvnodes.le_next; - if (vp->v_writecount == 0) + if (vp->v_type == VNON || vp->v_writecount == 0) continue; - if (vget(vp, 1)) + if (vget(vp, LK_EXCLUSIVE, p)) goto again; if ((error = getinoquota(VTOI(vp))) != 0) { vput(vp); @@ -429,7 +425,6 @@ again: ump->um_qflags[type] &= ~QTF_OPENING; if (error) quotaoff(p, mp, type); - vfs_unbusy(mp); return (error); } @@ -449,8 +444,6 @@ quotaoff(p, mp, type) register struct inode *ip; int error; - if ((mp->mnt_flag & MNT_MPBUSY) == 0) - panic("quotaoff: not busy"); if ((qvp = ump->um_quotas[type]) == NULLVP) return (0); ump->um_qflags[type] |= QTF_CLOSING; @@ -461,7 +454,9 @@ quotaoff(p, mp, type) again: for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) { nextvp = vp->v_mntvnodes.le_next; - if (vget(vp, 1)) + if (vp->v_type == VNON) + continue; + if (vget(vp, LK_EXCLUSIVE, p)) goto again; ip = VTOI(vp); dq = ip->i_dquot[type]; @@ -621,16 +616,16 @@ qsync(mp) struct mount *mp; { struct ufsmount *ump = VFSTOUFS(mp); + struct proc *p = curproc; register struct vnode *vp, *nextvp; register struct dquot *dq; register int i; + int error = 0; /* * Check if the mount point has any quotas. * If not, simply return. */ - if ((mp->mnt_flag & MNT_MPBUSY) == 0) - panic("qsync: not busy"); for (i = 0; i < MAXQUOTAS; i++) if (ump->um_quotas[i] != NULLVP) break; @@ -640,22 +635,34 @@ qsync(mp) * Search vnodes associated with this mount point, * synchronizing any modified dquot structures. */ + simple_lock(&mntvnode_slock); again: - for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) { - nextvp = vp->v_mntvnodes.le_next; - if (VOP_ISLOCKED(vp)) - continue; - if (vget(vp, 1)) + for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) { + if (vp->v_mount != mp) goto again; + nextvp = vp->v_mntvnodes.le_next; + if (vp->v_type == VNON) + continue; + simple_lock(&vp->v_interlock); + simple_unlock(&mntvnode_slock); + error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); + if (error) { + simple_lock(&mntvnode_slock); + if (error == ENOENT) + goto again; + continue; + } for (i = 0; i < MAXQUOTAS; i++) { dq = VTOI(vp)->i_dquot[i]; if (dq != NODQUOT && (dq->dq_flags & DQ_MOD)) dqsync(vp, dq); } vput(vp); - if (vp->v_mntvnodes.le_next != nextvp || vp->v_mount != mp) - goto again; - } + simple_lock(&mntvnode_slock); + if (vp->v_mntvnodes.le_next != nextvp) + goto again; + } + simple_unlock(&mntvnode_slock); return (0); } @@ -697,6 +704,7 @@ dqget(vp, id, ump, type, dqp) register int type; struct dquot **dqp; { + struct proc *p = curproc; register struct dquot *dq; struct dqhash *dqh; register struct vnode *dqvp; @@ -752,7 +760,7 @@ dqget(vp, id, ump, type, dqp) * Initialize the contents of the dquot structure. */ if (vp != dqvp) - VOP_LOCK(dqvp); + vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY, p); LIST_INSERT_HEAD(dqh, dq, dq_hash); DQREF(dq); dq->dq_flags = DQ_LOCK; @@ -772,7 +780,7 @@ dqget(vp, id, ump, type, dqp) if (auio.uio_resid == sizeof(struct dqblk) && error == 0) bzero((caddr_t)&dq->dq_dqb, sizeof(struct dqblk)); if (vp != dqvp) - VOP_UNLOCK(dqvp); + VOP_UNLOCK(dqvp, 0, p); if (dq->dq_flags & DQ_WANT) wakeup((caddr_t)dq); dq->dq_flags = 0; @@ -844,6 +852,7 @@ dqsync(vp, dq) struct vnode *vp; register struct dquot *dq; { + struct proc *p = curproc; struct vnode *dqvp; struct iovec aiov; struct uio auio; @@ -856,13 +865,13 @@ dqsync(vp, dq) if ((dqvp = dq->dq_ump->um_quotas[dq->dq_type]) == NULLVP) panic("dqsync: file"); if (vp != dqvp) - VOP_LOCK(dqvp); + vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY, p); while (dq->dq_flags & DQ_LOCK) { dq->dq_flags |= DQ_WANT; sleep((caddr_t)dq, PINOD+2); if ((dq->dq_flags & DQ_MOD) == 0) { if (vp != dqvp) - VOP_UNLOCK(dqvp); + VOP_UNLOCK(dqvp, 0, p); return (0); } } @@ -883,7 +892,7 @@ dqsync(vp, dq) wakeup((caddr_t)dq); dq->dq_flags &= ~(DQ_MOD|DQ_LOCK|DQ_WANT); if (vp != dqvp) - VOP_UNLOCK(dqvp); + VOP_UNLOCK(dqvp, 0, p); return (error); } diff --git a/sys/ufs/ufs/ufs_readwrite.c b/sys/ufs/ufs/ufs_readwrite.c index 604c16fcb90..25148b78f61 100644 --- a/sys/ufs/ufs/ufs_readwrite.c +++ b/sys/ufs/ufs/ufs_readwrite.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ufs_readwrite.c,v 1.9 1997/05/30 08:35:13 downsj Exp $ */ +/* $OpenBSD: ufs_readwrite.c,v 1.10 1997/10/06 15:27:39 csapuntz Exp $ */ /* $NetBSD: ufs_readwrite.c,v 1.9 1996/05/11 18:27:57 mycroft Exp $ */ /*- @@ -242,19 +242,13 @@ WRITE(v) xfersize = fs->fs_bsize - blkoffset; if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; -#ifdef LFS_READWRITE - (void)lfs_check(vp, lbn); - error = lfs_balloc(vp, blkoffset, xfersize, lbn, &bp); -#else if (fs->fs_bsize > xfersize) flags |= B_CLRBUF; else flags &= ~B_CLRBUF; - error = ffs_balloc(ip, - lbn, blkoffset + xfersize, ap->a_cred, &bp, flags); -#endif - if (error) + if ((error = VOP_BALLOC(vp, uio->uio_offset, xfersize, + ap->a_cred, flags, &bp)) != 0) break; if (uio->uio_offset + xfersize > ip->i_ffs_size) { ip->i_ffs_size = uio->uio_offset + xfersize; diff --git a/sys/ufs/ufs/ufs_vfsops.c b/sys/ufs/ufs/ufs_vfsops.c index 0e308fd39dd..11dfa3086c4 100644 --- a/sys/ufs/ufs/ufs_vfsops.c +++ b/sys/ufs/ufs/ufs_vfsops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ufs_vfsops.c,v 1.3 1997/05/30 08:35:15 downsj Exp $ */ +/* $OpenBSD: ufs_vfsops.c,v 1.4 1997/10/06 15:27:39 csapuntz Exp $ */ /* $NetBSD: ufs_vfsops.c,v 1.4 1996/02/09 22:36:12 christos Exp $ */ /* @@ -125,39 +125,64 @@ ufs_quotactl(mp, cmds, uid, arg, p) if ((u_int)type >= MAXQUOTAS) return (EINVAL); + if (vfs_busy(mp, LK_NOWAIT, 0, p)) + return (0); + + switch (cmd) { case Q_QUOTAON: - return (quotaon(p, mp, type, arg)); + error = quotaon(p, mp, type, arg); + break; case Q_QUOTAOFF: - if (vfs_busy(mp)) - return (0); error = quotaoff(p, mp, type); - vfs_unbusy(mp); - return (error); + break; case Q_SETQUOTA: - return (setquota(mp, uid, type, arg)); + error = setquota(mp, uid, type, arg) ; + break; case Q_SETUSE: - return (setuse(mp, uid, type, arg)); + error = setuse(mp, uid, type, arg); + break; case Q_GETQUOTA: - return (getquota(mp, uid, type, arg)); + error = getquota(mp, uid, type, arg); + break; case Q_SYNC: - if (vfs_busy(mp)) - return (0); error = qsync(mp); - vfs_unbusy(mp); - return (error); + break; default: - return (EINVAL); + error = EINVAL; + break; } - /* NOTREACHED */ + + vfs_unbusy(mp, p); + return (error); +#endif +} + + +/* + * Initial UFS filesystems, done only once. + */ +int +ufs_init(vfsp) + struct vfsconf *vfsp; +{ + static int done; + + if (done) + return (0); + done = 1; + ufs_ihashinit(); +#ifdef QUOTA + dqinit(); #endif + return (0); } /* diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c index fe58d6e899e..12245ddece3 100644 --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ufs_vnops.c,v 1.10 1997/07/03 17:49:49 deraadt Exp $ */ +/* $OpenBSD: ufs_vnops.c,v 1.11 1997/10/06 15:27:40 csapuntz Exp $ */ /* $NetBSD: ufs_vnops.c,v 1.18 1996/05/11 18:28:04 mycroft Exp $ */ /* @@ -90,6 +90,19 @@ union _qcvt { (q) = tmp.qcvt; \ } + +/* + * A virgin directory (no blushing please). + */ +static struct dirtemplate mastertemplate = { + 0, 12, DT_DIR, 1, ".", + 0, DIRBLKSIZ - 12, DT_DIR, 2, ".." +}; +static struct odirtemplate omastertemplate = { + 0, 12, 1, ".", + 0, DIRBLKSIZ - 12, 2, ".." +}; + /* * Create a regular file */ @@ -117,19 +130,19 @@ ufs_mknod(v) void *v; { struct vop_mknod_args /* { - struct vnode *a_dvp; - struct vnode **a_vpp; - struct componentname *a_cnp; - struct vattr *a_vap; - } */ *ap = v; - register struct vattr *vap = ap->a_vap; - register struct vnode **vpp = ap->a_vpp; - register struct inode *ip; + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + } */ *ap = v; + struct vattr *vap = ap->a_vap; + struct vnode **vpp = ap->a_vpp; + struct inode *ip; int error; if ((error = - ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), - ap->a_dvp, vpp, ap->a_cnp)) != 0) + ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), + ap->a_dvp, vpp, ap->a_cnp)) != 0) return (error); ip = VTOI(*vpp); ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; @@ -163,11 +176,11 @@ ufs_open(v) void *v; { struct vop_open_args /* { - struct vnode *a_vp; - int a_mode; - struct ucred *a_cred; - struct proc *a_p; - } */ *ap = v; + struct vnode *a_vp; + int a_mode; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap = v; /* * Files marked append-only must be opened for appending. @@ -194,11 +207,13 @@ ufs_close(v) struct ucred *a_cred; struct proc *a_p; } */ *ap = v; - register struct vnode *vp = ap->a_vp; - register struct inode *ip = VTOI(vp); + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); - if (vp->v_usecount > 1 && !(ip->i_flag & IN_LOCKED)) + simple_lock(&vp->v_interlock); + if (vp->v_usecount > 1) ITIMES(ip, &time, &time); + simple_unlock(&vp->v_interlock); return (0); } @@ -212,25 +227,27 @@ ufs_access(v) struct ucred *a_cred; struct proc *a_p; } */ *ap = v; - register struct vnode *vp = ap->a_vp; - register struct inode *ip = VTOI(vp); + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); mode_t mode = ap->a_mode; -#ifdef DIAGNOSTIC - if (!VOP_ISLOCKED(vp)) { - vprint("ufs_access: not locked", vp); - panic("ufs_access: not locked"); - } -#endif -#ifdef QUOTA - if (mode & VWRITE) + /* + * Disallow write attempts on read-only file systems; + * unless the file is a socket, fifo, or a block or + * character device resident on the file system. + */ + if (mode & VWRITE) { switch (vp->v_type) { int error; case VDIR: case VLNK: case VREG: + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); +#ifdef QUOTA if ((error = getinoquota(ip)) != 0) return (error); +#endif break; case VBAD: case VBLK: @@ -239,8 +256,9 @@ ufs_access(v) case VFIFO: case VNON: break; + } -#endif + } /* If immutable bit set, nobody gets to write it. */ if ((mode & VWRITE) && (ip->i_ffs_flags & IMMUTABLE)) @@ -261,9 +279,9 @@ ufs_getattr(v) struct ucred *a_cred; struct proc *a_p; } */ *ap = v; - register struct vnode *vp = ap->a_vp; - register struct inode *ip = VTOI(vp); - register struct vattr *vap = ap->a_vap; + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); + struct vattr *vap = ap->a_vap; ITIMES(ip, &time, &time); /* @@ -272,7 +290,7 @@ ufs_getattr(v) vap->va_fsid = ip->i_dev; vap->va_fileid = ip->i_number; vap->va_mode = ip->i_ffs_mode & ~IFMT; - vap->va_nlink = ip->i_ffs_nlink; + vap->va_nlink = ip->i_effnlink; vap->va_uid = ip->i_ffs_uid; vap->va_gid = ip->i_ffs_gid; vap->va_rdev = (dev_t)ip->i_ffs_rdev; @@ -311,11 +329,11 @@ ufs_setattr(v) struct ucred *a_cred; struct proc *a_p; } */ *ap = v; - register struct vattr *vap = ap->a_vap; - register struct vnode *vp = ap->a_vp; - register struct inode *ip = VTOI(vp); - register struct ucred *cred = ap->a_cred; - register struct proc *p = ap->a_p; + struct vattr *vap = ap->a_vap; + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); + struct ucred *cred = ap->a_cred; + struct proc *p = ap->a_p; int error; /* @@ -328,6 +346,8 @@ ufs_setattr(v) return (EINVAL); } if (vap->va_flags != VNOVAL) { + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); if (cred->cr_uid != ip->i_ffs_uid && (error = suser(cred, &p->p_acflag))) return (error); @@ -337,7 +357,8 @@ ufs_setattr(v) return (EPERM); ip->i_ffs_flags = vap->va_flags; } else { - if (ip->i_ffs_flags & (SF_IMMUTABLE | SF_APPEND)) + if (ip->i_ffs_flags & (SF_IMMUTABLE | SF_APPEND) || + (vap->va_flags & UF_SETTABLE) != vap->va_flags) return (EPERM); ip->i_ffs_flags &= SF_SETTABLE; ip->i_ffs_flags |= (vap->va_flags & UF_SETTABLE); @@ -352,19 +373,36 @@ ufs_setattr(v) * Go through the fields and update if not VNOVAL. */ if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, p); if (error) return (error); } if (vap->va_size != VNOVAL) { - if (vp->v_type == VDIR) - return (EISDIR); - error = VOP_TRUNCATE(vp, vap->va_size, 0, cred, p); - if (error) - return (error); + /* + * Disallow write attempts on read-only file systems; + * unless the file is a socket, fifo, or a block or + * character device resident on the file system. + */ + switch (vp->v_type) { + case VDIR: + return (EISDIR); + case VLNK: + case VREG: + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + break; + default: + break; + } + if ((error = VOP_TRUNCATE(vp, vap->va_size, 0, cred, p)) != 0) + return (error); } ip = VTOI(vp); if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); if (cred->cr_uid != ip->i_ffs_uid && (error = suser(cred, &p->p_acflag)) && ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || @@ -374,13 +412,16 @@ ufs_setattr(v) ip->i_flag |= IN_ACCESS; if (vap->va_mtime.tv_sec != VNOVAL) ip->i_flag |= IN_CHANGE | IN_UPDATE; - error = VOP_UPDATE(vp, &vap->va_atime, &vap->va_mtime, 1); + error = VOP_UPDATE(vp, &vap->va_atime, &vap->va_mtime, 0); if (error) return (error); } error = 0; - if (vap->va_mode != (mode_t)VNOVAL) + if (vap->va_mode != (mode_t)VNOVAL) { + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); error = ufs_chmod(vp, (int)vap->va_mode, cred, p); + } return (error); } @@ -390,12 +431,12 @@ ufs_setattr(v) */ static int ufs_chmod(vp, mode, cred, p) - register struct vnode *vp; - register int mode; - register struct ucred *cred; + struct vnode *vp; + int mode; + struct ucred *cred; struct proc *p; { - register struct inode *ip = VTOI(vp); + struct inode *ip = VTOI(vp); int error; if (cred->cr_uid != ip->i_ffs_uid && @@ -421,18 +462,18 @@ ufs_chmod(vp, mode, cred, p) */ static int ufs_chown(vp, uid, gid, cred, p) - register struct vnode *vp; + struct vnode *vp; uid_t uid; gid_t gid; struct ucred *cred; struct proc *p; { - register struct inode *ip = VTOI(vp); + struct inode *ip = VTOI(vp); uid_t ouid; gid_t ogid; int error = 0; #ifdef QUOTA - register int i; + int i; long change; #endif @@ -614,9 +655,9 @@ ufs_remove(v) struct vnode *a_vp; struct componentname *a_cnp; } */ *ap = v; - register struct inode *ip; - register struct vnode *vp = ap->a_vp; - register struct vnode *dvp = ap->a_dvp; + struct inode *ip; + struct vnode *vp = ap->a_vp; + struct vnode *dvp = ap->a_dvp; int error; ip = VTOI(vp); @@ -625,10 +666,8 @@ ufs_remove(v) error = EPERM; goto out; } - if ((error = ufs_dirremove(dvp, ap->a_cnp)) == 0) { - ip->i_ffs_nlink--; - ip->i_flag |= IN_CHANGE; - } + if ((error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0)) != 0) + goto out; out: if (dvp == vp) vrele(vp); @@ -650,10 +689,12 @@ ufs_link(v) struct vnode *a_vp; struct componentname *a_cnp; } */ *ap = v; - register struct vnode *dvp = ap->a_dvp; - register struct vnode *vp = ap->a_vp; - register struct componentname *cnp = ap->a_cnp; - register struct inode *ip; + struct vnode *dvp = ap->a_dvp; + struct vnode *vp = ap->a_vp; + struct componentname *cnp = ap->a_cnp; + struct proc *p = cnp->cn_proc; + struct inode *ip; + struct direct newdir; struct timespec ts; int error; @@ -671,7 +712,7 @@ ufs_link(v) error = EXDEV; goto out2; } - if (dvp != vp && (error = VOP_LOCK(vp))) { + if (dvp != vp && (error = vn_lock(vp, LK_EXCLUSIVE, p))) { VOP_ABORTOP(dvp, cnp); goto out2; } @@ -686,20 +727,25 @@ ufs_link(v) error = EPERM; goto out1; } + ip->i_effnlink++; ip->i_ffs_nlink++; ip->i_flag |= IN_CHANGE; + if (DOINGSOFTDEP(vp)) + softdep_increase_linkcnt(ip); TIMEVAL_TO_TIMESPEC(&time, &ts); - error = VOP_UPDATE(vp, &ts, &ts, 1); - if (!error) - error = ufs_direnter(ip, dvp, cnp); + if ((error = VOP_UPDATE(vp, &ts, &ts, !DOINGSOFTDEP(vp))) == 0) { + ufs_makedirentry(ip, cnp, &newdir); + error = ufs_direnter(dvp, &newdir, cnp, NULL); + } if (error) { + ip->i_effnlink--; ip->i_ffs_nlink--; ip->i_flag |= IN_CHANGE; } FREE(cnp->cn_pnbuf, M_NAMEI); out1: if (dvp != vp) - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); out2: vput(dvp); return (error); @@ -742,7 +788,7 @@ ufs_whiteout(v) newdir.d_namlen = cnp->cn_namelen; bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1); newdir.d_type = DT_WHT; - error = ufs_direnter2(dvp, &newdir, cnp->cn_cred, cnp->cn_proc); + error = ufs_direnter(dvp, &newdir, cnp, NULL); break; case DELETE: @@ -753,8 +799,11 @@ ufs_whiteout(v) #endif cnp->cn_flags &= ~DOWHITEOUT; - error = ufs_dirremove(dvp, cnp); + error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0); break; + default: + panic("ufs_whiteout: unknown op"); + /* NOTREACHED */ } if (cnp->cn_flags & HASBUF) { FREE(cnp->cn_pnbuf, M_NAMEI); @@ -801,17 +850,17 @@ ufs_rename(v) struct componentname *a_tcnp; } */ *ap = v; struct vnode *tvp = ap->a_tvp; - register struct vnode *tdvp = ap->a_tdvp; + struct vnode *tdvp = ap->a_tdvp; struct vnode *fvp = ap->a_fvp; - register struct vnode *fdvp = ap->a_fdvp; - register struct componentname *tcnp = ap->a_tcnp; - register struct componentname *fcnp = ap->a_fcnp; - register struct inode *ip, *xp, *dp; - struct dirtemplate dirbuf; + struct vnode *fdvp = ap->a_fdvp; + struct componentname *tcnp = ap->a_tcnp; + struct componentname *fcnp = ap->a_fcnp; + struct proc *p = fcnp->cn_proc; + struct inode *ip, *xp, *dp; + struct direct newdir; struct timespec ts; int doingdirectory = 0, oldparent = 0, newparent = 0; int error = 0; - u_char namlen; #ifdef DIAGNOSTIC if ((tcnp->cn_flags & HASBUF) == 0 || @@ -868,13 +917,13 @@ abortit: (void) relookup(fdvp, &fvp, fcnp); return (VOP_REMOVE(fdvp, fvp, fcnp)); } - if ((error = VOP_LOCK(fvp)) != 0) + if ((error = vn_lock(fvp, LK_EXCLUSIVE, p)) != 0) goto abortit; dp = VTOI(fdvp); ip = VTOI(fvp); if ((ip->i_ffs_flags & (IMMUTABLE | APPEND)) || (dp->i_ffs_flags & APPEND)) { - VOP_UNLOCK(fvp); + VOP_UNLOCK(fvp, 0, p); error = EPERM; goto abortit; } @@ -883,7 +932,7 @@ abortit: if (!error && tvp) error = VOP_ACCESS(tvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc); if (error) { - VOP_UNLOCK(fvp); + VOP_UNLOCK(fvp, 0, p); error = EACCES; goto abortit; } @@ -895,7 +944,7 @@ abortit: (fcnp->cn_flags & ISDOTDOT) || (tcnp->cn_flags & ISDOTDOT) || (ip->i_flag & IN_RENAME)) { - VOP_UNLOCK(fvp); + VOP_UNLOCK(fvp, 0, p); error = EINVAL; goto abortit; } @@ -920,11 +969,14 @@ abortit: * completing our work, the link count * may be wrong, but correctable. */ + ip->i_effnlink++; ip->i_ffs_nlink++; ip->i_flag |= IN_CHANGE; + if (DOINGSOFTDEP(fvp)) + softdep_increase_linkcnt(ip); TIMEVAL_TO_TIMESPEC(&time, &ts); - if ((error = VOP_UPDATE(fvp, &ts, &ts, 1)) != 0) { - VOP_UNLOCK(fvp); + if ((error = VOP_UPDATE(fvp, &ts, &ts, !DOINGSOFTDEP(fvp))) != 0) { + VOP_UNLOCK(fvp, 0, p); goto bad; } @@ -939,7 +991,7 @@ abortit: * call to checkpath(). */ error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc); - VOP_UNLOCK(fvp); + VOP_UNLOCK(fvp, 0, p); if (oldparent != dp->i_number) newparent = dp->i_number; if (doingdirectory && newparent) { @@ -978,13 +1030,19 @@ abortit: error = EMLINK; goto bad; } + dp->i_effnlink++; dp->i_ffs_nlink++; dp->i_flag |= IN_CHANGE; - if ((error = VOP_UPDATE(tdvp, &ts, &ts, 1)) != 0) + if (DOINGSOFTDEP(tdvp)) + softdep_increase_linkcnt(dp); + if ((error = VOP_UPDATE(tdvp, &ts, &ts, + !DOINGSOFTDEP(tdvp))) != 0) goto bad; } - if ((error = ufs_direnter(ip, tdvp, tcnp)) != 0) { + ufs_makedirentry(ip, tcnp, &newdir); + if ((error = ufs_direnter(tdvp, &newdir, tcnp, NULL)) != 0) { if (doingdirectory && newparent) { + dp->i_effnlink--; dp->i_ffs_nlink--; dp->i_flag |= IN_CHANGE; (void)VOP_UPDATE(tdvp, &ts, &ts, 1); @@ -1018,8 +1076,8 @@ abortit: * (both directories, or both not directories). */ if ((xp->i_ffs_mode & IFMT) == IFDIR) { - if (!ufs_dirempty(xp, dp->i_number, tcnp->cn_cred) || - xp->i_ffs_nlink > 2) { + if (xp->i_effnlink > 2 || + !ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) { error = ENOTEMPTY; goto bad; } @@ -1032,37 +1090,35 @@ abortit: error = EISDIR; goto bad; } - if ((error = ufs_dirrewrite(dp, ip, tcnp)) != 0) - goto bad; - /* - * If the target directory is in the same - * directory as the source directory, - * decrement the link count on the parent - * of the target directory. - */ - if (doingdirectory && !newparent) { - dp->i_ffs_nlink--; - dp->i_flag |= IN_CHANGE; - } - vput(tdvp); - /* - * Adjust the link count of the target to - * reflect the dirrewrite above. If this is - * a directory it is empty and there are - * no links to it, so we can squash the inode and - * any space associated with it. We disallowed - * renaming over top of a directory with links to - * it above, as the remaining link would point to - * a directory without "." or ".." entries. - */ - xp->i_ffs_nlink--; + + if ((error = ufs_dirrewrite(dp, xp, ip->i_number, + IFTODT(ip->i_ffs_mode), doingdirectory)) != 0) + goto bad; if (doingdirectory) { - if (--xp->i_ffs_nlink != 0) - panic("rename: linked directory"); - error = VOP_TRUNCATE(tvp, (off_t)0, IO_SYNC, - tcnp->cn_cred, tcnp->cn_proc); + dp->i_effnlink--; + dp->i_flag |= IN_CHANGE; + xp->i_effnlink--; + xp->i_flag |= IN_CHANGE; } - xp->i_flag |= IN_CHANGE; + if (doingdirectory && !DOINGSOFTDEP(tvp)) { + /* + * Truncate inode. The only stuff left in the directory + * is "." and "..". The "." reference is inconsequential + * since we are quashing it. We have removed the "." + * reference and the reference in the parent directory, + * but there may be other hard links. The soft + * dependency code will arrange to do these operations + * after the parent directory entry has been deleted on + * disk, so when running with that code we avoid doing + * them now. + */ + dp->i_ffs_nlink--; + xp->i_ffs_nlink--; + if ((error = VOP_TRUNCATE(tvp, (off_t)0, IO_SYNC, + tcnp->cn_cred, tcnp->cn_proc)) != 0) + goto bad; + } + vput(tdvp); vput(tvp); xp = NULL; } @@ -1092,10 +1148,9 @@ abortit: * changed while the new name has been entered. If the source is * a file then the entry may have been unlinked or renamed. In * either case there is no further work to be done. If the source - * is a directory then it cannot have been rmdir'ed; its link - * count of three would cause a rmdir to fail with ENOTEMPTY. - * The IRENAME flag ensures that it cannot be moved by another - * rename. + * is a directory then it cannot have been rmdir'ed; the IN_RENAME + * flag ensures that it cannot be moved by another rename or removed + * by a rmdir. */ if (xp != ip) { if (doingdirectory) @@ -1108,44 +1163,11 @@ abortit: * and ".." set to point to the new parent. */ if (doingdirectory && newparent) { - dp->i_ffs_nlink--; - dp->i_flag |= IN_CHANGE; - error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf, - sizeof (struct dirtemplate), (off_t)0, - UIO_SYSSPACE, IO_NODELOCKED, - tcnp->cn_cred, (int *)0, (struct proc *)0); - if (error == 0) { -# if (BYTE_ORDER == LITTLE_ENDIAN) - if (fvp->v_mount->mnt_maxsymlinklen <= 0) - namlen = dirbuf.dotdot_type; - else - namlen = dirbuf.dotdot_namlen; -# else - namlen = dirbuf.dotdot_namlen; -# endif - if (namlen != 2 || - dirbuf.dotdot_name[0] != '.' || - dirbuf.dotdot_name[1] != '.') { - ufs_dirbad(xp, (doff_t)12, - "rename: mangled dir"); - } else { - dirbuf.dotdot_ino = newparent; - (void) vn_rdwr(UIO_WRITE, fvp, - (caddr_t)&dirbuf, - sizeof (struct dirtemplate), - (off_t)0, UIO_SYSSPACE, - IO_NODELOCKED|IO_SYNC, - tcnp->cn_cred, (int *)0, - (struct proc *)0); - cache_purge(fdvp); - } - } - } - error = ufs_dirremove(fdvp, fcnp); - if (!error) { - xp->i_ffs_nlink--; - xp->i_flag |= IN_CHANGE; + xp->i_offset = mastertemplate.dot_reclen; + ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0); + cache_purge(fdvp); } + error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0); xp->i_flag &= ~IN_RENAME; } if (dp) @@ -1162,7 +1184,8 @@ bad: out: if (doingdirectory) ip->i_flag &= ~IN_RENAME; - if (VOP_LOCK(fvp) == 0) { + if (vn_lock(fvp, LK_EXCLUSIVE, p) == 0) { + ip->i_effnlink--; ip->i_ffs_nlink--; ip->i_flag |= IN_CHANGE; vput(fvp); @@ -1172,18 +1195,6 @@ out: } /* - * A virgin directory (no blushing please). - */ -static struct dirtemplate mastertemplate = { - 0, 12, DT_DIR, 1, ".", - 0, DIRBLKSIZ - 12, DT_DIR, 2, ".." -}; -static struct odirtemplate omastertemplate = { - 0, 12, 1, ".", - 0, DIRBLKSIZ - 12, 2, ".." -}; - -/* * Mkdir system call */ int @@ -1196,11 +1207,13 @@ ufs_mkdir(v) struct componentname *a_cnp; struct vattr *a_vap; } */ *ap = v; - register struct vnode *dvp = ap->a_dvp; - register struct vattr *vap = ap->a_vap; - register struct componentname *cnp = ap->a_cnp; - register struct inode *ip, *dp; + struct vnode *dvp = ap->a_dvp; + struct vattr *vap = ap->a_vap; + struct componentname *cnp = ap->a_cnp; + struct inode *ip, *dp; struct vnode *tvp; + struct buf *bp; + struct direct newdir; struct dirtemplate dirtemplate, *dtp; struct timespec ts; int error, dmode; @@ -1239,24 +1252,31 @@ ufs_mkdir(v) ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; ip->i_ffs_mode = dmode; tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ + ip->i_effnlink = 2; ip->i_ffs_nlink = 2; + if (DOINGSOFTDEP(tvp)) + softdep_increase_linkcnt(ip); + if (cnp->cn_flags & ISWHITEOUT) ip->i_ffs_flags |= UF_OPAQUE; - TIMEVAL_TO_TIMESPEC(&time, &ts); - error = VOP_UPDATE(tvp, &ts, &ts, 1); /* - * Bump link count in parent directory - * to reflect work done below. Should - * be done before reference is created - * so reparation is possible if we crash. + * Bump link count in parent directory to reflect work done below. + * Should be done before reference is create so cleanup is + * possible if we crash. */ + dp->i_effnlink++; dp->i_ffs_nlink++; dp->i_flag |= IN_CHANGE; - if ((error = VOP_UPDATE(dvp, &ts, &ts, 1)) != 0) + if (DOINGSOFTDEP(dvp)) + softdep_increase_linkcnt(dp); + TIMEVAL_TO_TIMESPEC(&time, &ts); + if ((error = VOP_UPDATE(dvp, &ts, &ts, !DOINGSOFTDEP(dvp))) != 0) goto bad; - /* Initialize directory with "." and ".." from static template. */ + /* + * Initialize directory with "." and ".." from static template. + */ if (dvp->v_mount->mnt_maxsymlinklen > 0) dtp = &mastertemplate; else @@ -1264,40 +1284,56 @@ ufs_mkdir(v) dirtemplate = *dtp; dirtemplate.dot_ino = ip->i_number; dirtemplate.dotdot_ino = dp->i_number; - error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)&dirtemplate, - sizeof (dirtemplate), (off_t)0, UIO_SYSSPACE, - IO_NODELOCKED|IO_SYNC, cnp->cn_cred, (int *)0, (struct proc *)0); - if (error) { - dp->i_ffs_nlink--; - dp->i_flag |= IN_CHANGE; + + if ((error = VOP_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred, + B_CLRBUF, &bp)) != 0) + goto bad; + ip->i_ffs_size = DIRBLKSIZ; + ip->i_flag |= IN_CHANGE | IN_UPDATE; + vnode_pager_setsize(tvp, (u_long)ip->i_ffs_size); + bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate); + if ((error = VOP_UPDATE(tvp, &ts, &ts, !DOINGSOFTDEP(tvp))) != 0) { + (void)VOP_BWRITE(bp); goto bad; - } - if (DIRBLKSIZ > VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) - panic("ufs_mkdir: blksize"); /* XXX should grow with balloc() */ - else { - ip->i_ffs_size = DIRBLKSIZ; - ip->i_flag |= IN_CHANGE; } - /* Directory set up, now install it's entry in the parent directory. */ - if ((error = ufs_direnter(ip, dvp, cnp)) != 0) { - dp->i_ffs_nlink--; - dp->i_flag |= IN_CHANGE; - } -bad: /* - * No need to do an explicit VOP_TRUNCATE here, vrele will do this - * for us because we set the link count to 0. + * Directory set up, now install it's entry in the parent directory. + * + * If we are not doing soft dependencies, then we must write out the + * buffer containing the new directory body before entering the new + * name in the parent. If we are doing soft dependencies, then the + * buffer containing the new directory body will be passed to and + * released in the soft dependency code after the code has attached + * an appropriate ordering dependency to the buffer which ensures that + * the buffer is written before the new name is written in the parent. */ - if (error) { - ip->i_ffs_nlink = 0; - ip->i_flag |= IN_CHANGE; + if (!DOINGSOFTDEP(dvp) && ((error = VOP_BWRITE(bp)) != 0)) + goto bad; + ufs_makedirentry(ip, cnp, &newdir); + error = ufs_direnter(dvp, &newdir, cnp, bp); + +bad: + if (error == 0) { + *ap->a_vpp = tvp; + } else { + dp->i_effnlink--; + dp->i_ffs_nlink--; + dp->i_flag |= IN_CHANGE; + /* + * No need to do an explicit VOP_TRUNCATE here, vrele will + * do this for us because we set the link count to 0. + */ + ip->i_effnlink = 0; + ip->i_ffs_nlink = 0; + ip->i_flag |= IN_CHANGE; + vput(tvp); - } else - *ap->a_vpp = tvp; + } out: FREE(cnp->cn_pnbuf, M_NAMEI); vput(dvp); + return (error); } @@ -1313,10 +1349,10 @@ ufs_rmdir(v) struct vnode *a_vp; struct componentname *a_cnp; } */ *ap = v; - register struct vnode *vp = ap->a_vp; - register struct vnode *dvp = ap->a_dvp; - register struct componentname *cnp = ap->a_cnp; - register struct inode *ip, *dp; + struct vnode *vp = ap->a_vp; + struct vnode *dvp = ap->a_dvp; + struct componentname *cnp = ap->a_cnp; + struct inode *ip, *dp; int error; ip = VTOI(vp); @@ -1330,14 +1366,17 @@ ufs_rmdir(v) return (EINVAL); } /* - * Verify the directory is empty (and valid). - * (Rmdir ".." won't be valid since - * ".." will contain a reference to - * the current directory and thus be - * non-empty.) + * Do not remove a directory that is in the process of being renamed. + * Verify the directory is empty (and valid). Rmdir ".." will not be + * valid since ".." will contain a reference to the current directory + * and thus be non-empty. */ error = 0; - if (ip->i_ffs_nlink != 2 || + if (ip->i_flag & IN_RENAME) { + error = EINVAL; + goto out; + } + if (ip->i_effnlink != 2 || !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) { error = ENOTEMPTY; goto out; @@ -1352,31 +1391,33 @@ ufs_rmdir(v) * inode. If we crash in between, the directory * will be reattached to lost+found, */ - if ((error = ufs_dirremove(dvp, cnp)) != 0) + if ((error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1)) != 0) goto out; - dp->i_ffs_nlink--; - dp->i_flag |= IN_CHANGE; cache_purge(dvp); - vput(dvp); - dvp = NULL; - /* - * Truncate inode. The only stuff left - * in the directory is "." and "..". The - * "." reference is inconsequential since - * we're quashing it. The ".." reference - * has already been adjusted above. We've - * removed the "." reference and the reference - * in the parent directory, but there may be - * other hard links so decrement by 2 and - * worry about them later. + /* + * Truncate inode. The only stuff left in the directory is "." and + * "..". The "." reference is inconsequential since we are quashing + * it. We have removed the "." reference and the reference in the + * parent directory, but there may be other hard links. So, + * ufs_dirremove will set the UF_IMMUTABLE flag to ensure that no + * new entries are made. The soft dependency code will arrange to + * do these operations after the parent directory entry has been + * deleted on disk, so when running with that code we avoid doing + * them now. */ - ip->i_ffs_nlink -= 2; - error = VOP_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred, - cnp->cn_proc); - cache_purge(ITOV(ip)); + dp->i_effnlink--; + dp->i_flag |= IN_CHANGE; + ip->i_effnlink--; + ip->i_flag |= IN_CHANGE; + if (!DOINGSOFTDEP(vp)) { + dp->i_ffs_nlink--; + ip->i_ffs_nlink--; + error = VOP_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred, + cnp->cn_proc); + } + cache_purge(vp); out: - if (dvp) - vput(dvp); + vput(dvp); vput(vp); return (error); } @@ -1395,8 +1436,8 @@ ufs_symlink(v) struct vattr *a_vap; char *a_target; } */ *ap = v; - register struct vnode *vp, **vpp = ap->a_vpp; - register struct inode *ip; + struct vnode *vp, **vpp = ap->a_vpp; + struct inode *ip; int len, error; error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, @@ -1436,10 +1477,10 @@ ufs_readdir(v) struct uio *a_uio; struct ucred *a_cred; int *a_eofflag; - u_long *a_cookies; - int ncookies; + u_long **a_cookies; + int *ncookies; } */ *ap = v; - register struct uio *uio = ap->a_uio; + struct uio *uio = ap->a_uio; int error; size_t count, lost; off_t off = uio->uio_offset; @@ -1495,9 +1536,10 @@ ufs_readdir(v) error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred); # endif if (!error && ap->a_ncookies) { - register struct dirent *dp; - register u_long *cookies = ap->a_cookies; - register int ncookies = ap->a_ncookies; + struct dirent *dp, *dpstart; + off_t offstart; + u_long *cookies; + int ncookies; /* * Only the NFS server and emulations use cookies, and they @@ -1506,17 +1548,28 @@ ufs_readdir(v) */ if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) panic("ufs_readdir: lost in space"); - dp = (struct dirent *) - (uio->uio_iov->iov_base - (uio->uio_offset - off)); - while (ncookies-- && off < uio->uio_offset) { - if (dp->d_reclen == 0) - break; + + dpstart = (struct dirent *) + (uio->uio_iov->iov_base - (uio->uio_offset - off)); + offstart = off; + for (dp = dpstart, ncookies = 0; off < uio->uio_offset; ) { + if (dp->d_reclen == 0) + break; + off += dp->d_reclen; + ncookies++; + dp = (struct dirent *)((caddr_t)dp + dp->d_reclen); + } + lost += uio->uio_offset - off; + uio->uio_offset = off; + MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP, + M_WAITOK); + *ap->a_ncookies = ncookies; + *ap->a_cookies = cookies; + for (off = offstart, dp = dpstart; off < uio->uio_offset; ) { + *(cookies++) = off; off += dp->d_reclen; - *(cookies++) = off; - dp = (struct dirent *)((caddr_t)dp + dp->d_reclen); + dp = (struct dirent *)((caddr_t)dp + dp->d_reclen); } - lost += uio->uio_offset - off; - uio->uio_offset = off; } uio->uio_resid += lost; *ap->a_eofflag = VTOI(ap->a_vp)->i_ffs_size <= uio->uio_offset; @@ -1535,8 +1588,8 @@ ufs_readlink(v) struct uio *a_uio; struct ucred *a_cred; } */ *ap = v; - register struct vnode *vp = ap->a_vp; - register struct inode *ip = VTOI(vp); + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); int isize; isize = ip->i_ffs_size; @@ -1575,82 +1628,31 @@ ufs_lock(v) { struct vop_lock_args /* { struct vnode *a_vp; + int a_flags; + sturct proc *a_p; } */ *ap = v; - register struct vnode *vp = ap->a_vp; - register struct inode *ip; -#ifdef DIAGNOSTIC - struct proc *p = curproc; /* XXX */ -#endif + struct vnode *vp = ap->a_vp; -start: - while (vp->v_flag & VXLOCK) { - vp->v_flag |= VXWANT; - sleep((caddr_t)vp, PINOD); - } - if (vp->v_tag == VT_NON) - return (ENOENT); - ip = VTOI(vp); - if (ip->i_flag & IN_LOCKED) { - ip->i_flag |= IN_WANTED; -#ifdef DIAGNOSTIC - if (p) { - if (p->p_pid == ip->i_lockholder) - panic("locking against myself"); - ip->i_lockwaiter = p->p_pid; - } else - ip->i_lockwaiter = -1; -#endif - (void) sleep((caddr_t)ip, PINOD); - goto start; - } -#ifdef DIAGNOSTIC - ip->i_lockwaiter = 0; - if (ip->i_lockholder != 0) - panic("lockholder (%d) != 0", ip->i_lockholder); - if (p && p->p_pid == 0) - printf("locking by process 0\n"); - if (p) - ip->i_lockholder = p->p_pid; - else - ip->i_lockholder = -1; -#endif - ip->i_flag |= IN_LOCKED; - return (0); + return (lockmgr(&VTOI(vp)->i_lock, ap->a_flags, &vp->v_interlock, + ap->a_p)); } /* * Unlock an inode. If WANT bit is on, wakeup. */ -int lockcount = 90; int ufs_unlock(v) void *v; { struct vop_unlock_args /* { struct vnode *a_vp; + int a_flags; + struct proc *a_p; } */ *ap = v; - register struct inode *ip = VTOI(ap->a_vp); -#ifdef DIAGNOSTIC - struct proc *p = curproc; /* XXX */ -#endif + struct vnode *vp = ap->a_vp; -#ifdef DIAGNOSTIC - if ((ip->i_flag & IN_LOCKED) == 0) { - vprint("ufs_unlock: unlocked inode", ap->a_vp); - panic("ufs_unlock NOT LOCKED"); - } - if (p && p->p_pid != ip->i_lockholder && p->p_pid > -1 && - ip->i_lockholder > -1 && lockcount++ < 100) - panic("unlocker (%d) != lock holder (%d)", - p->p_pid, ip->i_lockholder); - ip->i_lockholder = 0; -#endif - ip->i_flag &= ~IN_LOCKED; - if (ip->i_flag & IN_WANTED) { - ip->i_flag &= ~IN_WANTED; - wakeup((caddr_t)ip); - } - return (0); + return (lockmgr(&VTOI(vp)->i_lock, ap->a_flags | LK_RELEASE, + &vp->v_interlock, ap->a_p)); } /* @@ -1664,9 +1666,7 @@ ufs_islocked(v) struct vnode *a_vp; } */ *ap = v; - if (VTOI(ap->a_vp)->i_flag & IN_LOCKED) - return (1); - return (0); + return (lockstatus(&VTOI(ap->a_vp)->i_lock)); } /* @@ -1680,9 +1680,9 @@ ufs_strategy(v) struct vop_strategy_args /* { struct buf *a_bp; } */ *ap = v; - register struct buf *bp = ap->a_bp; - register struct vnode *vp = bp->b_vp; - register struct inode *ip; + struct buf *bp = ap->a_bp; + struct vnode *vp = bp->b_vp; + struct inode *ip; int error; ip = VTOI(vp); @@ -1720,8 +1720,8 @@ ufs_print(v) struct vop_print_args /* { struct vnode *a_vp; } */ *ap = v; - register struct vnode *vp = ap->a_vp; - register struct inode *ip = VTOI(vp); + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); printf("tag VT_UFS, ino %d, on dev %d, %d", ip->i_number, major(ip->i_dev), minor(ip->i_dev)); @@ -1729,12 +1729,7 @@ ufs_print(v) if (vp->v_type == VFIFO) fifo_printinfo(vp); #endif /* FIFO */ - printf("%s\n", (ip->i_flag & IN_LOCKED) ? " (LOCKED)" : ""); - if (ip->i_lockholder == 0) - return (0); - printf("\towner pid %d", ip->i_lockholder); - if (ip->i_lockwaiter) - printf(" waiting pid %d", ip->i_lockwaiter); + lockmgr_printinfo(&ip->i_lock); printf("\n"); return (0); } @@ -1796,10 +1791,12 @@ ufsspec_close(v) struct ucred *a_cred; struct proc *a_p; } */ *ap = v; - register struct inode *ip = VTOI(ap->a_vp); + struct inode *ip = VTOI(ap->a_vp); - if (ap->a_vp->v_usecount > 1 && !(ip->i_flag & IN_LOCKED)) + simple_lock(&vp->v_interlock); + if (ap->a_vp->v_usecount > 1) ITIMES(ip, &time, &time); + simple_unlock(&vp->v_interlock); return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap)); } @@ -1864,10 +1861,13 @@ ufsfifo_close(v) struct proc *a_p; } */ *ap = v; extern int (**fifo_vnodeop_p) __P((void *)); - register struct inode *ip = VTOI(ap->a_vp); + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); - if (ap->a_vp->v_usecount > 1 && !(ip->i_flag & IN_LOCKED)) + simple_lock(&vp->v_interlock); + if (ap->a_vp->v_usecount > 1) ITIMES(ip, &time, &time); + simple_unlock(&vp->v_interlock); return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap)); } #endif /* FIFO */ @@ -1924,7 +1924,7 @@ ufs_advlock(v) struct flock *a_fl; int a_flags; } */ *ap = v; - register struct inode *ip = VTOI(ap->a_vp); + struct inode *ip = VTOI(ap->a_vp); return (lf_advlock(&ip->i_lockf, ip->i_ffs_size, ap->a_id, ap->a_op, ap->a_fl, ap->a_flags)); @@ -1953,9 +1953,9 @@ ufs_vinit(mntp, specops, fifoops, vpp) if ((nvp = checkalias(vp, ip->i_ffs_rdev, mntp)) != NULL) { /* * Discard unneeded vnode, but save its inode. + * Note that the lock is carried over in the inode + * to the replacement vnode. */ - ufs_ihashrem(ip); - VOP_UNLOCK(vp); nvp->v_data = vp->v_data; vp->v_data = NULL; vp->v_op = spec_vnodeop_p; @@ -1966,7 +1966,6 @@ ufs_vinit(mntp, specops, fifoops, vpp) */ vp = nvp; ip->i_vnode = vp; - ufs_ihashins(ip); } break; case VFIFO: @@ -2005,7 +2004,8 @@ ufs_makeinode(mode, dvp, vpp, cnp) struct vnode **vpp; struct componentname *cnp; { - register struct inode *ip, *pdir; + struct inode *ip, *pdir; + struct direct newdir; struct timespec ts; struct vnode *tvp; int error; @@ -2040,7 +2040,10 @@ ufs_makeinode(mode, dvp, vpp, cnp) ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; ip->i_ffs_mode = mode; tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ + ip->i_effnlink = 1; ip->i_ffs_nlink = 1; + if (DOINGSOFTDEP(tvp)) + softdep_increase_linkcnt(ip); if ((ip->i_ffs_mode & ISGID) && !groupmember(ip->i_ffs_gid, cnp->cn_cred) && suser(cnp->cn_cred, NULL)) @@ -2053,10 +2056,13 @@ ufs_makeinode(mode, dvp, vpp, cnp) * Make sure inode goes to disk before directory entry. */ TIMEVAL_TO_TIMESPEC(&time, &ts); - if ((error = VOP_UPDATE(tvp, &ts, &ts, 1)) != 0) + if ((error = VOP_UPDATE(tvp, &ts, &ts, !DOINGSOFTDEP(tvp))) != 0) goto bad; - if ((error = ufs_direnter(ip, dvp, cnp)) != 0) + + ufs_makedirentry(ip, cnp, &newdir); + if ((error = ufs_direnter(dvp, &newdir, cnp, NULL)) != 0) goto bad; + if ((cnp->cn_flags & SAVESTART) == 0) FREE(cnp->cn_pnbuf, M_NAMEI); vput(dvp); @@ -2070,8 +2076,12 @@ bad: */ free(cnp->cn_pnbuf, M_NAMEI); vput(dvp); + ip->i_effnlink = 0; ip->i_ffs_nlink = 0; ip->i_flag |= IN_CHANGE; vput(tvp); + return (error); } + + |