summaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorVisa Hankala <visa@cvs.openbsd.org>2018-07-02 14:36:34 +0000
committerVisa Hankala <visa@cvs.openbsd.org>2018-07-02 14:36:34 +0000
commitbf4fc1ecc3106af6b6b5b5414756523c0655cd38 (patch)
treea20898c30f34f84dfd60dbe2c8473cc471522882 /sys
parent47dee901c91c08e3c90eabdfbd83a836267638d3 (diff)
Update the file reference count field `f_count' using atomic operations
instead of using a mutex for update serialization. Use a per-fdp mutex to manage updating of file instance pointers in the `fd_ofiles' array to let fd_getfile() acquire file references safely with concurrent file reference releases. OK mpi@
Diffstat (limited to 'sys')
-rw-r--r--sys/kern/kern_descrip.c94
-rw-r--r--sys/kern/kern_sysctl.c4
-rw-r--r--sys/kern/uipc_usrreq.c16
-rw-r--r--sys/sys/file.h24
-rw-r--r--sys/sys/filedesc.h5
5 files changed, 84 insertions, 59 deletions
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index c50fd3ac64b..2d37d5725e5 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_descrip.c,v 1.174 2018/07/02 14:21:45 visa Exp $ */
+/* $OpenBSD: kern_descrip.c,v 1.175 2018/07/02 14:36:33 visa Exp $ */
/* $NetBSD: kern_descrip.c,v 1.42 1996/03/30 22:24:38 christos Exp $ */
/*
@@ -198,6 +198,7 @@ struct file *
fd_iterfile(struct file *fp, struct proc *p)
{
struct file *nfp;
+ unsigned int count;
mtx_enter(&fhdlk);
if (fp == NULL)
@@ -206,10 +207,15 @@ fd_iterfile(struct file *fp, struct proc *p)
nfp = LIST_NEXT(fp, f_list);
/* don't refcount when f_count == 0 to avoid race in fdrop() */
- while (nfp != NULL && nfp->f_count == 0)
- nfp = LIST_NEXT(nfp, f_list);
- if (nfp != NULL)
- nfp->f_count++;
+ while (nfp != NULL) {
+ count = nfp->f_count;
+ if (count == 0) {
+ nfp = LIST_NEXT(nfp, f_list);
+ continue;
+ }
+ if (atomic_cas_uint(&nfp->f_count, count, count + 1) == count)
+ break;
+ }
mtx_leave(&fhdlk);
if (fp != NULL)
@@ -228,11 +234,11 @@ fd_getfile(struct filedesc *fdp, int fd)
if ((u_int)fd >= fdp->fd_nfiles)
return (NULL);
- mtx_enter(&fhdlk);
+ mtx_enter(&fdp->fd_fplock);
fp = fdp->fd_ofiles[fd];
if (fp != NULL)
- fp->f_count++;
- mtx_leave(&fhdlk);
+ atomic_inc_int(&fp->f_count);
+ mtx_leave(&fdp->fd_fplock);
return (fp);
}
@@ -654,7 +660,7 @@ finishdup(struct proc *p, struct file *fp, int old, int new,
fdpassertlocked(fdp);
KASSERT(fp->f_iflags & FIF_INSERTED);
- if (fp->f_count == LONG_MAX-2) {
+ if (fp->f_count >= FDUP_MAX_COUNT) {
FRELE(fp, p);
return (EDEADLK);
}
@@ -668,7 +674,14 @@ finishdup(struct proc *p, struct file *fp, int old, int new,
fd_used(fdp, new);
}
+ /*
+ * Use `fd_fplock' to synchronize with fd_getfile() so that
+ * the function no longer creates a new reference to the old file.
+ */
+ mtx_enter(&fdp->fd_fplock);
fdp->fd_ofiles[new] = fp;
+ mtx_leave(&fdp->fd_fplock);
+
fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] & ~UF_EXCLOSE;
*retval = new;
@@ -696,18 +709,31 @@ fdinsert(struct filedesc *fdp, int fd, int flags, struct file *fp)
LIST_INSERT_HEAD(&filehead, fp, f_list);
}
}
+ mtx_leave(&fhdlk);
+
+ mtx_enter(&fdp->fd_fplock);
KASSERT(fdp->fd_ofiles[fd] == NULL);
fdp->fd_ofiles[fd] = fp;
+ mtx_leave(&fdp->fd_fplock);
+
fdp->fd_ofileflags[fd] |= (flags & UF_EXCLOSE);
- mtx_leave(&fhdlk);
}
void
fdremove(struct filedesc *fdp, int fd)
{
fdpassertlocked(fdp);
+
+ /*
+ * Use `fd_fplock' to synchronize with fd_getfile() so that
+ * the function no longer creates a new reference to the file.
+ */
+ mtx_enter(&fdp->fd_fplock);
fdp->fd_ofiles[fd] = NULL;
+ mtx_leave(&fdp->fd_fplock);
+
fdp->fd_ofileflags[fd] = 0;
+
fd_unused(fdp, fd);
}
@@ -890,14 +916,17 @@ void
fdexpand(struct proc *p)
{
struct filedesc *fdp = p->p_fd;
- int nfiles;
+ int nfiles, oldnfiles;
size_t copylen;
- struct file **newofile;
+ struct file **newofile, **oldofile;
char *newofileflags;
u_int *newhimap, *newlomap;
fdpassertlocked(fdp);
+ oldnfiles = fdp->fd_nfiles;
+ oldofile = fdp->fd_ofiles;
+
/*
* No space in current array.
*/
@@ -931,9 +960,6 @@ fdexpand(struct proc *p)
memcpy(newofileflags, fdp->fd_ofileflags, copylen);
memset(newofileflags + copylen, 0, nfiles * sizeof(char) - copylen);
- if (fdp->fd_nfiles > NDFILE)
- free(fdp->fd_ofiles, M_FILEDESC, fdp->fd_nfiles * OFILESIZE);
-
if (NDHISLOTS(nfiles) > NDHISLOTS(fdp->fd_nfiles)) {
copylen = NDHISLOTS(fdp->fd_nfiles) * sizeof(u_int);
memcpy(newhimap, fdp->fd_himap, copylen);
@@ -954,9 +980,16 @@ fdexpand(struct proc *p)
fdp->fd_himap = newhimap;
fdp->fd_lomap = newlomap;
}
+
+ mtx_enter(&fdp->fd_fplock);
fdp->fd_ofiles = newofile;
+ mtx_leave(&fdp->fd_fplock);
+
fdp->fd_ofileflags = newofileflags;
- fdp->fd_nfiles = nfiles;
+ fdp->fd_nfiles = nfiles;
+
+ if (oldnfiles > NDFILE)
+ free(oldofile, M_FILEDESC, oldnfiles * OFILESIZE);
}
/*
@@ -1023,9 +1056,7 @@ fnew(struct proc *p)
fp->f_cred = p->p_ucred;
crhold(fp->f_cred);
- mtx_enter(&fhdlk);
- fp->f_count++;
- mtx_leave(&fhdlk);
+ FREF(fp);
return (fp);
}
@@ -1040,6 +1071,7 @@ fdinit(void)
newfdp = pool_get(&fdesc_pool, PR_WAITOK|PR_ZERO);
rw_init(&newfdp->fd_fd.fd_lock, "fdlock");
+ mtx_init(&newfdp->fd_fd.fd_fplock, IPL_MPFLOOR);
/* Create the file descriptor table. */
newfdp->fd_fd.fd_refcnt = 1;
@@ -1117,7 +1149,6 @@ fdcopy(struct process *pr)
newfdp->fd_flags = fdp->fd_flags;
newfdp->fd_cmask = fdp->fd_cmask;
- mtx_enter(&fhdlk);
for (i = 0; i <= fdp->fd_lastfile; i++) {
struct file *fp = fdp->fd_ofiles[i];
@@ -1130,17 +1161,16 @@ fdcopy(struct process *pr)
* tied to the process that opened them to enforce
* their internal consistency, so close them here.
*/
- if (fp->f_count == LONG_MAX-2 ||
+ if (fp->f_count >= FDUP_MAX_COUNT ||
fp->f_type == DTYPE_KQUEUE)
continue;
- fp->f_count++;
+ FREF(fp);
newfdp->fd_ofiles[i] = fp;
newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i];
fd_used(newfdp, i);
}
}
- mtx_leave(&fhdlk);
fdpunlock(fdp);
return (newfdp);
@@ -1200,11 +1230,9 @@ closef(struct file *fp, struct proc *p)
if (fp == NULL)
return (0);
- KASSERTMSG(fp->f_count >= 2, "count (%ld) < 2", fp->f_count);
+ KASSERTMSG(fp->f_count >= 2, "count (%u) < 2", fp->f_count);
- mtx_enter(&fhdlk);
- fp->f_count--;
- mtx_leave(&fhdlk);
+ atomic_dec_int(&fp->f_count);
/*
* POSIX record locking dictates that any close releases ALL
@@ -1236,10 +1264,9 @@ fdrop(struct file *fp, struct proc *p)
{
int error;
- MUTEX_ASSERT_LOCKED(&fhdlk);
-
- KASSERTMSG(fp->f_count == 0, "count (%ld) != 0", fp->f_count);
+ KASSERTMSG(fp->f_count == 0, "count (%u) != 0", fp->f_count);
+ mtx_enter(&fhdlk);
if (fp->f_iflags & FIF_INSERTED)
LIST_REMOVE(fp, f_list);
mtx_leave(&fhdlk);
@@ -1376,13 +1403,18 @@ dupfdopen(struct proc *p, int indx, int mode)
FRELE(wfp, p);
return (EACCES);
}
- if (wfp->f_count == LONG_MAX-2) {
+ if (wfp->f_count >= FDUP_MAX_COUNT) {
FRELE(wfp, p);
return (EDEADLK);
}
KASSERT(wfp->f_iflags & FIF_INSERTED);
+
+ mtx_enter(&fdp->fd_fplock);
+ KASSERT(fdp->fd_ofiles[indx] == NULL);
fdp->fd_ofiles[indx] = wfp;
+ mtx_leave(&fdp->fd_fplock);
+
fdp->fd_ofileflags[indx] = (fdp->fd_ofileflags[indx] & UF_EXCLOSE) |
(fdp->fd_ofileflags[dupfd] & ~UF_EXCLOSE);
diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c
index 3106bbb1de3..50e6e6ac6ae 100644
--- a/sys/kern/kern_sysctl.c
+++ b/sys/kern/kern_sysctl.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kern_sysctl.c,v 1.344 2018/07/01 16:33:15 visa Exp $ */
+/* $OpenBSD: kern_sysctl.c,v 1.345 2018/07/02 14:36:33 visa Exp $ */
/* $NetBSD: kern_sysctl.c,v 1.17 1996/05/20 17:49:05 mrg Exp $ */
/*-
@@ -1077,9 +1077,7 @@ fill_file(struct kinfo_file *kf, struct file *fp, struct filedesc *fdp,
kf->f_flag = fp->f_flag;
kf->f_iflags = fp->f_iflags;
kf->f_type = fp->f_type;
- mtx_enter(&fhdlk);
kf->f_count = fp->f_count;
- mtx_leave(&fhdlk);
if (show_pointers)
kf->f_ucred = PTRTOINT64(fp->f_cred);
kf->f_uid = fp->f_cred->cr_uid;
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index 8bea7cde97c..1df15175168 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: uipc_usrreq.c,v 1.132 2018/07/01 16:33:15 visa Exp $ */
+/* $OpenBSD: uipc_usrreq.c,v 1.133 2018/07/02 14:36:33 visa Exp $ */
/* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */
/*
@@ -744,12 +744,16 @@ restart:
* fdalloc() works properly.. We finalize it all
* in the loop below.
*/
+ mtx_enter(&fdp->fd_fplock);
+ KASSERT(fdp->fd_ofiles[fds[i]] == NULL);
fdp->fd_ofiles[fds[i]] = rp->fp;
- fdp->fd_ofileflags[fds[i]] = (rp->flags & UF_PLEDGED);
- rp++;
+ mtx_leave(&fdp->fd_fplock);
+ fdp->fd_ofileflags[fds[i]] = (rp->flags & UF_PLEDGED);
if (flags & MSG_CMSG_CLOEXEC)
fdp->fd_ofileflags[fds[i]] |= UF_EXCLOSE;
+
+ rp++;
}
/*
@@ -847,7 +851,7 @@ morespace:
error = EBADF;
goto fail;
}
- if (fp->f_count == LONG_MAX-2) {
+ if (fp->f_count >= FDUP_MAX_COUNT) {
error = EDEADLK;
goto fail;
}
@@ -927,7 +931,6 @@ unp_gc(void *arg __unused)
do {
nunref = 0;
LIST_FOREACH(unp, &unp_head, unp_link) {
- mtx_enter(&fhdlk);
fp = unp->unp_file;
if (unp->unp_flags & UNP_GCDEFER) {
/*
@@ -939,7 +942,6 @@ unp_gc(void *arg __unused)
unp_defer--;
} else if (unp->unp_flags & UNP_GCMARK) {
/* marked as live in previous pass */
- mtx_leave(&fhdlk);
continue;
} else if (fp == NULL) {
/* not being passed, so can't be in loop */
@@ -958,11 +960,9 @@ unp_gc(void *arg __unused)
if (fp->f_count == unp->unp_msgcount) {
nunref++;
unp->unp_flags |= UNP_GCDEAD;
- mtx_leave(&fhdlk);
continue;
}
}
- mtx_leave(&fhdlk);
/*
* This is the first time we've seen this socket on
diff --git a/sys/sys/file.h b/sys/sys/file.h
index 45052025d02..114c192d766 100644
--- a/sys/sys/file.h
+++ b/sys/sys/file.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: file.h,v 1.50 2018/06/25 22:29:16 kettenis Exp $ */
+/* $OpenBSD: file.h,v 1.51 2018/07/02 14:36:33 visa Exp $ */
/* $NetBSD: file.h,v 1.11 1995/03/26 20:24:13 jtc Exp $ */
/*
@@ -66,11 +66,12 @@ struct fileops {
* Locks used to protect struct members in this file:
* I immutable after creation
* F global `fhdlk' mutex
+ * a atomic operations
* f per file `f_mtx'
* k kernel lock
*/
struct file {
- LIST_ENTRY(file) f_list;/* [k] list of active files */
+ LIST_ENTRY(file) f_list;/* [F] list of active files */
struct mutex f_mtx;
short f_flag; /* [k] see fcntl.h */
#define DTYPE_VNODE 1 /* file */
@@ -79,7 +80,7 @@ struct file {
#define DTYPE_KQUEUE 4 /* event queue */
#define DTYPE_DMABUF 5 /* DMA buffer (for DRM) */
short f_type; /* [I] descriptor type */
- long f_count; /* [F] reference count */
+ u_int f_count; /* [a] reference count */
struct ucred *f_cred; /* [I] credentials associated with descriptor */
struct fileops *f_ops; /* [I] file operation pointers */
off_t f_offset; /* [k] */
@@ -99,25 +100,16 @@ struct file {
do { \
extern void vfs_stall_barrier(void); \
vfs_stall_barrier(); \
- mtx_enter(&fhdlk); \
- (fp)->f_count++; \
- mtx_leave(&fhdlk); \
+ atomic_inc_int(&(fp)->f_count); \
} while (0)
#define FRELE(fp,p) \
-({ \
- int rv = 0; \
- mtx_enter(&fhdlk); \
- if (--(fp)->f_count == 0) \
- rv = fdrop(fp, p); \
- else \
- mtx_leave(&fhdlk); \
- rv; \
-})
+ (atomic_dec_int_nv(&fp->f_count) == 0 ? fdrop(fp, p) : 0)
+
+#define FDUP_MAX_COUNT (UINT_MAX - 2 * MAXCPUS)
int fdrop(struct file *, struct proc *);
-extern struct mutex fhdlk; /* protects `filehead' and f_count */
LIST_HEAD(filelist, file);
extern int maxfiles; /* kernel limit on number of open files */
extern int numfiles; /* actual number of open files */
diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h
index d92bbd65197..aaadd17ebbf 100644
--- a/sys/sys/filedesc.h
+++ b/sys/sys/filedesc.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: filedesc.h,v 1.40 2018/06/25 09:36:28 mpi Exp $ */
+/* $OpenBSD: filedesc.h,v 1.41 2018/07/02 14:36:33 visa Exp $ */
/* $NetBSD: filedesc.h,v 1.14 1996/04/09 20:55:28 cgd Exp $ */
/*
@@ -32,6 +32,7 @@
* @(#)filedesc.h 8.1 (Berkeley) 6/2/93
*/
+#include <sys/mutex.h>
#include <sys/rwlock.h>
/*
* This structure is used for the management of descriptors. It may be
@@ -72,6 +73,8 @@ struct filedesc {
struct rwlock fd_lock; /* lock for the file descs; must be */
/* held when writing to fd_ofiles, */
/* fd_ofileflags, or fd_{hi,lo}map */
+ struct mutex fd_fplock; /* lock for reading fd_ofiles without
+ * fd_lock */
int fd_flags; /* flags on the file descriptor table */
};