src - OpenBSD base system

diff options


context:
space:
mode:

author	anton <anton@cvs.openbsd.org>	2019-01-21 18:09:22 +0000
committer	anton <anton@cvs.openbsd.org>	2019-01-21 18:09:22 +0000
commit	bc327c97b94e27819444bf325d33c2b5e5708a4f (patch)
tree	97cc661b5437aac51b4bad9155bb1d3ce02530f5
parent	b1b40e2217c9f88f38bcc644233882a46076587f (diff)

Introduce a dedicated entry point data structure for file locks. This new data

structure allows for better tracking of pending lock operations which is essential in order to prevent a use-after-free once the underlying vnode is gone. Inspired by the lockf implementation in FreeBSD. ok visa@ Reported-by: syzbot+d5540a236382f50f1dac@syzkaller.appspotmail.com

Diffstat

-rw-r--r--

sys/kern/vfs_lockf.c

162

-rw-r--r--

-rw-r--r--

-rw-r--r--

-rw-r--r--

-rw-r--r--

-rw-r--r--

-rw-r--r--

-rw-r--r--

9 files changed, 153 insertions, 52 deletions

diff --git a/sys/kern/vfs_lockf.c b/sys/kern/vfs_lockf.c
index 5ee98ddd38c..7fa6d9248c7 100644
--- a/sys/kern/vfs_lockf.c
+++ b/sys/kern/vfs_lockf.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: vfs_lockf.c,v 1.31 2018/11/10 21:21:15 anton Exp $ */

+/* $OpenBSD: vfs_lockf.c,v 1.32 2019/01/21 18:09:21 anton Exp $ */

/* $NetBSD: vfs_lockf.c,v 1.7 1996/02/04 02:18:21 christos Exp $ */

@@ -45,7 +45,8 @@

#include <sys/lockf.h>

#include <sys/unistd.h>

-struct pool lockfpool;

+struct pool lockf_state_pool;

+struct pool lockf_pool;

* This variable controls the maximum number of processes that will

@@ -75,16 +76,42 @@ int lockf_debug = DEBUG_SETLOCK|DEBUG_CLEARLOCK|DEBUG_WAKELOCK;

#define LFPRINT(args, level)

#endif

+void ls_ref(struct lockf_state *);

+void ls_rele(struct lockf_state *);

void

lf_init(void)

{

- pool_init(&lockfpool, sizeof(struct lockf), 0, IPL_NONE, PR_WAITOK,

- "lockfpl", NULL);

+ pool_init(&lockf_state_pool, sizeof(struct lockf_state), 0, IPL_NONE,

+ PR_WAITOK, "lockfspl", NULL);

+ pool_init(&lockf_pool, sizeof(struct lockf), 0, IPL_NONE,

+ PR_WAITOK, "lockfpl", NULL);

}

struct lockf *lf_alloc(uid_t, int);

void lf_free(struct lockf *);

+void

+ls_ref(struct lockf_state *ls)

+ ls->ls_refs++;

+void

+ls_rele(struct lockf_state *ls)

+ if (--ls->ls_refs > 0)

+ return;

+#ifdef LOCKF_DIAGNOSTIC

+ KASSERT(ls->ls_head == NULL);

+ KASSERT(ls->ls_pending == 0);

+#endif

+ *ls->ls_owner = NULL;

+ pool_put(&lockf_state_pool, ls);

* We enforce a limit on locks by uid, so that a single user cannot

* run the kernel out of memory. For now, the limit is pretty coarse.

@@ -115,7 +142,7 @@ lf_alloc(uid_t uid, int allowfail)

}

uip->ui_lockcnt++;

uid_release(uip);

- lock = pool_get(&lockfpool, PR_WAITOK);

+ lock = pool_get(&lockf_pool, PR_WAITOK);

lock->lf_uid = uid;

return (lock);

}

@@ -127,14 +154,14 @@ lf_free(struct lockf *lock)

LFPRINT(("lf_free", lock), DEBUG_LINK);

- if (*lock->lf_head == lock) {

+ if (lock->lf_state->ls_head == lock) {

LFPRINT(("lf_free: head", lock->lf_next), DEBUG_LINK);

#ifdef LOCKF_DIAGNOSTIC

KASSERT(lock->lf_prev == NULL);

#endif /* LOCKF_DIAGNOSTIC */

- *lock->lf_head = lock->lf_next;

+ lock->lf_state->ls_head = lock->lf_next;

}

#ifdef LOCKF_DIAGNOSTIC

@@ -142,11 +169,12 @@ lf_free(struct lockf *lock)

#endif /* LOCKF_DIAGNOSTIC */

lf_unlink(lock);

+ ls_rele(lock->lf_state);

uip = uid_find(lock->lf_uid);

uip->ui_lockcnt--;

uid_release(uip);

- pool_put(&lockfpool, lock);

+ pool_put(&lockf_pool, lock);

}

void

@@ -176,18 +204,14 @@ lf_link(struct lockf *lock1, struct lockf *lock2)

}

lock2->lf_prev = lock1;

- if (*lock1->lf_head == NULL) {

+ if (lock1->lf_state->ls_head == NULL) {

LFPRINT(("lf_link: head", lock1), DEBUG_LINK);

-#ifdef LOCKF_DIAGNOSTIC

- KASSERT(*lock2->lf_head == NULL);

-#endif /* LOCKF_DIAGNOSTIC */

- *lock1->lf_head = lock1;

- } else if (*lock2->lf_head == lock2) {

+ lock1->lf_state->ls_head = lock1;

+ } else if (lock1->lf_state->ls_head == lock2) {

LFPRINT(("lf_link: swap head", lock1), DEBUG_LINK);

- *lock1->lf_head = lock1;

+ lock1->lf_state->ls_head = lock1;

}

@@ -207,10 +231,11 @@ lf_unlink(struct lockf *lock)

* Do an advisory lock operation.

int

-lf_advlock(struct lockf **head, off_t size, caddr_t id, int op,

+lf_advlock(struct lockf_state **state, off_t size, caddr_t id, int op,

struct flock *fl, int flags)

{

struct proc *p = curproc;

+ struct lockf_state *ls;

struct lockf *lock;

off_t start, end;

int error;

@@ -248,23 +273,39 @@ lf_advlock(struct lockf **head, off_t size, caddr_t id, int op,

end = -1;

}

+ if (*state == NULL) {

+ ls = pool_get(&lockf_state_pool, PR_WAITOK | PR_ZERO);

+ /* pool_get() can sleep, ensure the race was won. */

+ if (*state == NULL) {

+ *state = ls;

+ ls->ls_owner = state;

+ } else {

+ pool_put(&lockf_state_pool, ls);

+ }

+ ls = *state;

+ ls_ref(ls);

* Avoid the common case of unlocking when inode has no locks.

- if (*head == NULL) {

+ if (ls->ls_head == NULL) {

if (op != F_SETLK) {

+ ls_rele(ls);

fl->l_type = F_UNLCK;

return (0);

}

lock = lf_alloc(p->p_ucred->cr_uid, op == F_SETLK ? 1 : 2);

- if (!lock)

+ if (!lock) {

+ ls_rele(ls);

return (ENOLCK);

+ }

lock->lf_start = start;

lock->lf_end = end;

lock->lf_id = id;

- lock->lf_head = head;

+ lock->lf_state = ls;

lock->lf_type = fl->l_type;

lock->lf_prev = NULL;

lock->lf_next = NULL;

@@ -297,7 +338,6 @@ int

lf_setlock(struct lockf *lock)

{

struct lockf *block;

- struct lockf **head = lock->lf_head;

struct lockf *prev, *overlap, *ltmp;

static char lockstr[] = "lockf";

int ovcase, priority, needtolink, error;

@@ -369,7 +409,10 @@ lf_setlock(struct lockf *lock)

LFPRINT(("lf_setlock", lock), DEBUG_SETLOCK);

LFPRINT(("lf_setlock: blocking on", block), DEBUG_SETLOCK);

TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block);

+ lock->lf_state->ls_pending++;

error = tsleep(lock, priority, lockstr, 0);

+ lock->lf_state->ls_pending--;

+ wakeup_one(lock->lf_state);

if (lock->lf_next != NULL) {

TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block);

lock->lf_next = NULL;

@@ -378,6 +421,10 @@ lf_setlock(struct lockf *lock)

lf_free(lock);

return (error);

}

+ if (lock->lf_flags & F_INTR) {

+ lf_free(lock);

+ return (EINTR);

+ }

}

* No blocks!! Add the lock. Note that we will

@@ -387,7 +434,7 @@ lf_setlock(struct lockf *lock)

* Skip over locks owned by other processes.

* Handle any locks that overlap and are owned by ourselves.

- block = *head;

+ block = lock->lf_state->ls_head;

prev = NULL;

overlap = NULL;

needtolink = 1;

@@ -411,8 +458,12 @@ lf_setlock(struct lockf *lock)

lf_link(lock, overlap);

else if (prev) /* last lock in list */

lf_link(prev, lock);

- else /* first lock in list */

- *head = lock;

+ else { /* first lock in list */

+#ifdef LOCKF_DIAGNOSTIC

+ KASSERT(lock->lf_state->ls_head == NULL);

+#endif

+ lock->lf_state->ls_head = lock;

+ }

}

break;

case 1: /* overlap == lock */

@@ -422,7 +473,7 @@ lf_setlock(struct lockf *lock)

if (lock->lf_type == F_RDLCK &&

overlap->lf_type == F_WRLCK)

- lf_wakelock(overlap);

+ lf_wakelock(overlap, 0);

overlap->lf_type = lock->lf_type;

lf_free(lock);

lock = overlap; /* for debug output below */

@@ -443,7 +494,7 @@ lf_setlock(struct lockf *lock)

overlap->lf_start = lock->lf_end + 1;

} else

lf_split(overlap, lock);

- lf_wakelock(overlap);

+ lf_wakelock(overlap, 0);

break;

case 3: /* lock contains overlap */

@@ -452,7 +503,7 @@ lf_setlock(struct lockf *lock)

if (lock->lf_type == F_RDLCK &&

overlap->lf_type == F_WRLCK) {

- lf_wakelock(overlap);

+ lf_wakelock(overlap, 0);

} else {

while ((ltmp =

TAILQ_FIRST(&overlap->lf_blkhd))) {

@@ -480,7 +531,7 @@ lf_setlock(struct lockf *lock)

lf_unlink(lock);

lf_link(overlap, lock);

overlap->lf_end = lock->lf_start - 1;

- lf_wakelock(overlap);

+ lf_wakelock(overlap, 0);

needtolink = 0;

continue;

case 5: /* overlap ends after lock */

@@ -490,7 +541,7 @@ lf_setlock(struct lockf *lock)

if (needtolink)

lf_link(lock, overlap);

overlap->lf_start = lock->lf_end + 1;

- lf_wakelock(overlap);

+ lf_wakelock(overlap, 0);

break;

}

break;

@@ -508,8 +559,7 @@ lf_setlock(struct lockf *lock)

int

lf_clearlock(struct lockf *lock)

{

- struct lockf **head = lock->lf_head;

- struct lockf *lf = *head;

+ struct lockf *lf = lock->lf_state->ls_head;

struct lockf *overlap, *prev;

int ovcase;

@@ -517,7 +567,7 @@ lf_clearlock(struct lockf *lock)

return (0);

LFPRINT(("lf_clearlock", lock), DEBUG_CLEARLOCK);

while ((ovcase = lf_findoverlap(lf, lock, SELF, &prev, &overlap))) {

- lf_wakelock(overlap);

+ lf_wakelock(overlap, 0);

switch (ovcase) {

case 1: /* overlap == lock */

@@ -582,7 +632,7 @@ lf_getblock(struct lockf *lock)

{

struct lockf *prev, *overlap, *lf;

- lf = *lock->lf_head;

+ lf = lock->lf_state->ls_head;

while (lf_findoverlap(lf, lock, OTHERS, &prev, &overlap) != 0) {

* We've found an overlap, see if it blocks us

@@ -682,6 +732,43 @@ lf_findoverlap(struct lockf *lf, struct lockf *lock, int type,

}

+ * Purge all locks associated with the given lock state.

+ */

+void

+lf_purgelocks(struct lockf_state *ls)

+ struct lockf *lock;

+ if (ls == NULL)

+ return;

+ ls_ref(ls);

+ /* Interrupt blocked locks and wait for all of them to finish. */

+ for (lock = ls->ls_head; lock != NULL; lock = lock->lf_next) {

+ LFPRINT(("lf_purgelocks: wakeup", lock), DEBUG_SETLOCK);

+ lf_wakelock(lock, F_INTR);

+ }

+ while (ls->ls_pending > 0) {

+ tsleep(ls, PLOCK, "lockfp", 0);

+ }

+ /*

+ * Any remaining locks cannot block other locks at this point and can

+ * safely be removed.

+ */

+ while ((lock = ls->ls_head) != NULL) {

+ lf_free(lock);

+ }

+ /* This is the last expected thread to hold a lock state reference. */

+#ifdef LOCKF_DIAGNOSTIC

+ KASSERT(ls->ls_refs == 1);

+#endif

+ ls_rele(ls);

+/*

* Split a lock and a contained region into

* two or three locks as necessary.

@@ -712,6 +799,7 @@ lf_split(struct lockf *lock1, struct lockf *lock2)

splitlock = lf_alloc(lock1->lf_uid, 0);

memcpy(splitlock, lock1, sizeof(*splitlock));

+ ls_ref(splitlock->lf_state);

splitlock->lf_prev = NULL;

splitlock->lf_next = NULL;

splitlock->lf_start = lock2->lf_end + 1;

@@ -727,13 +815,14 @@ lf_split(struct lockf *lock1, struct lockf *lock2)

* Wakeup a blocklist

void

-lf_wakelock(struct lockf *lock)

+lf_wakelock(struct lockf *lock, int flags)

{

struct lockf *wakelock;

while ((wakelock = TAILQ_FIRST(&lock->lf_blkhd))) {

TAILQ_REMOVE(&lock->lf_blkhd, wakelock, lf_block);

wakelock->lf_next = NULL;

+ wakelock->lf_flags |= flags;

wakeup_one(wakelock);

}

@@ -764,7 +853,8 @@ lf_print(char *tag, struct lockf *lock)

lock->lf_type == F_WRLCK ? "exclusive" :

lock->lf_type == F_UNLCK ? "unlock" :

"unknown", lock->lf_start, lock->lf_end);

- printf(", prev %p, next %p", lock->lf_prev, lock->lf_next);

+ printf(", prev %p, next %p, state %p",

+ lock->lf_prev, lock->lf_next, lock->lf_state);

block = TAILQ_FIRST(&lock->lf_blkhd);

if (block)

printf(", block");

@@ -782,7 +872,7 @@ lf_printlist(char *tag, struct lockf *lock)

#endif /* LOCKF_DIAGNOSTIC */

printf("%s: Lock list:\n", tag);

- for (lf = *lock->lf_head; lf; lf = lf->lf_next) {

+ for (lf = lock->lf_state->ls_head; lf; lf = lf->lf_next) {

if (lock == lf)

printf(" * ");

else

diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 4e888b98f27..a15481c73f9 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: vfs_subr.c,v 1.284 2018/12/23 10:46:51 natano Exp $ */

+/* $OpenBSD: vfs_subr.c,v 1.285 2019/01/21 18:09:21 anton Exp $ */

/* $NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $ */

@@ -52,6 +52,7 @@

#include <sys/conf.h>

#include <sys/vnode.h>

#include <sys/lock.h>

+#include <sys/lockf.h>

#include <sys/stat.h>

#include <sys/acct.h>

#include <sys/namei.h>

@@ -1150,6 +1151,7 @@ vgonel(struct vnode *vp, struct proc *p)

vx->v_flag &= ~VALIASED;

vp->v_flag &= ~VALIASED;

}

+ lf_purgelocks(vp->v_speclockf);

free(vp->v_specinfo, M_VNODE, sizeof(struct specinfo));

vp->v_specinfo = NULL;

}

diff --git a/sys/msdosfs/denode.h b/sys/msdosfs/denode.h
index 5fe1d8862cd..3a3f629df52 100644
--- a/sys/msdosfs/denode.h
+++ b/sys/msdosfs/denode.h

@@ -1,4 +1,4 @@

-/* $OpenBSD: denode.h,v 1.33 2018/05/07 14:43:01 mpi Exp $ */

+/* $OpenBSD: denode.h,v 1.34 2019/01/21 18:09:21 anton Exp $ */

/* $NetBSD: denode.h,v 1.24 1997/10/17 11:23:39 ws Exp $ */

/*-

@@ -148,7 +148,7 @@ struct denode {

int de_fndcnt; /* number of slots before de_fndoffset */

long de_refcnt; /* reference count */

struct msdosfsmount *de_pmp; /* addr of our mount struct */

- struct lockf *de_lockf; /* byte level lock list */

+ struct lockf_state *de_lockf; /* byte level lock list */

struct rrwlock de_lock; /* denode lock */

u_char de_Name[11]; /* name, from DOS directory entry */

u_char de_Attributes; /* attributes, from directory entry */

diff --git a/sys/nfs/nfsnode.h b/sys/nfs/nfsnode.h
index 6a074a686db..1348060595f 100644
--- a/sys/nfs/nfsnode.h
+++ b/sys/nfs/nfsnode.h

@@ -1,4 +1,4 @@

-/* $OpenBSD: nfsnode.h,v 1.40 2018/05/05 11:54:11 mpi Exp $ */

+/* $OpenBSD: nfsnode.h,v 1.41 2019/01/21 18:09:21 anton Exp $ */

/* $NetBSD: nfsnode.h,v 1.16 1996/02/18 11:54:04 fvdl Exp $ */

@@ -78,7 +78,7 @@ struct nfsnode {

time_t n_ctime; /* Prev create time. */

nfsfh_t *n_fhp; /* NFS File Handle */

struct vnode *n_vnode; /* associated vnode */

- struct lockf *n_lockf; /* Locking record of file */

+ struct lockf_state *n_lockf; /* Locking record of file */

struct rrwlock n_lock; /* NFSnode lock */

int n_error; /* Save write error value */

union {

diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h
index b08dcc72ff8..e964ea49dde 100644
--- a/sys/sys/fcntl.h
+++ b/sys/sys/fcntl.h

@@ -1,4 +1,4 @@

-/* $OpenBSD: fcntl.h,v 1.21 2015/05/17 01:22:01 deraadt Exp $ */

+/* $OpenBSD: fcntl.h,v 1.22 2019/01/21 18:09:21 anton Exp $ */

/* $NetBSD: fcntl.h,v 1.8 1995/03/26 20:24:12 jtc Exp $ */

/*-

@@ -170,6 +170,7 @@

#define F_WAIT 0x010 /* Wait until lock is granted */

#define F_FLOCK 0x020 /* Use flock(2) semantics for lock */

#define F_POSIX 0x040 /* Use POSIX semantics for lock */

+#define F_INTR 0x080 /* Lock operation interrupted */

#endif

diff --git a/sys/sys/lockf.h b/sys/sys/lockf.h
index a822bcd32aa..90212a32e1e 100644
--- a/sys/sys/lockf.h
+++ b/sys/sys/lockf.h

@@ -1,4 +1,4 @@

-/* $OpenBSD: lockf.h,v 1.12 2018/11/02 07:15:03 anton Exp $ */

+/* $OpenBSD: lockf.h,v 1.13 2019/01/21 18:09:21 anton Exp $ */

/* $NetBSD: lockf.h,v 1.5 1994/06/29 06:44:33 cgd Exp $ */

@@ -49,7 +49,7 @@ struct lockf {

off_t lf_start; /* The byte # of the start of the lock */

off_t lf_end; /* The byte # of the end of the lock (-1=EOF)*/

caddr_t lf_id; /* The id of the resource holding the lock */

- struct lockf **lf_head; /* Back pointer to the head of lockf list */

+ struct lockf_state *lf_state; /* State associated with the lock */

struct lockf *lf_prev; /* A pointer to the previous lock on this inode */

struct lockf *lf_next; /* A pointer to the next lock on this inode */

struct locklist lf_blkhd; /* The list of blocked locks */

@@ -58,13 +58,20 @@ struct lockf {

pid_t lf_pid; /* POSIX - owner pid */

};

+struct lockf_state {

+ struct lockf_state **ls_owner; /* owner */

+ struct lockf *ls_head; /* linked list of locks */

+ int ls_refs; /* reference counter */

+ int ls_pending; /* pending lock operations */

+};

/* Maximum length of sleep chains to traverse to try and detect deadlock. */

#define MAXDEPTH 50

#ifdef _KERNEL

__BEGIN_DECLS

void lf_init(void);

-int lf_advlock(struct lockf **,

+int lf_advlock(struct lockf_state **,

off_t, caddr_t, int, struct flock *, int);

int lf_clearlock(struct lockf *);

int lf_findoverlap(struct lockf *,

@@ -73,8 +80,9 @@ struct lockf *

lf_getblock(struct lockf *);

int lf_getlock(struct lockf *, struct flock *);

int lf_setlock(struct lockf *);

+void lf_purgelocks(struct lockf_state *);

void lf_split(struct lockf *, struct lockf *);

-void lf_wakelock(struct lockf *);

+void lf_wakelock(struct lockf *, int);

void lf_link(struct lockf *, struct lockf *);

void lf_unlink(struct lockf *);

__END_DECLS

diff --git a/sys/sys/specdev.h b/sys/sys/specdev.h
index c85e95d14db..870aec0d324 100644
--- a/sys/sys/specdev.h
+++ b/sys/sys/specdev.h

@@ -1,4 +1,4 @@

-/* $OpenBSD: specdev.h,v 1.37 2016/04/05 19:26:15 natano Exp $ */

+/* $OpenBSD: specdev.h,v 1.38 2019/01/21 18:09:21 anton Exp $ */

/* $NetBSD: specdev.h,v 1.12 1996/02/13 13:13:01 mycroft Exp $ */

@@ -42,7 +42,7 @@ struct specinfo {

struct vnode *si_specnext;

struct mount *si_mountpoint;

dev_t si_rdev;

- struct lockf *si_lockf;

+ struct lockf_state *si_lockf;

daddr_t si_lastr;

union {

struct vnode *ci_parent; /* pointer back to parent device */

diff --git a/sys/tmpfs/tmpfs.h b/sys/tmpfs/tmpfs.h
index 8646ca36446..0c5b1583327 100644
--- a/sys/tmpfs/tmpfs.h
+++ b/sys/tmpfs/tmpfs.h

@@ -1,4 +1,4 @@

-/* $OpenBSD: tmpfs.h,v 1.8 2016/06/19 11:54:33 natano Exp $ */

+/* $OpenBSD: tmpfs.h,v 1.9 2019/01/21 18:09:21 anton Exp $ */

/* $NetBSD: tmpfs.h,v 1.45 2011/09/27 01:10:43 christos Exp $ */

@@ -122,7 +122,7 @@ typedef struct tmpfs_node {

struct timespec tn_birthtime;

/* Head of byte-level lock list (used by tmpfs_advlock). */

- struct lockf * tn_lockf;

+ struct lockf_state * tn_lockf;

union {

/* Type case: VBLK or VCHR. */

diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h
index 3f2c7081d3c..4675dc7e38d 100644
--- a/sys/ufs/ufs/inode.h
+++ b/sys/ufs/ufs/inode.h

@@ -1,4 +1,4 @@

-/* $OpenBSD: inode.h,v 1.50 2016/06/19 11:54:34 natano Exp $ */

+/* $OpenBSD: inode.h,v 1.51 2019/01/21 18:09:21 anton Exp $ */

/* $NetBSD: inode.h,v 1.8 1995/06/15 23:22:50 cgd Exp $ */

@@ -85,7 +85,7 @@ struct inode {

struct cluster_info i_ci;

struct dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */

u_quad_t i_modrev; /* Revision level for NFS lease. */

- struct lockf *i_lockf;/* Head of byte-level lock list. */

+ struct lockf_state *i_lockf; /* Byte-level lock state. */

struct rrwlock i_lock;/* Inode lock */