diff options
author | Paul Irofti <pirofti@cvs.openbsd.org> | 2018-06-08 13:53:02 +0000 |
---|---|---|
committer | Paul Irofti <pirofti@cvs.openbsd.org> | 2018-06-08 13:53:02 +0000 |
commit | 4a152d480f9947fdd104aee21e4842cdc30d0cb3 (patch) | |
tree | 0039f306a822416c7951036416eb5581120d923b /lib/librthread | |
parent | 522d400896b03589ce78f8cd82d94a9942d2e197 (diff) |
New semaphore implementation making sem_post async-safe.
POSIX dictates that sem_post() needs to be async-safe here[0] and is
thus included in the list of safe functions to call from within a signal
handler here[1].
The old semaphore implementation is using spinlocks and __thrsleep to
synchronize between threads.
Let's say there are two threads: T0 and T1 and the semaphore has V=0.
T1 calls sem_wait() and it will now sleep (spinlock) until someone else
sem_post()'s. Let's say T0 sends a signal to T1 and exits.
The signal handler calls sem_post() which is meant to unblock T1 by
incrementing V. With the old semaphore implementation we we are now in a
deadlock as sem_post spinlocks on the same lock.
The new implementation does not suffer from this defect as it
uses futexes to resolve locking and thus sem_post does not need to spin.
Besides fixing this defect and making us POSIX compliant, this should
also improve performance as there should be less context switching and
thus less time spent in the kernel.
For architectures that do not provied futexes and atomic operations,
the old implementation will be used and it is now being renamed to
rthread_sem_compat as discussed with mpi@.
[0] -- http://pubs.opengroup.org/onlinepubs/9699919799/functions/sem_post.html
[1] -- http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html
OK visa@, mpi@, guenther@
Diffstat (limited to 'lib/librthread')
-rw-r--r-- | lib/librthread/Makefile | 13 | ||||
-rw-r--r-- | lib/librthread/rthread_sem.c | 108 | ||||
-rw-r--r-- | lib/librthread/rthread_sem_compat.c | 439 | ||||
-rw-r--r-- | lib/librthread/synch.h | 61 |
4 files changed, 560 insertions, 61 deletions
diff --git a/lib/librthread/Makefile b/lib/librthread/Makefile index 4c3e127491d..7d188db71da 100644 --- a/lib/librthread/Makefile +++ b/lib/librthread/Makefile @@ -1,4 +1,4 @@ -# $OpenBSD: Makefile,v 1.49 2017/10/15 23:40:33 guenther Exp $ +# $OpenBSD: Makefile,v 1.50 2018/06/08 13:53:01 pirofti Exp $ LIB=pthread LIBCSRCDIR= ${.CURDIR}/../libc @@ -30,12 +30,21 @@ SRCS= rthread.c \ rthread_rwlock.c \ rthread_rwlockattr.c \ rthread_sched.c \ - rthread_sem.c \ rthread_sig.c \ rthread_stack.c \ rthread_spin_lock.c \ sched_prio.c +# Architectures that implement atomics +.if ${MACHINE_ARCH} == "aarch64" || \ + ${MACHINE_ARCH} == "amd64" || ${MACHINE_ARCH} == "i386" || \ + ${MACHINE_ARCH} == "mips64" || ${MACHINE_ARCH} == "mips64el" || \ + ${MACHINE_ARCH} == "sparc64" +SRCS+= rthread_sem.c +.else +SRCS+= rthread_sem_compat.c +.endif + SRCDIR= ${.CURDIR}/../libpthread .include "${SRCDIR}/man/Makefile.inc" .include <bsd.lib.mk> diff --git a/lib/librthread/rthread_sem.c b/lib/librthread/rthread_sem.c index 66b9f917abf..7de0e832363 100644 --- a/lib/librthread/rthread_sem.c +++ b/lib/librthread/rthread_sem.c @@ -1,6 +1,7 @@ -/* $OpenBSD: rthread_sem.c,v 1.28 2018/04/27 06:47:34 guenther Exp $ */ +/* $OpenBSD: rthread_sem.c,v 1.29 2018/06/08 13:53:01 pirofti Exp $ */ /* * Copyright (c) 2004,2005,2013 Ted Unangst <tedu@openbsd.org> + * Copyright (c) 2018 Paul Irofti <pirofti@openbsd.org> * All Rights Reserved. * * Permission to use, copy, modify, and distribute this software for any @@ -19,6 +20,9 @@ #include <sys/types.h> #include <sys/mman.h> #include <sys/stat.h> +#include <sys/atomic.h> +#include <sys/time.h> +#include <sys/futex.h> #include <errno.h> #include <fcntl.h> @@ -33,6 +37,7 @@ #include "rthread.h" #include "cancel.h" /* in libc/include */ +#include "synch.h" #define SHARED_IDENT ((void *)-1) @@ -56,53 +61,41 @@ int _sem_wait(sem_t sem, int can_eintr, const struct timespec *abstime, int *delayed_cancel) { - void *ident = (void *)&sem->waitcount; - int r; - - if (sem->shared) - ident = SHARED_IDENT; - - _spinlock(&sem->lock); - if (sem->value) { - sem->value--; - r = 0; - } else { - sem->waitcount++; - do { - r = __thrsleep(ident, CLOCK_REALTIME, abstime, - &sem->lock, delayed_cancel); - _spinlock(&sem->lock); - /* ignore interruptions other than cancelation */ - if ((r == ECANCELED && *delayed_cancel == 0) || - (r == EINTR && !can_eintr)) - r = 0; - } while (r == 0 && sem->value == 0); - sem->waitcount--; - if (r == 0) - sem->value--; + int r = 0; + int v, ov; + + atomic_inc_int(&sem->waitcount); + for (;;) { + while ((v = sem->value) > 0) { + ov = atomic_cas_uint(&sem->value, v, v - 1); + if (ov == v) { + membar_enter_after_atomic(); + atomic_dec_int(&sem->waitcount); + return 0; + } + } + if (r) + break; + + r = _twait(&sem->value, 0, CLOCK_REALTIME, abstime); + /* ignore interruptions other than cancelation */ + if ((r == ECANCELED && *delayed_cancel == 0) || + (r == EINTR && !can_eintr) || r == EAGAIN) + r = 0; } - _spinunlock(&sem->lock); - return (r); + atomic_dec_int(&sem->waitcount); + + return r; } /* always increment count */ int _sem_post(sem_t sem) { - void *ident = (void *)&sem->waitcount; - int rv = 0; - - if (sem->shared) - ident = SHARED_IDENT; - - _spinlock(&sem->lock); - sem->value++; - if (sem->waitcount) { - __thrwakeup(ident, 1); - rv = 1; - } - _spinunlock(&sem->lock); - return (rv); + membar_exit_before_atomic(); + atomic_inc_int(&sem->value); + _wake(&sem->value, 1); + return 0; } /* @@ -158,7 +151,6 @@ sem_init(sem_t *semp, int pshared, unsigned int value) errno = ENOSPC; return (-1); } - sem->lock = _SPINLOCK_UNLOCKED; sem->value = value; *semp = sem; @@ -205,9 +197,7 @@ sem_getvalue(sem_t *semp, int *sval) return (-1); } - _spinlock(&sem->lock); *sval = sem->value; - _spinunlock(&sem->lock); return (0); } @@ -251,6 +241,8 @@ sem_wait(sem_t *semp) if (r) { errno = r; + _rthread_debug(1, "%s: v=%d errno=%d\n", __func__, + sem->value, errno); return (-1); } @@ -267,7 +259,7 @@ sem_timedwait(sem_t *semp, const struct timespec *abstime) PREP_CANCEL_POINT(tib); if (!semp || !(sem = *semp) || abstime == NULL || - abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000) { + abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000) { errno = EINVAL; return (-1); } @@ -282,6 +274,8 @@ sem_timedwait(sem_t *semp, const struct timespec *abstime) if (r) { errno = r == EWOULDBLOCK ? ETIMEDOUT : r; + _rthread_debug(1, "%s: v=%d errno=%d\n", __func__, + sem->value, errno); return (-1); } @@ -292,27 +286,24 @@ int sem_trywait(sem_t *semp) { sem_t sem; - int r; + int v, ov; if (!semp || !(sem = *semp)) { errno = EINVAL; return (-1); } - _spinlock(&sem->lock); - if (sem->value) { - sem->value--; - r = 0; - } else - r = EAGAIN; - _spinunlock(&sem->lock); - - if (r) { - errno = r; - return (-1); + while ((v = sem->value) > 0) { + ov = atomic_cas_uint(&sem->value, v, v - 1); + if (ov == v) { + membar_enter_after_atomic(); + return (0); + } } - return (0); + errno = EAGAIN; + _rthread_debug(1, "%s: v=%d errno=%d\n", __func__, sem->value, errno); + return (-1); } @@ -403,7 +394,6 @@ sem_open(const char *name, int oflag, ...) return (SEM_FAILED); } if (created) { - sem->lock = _SPINLOCK_UNLOCKED; sem->value = value; sem->shared = 1; } diff --git a/lib/librthread/rthread_sem_compat.c b/lib/librthread/rthread_sem_compat.c new file mode 100644 index 00000000000..81309d5f28e --- /dev/null +++ b/lib/librthread/rthread_sem_compat.c @@ -0,0 +1,439 @@ +/* $OpenBSD: rthread_sem_compat.c,v 1.1 2018/06/08 13:53:01 pirofti Exp $ */ +/* + * Copyright (c) 2004,2005,2013 Ted Unangst <tedu@openbsd.org> + * All Rights Reserved. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/stat.h> + +#include <errno.h> +#include <fcntl.h> +#include <sha2.h> +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +#include <pthread.h> + +#include "rthread.h" +#include "cancel.h" /* in libc/include */ + +#define SHARED_IDENT ((void *)-1) + +/* SHA256_DIGEST_STRING_LENGTH includes nul */ +/* "/tmp/" + sha256 + ".sem" */ +#define SEM_PATH_SIZE (5 + SHA256_DIGEST_STRING_LENGTH + 4) + +/* long enough to be hard to guess */ +#define SEM_RANDOM_NAME_LEN 10 + +/* + * Size of memory to be mmap()'ed by named semaphores. + * Should be >= SEM_PATH_SIZE and page-aligned. + */ +#define SEM_MMAP_SIZE _thread_pagesize + +/* + * Internal implementation of semaphores + */ +int +_sem_wait(sem_t sem, int can_eintr, const struct timespec *abstime, + int *delayed_cancel) +{ + void *ident = (void *)&sem->waitcount; + int r; + + if (sem->shared) + ident = SHARED_IDENT; + + _spinlock(&sem->lock); + if (sem->value) { + sem->value--; + r = 0; + } else { + sem->waitcount++; + do { + r = __thrsleep(ident, CLOCK_REALTIME, abstime, + &sem->lock, delayed_cancel); + _spinlock(&sem->lock); + /* ignore interruptions other than cancelation */ + if ((r == ECANCELED && *delayed_cancel == 0) || + (r == EINTR && !can_eintr)) + r = 0; + } while (r == 0 && sem->value == 0); + sem->waitcount--; + if (r == 0) + sem->value--; + } + _spinunlock(&sem->lock); + return (r); +} + +/* always increment count */ +int +_sem_post(sem_t sem) +{ + void *ident = (void *)&sem->waitcount; + int rv = 0; + + if (sem->shared) + ident = SHARED_IDENT; + + _spinlock(&sem->lock); + sem->value++; + if (sem->waitcount) { + __thrwakeup(ident, 1); + rv = 1; + } + _spinunlock(&sem->lock); + return (rv); +} + +/* + * exported semaphores + */ +int +sem_init(sem_t *semp, int pshared, unsigned int value) +{ + sem_t sem; + + if (value > SEM_VALUE_MAX) { + errno = EINVAL; + return (-1); + } + + if (pshared) { + errno = EPERM; + return (-1); +#ifdef notyet + char name[SEM_RANDOM_NAME_LEN]; + sem_t *sempshared; + int i; + + for (;;) { + for (i = 0; i < SEM_RANDOM_NAME_LEN - 1; i++) + name[i] = arc4random_uniform(255) + 1; + name[SEM_RANDOM_NAME_LEN - 1] = '\0'; + sempshared = sem_open(name, O_CREAT | O_EXCL, 0, value); + if (sempshared != SEM_FAILED) + break; + if (errno == EEXIST) + continue; + if (errno != EPERM) + errno = ENOSPC; + return (-1); + } + + /* unnamed semaphore should not be opened twice */ + if (sem_unlink(name) == -1) { + sem_close(sempshared); + errno = ENOSPC; + return (-1); + } + + *semp = *sempshared; + free(sempshared); + return (0); +#endif + } + + sem = calloc(1, sizeof(*sem)); + if (!sem) { + errno = ENOSPC; + return (-1); + } + sem->lock = _SPINLOCK_UNLOCKED; + sem->value = value; + *semp = sem; + + return (0); +} + +int +sem_destroy(sem_t *semp) +{ + sem_t sem; + + if (!_threads_ready) /* for SEM_MMAP_SIZE */ + _rthread_init(); + + if (!semp || !(sem = *semp)) { + errno = EINVAL; + return (-1); + } + + if (sem->waitcount) { +#define MSG "sem_destroy on semaphore with waiters!\n" + write(2, MSG, sizeof(MSG) - 1); +#undef MSG + errno = EBUSY; + return (-1); + } + + *semp = NULL; + if (sem->shared) + munmap(sem, SEM_MMAP_SIZE); + else + free(sem); + + return (0); +} + +int +sem_getvalue(sem_t *semp, int *sval) +{ + sem_t sem; + + if (!semp || !(sem = *semp)) { + errno = EINVAL; + return (-1); + } + + _spinlock(&sem->lock); + *sval = sem->value; + _spinunlock(&sem->lock); + + return (0); +} + +int +sem_post(sem_t *semp) +{ + sem_t sem; + + if (!semp || !(sem = *semp)) { + errno = EINVAL; + return (-1); + } + + _sem_post(sem); + + return (0); +} + +int +sem_wait(sem_t *semp) +{ + struct tib *tib = TIB_GET(); + pthread_t self; + sem_t sem; + int r; + PREP_CANCEL_POINT(tib); + + if (!_threads_ready) + _rthread_init(); + self = tib->tib_thread; + + if (!semp || !(sem = *semp)) { + errno = EINVAL; + return (-1); + } + + ENTER_DELAYED_CANCEL_POINT(tib, self); + r = _sem_wait(sem, 1, NULL, &self->delayed_cancel); + LEAVE_CANCEL_POINT_INNER(tib, r); + + if (r) { + errno = r; + return (-1); + } + + return (0); +} + +int +sem_timedwait(sem_t *semp, const struct timespec *abstime) +{ + struct tib *tib = TIB_GET(); + pthread_t self; + sem_t sem; + int r; + PREP_CANCEL_POINT(tib); + + if (!semp || !(sem = *semp) || abstime == NULL || + abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000) { + errno = EINVAL; + return (-1); + } + + if (!_threads_ready) + _rthread_init(); + self = tib->tib_thread; + + ENTER_DELAYED_CANCEL_POINT(tib, self); + r = _sem_wait(sem, 1, abstime, &self->delayed_cancel); + LEAVE_CANCEL_POINT_INNER(tib, r); + + if (r) { + errno = r == EWOULDBLOCK ? ETIMEDOUT : r; + return (-1); + } + + return (0); +} + +int +sem_trywait(sem_t *semp) +{ + sem_t sem; + int r; + + if (!semp || !(sem = *semp)) { + errno = EINVAL; + return (-1); + } + + _spinlock(&sem->lock); + if (sem->value) { + sem->value--; + r = 0; + } else + r = EAGAIN; + _spinunlock(&sem->lock); + + if (r) { + errno = r; + return (-1); + } + + return (0); +} + + +static void +makesempath(const char *origpath, char *sempath, size_t len) +{ + char buf[SHA256_DIGEST_STRING_LENGTH]; + + SHA256Data(origpath, strlen(origpath), buf); + snprintf(sempath, len, "/tmp/%s.sem", buf); +} + +sem_t * +sem_open(const char *name, int oflag, ...) +{ + char sempath[SEM_PATH_SIZE]; + struct stat sb; + sem_t sem, *semp; + unsigned int value = 0; + int created = 0, fd; + + if (!_threads_ready) + _rthread_init(); + + if (oflag & ~(O_CREAT | O_EXCL)) { + errno = EINVAL; + return (SEM_FAILED); + } + + if (oflag & O_CREAT) { + va_list ap; + va_start(ap, oflag); + /* 3rd parameter mode is not used */ + va_arg(ap, mode_t); + value = va_arg(ap, unsigned); + va_end(ap); + + if (value > SEM_VALUE_MAX) { + errno = EINVAL; + return (SEM_FAILED); + } + } + + makesempath(name, sempath, sizeof(sempath)); + fd = open(sempath, O_RDWR | O_NOFOLLOW | oflag, 0600); + if (fd == -1) + return (SEM_FAILED); + if (fstat(fd, &sb) == -1 || !S_ISREG(sb.st_mode)) { + close(fd); + errno = EINVAL; + return (SEM_FAILED); + } + if (sb.st_uid != geteuid()) { + close(fd); + errno = EPERM; + return (SEM_FAILED); + } + if (sb.st_size != (off_t)SEM_MMAP_SIZE) { + if (!(oflag & O_CREAT)) { + close(fd); + errno = EINVAL; + return (SEM_FAILED); + } + if (sb.st_size != 0) { + close(fd); + errno = EINVAL; + return (SEM_FAILED); + } + if (ftruncate(fd, SEM_MMAP_SIZE) == -1) { + close(fd); + errno = EINVAL; + return (SEM_FAILED); + } + + created = 1; + } + sem = mmap(NULL, SEM_MMAP_SIZE, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + close(fd); + if (sem == MAP_FAILED) { + errno = EINVAL; + return (SEM_FAILED); + } + semp = malloc(sizeof(*semp)); + if (!semp) { + munmap(sem, SEM_MMAP_SIZE); + errno = ENOSPC; + return (SEM_FAILED); + } + if (created) { + sem->lock = _SPINLOCK_UNLOCKED; + sem->value = value; + sem->shared = 1; + } + *semp = sem; + + return (semp); +} + +int +sem_close(sem_t *semp) +{ + sem_t sem; + + if (!semp || !(sem = *semp) || !sem->shared) { + errno = EINVAL; + return (-1); + } + + *semp = NULL; + munmap(sem, SEM_MMAP_SIZE); + free(semp); + + return (0); +} + +int +sem_unlink(const char *name) +{ + char sempath[SEM_PATH_SIZE]; + + makesempath(name, sempath, sizeof(sempath)); + return (unlink(sempath)); +} diff --git a/lib/librthread/synch.h b/lib/librthread/synch.h new file mode 100644 index 00000000000..2cec188e265 --- /dev/null +++ b/lib/librthread/synch.h @@ -0,0 +1,61 @@ +/* $OpenBSD: synch.h,v 1.4 2018/06/08 13:53:01 pirofti Exp $ */ +/* + * Copyright (c) 2017 Martin Pieuchot + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/atomic.h> +#include <sys/time.h> +#include <sys/futex.h> + +static inline int +_wake(volatile uint32_t *p, int n) +{ + return futex(p, FUTEX_WAKE, n, NULL, NULL); +} + +static inline void +_wait(volatile uint32_t *p, int val) +{ + while (*p != (uint32_t)val) + futex(p, FUTEX_WAIT, val, NULL, NULL); +} + +static inline int +_twait(volatile uint32_t *p, int val, clockid_t clockid, const struct timespec *abs) +{ + struct timespec rel; + + if (abs == NULL) + return futex(p, FUTEX_WAIT, val, NULL, NULL); + + if (abs->tv_nsec >= 1000000000 || clock_gettime(clockid, &rel)) + return (EINVAL); + + rel.tv_sec = abs->tv_sec - rel.tv_sec; + if ((rel.tv_nsec = abs->tv_nsec - rel.tv_nsec) < 0) { + rel.tv_sec--; + rel.tv_nsec += 1000000000; + } + if (rel.tv_sec < 0) + return (ETIMEDOUT); + + return futex(p, FUTEX_WAIT, val, &rel, NULL); +} + +static inline int +_requeue(volatile uint32_t *p, int n, int m, volatile uint32_t *q) +{ + return futex(p, FUTEX_REQUEUE, n, (void *)(long)m, q); +} |