From 510705aa85a5914ad9570dc6e7c5145a4b6c399f Mon Sep 17 00:00:00 2001 From: Philip Guenther Date: Fri, 1 Dec 2017 23:30:06 +0000 Subject: Redo the calculation of the alignment and placement of static TLS data to correctly take into account the segment p_align. Previously, anything with a size belong the natural alignment or with alignment larger than the natural one would either not be intialized correctly, be misaligned, or result in the TIB being misaligned. Problems reported by Charles Collicutt (charles (at) collicutt.co.uk) ok kettenis@ --- libexec/ld.so/malloc.c | 119 +++++++++++++++++++++++++++++++++++++++++++++++++ libexec/ld.so/tib.c | 110 ++++++++++++++++++++++++++++++++++++++------- libexec/ld.so/util.h | 3 +- 3 files changed, 215 insertions(+), 17 deletions(-) (limited to 'libexec') diff --git a/libexec/ld.so/malloc.c b/libexec/ld.so/malloc.c index d9032dc3268..fb93e37f06c 100644 --- a/libexec/ld.so/malloc.c +++ b/libexec/ld.so/malloc.c @@ -1105,3 +1105,122 @@ ret: return r; } +static void * +mapalign(struct dir_info *d, size_t alignment, size_t sz, int zero_fill) +{ + char *p, *q; + + if (alignment < MALLOC_PAGESIZE || ((alignment - 1) & alignment) != 0) + wrterror("mapalign bad alignment"); + if (sz != PAGEROUND(sz)) + wrterror("mapalign round"); + + /* Allocate sz + alignment bytes of memory, which must include a + * subrange of size bytes that is properly aligned. Unmap the + * other bytes, and then return that subrange. + */ + + /* We need sz + alignment to fit into a size_t. */ + if (alignment > SIZE_MAX - sz) + return MAP_FAILED; + + p = map(d, sz + alignment, zero_fill); + if (p == MAP_FAILED) + return MAP_FAILED; + q = (char *)(((uintptr_t)p + alignment - 1) & ~(alignment - 1)); + if (q != p) { + if (_dl_munmap(p, q - p)) + wrterror("munmap"); + } + if (_dl_munmap(q + sz, alignment - (q - p))) + wrterror("munmap"); + + return q; +} + +static void * +omemalign(size_t alignment, size_t sz, int zero_fill) +{ + size_t psz; + void *p; + + /* If between half a page and a page, avoid MALLOC_MOVE. */ + if (sz > MALLOC_MAXCHUNK && sz < MALLOC_PAGESIZE) + sz = MALLOC_PAGESIZE; + if (alignment <= MALLOC_PAGESIZE) { + /* + * max(size, alignment) is enough to assure the requested + * alignment, since the allocator always allocates + * power-of-two blocks. + */ + if (sz < alignment) + sz = alignment; + return omalloc(sz, zero_fill); + } + + if (sz >= SIZE_MAX - mopts.malloc_guard - MALLOC_PAGESIZE) { + return NULL; + } + + sz += mopts.malloc_guard; + psz = PAGEROUND(sz); + + p = mapalign(g_pool, alignment, psz, zero_fill); + if (p == MAP_FAILED) { + return NULL; + } + + if (insert(g_pool, p, sz)) { + unmap(g_pool, p, psz); + return NULL; + } + + if (mopts.malloc_guard) { + if (_dl_mprotect((char *)p + psz - mopts.malloc_guard, + mopts.malloc_guard, PROT_NONE)) + wrterror("mprotect"); + } + + if (mopts.malloc_junk == 2) { + if (zero_fill) + _dl_memset((char *)p + sz - mopts.malloc_guard, + SOME_JUNK, psz - sz); + else + _dl_memset(p, SOME_JUNK, psz - mopts.malloc_guard); + } + else if (mopts.chunk_canaries) { + size_t csz = psz - sz; + + if (csz > CHUNK_CHECK_LENGTH) + csz = CHUNK_CHECK_LENGTH; + _dl_memset((char *)p + sz - mopts.malloc_guard, + SOME_JUNK, csz); + } + + return p; +} + +void * +_dl_aligned_alloc(size_t alignment, size_t size) +{ + void *r = NULL; + lock_cb *cb; + + /* Make sure that alignment is a large enough power of 2. */ + if (((alignment - 1) & alignment) != 0 || alignment < sizeof(void *)) + return NULL; + + cb = _dl_thread_kern_stop(); + if (g_pool == NULL) + omalloc_init(&g_pool); + g_pool->func = "aligned_alloc():"; + if (g_pool->active++) { + malloc_recurse(); + goto ret; + } + r = omemalign(alignment, size, 0); + g_pool->active--; +ret: + _dl_thread_kern_go(cb); + return r; +} diff --git a/libexec/ld.so/tib.c b/libexec/ld.so/tib.c index 93f6a6ce66b..41aafce732d 100644 --- a/libexec/ld.so/tib.c +++ b/libexec/ld.so/tib.c @@ -36,7 +36,21 @@ __dso_hidden void *allocate_tib(size_t); +#define MAX(a,b) (((a)>(b))?(a):(b)) + +#ifdef TIB_EXTRA_ALIGN +# define TIB_ALIGN MAX(__alignof__(struct tib), TIB_EXTRA_ALIGN) +#else +# define TIB_ALIGN __alignof__(struct tib) +#endif + + +/* size of static TLS allocation */ static int static_tls_size; +/* alignment of static TLS allocation */ +static int static_tls_align; +/* base-offset alignment of (first) static TLS allocation */ +static int static_tls_align_offset; int _dl_tib_static_done; @@ -55,22 +69,26 @@ allocate_tib(size_t extra) struct elf_object *obj; #if TLS_VARIANT == 1 - /* round up the extra size to align the tib after it */ - extra = ELF_ROUND(extra, sizeof(void *)); - base = _dl_malloc(extra + sizeof *tib + static_tls_size); + /* round up the extra size to align the TIB and TLS data after it */ + size_t unpad_extra = (extra <= static_tls_align_offset) ? 0 : + ELF_ROUND(extra - static_tls_align_offset, static_tls_align); + base = _dl_aligned_alloc(static_tls_align, unpad_extra + + static_tls_align_offset + sizeof *tib + static_tls_size); if (base == NULL) return NULL; - tib = (struct tib *)(base + extra); + tib = (struct tib *)(base + unpad_extra + static_tls_align_offset); if (extra) thread = base; #define TLS_ADDR(tibp, offset) ((char *)(tibp) + sizeof(struct tib) + (offset)) #elif TLS_VARIANT == 2 - /* round up the tib size to align the extra area after it */ - base = _dl_malloc(ELF_ROUND(sizeof *tib, TIB_EXTRA_ALIGN) + - extra + static_tls_size); + /* round up the TIB size to align the extra area after it */ + base = _dl_aligned_alloc(static_tls_align, static_tls_size + + static_tls_align_offset + ELF_ROUND(sizeof *tib, TIB_EXTRA_ALIGN) + + extra); if (base == NULL) return NULL; + base += static_tls_align_offset; tib = (struct tib *)(base + static_tls_size); if (extra) thread = (char *)tib + ELF_ROUND(sizeof *tib, TIB_EXTRA_ALIGN); @@ -107,10 +125,12 @@ _dl_free_tib(void *tib, size_t extra) size_t tib_offset; #if TLS_VARIANT == 1 - tib_offset = ELF_ROUND(extra, sizeof(void *)); + tib_offset = (extra <= static_tls_align_offset) ? 0 : + ELF_ROUND(extra - static_tls_align_offset, static_tls_align); #elif TLS_VARIANT == 2 tib_offset = static_tls_size; #endif + tib_offset += static_tls_align_offset; DL_DEB(("free tib=%p\n", (void *)tib)); _dl_free((char *)tib - tib_offset); @@ -136,19 +156,68 @@ _dl_set_tls(elf_object_t *object, Elf_Phdr *ptls, Elf_Addr libaddr, } static inline Elf_Addr -allocate_tls_offset(Elf_Addr msize, Elf_Addr align) +allocate_tls_offset(Elf_Addr msize, Elf_Addr align, int for_exe) { Elf_Addr offset; + if (for_exe && static_tls_size != 0) + _dl_die("TLS allocation before executable!"); + #if TLS_VARIANT == 1 - /* round up to the required alignment, then allocate the space */ - offset = ELF_ROUND(static_tls_size, align); - static_tls_size += msize; + if (for_exe) { + /* + * Variant 1 places the data after the TIB. If the + * TLS alignment is larger than the TIB alignment + * then we may need to pad in front of the TIB to + * place the TLS data on the proper alignment. + * Example: p_align=16 sizeof(TIB)=52 align(TIB)=4 + * - need to offset the TIB 12 bytes from the start + * - to place ths TLS data at offset 64 + */ + static_tls_align = MAX(align, TIB_ALIGN); + static_tls_align_offset = + ELF_ROUND(sizeof(struct tib), static_tls_align) - + sizeof(struct tib); + offset = 0; + static_tls_size = msize; + } else { + /* + * If a later object increases the alignment, realign the + * existing sections. We push as much padding as possible + * to the start there it can overlap the thread structure + */ + if (static_tls_align < align) { + static_tls_align_offset += align - static_tls_align; + static_tls_align = align; + } + + /* + * Round up to the required alignment, taking into account + * the leading padding and TIB, then allocate the space. + */ + offset = static_tls_align_offset + sizeof(struct tib) + + static_tls_size; + offset = ELF_ROUND(offset, align) - static_tls_align_offset + - sizeof(struct tib); + static_tls_size = offset + msize; + } #elif TLS_VARIANT == 2 + /* Realignment is automatic for variant II */ + if (static_tls_align < align) + static_tls_align = align; + /* - * allocate the space, then round up to the alignment - * (these are negative offsets, so rounding up really rounds the - * address down) + * Variant 2 places the data before the TIB so we need to round up + * the size to the TLS data alignment TIB's alignment. + * Example A: p_memsz=24 p_align=16 align(TIB)=8 + * - need to allocate 32 bytes for TLS as compiler + * - will give the first TLS symbol an offset of -32 + * Example B: p_memsz=4 p_align=4 align(TIB)=8 + * - need to allocate 8 bytes so that the TIB is + * - properly aligned + * So: allocate the space, then round up to the alignment + * (these are negative offsets, so rounding up really + * rounds the address down) */ static_tls_size = ELF_ROUND(static_tls_size + msize, align); offset = static_tls_size; @@ -166,15 +235,24 @@ _dl_allocate_tls_offsets(void) { struct elf_object *obj; + static_tls_align = TIB_ALIGN; for (obj = _dl_objects; obj != NULL; obj = obj->next) { if (obj->tls_msize != 0) { obj->tls_offset = allocate_tls_offset(obj->tls_msize, - obj->tls_align); + obj->tls_align, obj->obj_type == OBJTYPE_EXE); } } +#if TLS_VARIANT == 2 + static_tls_align_offset = ELF_ROUND(static_tls_size, static_tls_align) + - static_tls_size; +#endif + /* no more static TLS allocations after this */ _dl_tib_static_done = 1; + + DL_DEB(("static tls size=%x align=%x offset=%x\n", + static_tls_size, static_tls_align, static_tls_align_offset)); } /* diff --git a/libexec/ld.so/util.h b/libexec/ld.so/util.h index 9820fb38622..260817d422a 100644 --- a/libexec/ld.so/util.h +++ b/libexec/ld.so/util.h @@ -1,4 +1,4 @@ -/* $OpenBSD: util.h,v 1.31 2017/08/29 15:25:51 deraadt Exp $ */ +/* $OpenBSD: util.h,v 1.32 2017/12/01 23:30:05 guenther Exp $ */ /* * Copyright (c) 1998 Todd C. Miller @@ -41,6 +41,7 @@ void *_dl_calloc(size_t nmemb, const size_t size); void *_dl_realloc(void *, size_t size); void *_dl_reallocarray(void *, size_t nmemb, size_t size); void _dl_free(void *); +void *_dl_aligned_alloc(size_t _alignment, size_t _size); char *_dl_strdup(const char *); size_t _dl_strlen(const char *); size_t _dl_strlcat(char *dst, const char *src, size_t siz); -- cgit v1.2.3