diff options
author | Patrick Wildt <patrick@cvs.openbsd.org> | 2019-01-29 22:35:44 +0000 |
---|---|---|
committer | Patrick Wildt <patrick@cvs.openbsd.org> | 2019-01-29 22:35:44 +0000 |
commit | 4307d4c1c866216fa87cb709ad9d857100a52463 (patch) | |
tree | 15665c95aa3815c07cf2a18a830e03a90e4bc0de /lib/libcompiler_rt | |
parent | 7ab9e8bbf68af83fb44229b13939e33d6eb49f04 (diff) |
Update compiler-rt to 7.0.1. This is mostly a cosmetic update since
most changes are guarded behind WIN32 or GCC ifdefs. The functional
change is in emutls and AMD model checking.
ok kettenis@
Diffstat (limited to 'lib/libcompiler_rt')
-rw-r--r-- | lib/libcompiler_rt/CMakeLists.txt | 61 | ||||
-rw-r--r-- | lib/libcompiler_rt/LICENSE.TXT | 2 | ||||
-rw-r--r-- | lib/libcompiler_rt/arm/chkstk.S | 34 | ||||
-rw-r--r-- | lib/libcompiler_rt/clear_cache.c | 4 | ||||
-rw-r--r-- | lib/libcompiler_rt/clzdi2.c | 6 | ||||
-rw-r--r-- | lib/libcompiler_rt/cpu_model.c | 4 | ||||
-rw-r--r-- | lib/libcompiler_rt/ctzdi2.c | 6 | ||||
-rw-r--r-- | lib/libcompiler_rt/emutls.c | 65 | ||||
-rw-r--r-- | lib/libcompiler_rt/int_types.h | 2 | ||||
-rw-r--r-- | lib/libcompiler_rt/riscv/mulsi3.S | 28 |
10 files changed, 187 insertions, 25 deletions
diff --git a/lib/libcompiler_rt/CMakeLists.txt b/lib/libcompiler_rt/CMakeLists.txt index 0b50b5bb8d2..82332967b10 100644 --- a/lib/libcompiler_rt/CMakeLists.txt +++ b/lib/libcompiler_rt/CMakeLists.txt @@ -173,8 +173,8 @@ set(GENERIC_TF_SOURCES trunctfsf2.c) option(COMPILER_RT_EXCLUDE_ATOMIC_BUILTIN - "Skip the atomic builtin (this may be needed if system headers are unavailable)" - Off) + "Skip the atomic builtin (these should normally be provided by a shared library)" + On) if(NOT FUCHSIA AND NOT COMPILER_RT_BAREMETAL_BUILD) set(GENERIC_SOURCES @@ -406,6 +406,7 @@ if(MINGW) arm/aeabi_ldivmod.S arm/aeabi_uidivmod.S arm/aeabi_uldivmod.S + arm/chkstk.S divmoddi4.c divmodsi4.c divdi3.c @@ -459,6 +460,41 @@ set(armv6m_SOURCES ${thumb1_SOURCES}) set(armv7m_SOURCES ${arm_SOURCES}) set(armv7em_SOURCES ${arm_SOURCES}) +# hexagon arch +set(hexagon_SOURCES ${GENERIC_SOURCES} ${GENERIC_TF_SOURCES}) +set(hexagon_SOURCES + hexagon/common_entry_exit_abi1.S + hexagon/common_entry_exit_abi2.S + hexagon/common_entry_exit_legacy.S + hexagon/dfaddsub.S + hexagon/dfdiv.S + hexagon/dffma.S + hexagon/dfminmax.S + hexagon/dfmul.S + hexagon/dfsqrt.S + hexagon/divdi3.S + hexagon/divsi3.S + hexagon/fabs_opt.S + hexagon/fastmath2_dlib_asm.S + hexagon/fastmath2_ldlib_asm.S + hexagon/fastmath_dlib_asm.S + hexagon/fma_opt.S + hexagon/fmax_opt.S + hexagon/fmin_opt.S + hexagon/memcpy_forward_vp4cp4n2.S + hexagon/memcpy_likely_aligned.S + hexagon/moddi3.S + hexagon/modsi3.S + hexagon/sfdiv_opt.S + hexagon/sfsqrt_opt.S + hexagon/udivdi3.S + hexagon/udivmoddi4.S + hexagon/udivmodsi4.S + hexagon/udivsi3.S + hexagon/umoddi3.S + hexagon/umodsi3.S) + + set(mips_SOURCES ${GENERIC_SOURCES}) set(mipsel_SOURCES ${mips_SOURCES}) set(mips64_SOURCES ${GENERIC_TF_SOURCES} @@ -480,6 +516,12 @@ set(powerpc64_SOURCES ${GENERIC_SOURCES}) set(powerpc64le_SOURCES ${powerpc64_SOURCES}) +set(riscv_SOURCES ${GENERIC_SOURCES} ${GENERIC_TF_SOURCES}) +set(riscv32_SOURCES + riscv/mulsi3.S + ${riscv_SOURCES}) +set(riscv64_SOURCES ${riscv_SOURCES}) + set(wasm32_SOURCES ${GENERIC_TF_SOURCES} ${GENERIC_SOURCES}) @@ -525,6 +567,15 @@ else () set(_arch "arm") endif() + # For ARM archs, exclude any VFP builtins if VFP is not supported + if (${arch} MATCHES "^(arm|armhf|armv7|armv7s|armv7k|armv7m|armv7em)$") + string(REPLACE ";" " " _TARGET_${arch}_CFLAGS "${TARGET_${arch}_CFLAGS}") + check_compile_definition(__VFP_FP__ "${CMAKE_C_FLAGS} ${_TARGET_${arch}_CFLAGS}" COMPILER_RT_HAS_${arch}_VFP) + if(NOT COMPILER_RT_HAS_${arch}_VFP) + list(REMOVE_ITEM ${arch}_SOURCES ${arm_Thumb1_VFPv2_SOURCES} ${arm_Thumb1_SjLj_EH_SOURCES}) + endif() + endif() + # Filter out generic versions of routines that are re-implemented in # architecture specific manner. This prevents multiple definitions of the # same symbols, making the symbol selection non-deterministic. @@ -542,6 +593,12 @@ else () list(APPEND BUILTIN_CFLAGS -fomit-frame-pointer -DCOMPILER_RT_ARMHF_TARGET) endif() + # For RISCV32, we must force enable int128 for compiling long + # double routines. + if("${arch}" STREQUAL "riscv32") + list(APPEND BUILTIN_CFLAGS -fforce-enable-int128) + endif() + add_compiler_rt_runtime(clang_rt.builtins STATIC ARCHS ${arch} diff --git a/lib/libcompiler_rt/LICENSE.TXT b/lib/libcompiler_rt/LICENSE.TXT index a17dc12b272..0134694e4e5 100644 --- a/lib/libcompiler_rt/LICENSE.TXT +++ b/lib/libcompiler_rt/LICENSE.TXT @@ -14,7 +14,7 @@ Full text of the relevant licenses is included below. University of Illinois/NCSA Open Source License -Copyright (c) 2009-2016 by the contributors listed in CREDITS.TXT +Copyright (c) 2009-2018 by the contributors listed in CREDITS.TXT All rights reserved. diff --git a/lib/libcompiler_rt/arm/chkstk.S b/lib/libcompiler_rt/arm/chkstk.S new file mode 100644 index 00000000000..e3002105897 --- /dev/null +++ b/lib/libcompiler_rt/arm/chkstk.S @@ -0,0 +1,34 @@ +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. + +#include "../assembly.h" + +// __chkstk routine +// This routine is windows specific. +// http://msdn.microsoft.com/en-us/library/ms648426.aspx + +// This clobbers the register r12, and the condition codes, and uses r5 and r6 +// as temporaries by backing them up and restoring them afterwards. +// Does not modify any memory or the stack pointer. + +// movw r4, #256 // Number of bytes of stack, in units of 4 byte +// bl __chkstk +// sub.w sp, sp, r4 + +#define PAGE_SIZE 4096 + + .p2align 2 +DEFINE_COMPILERRT_FUNCTION(__chkstk) + lsl r4, r4, #2 + mov r12, sp + push {r5, r6} + mov r5, r4 +1: + sub r12, r12, #PAGE_SIZE + subs r5, r5, #PAGE_SIZE + ldr r6, [r12] + bgt 1b + + pop {r5, r6} + bx lr +END_COMPILERRT_FUNCTION(__chkstk) diff --git a/lib/libcompiler_rt/clear_cache.c b/lib/libcompiler_rt/clear_cache.c index 451f1c0b124..9dcab344ad1 100644 --- a/lib/libcompiler_rt/clear_cache.c +++ b/lib/libcompiler_rt/clear_cache.c @@ -101,6 +101,8 @@ void __clear_cache(void *start, void *end) { * Intel processors have a unified instruction and data cache * so there is nothing to do */ +#elif defined(_WIN32) && (defined(__arm__) || defined(__aarch64__)) + FlushInstructionCache(GetCurrentProcess(), start, end - start); #elif defined(__arm__) && !defined(__APPLE__) #if defined(__FreeBSD__) || defined(__NetBSD__) struct arm_sync_icache_args arg; @@ -128,8 +130,6 @@ void __clear_cache(void *start, void *end) { : "r"(syscall_nr), "r"(start_reg), "r"(end_reg), "r"(flags)); assert(start_reg == 0 && "Cache flush syscall failed."); - #elif defined(_WIN32) - FlushInstructionCache(GetCurrentProcess(), start, end - start); #else compilerrt_abort(); #endif diff --git a/lib/libcompiler_rt/clzdi2.c b/lib/libcompiler_rt/clzdi2.c index b9e64da492b..b56d98f5c01 100644 --- a/lib/libcompiler_rt/clzdi2.c +++ b/lib/libcompiler_rt/clzdi2.c @@ -16,6 +16,12 @@ /* Returns: the number of leading 0-bits */ +#if !defined(__clang__) && (defined(__sparc64__) || defined(__mips64) || defined(__riscv__)) +/* gcc resolves __builtin_clz -> __clzdi2 leading to infinite recursion */ +#define __builtin_clz(a) __clzsi2(a) +extern si_int __clzsi2(si_int); +#endif + /* Precondition: a != 0 */ COMPILER_RT_ABI si_int diff --git a/lib/libcompiler_rt/cpu_model.c b/lib/libcompiler_rt/cpu_model.c index 4c96e9cd85d..43b913390fc 100644 --- a/lib/libcompiler_rt/cpu_model.c +++ b/lib/libcompiler_rt/cpu_model.c @@ -416,9 +416,9 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, *Subtype = AMDFAM15H_BDVER3; break; // "bdver3"; 30h-3Fh: Steamroller } - if (Model >= 0x10 && Model <= 0x1f) { + if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) { *Subtype = AMDFAM15H_BDVER2; - break; // "bdver2"; 10h-1Fh: Piledriver + break; // "bdver2"; 02h, 10h-1Fh: Piledriver } if (Model <= 0x0f) { *Subtype = AMDFAM15H_BDVER1; diff --git a/lib/libcompiler_rt/ctzdi2.c b/lib/libcompiler_rt/ctzdi2.c index db3c6fdc08f..eecde29718d 100644 --- a/lib/libcompiler_rt/ctzdi2.c +++ b/lib/libcompiler_rt/ctzdi2.c @@ -16,6 +16,12 @@ /* Returns: the number of trailing 0-bits */ +#if !defined(__clang__) && (defined(__sparc64__) || defined(__mips64) || defined(__riscv__)) +/* gcc resolves __builtin_ctz -> __ctzdi2 leading to infinite recursion */ +#define __builtin_ctz(a) __ctzsi2(a) +extern si_int __ctzsi2(si_int); +#endif + /* Precondition: a != 0 */ COMPILER_RT_ABI si_int diff --git a/lib/libcompiler_rt/emutls.c b/lib/libcompiler_rt/emutls.c index 5dd8dd15477..07d436e267d 100644 --- a/lib/libcompiler_rt/emutls.c +++ b/lib/libcompiler_rt/emutls.c @@ -14,7 +14,22 @@ #include "int_lib.h" #include "int_util.h" +#ifdef __BIONIC__ +/* There are 4 pthread key cleanup rounds on Bionic. Delay emutls deallocation + to round 2. We need to delay deallocation because: + - Android versions older than M lack __cxa_thread_atexit_impl, so apps + use a pthread key destructor to call C++ destructors. + - Apps might use __thread/thread_local variables in pthread destructors. + We can't wait until the final two rounds, because jemalloc needs two rounds + after the final malloc/free call to free its thread-specific data (see + https://reviews.llvm.org/D46978#1107507). */ +#define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 1 +#else +#define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 0 +#endif + typedef struct emutls_address_array { + uintptr_t skip_destructor_rounds; uintptr_t size; /* number of elements in the 'data' array */ void* data[]; } emutls_address_array; @@ -65,9 +80,30 @@ static __inline void emutls_memalign_free(void *base) { #endif } +static __inline void emutls_setspecific(emutls_address_array *value) { + pthread_setspecific(emutls_pthread_key, (void*) value); +} + +static __inline emutls_address_array* emutls_getspecific() { + return (emutls_address_array*) pthread_getspecific(emutls_pthread_key); +} + static void emutls_key_destructor(void* ptr) { - emutls_shutdown((emutls_address_array*)ptr); - free(ptr); + emutls_address_array *array = (emutls_address_array*)ptr; + if (array->skip_destructor_rounds > 0) { + /* emutls is deallocated using a pthread key destructor. These + * destructors are called in several rounds to accommodate destructor + * functions that (re)initialize key values with pthread_setspecific. + * Delay the emutls deallocation to accommodate other end-of-thread + * cleanup tasks like calling thread_local destructors (e.g. the + * __cxa_thread_atexit fallback in libc++abi). + */ + array->skip_destructor_rounds--; + emutls_setspecific(array); + } else { + emutls_shutdown(array); + free(ptr); + } } static __inline void emutls_init(void) { @@ -88,15 +124,7 @@ static __inline void emutls_unlock() { pthread_mutex_unlock(&emutls_mutex); } -static __inline void emutls_setspecific(emutls_address_array *value) { - pthread_setspecific(emutls_pthread_key, (void*) value); -} - -static __inline emutls_address_array* emutls_getspecific() { - return (emutls_address_array*) pthread_getspecific(emutls_pthread_key); -} - -#else +#else /* _WIN32 */ #include <windows.h> #include <malloc.h> @@ -222,11 +250,11 @@ static __inline void __atomic_store_n(void *ptr, uintptr_t val, unsigned type) { InterlockedExchangePointer((void *volatile *)ptr, (void *)val); } -#endif +#endif /* __ATOMIC_RELEASE */ #pragma warning (pop) -#endif +#endif /* _WIN32 */ static size_t emutls_num_object = 0; /* number of allocated TLS objects */ @@ -314,11 +342,12 @@ static __inline void emutls_check_array_set_size(emutls_address_array *array, * which must be no smaller than the given index. */ static __inline uintptr_t emutls_new_data_array_size(uintptr_t index) { - /* Need to allocate emutls_address_array with one extra slot - * to store the data array size. + /* Need to allocate emutls_address_array with extra slots + * to store the header. * Round up the emutls_address_array size to multiple of 16. */ - return ((index + 1 + 15) & ~((uintptr_t)15)) - 1; + uintptr_t header_words = sizeof(emutls_address_array) / sizeof(void *); + return ((index + header_words + 15) & ~((uintptr_t)15)) - header_words; } /* Returns the size in bytes required for an emutls_address_array with @@ -337,8 +366,10 @@ emutls_get_address_array(uintptr_t index) { if (array == NULL) { uintptr_t new_size = emutls_new_data_array_size(index); array = (emutls_address_array*) malloc(emutls_asize(new_size)); - if (array) + if (array) { memset(array->data, 0, new_size * sizeof(void*)); + array->skip_destructor_rounds = EMUTLS_SKIP_DESTRUCTOR_ROUNDS; + } emutls_check_array_set_size(array, new_size); } else if (index > array->size) { uintptr_t orig_size = array->size; diff --git a/lib/libcompiler_rt/int_types.h b/lib/libcompiler_rt/int_types.h index a92238c5b73..f53f343d35d 100644 --- a/lib/libcompiler_rt/int_types.h +++ b/lib/libcompiler_rt/int_types.h @@ -60,7 +60,7 @@ typedef union }s; } udwords; -#if (defined(__LP64__) || defined(__wasm__) || defined(__mips64)) +#if (defined(__LP64__) || defined(__wasm__) || defined(__mips64)) || defined(__riscv) #define CRT_HAS_128BIT #endif diff --git a/lib/libcompiler_rt/riscv/mulsi3.S b/lib/libcompiler_rt/riscv/mulsi3.S new file mode 100644 index 00000000000..a58d237040b --- /dev/null +++ b/lib/libcompiler_rt/riscv/mulsi3.S @@ -0,0 +1,28 @@ +//===--- mulsi3.S - Integer multiplication routines routines ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#if !defined(__riscv_mul) && __riscv_xlen == 32 + .text + .align 2 + + .globl __mulsi3 + .type __mulsi3, @function +__mulsi3: + mv a2, a0 + mv a0, zero +.L1: + andi a3, a1, 1 + beqz a3, .L2 + add a0, a0, a2 +.L2: + srli a1, a1, 1 + slli a2, a2, 1 + bnez a1, .L1 + ret +#endif |