summaryrefslogtreecommitdiff
path: root/lib/libcompiler_rt
diff options
context:
space:
mode:
authorPatrick Wildt <patrick@cvs.openbsd.org>2019-01-29 22:35:44 +0000
committerPatrick Wildt <patrick@cvs.openbsd.org>2019-01-29 22:35:44 +0000
commit4307d4c1c866216fa87cb709ad9d857100a52463 (patch)
tree15665c95aa3815c07cf2a18a830e03a90e4bc0de /lib/libcompiler_rt
parent7ab9e8bbf68af83fb44229b13939e33d6eb49f04 (diff)
Update compiler-rt to 7.0.1. This is mostly a cosmetic update since
most changes are guarded behind WIN32 or GCC ifdefs. The functional change is in emutls and AMD model checking. ok kettenis@
Diffstat (limited to 'lib/libcompiler_rt')
-rw-r--r--lib/libcompiler_rt/CMakeLists.txt61
-rw-r--r--lib/libcompiler_rt/LICENSE.TXT2
-rw-r--r--lib/libcompiler_rt/arm/chkstk.S34
-rw-r--r--lib/libcompiler_rt/clear_cache.c4
-rw-r--r--lib/libcompiler_rt/clzdi2.c6
-rw-r--r--lib/libcompiler_rt/cpu_model.c4
-rw-r--r--lib/libcompiler_rt/ctzdi2.c6
-rw-r--r--lib/libcompiler_rt/emutls.c65
-rw-r--r--lib/libcompiler_rt/int_types.h2
-rw-r--r--lib/libcompiler_rt/riscv/mulsi3.S28
10 files changed, 187 insertions, 25 deletions
diff --git a/lib/libcompiler_rt/CMakeLists.txt b/lib/libcompiler_rt/CMakeLists.txt
index 0b50b5bb8d2..82332967b10 100644
--- a/lib/libcompiler_rt/CMakeLists.txt
+++ b/lib/libcompiler_rt/CMakeLists.txt
@@ -173,8 +173,8 @@ set(GENERIC_TF_SOURCES
trunctfsf2.c)
option(COMPILER_RT_EXCLUDE_ATOMIC_BUILTIN
- "Skip the atomic builtin (this may be needed if system headers are unavailable)"
- Off)
+ "Skip the atomic builtin (these should normally be provided by a shared library)"
+ On)
if(NOT FUCHSIA AND NOT COMPILER_RT_BAREMETAL_BUILD)
set(GENERIC_SOURCES
@@ -406,6 +406,7 @@ if(MINGW)
arm/aeabi_ldivmod.S
arm/aeabi_uidivmod.S
arm/aeabi_uldivmod.S
+ arm/chkstk.S
divmoddi4.c
divmodsi4.c
divdi3.c
@@ -459,6 +460,41 @@ set(armv6m_SOURCES ${thumb1_SOURCES})
set(armv7m_SOURCES ${arm_SOURCES})
set(armv7em_SOURCES ${arm_SOURCES})
+# hexagon arch
+set(hexagon_SOURCES ${GENERIC_SOURCES} ${GENERIC_TF_SOURCES})
+set(hexagon_SOURCES
+ hexagon/common_entry_exit_abi1.S
+ hexagon/common_entry_exit_abi2.S
+ hexagon/common_entry_exit_legacy.S
+ hexagon/dfaddsub.S
+ hexagon/dfdiv.S
+ hexagon/dffma.S
+ hexagon/dfminmax.S
+ hexagon/dfmul.S
+ hexagon/dfsqrt.S
+ hexagon/divdi3.S
+ hexagon/divsi3.S
+ hexagon/fabs_opt.S
+ hexagon/fastmath2_dlib_asm.S
+ hexagon/fastmath2_ldlib_asm.S
+ hexagon/fastmath_dlib_asm.S
+ hexagon/fma_opt.S
+ hexagon/fmax_opt.S
+ hexagon/fmin_opt.S
+ hexagon/memcpy_forward_vp4cp4n2.S
+ hexagon/memcpy_likely_aligned.S
+ hexagon/moddi3.S
+ hexagon/modsi3.S
+ hexagon/sfdiv_opt.S
+ hexagon/sfsqrt_opt.S
+ hexagon/udivdi3.S
+ hexagon/udivmoddi4.S
+ hexagon/udivmodsi4.S
+ hexagon/udivsi3.S
+ hexagon/umoddi3.S
+ hexagon/umodsi3.S)
+
+
set(mips_SOURCES ${GENERIC_SOURCES})
set(mipsel_SOURCES ${mips_SOURCES})
set(mips64_SOURCES ${GENERIC_TF_SOURCES}
@@ -480,6 +516,12 @@ set(powerpc64_SOURCES
${GENERIC_SOURCES})
set(powerpc64le_SOURCES ${powerpc64_SOURCES})
+set(riscv_SOURCES ${GENERIC_SOURCES} ${GENERIC_TF_SOURCES})
+set(riscv32_SOURCES
+ riscv/mulsi3.S
+ ${riscv_SOURCES})
+set(riscv64_SOURCES ${riscv_SOURCES})
+
set(wasm32_SOURCES
${GENERIC_TF_SOURCES}
${GENERIC_SOURCES})
@@ -525,6 +567,15 @@ else ()
set(_arch "arm")
endif()
+ # For ARM archs, exclude any VFP builtins if VFP is not supported
+ if (${arch} MATCHES "^(arm|armhf|armv7|armv7s|armv7k|armv7m|armv7em)$")
+ string(REPLACE ";" " " _TARGET_${arch}_CFLAGS "${TARGET_${arch}_CFLAGS}")
+ check_compile_definition(__VFP_FP__ "${CMAKE_C_FLAGS} ${_TARGET_${arch}_CFLAGS}" COMPILER_RT_HAS_${arch}_VFP)
+ if(NOT COMPILER_RT_HAS_${arch}_VFP)
+ list(REMOVE_ITEM ${arch}_SOURCES ${arm_Thumb1_VFPv2_SOURCES} ${arm_Thumb1_SjLj_EH_SOURCES})
+ endif()
+ endif()
+
# Filter out generic versions of routines that are re-implemented in
# architecture specific manner. This prevents multiple definitions of the
# same symbols, making the symbol selection non-deterministic.
@@ -542,6 +593,12 @@ else ()
list(APPEND BUILTIN_CFLAGS -fomit-frame-pointer -DCOMPILER_RT_ARMHF_TARGET)
endif()
+ # For RISCV32, we must force enable int128 for compiling long
+ # double routines.
+ if("${arch}" STREQUAL "riscv32")
+ list(APPEND BUILTIN_CFLAGS -fforce-enable-int128)
+ endif()
+
add_compiler_rt_runtime(clang_rt.builtins
STATIC
ARCHS ${arch}
diff --git a/lib/libcompiler_rt/LICENSE.TXT b/lib/libcompiler_rt/LICENSE.TXT
index a17dc12b272..0134694e4e5 100644
--- a/lib/libcompiler_rt/LICENSE.TXT
+++ b/lib/libcompiler_rt/LICENSE.TXT
@@ -14,7 +14,7 @@ Full text of the relevant licenses is included below.
University of Illinois/NCSA
Open Source License
-Copyright (c) 2009-2016 by the contributors listed in CREDITS.TXT
+Copyright (c) 2009-2018 by the contributors listed in CREDITS.TXT
All rights reserved.
diff --git a/lib/libcompiler_rt/arm/chkstk.S b/lib/libcompiler_rt/arm/chkstk.S
new file mode 100644
index 00000000000..e3002105897
--- /dev/null
+++ b/lib/libcompiler_rt/arm/chkstk.S
@@ -0,0 +1,34 @@
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+
+#include "../assembly.h"
+
+// __chkstk routine
+// This routine is windows specific.
+// http://msdn.microsoft.com/en-us/library/ms648426.aspx
+
+// This clobbers the register r12, and the condition codes, and uses r5 and r6
+// as temporaries by backing them up and restoring them afterwards.
+// Does not modify any memory or the stack pointer.
+
+// movw r4, #256 // Number of bytes of stack, in units of 4 byte
+// bl __chkstk
+// sub.w sp, sp, r4
+
+#define PAGE_SIZE 4096
+
+ .p2align 2
+DEFINE_COMPILERRT_FUNCTION(__chkstk)
+ lsl r4, r4, #2
+ mov r12, sp
+ push {r5, r6}
+ mov r5, r4
+1:
+ sub r12, r12, #PAGE_SIZE
+ subs r5, r5, #PAGE_SIZE
+ ldr r6, [r12]
+ bgt 1b
+
+ pop {r5, r6}
+ bx lr
+END_COMPILERRT_FUNCTION(__chkstk)
diff --git a/lib/libcompiler_rt/clear_cache.c b/lib/libcompiler_rt/clear_cache.c
index 451f1c0b124..9dcab344ad1 100644
--- a/lib/libcompiler_rt/clear_cache.c
+++ b/lib/libcompiler_rt/clear_cache.c
@@ -101,6 +101,8 @@ void __clear_cache(void *start, void *end) {
* Intel processors have a unified instruction and data cache
* so there is nothing to do
*/
+#elif defined(_WIN32) && (defined(__arm__) || defined(__aarch64__))
+ FlushInstructionCache(GetCurrentProcess(), start, end - start);
#elif defined(__arm__) && !defined(__APPLE__)
#if defined(__FreeBSD__) || defined(__NetBSD__)
struct arm_sync_icache_args arg;
@@ -128,8 +130,6 @@ void __clear_cache(void *start, void *end) {
: "r"(syscall_nr), "r"(start_reg), "r"(end_reg),
"r"(flags));
assert(start_reg == 0 && "Cache flush syscall failed.");
- #elif defined(_WIN32)
- FlushInstructionCache(GetCurrentProcess(), start, end - start);
#else
compilerrt_abort();
#endif
diff --git a/lib/libcompiler_rt/clzdi2.c b/lib/libcompiler_rt/clzdi2.c
index b9e64da492b..b56d98f5c01 100644
--- a/lib/libcompiler_rt/clzdi2.c
+++ b/lib/libcompiler_rt/clzdi2.c
@@ -16,6 +16,12 @@
/* Returns: the number of leading 0-bits */
+#if !defined(__clang__) && (defined(__sparc64__) || defined(__mips64) || defined(__riscv__))
+/* gcc resolves __builtin_clz -> __clzdi2 leading to infinite recursion */
+#define __builtin_clz(a) __clzsi2(a)
+extern si_int __clzsi2(si_int);
+#endif
+
/* Precondition: a != 0 */
COMPILER_RT_ABI si_int
diff --git a/lib/libcompiler_rt/cpu_model.c b/lib/libcompiler_rt/cpu_model.c
index 4c96e9cd85d..43b913390fc 100644
--- a/lib/libcompiler_rt/cpu_model.c
+++ b/lib/libcompiler_rt/cpu_model.c
@@ -416,9 +416,9 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
*Subtype = AMDFAM15H_BDVER3;
break; // "bdver3"; 30h-3Fh: Steamroller
}
- if (Model >= 0x10 && Model <= 0x1f) {
+ if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
*Subtype = AMDFAM15H_BDVER2;
- break; // "bdver2"; 10h-1Fh: Piledriver
+ break; // "bdver2"; 02h, 10h-1Fh: Piledriver
}
if (Model <= 0x0f) {
*Subtype = AMDFAM15H_BDVER1;
diff --git a/lib/libcompiler_rt/ctzdi2.c b/lib/libcompiler_rt/ctzdi2.c
index db3c6fdc08f..eecde29718d 100644
--- a/lib/libcompiler_rt/ctzdi2.c
+++ b/lib/libcompiler_rt/ctzdi2.c
@@ -16,6 +16,12 @@
/* Returns: the number of trailing 0-bits */
+#if !defined(__clang__) && (defined(__sparc64__) || defined(__mips64) || defined(__riscv__))
+/* gcc resolves __builtin_ctz -> __ctzdi2 leading to infinite recursion */
+#define __builtin_ctz(a) __ctzsi2(a)
+extern si_int __ctzsi2(si_int);
+#endif
+
/* Precondition: a != 0 */
COMPILER_RT_ABI si_int
diff --git a/lib/libcompiler_rt/emutls.c b/lib/libcompiler_rt/emutls.c
index 5dd8dd15477..07d436e267d 100644
--- a/lib/libcompiler_rt/emutls.c
+++ b/lib/libcompiler_rt/emutls.c
@@ -14,7 +14,22 @@
#include "int_lib.h"
#include "int_util.h"
+#ifdef __BIONIC__
+/* There are 4 pthread key cleanup rounds on Bionic. Delay emutls deallocation
+ to round 2. We need to delay deallocation because:
+ - Android versions older than M lack __cxa_thread_atexit_impl, so apps
+ use a pthread key destructor to call C++ destructors.
+ - Apps might use __thread/thread_local variables in pthread destructors.
+ We can't wait until the final two rounds, because jemalloc needs two rounds
+ after the final malloc/free call to free its thread-specific data (see
+ https://reviews.llvm.org/D46978#1107507). */
+#define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 1
+#else
+#define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 0
+#endif
+
typedef struct emutls_address_array {
+ uintptr_t skip_destructor_rounds;
uintptr_t size; /* number of elements in the 'data' array */
void* data[];
} emutls_address_array;
@@ -65,9 +80,30 @@ static __inline void emutls_memalign_free(void *base) {
#endif
}
+static __inline void emutls_setspecific(emutls_address_array *value) {
+ pthread_setspecific(emutls_pthread_key, (void*) value);
+}
+
+static __inline emutls_address_array* emutls_getspecific() {
+ return (emutls_address_array*) pthread_getspecific(emutls_pthread_key);
+}
+
static void emutls_key_destructor(void* ptr) {
- emutls_shutdown((emutls_address_array*)ptr);
- free(ptr);
+ emutls_address_array *array = (emutls_address_array*)ptr;
+ if (array->skip_destructor_rounds > 0) {
+ /* emutls is deallocated using a pthread key destructor. These
+ * destructors are called in several rounds to accommodate destructor
+ * functions that (re)initialize key values with pthread_setspecific.
+ * Delay the emutls deallocation to accommodate other end-of-thread
+ * cleanup tasks like calling thread_local destructors (e.g. the
+ * __cxa_thread_atexit fallback in libc++abi).
+ */
+ array->skip_destructor_rounds--;
+ emutls_setspecific(array);
+ } else {
+ emutls_shutdown(array);
+ free(ptr);
+ }
}
static __inline void emutls_init(void) {
@@ -88,15 +124,7 @@ static __inline void emutls_unlock() {
pthread_mutex_unlock(&emutls_mutex);
}
-static __inline void emutls_setspecific(emutls_address_array *value) {
- pthread_setspecific(emutls_pthread_key, (void*) value);
-}
-
-static __inline emutls_address_array* emutls_getspecific() {
- return (emutls_address_array*) pthread_getspecific(emutls_pthread_key);
-}
-
-#else
+#else /* _WIN32 */
#include <windows.h>
#include <malloc.h>
@@ -222,11 +250,11 @@ static __inline void __atomic_store_n(void *ptr, uintptr_t val, unsigned type) {
InterlockedExchangePointer((void *volatile *)ptr, (void *)val);
}
-#endif
+#endif /* __ATOMIC_RELEASE */
#pragma warning (pop)
-#endif
+#endif /* _WIN32 */
static size_t emutls_num_object = 0; /* number of allocated TLS objects */
@@ -314,11 +342,12 @@ static __inline void emutls_check_array_set_size(emutls_address_array *array,
* which must be no smaller than the given index.
*/
static __inline uintptr_t emutls_new_data_array_size(uintptr_t index) {
- /* Need to allocate emutls_address_array with one extra slot
- * to store the data array size.
+ /* Need to allocate emutls_address_array with extra slots
+ * to store the header.
* Round up the emutls_address_array size to multiple of 16.
*/
- return ((index + 1 + 15) & ~((uintptr_t)15)) - 1;
+ uintptr_t header_words = sizeof(emutls_address_array) / sizeof(void *);
+ return ((index + header_words + 15) & ~((uintptr_t)15)) - header_words;
}
/* Returns the size in bytes required for an emutls_address_array with
@@ -337,8 +366,10 @@ emutls_get_address_array(uintptr_t index) {
if (array == NULL) {
uintptr_t new_size = emutls_new_data_array_size(index);
array = (emutls_address_array*) malloc(emutls_asize(new_size));
- if (array)
+ if (array) {
memset(array->data, 0, new_size * sizeof(void*));
+ array->skip_destructor_rounds = EMUTLS_SKIP_DESTRUCTOR_ROUNDS;
+ }
emutls_check_array_set_size(array, new_size);
} else if (index > array->size) {
uintptr_t orig_size = array->size;
diff --git a/lib/libcompiler_rt/int_types.h b/lib/libcompiler_rt/int_types.h
index a92238c5b73..f53f343d35d 100644
--- a/lib/libcompiler_rt/int_types.h
+++ b/lib/libcompiler_rt/int_types.h
@@ -60,7 +60,7 @@ typedef union
}s;
} udwords;
-#if (defined(__LP64__) || defined(__wasm__) || defined(__mips64))
+#if (defined(__LP64__) || defined(__wasm__) || defined(__mips64)) || defined(__riscv)
#define CRT_HAS_128BIT
#endif
diff --git a/lib/libcompiler_rt/riscv/mulsi3.S b/lib/libcompiler_rt/riscv/mulsi3.S
new file mode 100644
index 00000000000..a58d237040b
--- /dev/null
+++ b/lib/libcompiler_rt/riscv/mulsi3.S
@@ -0,0 +1,28 @@
+//===--- mulsi3.S - Integer multiplication routines routines ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#if !defined(__riscv_mul) && __riscv_xlen == 32
+ .text
+ .align 2
+
+ .globl __mulsi3
+ .type __mulsi3, @function
+__mulsi3:
+ mv a2, a0
+ mv a0, zero
+.L1:
+ andi a3, a1, 1
+ beqz a3, .L2
+ add a0, a0, a2
+.L2:
+ srli a1, a1, 1
+ slli a2, a2, 1
+ bnez a1, .L1
+ ret
+#endif