src - OpenBSD base system

diff options


context:
space:
mode:

author	Patrick Wildt <patrick@cvs.openbsd.org>	2019-01-29 22:35:44 +0000
committer	Patrick Wildt <patrick@cvs.openbsd.org>	2019-01-29 22:35:44 +0000
commit	4307d4c1c866216fa87cb709ad9d857100a52463 (patch)
tree	15665c95aa3815c07cf2a18a830e03a90e4bc0de /lib/libcompiler_rt
parent	7ab9e8bbf68af83fb44229b13939e33d6eb49f04 (diff)

Update compiler-rt to 7.0.1. This is mostly a cosmetic update since

most changes are guarded behind WIN32 or GCC ifdefs. The functional change is in emutls and AMD model checking. ok kettenis@

Diffstat (limited to 'lib/libcompiler_rt')

-rw-r--r--

lib/libcompiler_rt/CMakeLists.txt

-rw-r--r--

lib/libcompiler_rt/LICENSE.TXT

-rw-r--r--

lib/libcompiler_rt/arm/chkstk.S

-rw-r--r--

lib/libcompiler_rt/clear_cache.c

-rw-r--r--

lib/libcompiler_rt/clzdi2.c

-rw-r--r--

lib/libcompiler_rt/cpu_model.c

-rw-r--r--

lib/libcompiler_rt/ctzdi2.c

-rw-r--r--

lib/libcompiler_rt/emutls.c

-rw-r--r--

lib/libcompiler_rt/int_types.h

-rw-r--r--

lib/libcompiler_rt/riscv/mulsi3.S

10 files changed, 187 insertions, 25 deletions

diff --git a/lib/libcompiler_rt/CMakeLists.txt b/lib/libcompiler_rt/CMakeLists.txt
index 0b50b5bb8d2..82332967b10 100644
--- a/lib/libcompiler_rt/CMakeLists.txt
+++ b/lib/libcompiler_rt/CMakeLists.txt

@@ -173,8 +173,8 @@ set(GENERIC_TF_SOURCES

trunctfsf2.c)

option(COMPILER_RT_EXCLUDE_ATOMIC_BUILTIN

- "Skip the atomic builtin (this may be needed if system headers are unavailable)"

- Off)

+ "Skip the atomic builtin (these should normally be provided by a shared library)"

+ On)

if(NOT FUCHSIA AND NOT COMPILER_RT_BAREMETAL_BUILD)

set(GENERIC_SOURCES

@@ -406,6 +406,7 @@ if(MINGW)

arm/aeabi_ldivmod.S

arm/aeabi_uidivmod.S

arm/aeabi_uldivmod.S

+ arm/chkstk.S

divmoddi4.c

divmodsi4.c

divdi3.c

@@ -459,6 +460,41 @@ set(armv6m_SOURCES ${thumb1_SOURCES})

set(armv7m_SOURCES ${arm_SOURCES})

set(armv7em_SOURCES ${arm_SOURCES})

+# hexagon arch

+set(hexagon_SOURCES ${GENERIC_SOURCES} ${GENERIC_TF_SOURCES})

+set(hexagon_SOURCES

+ hexagon/common_entry_exit_abi1.S

+ hexagon/common_entry_exit_abi2.S

+ hexagon/common_entry_exit_legacy.S

+ hexagon/dfaddsub.S

+ hexagon/dfdiv.S

+ hexagon/dffma.S

+ hexagon/dfminmax.S

+ hexagon/dfmul.S

+ hexagon/dfsqrt.S

+ hexagon/divdi3.S

+ hexagon/divsi3.S

+ hexagon/fabs_opt.S

+ hexagon/fastmath2_dlib_asm.S

+ hexagon/fastmath2_ldlib_asm.S

+ hexagon/fastmath_dlib_asm.S

+ hexagon/fma_opt.S

+ hexagon/fmax_opt.S

+ hexagon/fmin_opt.S

+ hexagon/memcpy_forward_vp4cp4n2.S

+ hexagon/memcpy_likely_aligned.S

+ hexagon/moddi3.S

+ hexagon/modsi3.S

+ hexagon/sfdiv_opt.S

+ hexagon/sfsqrt_opt.S

+ hexagon/udivdi3.S

+ hexagon/udivmoddi4.S

+ hexagon/udivmodsi4.S

+ hexagon/udivsi3.S

+ hexagon/umoddi3.S

+ hexagon/umodsi3.S)

set(mips_SOURCES ${GENERIC_SOURCES})

set(mipsel_SOURCES ${mips_SOURCES})

set(mips64_SOURCES ${GENERIC_TF_SOURCES}

@@ -480,6 +516,12 @@ set(powerpc64_SOURCES

${GENERIC_SOURCES})

set(powerpc64le_SOURCES ${powerpc64_SOURCES})

+set(riscv_SOURCES ${GENERIC_SOURCES} ${GENERIC_TF_SOURCES})

+set(riscv32_SOURCES

+ riscv/mulsi3.S

+ ${riscv_SOURCES})

+set(riscv64_SOURCES ${riscv_SOURCES})

set(wasm32_SOURCES

${GENERIC_TF_SOURCES}

${GENERIC_SOURCES})

@@ -525,6 +567,15 @@ else ()

set(_arch "arm")

endif()

+ # For ARM archs, exclude any VFP builtins if VFP is not supported

+ if (${arch} MATCHES "^(arm|armhf|armv7|armv7s|armv7k|armv7m|armv7em)$")

+ string(REPLACE ";" " " _TARGET_${arch}_CFLAGS "${TARGET_${arch}_CFLAGS}")

+ check_compile_definition(__VFP_FP__ "${CMAKE_C_FLAGS} ${_TARGET_${arch}_CFLAGS}" COMPILER_RT_HAS_${arch}_VFP)

+ if(NOT COMPILER_RT_HAS_${arch}_VFP)

+ list(REMOVE_ITEM ${arch}_SOURCES ${arm_Thumb1_VFPv2_SOURCES} ${arm_Thumb1_SjLj_EH_SOURCES})

+ endif()

# Filter out generic versions of routines that are re-implemented in

# architecture specific manner. This prevents multiple definitions of the

# same symbols, making the symbol selection non-deterministic.

@@ -542,6 +593,12 @@ else ()

list(APPEND BUILTIN_CFLAGS -fomit-frame-pointer -DCOMPILER_RT_ARMHF_TARGET)

endif()

+ # For RISCV32, we must force enable int128 for compiling long

+ # double routines.

+ if("${arch}" STREQUAL "riscv32")

+ list(APPEND BUILTIN_CFLAGS -fforce-enable-int128)

+ endif()

add_compiler_rt_runtime(clang_rt.builtins

STATIC

ARCHS ${arch}

diff --git a/lib/libcompiler_rt/LICENSE.TXT b/lib/libcompiler_rt/LICENSE.TXT
index a17dc12b272..0134694e4e5 100644
--- a/lib/libcompiler_rt/LICENSE.TXT
+++ b/lib/libcompiler_rt/LICENSE.TXT

@@ -14,7 +14,7 @@ Full text of the relevant licenses is included below.

University of Illinois/NCSA

Open Source License

diff --git a/lib/libcompiler_rt/arm/chkstk.S b/lib/libcompiler_rt/arm/chkstk.S
new file mode 100644
index 00000000000..e3002105897
--- /dev/null
+++ b/lib/libcompiler_rt/arm/chkstk.S

@@ -0,0 +1,34 @@

+// This file is dual licensed under the MIT and the University of Illinois Open

+// Source Licenses. See LICENSE.TXT for details.

+#include "../assembly.h"

+// __chkstk routine

+// This routine is windows specific.

+// http://msdn.microsoft.com/en-us/library/ms648426.aspx

+// This clobbers the register r12, and the condition codes, and uses r5 and r6

+// as temporaries by backing them up and restoring them afterwards.

+// Does not modify any memory or the stack pointer.

+// movw r4, #256 // Number of bytes of stack, in units of 4 byte

+// bl __chkstk

+// sub.w sp, sp, r4

+#define PAGE_SIZE 4096

+ .p2align 2

+DEFINE_COMPILERRT_FUNCTION(__chkstk)

+ lsl r4, r4, #2

+ mov r12, sp

+ push {r5, r6}

+ mov r5, r4

+1:

+ sub r12, r12, #PAGE_SIZE

+ subs r5, r5, #PAGE_SIZE

+ ldr r6, [r12]

+ bgt 1b

+ pop {r5, r6}

+ bx lr

+END_COMPILERRT_FUNCTION(__chkstk)

diff --git a/lib/libcompiler_rt/clear_cache.c b/lib/libcompiler_rt/clear_cache.c
index 451f1c0b124..9dcab344ad1 100644
--- a/lib/libcompiler_rt/clear_cache.c
+++ b/lib/libcompiler_rt/clear_cache.c

@@ -101,6 +101,8 @@ void __clear_cache(void *start, void *end) {

* Intel processors have a unified instruction and data cache

* so there is nothing to do

+#elif defined(_WIN32) && (defined(__arm__) || defined(__aarch64__))

+ FlushInstructionCache(GetCurrentProcess(), start, end - start);

#elif defined(__arm__) && !defined(__APPLE__)

#if defined(__FreeBSD__) || defined(__NetBSD__)

struct arm_sync_icache_args arg;

@@ -128,8 +130,6 @@ void __clear_cache(void *start, void *end) {

: "r"(syscall_nr), "r"(start_reg), "r"(end_reg),

"r"(flags));

assert(start_reg == 0 && "Cache flush syscall failed.");

- #elif defined(_WIN32)

- FlushInstructionCache(GetCurrentProcess(), start, end - start);

#else

compilerrt_abort();

#endif

diff --git a/lib/libcompiler_rt/clzdi2.c b/lib/libcompiler_rt/clzdi2.c
index b9e64da492b..b56d98f5c01 100644
--- a/lib/libcompiler_rt/clzdi2.c
+++ b/lib/libcompiler_rt/clzdi2.c

@@ -16,6 +16,12 @@

/* Returns: the number of leading 0-bits */

+#if !defined(__clang__) && (defined(__sparc64__) || defined(__mips64) || defined(__riscv__))

+/* gcc resolves __builtin_clz -> __clzdi2 leading to infinite recursion */

+#define __builtin_clz(a) __clzsi2(a)

+extern si_int __clzsi2(si_int);

+#endif

/* Precondition: a != 0 */

COMPILER_RT_ABI si_int

diff --git a/lib/libcompiler_rt/cpu_model.c b/lib/libcompiler_rt/cpu_model.c
index 4c96e9cd85d..43b913390fc 100644
--- a/lib/libcompiler_rt/cpu_model.c
+++ b/lib/libcompiler_rt/cpu_model.c

@@ -416,9 +416,9 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,

*Subtype = AMDFAM15H_BDVER3;

break; // "bdver3"; 30h-3Fh: Steamroller

}

- if (Model >= 0x10 && Model <= 0x1f) {

+ if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {

*Subtype = AMDFAM15H_BDVER2;

- break; // "bdver2"; 10h-1Fh: Piledriver

+ break; // "bdver2"; 02h, 10h-1Fh: Piledriver

}

if (Model <= 0x0f) {

*Subtype = AMDFAM15H_BDVER1;

diff --git a/lib/libcompiler_rt/ctzdi2.c b/lib/libcompiler_rt/ctzdi2.c
index db3c6fdc08f..eecde29718d 100644
--- a/lib/libcompiler_rt/ctzdi2.c
+++ b/lib/libcompiler_rt/ctzdi2.c

@@ -16,6 +16,12 @@

/* Returns: the number of trailing 0-bits */

+#if !defined(__clang__) && (defined(__sparc64__) || defined(__mips64) || defined(__riscv__))

+/* gcc resolves __builtin_ctz -> __ctzdi2 leading to infinite recursion */

+#define __builtin_ctz(a) __ctzsi2(a)

+extern si_int __ctzsi2(si_int);

+#endif

/* Precondition: a != 0 */

COMPILER_RT_ABI si_int

diff --git a/lib/libcompiler_rt/emutls.c b/lib/libcompiler_rt/emutls.c
index 5dd8dd15477..07d436e267d 100644
--- a/lib/libcompiler_rt/emutls.c
+++ b/lib/libcompiler_rt/emutls.c

@@ -14,7 +14,22 @@

#include "int_lib.h"

#include "int_util.h"

+#ifdef __BIONIC__

+/* There are 4 pthread key cleanup rounds on Bionic. Delay emutls deallocation

+ to round 2. We need to delay deallocation because:

+ - Android versions older than M lack __cxa_thread_atexit_impl, so apps

+ use a pthread key destructor to call C++ destructors.

+ - Apps might use __thread/thread_local variables in pthread destructors.

+ We can't wait until the final two rounds, because jemalloc needs two rounds

+ after the final malloc/free call to free its thread-specific data (see

+ https://reviews.llvm.org/D46978#1107507). */

+#define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 1

+#else

+#define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 0

+#endif

typedef struct emutls_address_array {

+ uintptr_t skip_destructor_rounds;

uintptr_t size; /* number of elements in the 'data' array */

void* data[];

} emutls_address_array;

@@ -65,9 +80,30 @@ static __inline void emutls_memalign_free(void *base) {

#endif

}

+static __inline void emutls_setspecific(emutls_address_array *value) {

+ pthread_setspecific(emutls_pthread_key, (void*) value);

+static __inline emutls_address_array* emutls_getspecific() {

+ return (emutls_address_array*) pthread_getspecific(emutls_pthread_key);

static void emutls_key_destructor(void* ptr) {

- emutls_shutdown((emutls_address_array*)ptr);

- free(ptr);

+ emutls_address_array *array = (emutls_address_array*)ptr;

+ if (array->skip_destructor_rounds > 0) {

+ /* emutls is deallocated using a pthread key destructor. These

+ * destructors are called in several rounds to accommodate destructor

+ * functions that (re)initialize key values with pthread_setspecific.

+ * Delay the emutls deallocation to accommodate other end-of-thread

+ * cleanup tasks like calling thread_local destructors (e.g. the

+ * __cxa_thread_atexit fallback in libc++abi).

+ */

+ array->skip_destructor_rounds--;

+ emutls_setspecific(array);

+ } else {

+ emutls_shutdown(array);

+ free(ptr);

+ }

}

static __inline void emutls_init(void) {

@@ -88,15 +124,7 @@ static __inline void emutls_unlock() {

pthread_mutex_unlock(&emutls_mutex);

}

-static __inline void emutls_setspecific(emutls_address_array *value) {

- pthread_setspecific(emutls_pthread_key, (void*) value);

-static __inline emutls_address_array* emutls_getspecific() {

- return (emutls_address_array*) pthread_getspecific(emutls_pthread_key);

-#else

+#else /* _WIN32 */

#include <windows.h>

#include <malloc.h>

@@ -222,11 +250,11 @@ static __inline void __atomic_store_n(void *ptr, uintptr_t val, unsigned type) {

InterlockedExchangePointer((void *volatile *)ptr, (void *)val);

}

-#endif

+#endif /* __ATOMIC_RELEASE */

#pragma warning (pop)

-#endif

+#endif /* _WIN32 */

static size_t emutls_num_object = 0; /* number of allocated TLS objects */

@@ -314,11 +342,12 @@ static __inline void emutls_check_array_set_size(emutls_address_array *array,

* which must be no smaller than the given index.

static __inline uintptr_t emutls_new_data_array_size(uintptr_t index) {

- /* Need to allocate emutls_address_array with one extra slot

- * to store the data array size.

+ /* Need to allocate emutls_address_array with extra slots

+ * to store the header.

* Round up the emutls_address_array size to multiple of 16.

- return ((index + 1 + 15) & ~((uintptr_t)15)) - 1;

+ uintptr_t header_words = sizeof(emutls_address_array) / sizeof(void *);

+ return ((index + header_words + 15) & ~((uintptr_t)15)) - header_words;

}

/* Returns the size in bytes required for an emutls_address_array with

@@ -337,8 +366,10 @@ emutls_get_address_array(uintptr_t index) {

if (array == NULL) {

uintptr_t new_size = emutls_new_data_array_size(index);

array = (emutls_address_array*) malloc(emutls_asize(new_size));

- if (array)

+ if (array) {

memset(array->data, 0, new_size * sizeof(void*));

+ array->skip_destructor_rounds = EMUTLS_SKIP_DESTRUCTOR_ROUNDS;

+ }

emutls_check_array_set_size(array, new_size);

} else if (index > array->size) {

uintptr_t orig_size = array->size;

diff --git a/lib/libcompiler_rt/int_types.h b/lib/libcompiler_rt/int_types.h
index a92238c5b73..f53f343d35d 100644
--- a/lib/libcompiler_rt/int_types.h
+++ b/lib/libcompiler_rt/int_types.h

@@ -60,7 +60,7 @@ typedef union

}s;

} udwords;

-#if (defined(__LP64__) || defined(__wasm__) || defined(__mips64))

+#if (defined(__LP64__) || defined(__wasm__) || defined(__mips64)) || defined(__riscv)

#define CRT_HAS_128BIT

#endif

diff --git a/lib/libcompiler_rt/riscv/mulsi3.S b/lib/libcompiler_rt/riscv/mulsi3.S
new file mode 100644
index 00000000000..a58d237040b
--- /dev/null
+++ b/lib/libcompiler_rt/riscv/mulsi3.S

@@ -0,0 +1,28 @@

+//===--- mulsi3.S - Integer multiplication routines routines ---===//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is dual licensed under the MIT and the University of Illinois Open

+// Source Licenses. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+#if !defined(__riscv_mul) && __riscv_xlen == 32

+ .text

+ .align 2

+ .globl __mulsi3

+ .type __mulsi3, @function

+__mulsi3:

+ mv a2, a0

+ mv a0, zero

+.L1:

+ andi a3, a1, 1

+ beqz a3, .L2

+ add a0, a0, a2

+.L2:

+ srli a1, a1, 1

+ slli a2, a2, 1

+ bnez a1, .L1

+ ret

+#endif