diff options
25 files changed, 417 insertions, 253 deletions
diff --git a/lib/libcompiler_rt/CMakeLists.txt b/lib/libcompiler_rt/CMakeLists.txt index 82332967b10..77947417bfe 100644 --- a/lib/libcompiler_rt/CMakeLists.txt +++ b/lib/libcompiler_rt/CMakeLists.txt @@ -505,7 +505,9 @@ set(mips64el_SOURCES ${GENERIC_TF_SOURCES} set(powerpc64_SOURCES ppc/divtc3.c ppc/fixtfdi.c + ppc/fixunstfti.c ppc/fixunstfdi.c + ppc/floattitf.c ppc/floatditf.c ppc/floatunditf.c ppc/gcc_qadd.c diff --git a/lib/libcompiler_rt/Darwin-excludes/10.4.txt b/lib/libcompiler_rt/Darwin-excludes/10.4.txt deleted file mode 100644 index 603c0b3bf39..00000000000 --- a/lib/libcompiler_rt/Darwin-excludes/10.4.txt +++ /dev/null @@ -1,137 +0,0 @@ -absvdi2 -absvsi2 -absvti2 -adddf3 -addsf3 -addtf3 -addvdi3 -addvsi3 -addvti3 -apple_versioning -ashldi3 -ashlti3 -ashrdi3 -ashrti3 -atomic_flag_clear -atomic_flag_clear_explicit -atomic_flag_test_and_set -atomic_flag_test_and_set_explicit -atomic_signal_fence -atomic_thread_fence -clear_cache -clzdi2 -clzsi2 -clzti2 -cmpdi2 -cmpti2 -comparedf2 -comparesf2 -ctzdi2 -ctzsi2 -ctzti2 -divdc3 -divdf3 -divdi3 -divmoddi4 -divmodsi4 -divsc3 -divsf3 -divsi3 -divtf3 -divti3 -divxc3 -enable_execute_stack -extendhfsf2 -extendsfdf2 -ffsdi2 -ffsti2 -fixdfdi -fixdfsi -fixdfti -fixsfdi -fixsfsi -fixsfti -fixunsdfdi -fixunsdfsi -fixunsdfti -fixunssfdi -fixunssfsi -fixunssfti -fixunsxfdi -fixunsxfsi -fixunsxfti -fixxfdi -fixxfti -floatdidf -floatdisf -floatdixf -floatsidf -floatsisf -floattidf -floattisf -floattixf -floatunsidf -floatunsisf -floatuntidf -floatuntisf -floatuntixf -gcc_personality_v0 -gnu_f2h_ieee -gnu_h2f_ieee -lshrdi3 -lshrti3 -moddi3 -modsi3 -modti3 -muldc3 -muldf3 -muldi3 -mulodi4 -mulosi4 -muloti4 -mulsc3 -mulsf3 -multf3 -multi3 -mulvdi3 -mulvsi3 -mulvti3 -mulxc3 -negdf2 -negdi2 -negsf2 -negti2 -negvdi2 -negvsi2 -negvti2 -paritydi2 -paritysi2 -parityti2 -popcountdi2 -popcountsi2 -popcountti2 -powidf2 -powisf2 -powitf2 -powixf2 -subdf3 -subsf3 -subtf3 -subvdi3 -subvsi3 -subvti3 -trampoline_setup -truncdfhf2 -truncdfsf2 -truncsfhf2 -ucmpdi2 -ucmpti2 -udivdi3 -udivmoddi4 -udivmodsi4 -udivmodti4 -udivsi3 -udivti3 -umoddi3 -umodsi3 -umodti3 diff --git a/lib/libcompiler_rt/LICENSE.TXT b/lib/libcompiler_rt/LICENSE.TXT index 0134694e4e5..1c94ad5d891 100644 --- a/lib/libcompiler_rt/LICENSE.TXT +++ b/lib/libcompiler_rt/LICENSE.TXT @@ -14,7 +14,7 @@ Full text of the relevant licenses is included below. University of Illinois/NCSA Open Source License -Copyright (c) 2009-2018 by the contributors listed in CREDITS.TXT +Copyright (c) 2009-2019 by the contributors listed in CREDITS.TXT All rights reserved. diff --git a/lib/libcompiler_rt/arm/addsf3.S b/lib/libcompiler_rt/arm/addsf3.S index 362b5c147ea..74723cbeff7 100644 --- a/lib/libcompiler_rt/arm/addsf3.S +++ b/lib/libcompiler_rt/arm/addsf3.S @@ -178,7 +178,7 @@ LOCAL_LABEL(do_substraction): push {r0, r1, r2, r3} movs r0, r4 - bl __clzsi2 + bl SYMBOL_NAME(__clzsi2) movs r5, r0 pop {r0, r1, r2, r3} // shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3); diff --git a/lib/libcompiler_rt/arm/aeabi_cdcmp.S b/lib/libcompiler_rt/arm/aeabi_cdcmp.S index 87dd03dce94..adc2d55d90f 100644 --- a/lib/libcompiler_rt/arm/aeabi_cdcmp.S +++ b/lib/libcompiler_rt/arm/aeabi_cdcmp.S @@ -55,7 +55,7 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmpeq) mov ip, #APSR_C msr APSR_nzcvq, ip #else - msr CPSR_f, #APSR_C + msr APSR_nzcvq, #APSR_C #endif JMP(lr) #endif @@ -115,11 +115,7 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmple) movne ip, #(APSR_C) 1: -#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) msr APSR_nzcvq, ip -#else - msr CPSR_f, ip -#endif pop {r0-r3} POP_PC() #endif diff --git a/lib/libcompiler_rt/arm/aeabi_cfcmp.S b/lib/libcompiler_rt/arm/aeabi_cfcmp.S index c5fee6b6a08..4b1de997687 100644 --- a/lib/libcompiler_rt/arm/aeabi_cfcmp.S +++ b/lib/libcompiler_rt/arm/aeabi_cfcmp.S @@ -55,7 +55,7 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmpeq) mov ip, #APSR_C msr APSR_nzcvq, ip #else - msr CPSR_f, #APSR_C + msr APSR_nzcvq, #APSR_C #endif JMP(lr) #endif @@ -115,11 +115,7 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmple) movne ip, #(APSR_C) 1: -#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) msr APSR_nzcvq, ip -#else - msr CPSR_f, ip -#endif pop {r0-r3} POP_PC() #endif diff --git a/lib/libcompiler_rt/clzdi2.c b/lib/libcompiler_rt/clzdi2.c index b56d98f5c01..1819e6be436 100644 --- a/lib/libcompiler_rt/clzdi2.c +++ b/lib/libcompiler_rt/clzdi2.c @@ -16,8 +16,13 @@ /* Returns: the number of leading 0-bits */ -#if !defined(__clang__) && (defined(__sparc64__) || defined(__mips64) || defined(__riscv__)) -/* gcc resolves __builtin_clz -> __clzdi2 leading to infinite recursion */ +#if !defined(__clang__) && \ + ((defined(__sparc__) && defined(__arch64__)) || \ + defined(__mips64) || \ + (defined(__riscv) && __SIZEOF_POINTER__ >= 8)) +/* On 64-bit architectures with neither a native clz instruction nor a native + * ctz instruction, gcc resolves __builtin_clz to __clzdi2 rather than + * __clzsi2, leading to infinite recursion. */ #define __builtin_clz(a) __clzsi2(a) extern si_int __clzsi2(si_int); #endif diff --git a/lib/libcompiler_rt/cpu_model.c b/lib/libcompiler_rt/cpu_model.c index 43b913390fc..fb2b899fc70 100644 --- a/lib/libcompiler_rt/cpu_model.c +++ b/lib/libcompiler_rt/cpu_model.c @@ -55,6 +55,9 @@ enum ProcessorTypes { AMD_BTVER2, AMDFAM17H, INTEL_KNM, + INTEL_GOLDMONT, + INTEL_GOLDMONT_PLUS, + INTEL_TREMONT, CPU_TYPE_MAX }; @@ -76,6 +79,8 @@ enum ProcessorSubtypes { INTEL_COREI7_SKYLAKE, INTEL_COREI7_SKYLAKE_AVX512, INTEL_COREI7_CANNONLAKE, + INTEL_COREI7_ICELAKE_CLIENT, + INTEL_COREI7_ICELAKE_SERVER, CPU_SUBTYPE_MAX }; @@ -110,7 +115,12 @@ enum ProcessorFeatures { FEATURE_AVX512IFMA, FEATURE_AVX5124VNNIW, FEATURE_AVX5124FMAPS, - FEATURE_AVX512VPOPCNTDQ + FEATURE_AVX512VPOPCNTDQ, + FEATURE_AVX512VBMI2, + FEATURE_GFNI, + FEATURE_VPCLMULQDQ, + FEATURE_AVX512VNNI, + FEATURE_AVX512BITALG }; // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). @@ -364,6 +374,14 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, case 0x4c: // really airmont *Type = INTEL_SILVERMONT; break; // "silvermont" + // Goldmont: + case 0x5c: // Apollo Lake + case 0x5f: // Denverton + *Type = INTEL_GOLDMONT; + break; // "goldmont" + case 0x7a: + *Type = INTEL_GOLDMONT_PLUS; + break; case 0x57: *Type = INTEL_KNL; // knl @@ -438,35 +456,45 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, } static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, - unsigned *FeaturesOut) { + unsigned *FeaturesOut, + unsigned *Features2Out) { unsigned Features = 0; + unsigned Features2 = 0; unsigned EAX, EBX; +#define setFeature(F) \ + do { \ + if (F < 32) \ + Features |= 1U << (F & 0x1f); \ + else if (F < 64) \ + Features2 |= 1U << ((F - 32) & 0x1f); \ + } while (0) + if ((EDX >> 15) & 1) - Features |= 1 << FEATURE_CMOV; + setFeature(FEATURE_CMOV); if ((EDX >> 23) & 1) - Features |= 1 << FEATURE_MMX; + setFeature(FEATURE_MMX); if ((EDX >> 25) & 1) - Features |= 1 << FEATURE_SSE; + setFeature(FEATURE_SSE); if ((EDX >> 26) & 1) - Features |= 1 << FEATURE_SSE2; + setFeature(FEATURE_SSE2); if ((ECX >> 0) & 1) - Features |= 1 << FEATURE_SSE3; + setFeature(FEATURE_SSE3); if ((ECX >> 1) & 1) - Features |= 1 << FEATURE_PCLMUL; + setFeature(FEATURE_PCLMUL); if ((ECX >> 9) & 1) - Features |= 1 << FEATURE_SSSE3; + setFeature(FEATURE_SSSE3); if ((ECX >> 12) & 1) - Features |= 1 << FEATURE_FMA; + setFeature(FEATURE_FMA); if ((ECX >> 19) & 1) - Features |= 1 << FEATURE_SSE4_1; + setFeature(FEATURE_SSE4_1); if ((ECX >> 20) & 1) - Features |= 1 << FEATURE_SSE4_2; + setFeature(FEATURE_SSE4_2); if ((ECX >> 23) & 1) - Features |= 1 << FEATURE_POPCNT; + setFeature(FEATURE_POPCNT); if ((ECX >> 25) & 1) - Features |= 1 << FEATURE_AES; + setFeature(FEATURE_AES); // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV // indicates that the AVX registers will be saved and restored on context @@ -477,43 +505,53 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); if (HasAVX) - Features |= 1 << FEATURE_AVX; + setFeature(FEATURE_AVX); bool HasLeaf7 = MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); if (HasLeaf7 && ((EBX >> 3) & 1)) - Features |= 1 << FEATURE_BMI; + setFeature(FEATURE_BMI); if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) - Features |= 1 << FEATURE_AVX2; + setFeature(FEATURE_AVX2); if (HasLeaf7 && ((EBX >> 9) & 1)) - Features |= 1 << FEATURE_BMI2; + setFeature(FEATURE_BMI2); if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) - Features |= 1 << FEATURE_AVX512F; + setFeature(FEATURE_AVX512F); if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) - Features |= 1 << FEATURE_AVX512DQ; + setFeature(FEATURE_AVX512DQ); if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) - Features |= 1 << FEATURE_AVX512IFMA; + setFeature(FEATURE_AVX512IFMA); if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) - Features |= 1 << FEATURE_AVX512PF; + setFeature(FEATURE_AVX512PF); if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) - Features |= 1 << FEATURE_AVX512ER; + setFeature(FEATURE_AVX512ER); if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) - Features |= 1 << FEATURE_AVX512CD; + setFeature(FEATURE_AVX512CD); if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) - Features |= 1 << FEATURE_AVX512BW; + setFeature(FEATURE_AVX512BW); if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) - Features |= 1 << FEATURE_AVX512VL; + setFeature(FEATURE_AVX512VL); if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) - Features |= 1 << FEATURE_AVX512VBMI; + setFeature(FEATURE_AVX512VBMI); + if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512VBMI2); + if (HasLeaf7 && ((ECX >> 8) & 1)) + setFeature(FEATURE_GFNI); + if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX) + setFeature(FEATURE_VPCLMULQDQ); + if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512VNNI); + if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512BITALG); if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) - Features |= 1 << FEATURE_AVX512VPOPCNTDQ; + setFeature(FEATURE_AVX512VPOPCNTDQ); if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) - Features |= 1 << FEATURE_AVX5124VNNIW; + setFeature(FEATURE_AVX5124VNNIW); if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) - Features |= 1 << FEATURE_AVX5124FMAPS; + setFeature(FEATURE_AVX5124FMAPS); unsigned MaxExtLevel; getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); @@ -521,13 +559,15 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); if (HasExtLeaf1 && ((ECX >> 6) & 1)) - Features |= 1 << FEATURE_SSE4_A; + setFeature(FEATURE_SSE4_A); if (HasExtLeaf1 && ((ECX >> 11) & 1)) - Features |= 1 << FEATURE_XOP; + setFeature(FEATURE_XOP); if (HasExtLeaf1 && ((ECX >> 16) & 1)) - Features |= 1 << FEATURE_FMA4; + setFeature(FEATURE_FMA4); *FeaturesOut = Features; + *Features2Out = Features2; +#undef setFeature } #if defined(HAVE_INIT_PRIORITY) @@ -548,8 +588,9 @@ struct __processor_model { unsigned int __cpu_subtype; unsigned int __cpu_features[1]; } __cpu_model = {0, 0, 0, {0}}; +unsigned int __cpu_features2; -/* A constructor function that is sets __cpu_model and __cpu_features with +/* A constructor function that is sets __cpu_model and __cpu_features2 with the right values. This needs to run only once. This constructor is given the highest priority and it should run before constructors without the priority set. However, it still runs after ifunc initializers and @@ -562,6 +603,7 @@ __cpu_indicator_init(void) { unsigned Vendor; unsigned Model, Family, Brand_id; unsigned Features = 0; + unsigned Features2 = 0; /* This function needs to run just once. */ if (__cpu_model.__cpu_vendor) @@ -580,8 +622,9 @@ __cpu_indicator_init(void) { Brand_id = EBX & 0xff; /* Find available features. */ - getAvailableFeatures(ECX, EDX, MaxLeaf, &Features); + getAvailableFeatures(ECX, EDX, MaxLeaf, &Features, &Features2); __cpu_model.__cpu_features[0] = Features; + __cpu_features2 = Features2; if (Vendor == SIG_INTEL) { /* Get CPU type. */ diff --git a/lib/libcompiler_rt/ctzdi2.c b/lib/libcompiler_rt/ctzdi2.c index eecde29718d..ef6d7fea136 100644 --- a/lib/libcompiler_rt/ctzdi2.c +++ b/lib/libcompiler_rt/ctzdi2.c @@ -16,8 +16,13 @@ /* Returns: the number of trailing 0-bits */ -#if !defined(__clang__) && (defined(__sparc64__) || defined(__mips64) || defined(__riscv__)) -/* gcc resolves __builtin_ctz -> __ctzdi2 leading to infinite recursion */ +#if !defined(__clang__) && \ + ((defined(__sparc__) && defined(__arch64__)) || \ + defined(__mips64) || \ + (defined(__riscv) && __SIZEOF_POINTER__ >= 8)) +/* On 64-bit architectures with neither a native clz instruction nor a native + * ctz instruction, gcc resolves __builtin_ctz to __ctzdi2 rather than + * __ctzsi2, leading to infinite recursion. */ #define __builtin_ctz(a) __ctzsi2(a) extern si_int __ctzsi2(si_int); #endif diff --git a/lib/libcompiler_rt/divdc3.c b/lib/libcompiler_rt/divdc3.c index 3c88390b5e7..392d6ecacde 100644 --- a/lib/libcompiler_rt/divdc3.c +++ b/lib/libcompiler_rt/divdc3.c @@ -12,6 +12,8 @@ * ===----------------------------------------------------------------------=== */ +#define DOUBLE_PRECISION +#include "fp_lib.h" #include "int_lib.h" #include "int_math.h" @@ -21,7 +23,7 @@ COMPILER_RT_ABI Dcomplex __divdc3(double __a, double __b, double __c, double __d) { int __ilogbw = 0; - double __logbw = crt_logb(crt_fmax(crt_fabs(__c), crt_fabs(__d))); + double __logbw = __compiler_rt_logb(crt_fmax(crt_fabs(__c), crt_fabs(__d))); if (crt_isfinite(__logbw)) { __ilogbw = (int)__logbw; diff --git a/lib/libcompiler_rt/divdf3.c b/lib/libcompiler_rt/divdf3.c index 04a4dc5571c..411c82ebb87 100644 --- a/lib/libcompiler_rt/divdf3.c +++ b/lib/libcompiler_rt/divdf3.c @@ -21,36 +21,36 @@ COMPILER_RT_ABI fp_t __divdf3(fp_t a, fp_t b) { - + const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; - + rep_t aSignificand = toRep(a) & significandMask; rep_t bSignificand = toRep(b) & significandMask; int scale = 0; - + // Detect if a or b is zero, denormal, infinity, or NaN. if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { - + const rep_t aAbs = toRep(a) & absMask; const rep_t bAbs = toRep(b) & absMask; - + // NaN / anything = qNaN if (aAbs > infRep) return fromRep(toRep(a) | quietBit); // anything / NaN = qNaN if (bAbs > infRep) return fromRep(toRep(b) | quietBit); - + if (aAbs == infRep) { // infinity / infinity = NaN if (bAbs == infRep) return fromRep(qnanRep); // infinity / anything else = +/- infinity else return fromRep(aAbs | quotientSign); } - + // anything else / infinity = +/- 0 if (bAbs == infRep) return fromRep(quotientSign); - + if (!aAbs) { // zero / zero = NaN if (!bAbs) return fromRep(qnanRep); @@ -59,28 +59,28 @@ __divdf3(fp_t a, fp_t b) { } // anything else / zero = +/- infinity if (!bAbs) return fromRep(infRep | quotientSign); - + // one or both of a or b is denormal, the other (if applicable) is a // normal number. Renormalize one or both of a and b, and set scale to // include the necessary exponent adjustment. if (aAbs < implicitBit) scale += normalize(&aSignificand); if (bAbs < implicitBit) scale -= normalize(&bSignificand); } - + // Or in the implicit significand bit. (If we fell through from the // denormal path it was already set by normalize( ), but setting it twice // won't hurt anything.) aSignificand |= implicitBit; bSignificand |= implicitBit; int quotientExponent = aExponent - bExponent + scale; - + // Align the significand of b as a Q31 fixed-point number in the range // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This // is accurate to about 3.5 binary digits. const uint32_t q31b = bSignificand >> 21; uint32_t recip32 = UINT32_C(0x7504f333) - q31b; - + // Now refine the reciprocal estimate using a Newton-Raphson iteration: // // x1 = x0 * (2 - x0 * b) @@ -95,13 +95,13 @@ __divdf3(fp_t a, fp_t b) { recip32 = (uint64_t)recip32 * correction32 >> 31; correction32 = -((uint64_t)recip32 * q31b >> 32); recip32 = (uint64_t)recip32 * correction32 >> 31; - + // recip32 might have overflowed to exactly zero in the preceding // computation if the high word of b is exactly 1.0. This would sabotage // the full-width final stage of the computation that follows, so we adjust // recip32 downward by one bit. recip32--; - + // We need to perform one more iteration to get us to 56 binary digits; // The last iteration needs to happen with extra precision. const uint32_t q63blo = bSignificand << 11; @@ -110,14 +110,14 @@ __divdf3(fp_t a, fp_t b) { uint32_t cHi = correction >> 32; uint32_t cLo = correction; reciprocal = (uint64_t)recip32*cHi + ((uint64_t)recip32*cLo >> 32); - + // We already adjusted the 32-bit estimate, now we need to adjust the final // 64-bit reciprocal estimate downward to ensure that it is strictly smaller // than the infinitely precise exact reciprocal. Because the computation // of the Newton-Raphson step is truncating at every step, this adjustment // is small; most of the work is already done. reciprocal -= 2; - + // The numerical reciprocal is accurate to within 2^-56, lies in the // interval [0.5, 1.0), and is strictly smaller than the true reciprocal // of b. Multiplying a by this reciprocal thus gives a numerical q = a/b @@ -127,12 +127,12 @@ __divdf3(fp_t a, fp_t b) { // 2. q is in the interval [0.5, 2.0) // 3. the error in q is bounded away from 2^-53 (actually, we have a // couple of bits to spare, but this is all we need). - + // We need a 64 x 64 multiply high to compute q, which isn't a basic // operation in C, so we need to be a little bit fussy. rep_t quotient, quotientLo; wideMultiply(aSignificand << 2, reciprocal, "ient, "ientLo); - + // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). // In either case, we are going to compute a residual of the form // @@ -141,7 +141,7 @@ __divdf3(fp_t a, fp_t b) { // We know from the construction of q that r satisfies: // // 0 <= r < ulp(q)*b - // + // // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we // already have the correct result. The exact halfway case cannot occur. // We also take this time to right shift quotient if it falls in the [1,2) @@ -154,20 +154,20 @@ __divdf3(fp_t a, fp_t b) { quotient >>= 1; residual = (aSignificand << 52) - quotient * bSignificand; } - + const int writtenExponent = quotientExponent + exponentBias; - + if (writtenExponent >= maxExponent) { // If we have overflowed the exponent, return infinity. return fromRep(infRep | quotientSign); } - + else if (writtenExponent < 1) { // Flush denormals to zero. In the future, it would be nice to add // code to round them correctly. return fromRep(quotientSign); } - + else { const bool round = (residual << 1) > bSignificand; // Clear the implicit bit diff --git a/lib/libcompiler_rt/divsc3.c b/lib/libcompiler_rt/divsc3.c index 42a48315e66..0d18a256c3d 100644 --- a/lib/libcompiler_rt/divsc3.c +++ b/lib/libcompiler_rt/divsc3.c @@ -12,6 +12,8 @@ *===----------------------------------------------------------------------=== */ +#define SINGLE_PRECISION +#include "fp_lib.h" #include "int_lib.h" #include "int_math.h" @@ -21,7 +23,8 @@ COMPILER_RT_ABI Fcomplex __divsc3(float __a, float __b, float __c, float __d) { int __ilogbw = 0; - float __logbw = crt_logbf(crt_fmaxf(crt_fabsf(__c), crt_fabsf(__d))); + float __logbw = + __compiler_rt_logbf(crt_fmaxf(crt_fabsf(__c), crt_fabsf(__d))); if (crt_isfinite(__logbw)) { __ilogbw = (int)__logbw; diff --git a/lib/libcompiler_rt/divsf3.c b/lib/libcompiler_rt/divsf3.c index 65294d70fc6..a74917fd1de 100644 --- a/lib/libcompiler_rt/divsf3.c +++ b/lib/libcompiler_rt/divsf3.c @@ -21,36 +21,36 @@ COMPILER_RT_ABI fp_t __divsf3(fp_t a, fp_t b) { - + const unsigned int aExponent = toRep(a) >> significandBits & maxExponent; const unsigned int bExponent = toRep(b) >> significandBits & maxExponent; const rep_t quotientSign = (toRep(a) ^ toRep(b)) & signBit; - + rep_t aSignificand = toRep(a) & significandMask; rep_t bSignificand = toRep(b) & significandMask; int scale = 0; - + // Detect if a or b is zero, denormal, infinity, or NaN. if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) { - + const rep_t aAbs = toRep(a) & absMask; const rep_t bAbs = toRep(b) & absMask; - + // NaN / anything = qNaN if (aAbs > infRep) return fromRep(toRep(a) | quietBit); // anything / NaN = qNaN if (bAbs > infRep) return fromRep(toRep(b) | quietBit); - + if (aAbs == infRep) { // infinity / infinity = NaN if (bAbs == infRep) return fromRep(qnanRep); // infinity / anything else = +/- infinity else return fromRep(aAbs | quotientSign); } - + // anything else / infinity = +/- 0 if (bAbs == infRep) return fromRep(quotientSign); - + if (!aAbs) { // zero / zero = NaN if (!bAbs) return fromRep(qnanRep); @@ -59,28 +59,28 @@ __divsf3(fp_t a, fp_t b) { } // anything else / zero = +/- infinity if (!bAbs) return fromRep(infRep | quotientSign); - + // one or both of a or b is denormal, the other (if applicable) is a // normal number. Renormalize one or both of a and b, and set scale to // include the necessary exponent adjustment. if (aAbs < implicitBit) scale += normalize(&aSignificand); if (bAbs < implicitBit) scale -= normalize(&bSignificand); } - + // Or in the implicit significand bit. (If we fell through from the // denormal path it was already set by normalize( ), but setting it twice // won't hurt anything.) aSignificand |= implicitBit; bSignificand |= implicitBit; int quotientExponent = aExponent - bExponent + scale; - + // Align the significand of b as a Q31 fixed-point number in the range // [1, 2.0) and get a Q32 approximate reciprocal using a small minimax // polynomial approximation: reciprocal = 3/4 + 1/sqrt(2) - b/2. This // is accurate to about 3.5 binary digits. uint32_t q31b = bSignificand << 8; uint32_t reciprocal = UINT32_C(0x7504f333) - q31b; - + // Now refine the reciprocal estimate using a Newton-Raphson iteration: // // x1 = x0 * (2 - x0 * b) @@ -95,7 +95,7 @@ __divsf3(fp_t a, fp_t b) { reciprocal = (uint64_t)reciprocal * correction >> 31; correction = -((uint64_t)reciprocal * q31b >> 32); reciprocal = (uint64_t)reciprocal * correction >> 31; - + // Exhaustive testing shows that the error in reciprocal after three steps // is in the interval [-0x1.f58108p-31, 0x1.d0e48cp-29], in line with our // expectations. We bump the reciprocal by a tiny value to force the error @@ -103,7 +103,7 @@ __divsf3(fp_t a, fp_t b) { // be specific). This also causes 1/1 to give a sensible approximation // instead of zero (due to overflow). reciprocal -= 2; - + // The numerical reciprocal is accurate to within 2^-28, lies in the // interval [0x1.000000eep-1, 0x1.fffffffcp-1], and is strictly smaller // than the true reciprocal of b. Multiplying a by this reciprocal thus @@ -115,9 +115,9 @@ __divsf3(fp_t a, fp_t b) { // from the fact that we truncate the product, and the 2^27 term // is the error in the reciprocal of b scaled by the maximum // possible value of a. As a consequence of this error bound, - // either q or nextafter(q) is the correctly rounded + // either q or nextafter(q) is the correctly rounded rep_t quotient = (uint64_t)reciprocal*(aSignificand << 1) >> 32; - + // Two cases: quotient is in [0.5, 1.0) or quotient is in [1.0, 2.0). // In either case, we are going to compute a residual of the form // @@ -126,7 +126,7 @@ __divsf3(fp_t a, fp_t b) { // We know from the construction of q that r satisfies: // // 0 <= r < ulp(q)*b - // + // // if r is greater than 1/2 ulp(q)*b, then q rounds up. Otherwise, we // already have the correct result. The exact halfway case cannot occur. // We also take this time to right shift quotient if it falls in the [1,2) @@ -141,18 +141,18 @@ __divsf3(fp_t a, fp_t b) { } const int writtenExponent = quotientExponent + exponentBias; - + if (writtenExponent >= maxExponent) { // If we have overflowed the exponent, return infinity. return fromRep(infRep | quotientSign); } - + else if (writtenExponent < 1) { // Flush denormals to zero. In the future, it would be nice to add // code to round them correctly. return fromRep(quotientSign); } - + else { const bool round = (residual << 1) > bSignificand; // Clear the implicit bit diff --git a/lib/libcompiler_rt/divtc3.c b/lib/libcompiler_rt/divtc3.c index 16e538ba4a3..e5ea00d841e 100644 --- a/lib/libcompiler_rt/divtc3.c +++ b/lib/libcompiler_rt/divtc3.c @@ -12,6 +12,8 @@ *===----------------------------------------------------------------------=== */ +#define QUAD_PRECISION +#include "fp_lib.h" #include "int_lib.h" #include "int_math.h" @@ -21,7 +23,8 @@ COMPILER_RT_ABI Lcomplex __divtc3(long double __a, long double __b, long double __c, long double __d) { int __ilogbw = 0; - long double __logbw = crt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d))); + long double __logbw = + __compiler_rt_logbl(crt_fmaxl(crt_fabsl(__c), crt_fabsl(__d))); if (crt_isfinite(__logbw)) { __ilogbw = (int)__logbw; diff --git a/lib/libcompiler_rt/emutls.c b/lib/libcompiler_rt/emutls.c index 07d436e267d..ef95a1c260c 100644 --- a/lib/libcompiler_rt/emutls.c +++ b/lib/libcompiler_rt/emutls.c @@ -42,6 +42,7 @@ static void emutls_shutdown(emutls_address_array *array); static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_key_t emutls_pthread_key; +static bool emutls_key_created = false; typedef unsigned int gcc_word __attribute__((mode(word))); typedef unsigned int gcc_pointer __attribute__((mode(pointer))); @@ -109,6 +110,7 @@ static void emutls_key_destructor(void* ptr) { static __inline void emutls_init(void) { if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0) abort(); + emutls_key_created = true; } static __inline void emutls_init_once(void) { @@ -390,3 +392,14 @@ void* __emutls_get_address(__emutls_control* control) { array->data[index] = emutls_allocate_object(control); return array->data[index]; } + +#ifdef __BIONIC__ +/* Called by Bionic on dlclose to delete the emutls pthread key. */ +__attribute__((visibility("hidden"))) +void __emutls_unregister_key(void) { + if (emutls_key_created) { + pthread_key_delete(emutls_pthread_key); + emutls_key_created = false; + } +} +#endif diff --git a/lib/libcompiler_rt/fp_lib.h b/lib/libcompiler_rt/fp_lib.h index 223fb980aae..a0e19ab6a8f 100644 --- a/lib/libcompiler_rt/fp_lib.h +++ b/lib/libcompiler_rt/fp_lib.h @@ -25,6 +25,7 @@ #include <stdbool.h> #include <limits.h> #include "int_lib.h" +#include "int_math.h" // x86_64 FreeBSD prior v9.3 define fixed-width types incorrectly in // 32-bit mode. @@ -265,6 +266,62 @@ static __inline void wideRightShiftWithSticky(rep_t *hi, rep_t *lo, unsigned int *hi = 0; } } + +// Implements logb methods (logb, logbf, logbl) for IEEE-754. This avoids +// pulling in a libm dependency from compiler-rt, but is not meant to replace +// it (i.e. code calling logb() should get the one from libm, not this), hence +// the __compiler_rt prefix. +static __inline fp_t __compiler_rt_logbX(fp_t x) { + rep_t rep = toRep(x); + int exp = (rep & exponentMask) >> significandBits; + + // Abnormal cases: + // 1) +/- inf returns +inf; NaN returns NaN + // 2) 0.0 returns -inf + if (exp == maxExponent) { + if (((rep & signBit) == 0) || (x != x)) { + return x; // NaN or +inf: return x + } else { + return -x; // -inf: return -x + } + } else if (x == 0.0) { + // 0.0: return -inf + return fromRep(infRep | signBit); + } + + if (exp != 0) { + // Normal number + return exp - exponentBias; // Unbias exponent + } else { + // Subnormal number; normalize and repeat + rep &= absMask; + const int shift = 1 - normalize(&rep); + exp = (rep & exponentMask) >> significandBits; + return exp - exponentBias - shift; // Unbias exponent + } +} +#endif + +#if defined(SINGLE_PRECISION) +static __inline fp_t __compiler_rt_logbf(fp_t x) { + return __compiler_rt_logbX(x); +} +#elif defined(DOUBLE_PRECISION) +static __inline fp_t __compiler_rt_logb(fp_t x) { + return __compiler_rt_logbX(x); +} +#elif defined(QUAD_PRECISION) + #if defined(CRT_LDBL_128BIT) +static __inline fp_t __compiler_rt_logbl(fp_t x) { + return __compiler_rt_logbX(x); +} + #else +// The generic implementation only works for ieee754 floating point. For other +// floating point types, continue to rely on the libm implementation for now. +static __inline long double __compiler_rt_logbl(long double x) { + return crt_logbl(x); +} + #endif #endif #endif // FP_LIB_HEADER diff --git a/lib/libcompiler_rt/gcc_personality_v0.c b/lib/libcompiler_rt/gcc_personality_v0.c index 0bc76562456..68581ef1643 100644 --- a/lib/libcompiler_rt/gcc_personality_v0.c +++ b/lib/libcompiler_rt/gcc_personality_v0.c @@ -206,8 +206,8 @@ __gcc_personality_v0(int version, _Unwind_Action actions, if ( lsda == (uint8_t*) 0 ) return continueUnwind(exceptionObject, context); - uintptr_t pc = _Unwind_GetIP(context)-1; - uintptr_t funcStart = _Unwind_GetRegionStart(context); + uintptr_t pc = (uintptr_t)_Unwind_GetIP(context)-1; + uintptr_t funcStart = (uintptr_t)_Unwind_GetRegionStart(context); uintptr_t pcOffset = pc - funcStart; /* Parse LSDA header. */ @@ -249,4 +249,3 @@ __gcc_personality_v0(int version, _Unwind_Action actions, /* No landing pad found, continue unwinding. */ return continueUnwind(exceptionObject, context); } - diff --git a/lib/libcompiler_rt/int_lib.h b/lib/libcompiler_rt/int_lib.h index 9d09e2dc915..fe8a3bdedc0 100644 --- a/lib/libcompiler_rt/int_lib.h +++ b/lib/libcompiler_rt/int_lib.h @@ -43,7 +43,7 @@ #define AEABI_RTABI __attribute__((__pcs__("aapcs"))) -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) #define ALWAYS_INLINE __forceinline #define NOINLINE __declspec(noinline) #define NORETURN __declspec(noreturn) diff --git a/lib/libcompiler_rt/int_math.h b/lib/libcompiler_rt/int_math.h index fc81fb7f022..aa3d0721a8a 100644 --- a/lib/libcompiler_rt/int_math.h +++ b/lib/libcompiler_rt/int_math.h @@ -92,12 +92,8 @@ #endif #if defined(_MSC_VER) && !defined(__clang__) -#define crt_logb(x) logb((x)) -#define crt_logbf(x) logbf((x)) #define crt_logbl(x) logbl((x)) #else -#define crt_logb(x) __builtin_logb((x)) -#define crt_logbf(x) __builtin_logbf((x)) #define crt_logbl(x) __builtin_logbl((x)) #endif diff --git a/lib/libcompiler_rt/int_types.h b/lib/libcompiler_rt/int_types.h index f53f343d35d..9f8da56cb77 100644 --- a/lib/libcompiler_rt/int_types.h +++ b/lib/libcompiler_rt/int_types.h @@ -60,10 +60,19 @@ typedef union }s; } udwords; -#if (defined(__LP64__) || defined(__wasm__) || defined(__mips64)) || defined(__riscv) +#if defined(__LP64__) || defined(__wasm__) || defined(__mips64) || \ + defined(__riscv) || defined(_WIN64) #define CRT_HAS_128BIT #endif +/* MSVC doesn't have a working 128bit integer type. Users should really compile + * compiler-rt with clang, but if they happen to be doing a standalone build for + * asan or something else, disable the 128 bit parts so things sort of work. + */ +#if defined(_MSC_VER) && !defined(__clang__) +#undef CRT_HAS_128BIT +#endif + #ifdef CRT_HAS_128BIT typedef int ti_int __attribute__ ((mode (TI))); typedef unsigned tu_int __attribute__ ((mode (TI))); @@ -137,6 +146,18 @@ typedef struct #endif /* _YUGA_LITTLE_ENDIAN */ } uqwords; +/* Check if the target supports 80 bit extended precision long doubles. + * Notably, on x86 Windows, MSVC only provides a 64-bit long double, but GCC + * still makes it 80 bits. Clang will match whatever compiler it is trying to + * be compatible with. + */ +#if ((defined(__i386__) || defined(__x86_64__)) && !defined(_MSC_VER)) || \ + defined(__m68k__) || defined(__ia64__) +#define HAS_80_BIT_LONG_DOUBLE 1 +#else +#define HAS_80_BIT_LONG_DOUBLE 0 +#endif + typedef union { uqwords u; diff --git a/lib/libcompiler_rt/int_util.c b/lib/libcompiler_rt/int_util.c index de87410dbca..752f2015580 100644 --- a/lib/libcompiler_rt/int_util.c +++ b/lib/libcompiler_rt/int_util.c @@ -27,7 +27,7 @@ NORETURN extern void panic(const char *, ...); #ifndef _WIN32 __attribute__((visibility("hidden"))) #endif -void compilerrt_abort_impl(const char *file, int line, const char *function) { +void __compilerrt_abort_impl(const char *file, int line, const char *function) { panic("%s:%d: abort in %s", file, line, function); } @@ -41,7 +41,7 @@ NORETURN extern void __assert_rtn(const char *func, const char *file, int line, __attribute__((weak)) __attribute__((visibility("hidden"))) #endif -void compilerrt_abort_impl(const char *file, int line, const char *function) { +void __compilerrt_abort_impl(const char *file, int line, const char *function) { __assert_rtn(function, file, line, "libcompiler_rt abort"); } @@ -51,7 +51,7 @@ void compilerrt_abort_impl(const char *file, int line, const char *function) { __attribute__((weak)) __attribute__((visibility("hidden"))) #endif -void compilerrt_abort_impl(const char *file, int line, const char *function) { +void __compilerrt_abort_impl(const char *file, int line, const char *function) { __builtin_trap(); } @@ -64,7 +64,7 @@ void compilerrt_abort_impl(const char *file, int line, const char *function) { __attribute__((weak)) __attribute__((visibility("hidden"))) #endif -void compilerrt_abort_impl(const char *file, int line, const char *function) { +void __compilerrt_abort_impl(const char *file, int line, const char *function) { abort(); } diff --git a/lib/libcompiler_rt/int_util.h b/lib/libcompiler_rt/int_util.h index a7b20ed6624..c3c87381ad8 100644 --- a/lib/libcompiler_rt/int_util.h +++ b/lib/libcompiler_rt/int_util.h @@ -20,10 +20,10 @@ #define INT_UTIL_H /** \brief Trigger a program abort (or panic for kernel code). */ -#define compilerrt_abort() compilerrt_abort_impl(__FILE__, __LINE__, __func__) +#define compilerrt_abort() __compilerrt_abort_impl(__FILE__, __LINE__, __func__) -NORETURN void compilerrt_abort_impl(const char *file, int line, - const char *function); +NORETURN void __compilerrt_abort_impl(const char *file, int line, + const char *function); #define COMPILE_TIME_ASSERT(expr) COMPILE_TIME_ASSERT1(expr, __COUNTER__) #define COMPILE_TIME_ASSERT1(expr, cnt) COMPILE_TIME_ASSERT2(expr, cnt) diff --git a/lib/libcompiler_rt/ppc/divtc3.c b/lib/libcompiler_rt/ppc/divtc3.c index 8ec41c528ab..ef532b84114 100644 --- a/lib/libcompiler_rt/ppc/divtc3.c +++ b/lib/libcompiler_rt/ppc/divtc3.c @@ -4,6 +4,11 @@ #include "DD.h" #include "../int_math.h" +// Use DOUBLE_PRECISION because the soft-fp method we use is logb (on the upper +// half of the long doubles), even though this file defines complex division for +// 128-bit floats. +#define DOUBLE_PRECISION +#include "../fp_lib.h" #if !defined(CRT_INFINITY) && defined(HUGE_VAL) #define CRT_INFINITY HUGE_VAL @@ -21,9 +26,10 @@ __divtc3(long double a, long double b, long double c, long double d) DD dDD = { .ld = d }; int ilogbw = 0; - const double logbw = crt_logb(crt_fmax(crt_fabs(cDD.s.hi), crt_fabs(dDD.s.hi) )); - - if (crt_isfinite(logbw)) + const double logbw = __compiler_rt_logb( + crt_fmax(crt_fabs(cDD.s.hi), crt_fabs(dDD.s.hi))); + + if (crt_isfinite(logbw)) { ilogbw = (int)logbw; diff --git a/lib/libcompiler_rt/ppc/fixunstfti.c b/lib/libcompiler_rt/ppc/fixunstfti.c new file mode 100644 index 00000000000..fa21084cb57 --- /dev/null +++ b/lib/libcompiler_rt/ppc/fixunstfti.c @@ -0,0 +1,106 @@ +//===-- lib/builtins/ppc/fixunstfti.c - Convert long double->int128 *-C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements converting the 128bit IBM/PowerPC long double (double- +// double) data type to an unsigned 128 bit integer. +// +//===----------------------------------------------------------------------===// + +#include "../int_math.h" +#define BIAS 1023 + +/* Convert long double into an unsigned 128-bit integer. */ +__uint128_t __fixunstfti(long double input) { + + /* If we are trying to convert a NaN, return the NaN bit pattern. */ + if (crt_isnan(input)) { + return ((__uint128_t)0x7FF8000000000000ll) << 64 | + (__uint128_t)0x0000000000000000ll; + } + + __uint128_t result, hiResult, loResult; + int hiExponent, loExponent, shift; + /* The long double representation, with the high and low portions of + * the long double, and the corresponding bit patterns of each double. */ + union { + long double ld; + double d[2]; /* [0] is the high double, [1] is the low double. */ + unsigned long long ull[2]; /* High and low doubles as 64-bit integers. */ + } ldUnion; + + /* If the long double is less than 1.0 or negative, + * return 0.0. */ + if (input < 1.0) + return 0.0; + + /* Retrieve the 64-bit patterns of high and low doubles. + * Compute the unbiased exponent of both high and low doubles by + * removing the signs, isolating the exponent, and subtracting + * the bias from it. */ + ldUnion.ld = input; + hiExponent = ((ldUnion.ull[0] & 0x7FFFFFFFFFFFFFFFll) >> 52) - BIAS; + loExponent = ((ldUnion.ull[1] & 0x7FFFFFFFFFFFFFFFll) >> 52) - BIAS; + + /* Convert each double into int64; they will be added to the int128 result. + * CASE 1: High or low double fits in int64 + * - Convert the each double normally into int64. + * + * CASE 2: High or low double does not fit in int64 + * - Scale the double to fit within a 64-bit integer + * - Calculate the shift (amount to scale the double by in the int128) + * - Clear all the bits of the exponent (with 0x800FFFFFFFFFFFFF) + * - Add BIAS+53 (0x4350000000000000) to exponent to correct the value + * - Scale (move) the double to the correct place in the int128 + * (Move it by 2^53 places) + * + * Note: If the high double is assumed to be positive, an unsigned conversion + * from long double to 64-bit integer is needed. The low double can be either + * positive or negative, so a signed conversion is needed to retain the result + * of the low double and to ensure it does not simply get converted to 0. */ + + /* CASE 1 - High double fits in int64. */ + if (hiExponent < 63) { + hiResult = (unsigned long long)ldUnion.d[0]; + } else if (hiExponent < 128) { + /* CASE 2 - High double does not fit in int64, scale and convert it. */ + shift = hiExponent - 54; + ldUnion.ull[0] &= 0x800FFFFFFFFFFFFFll; + ldUnion.ull[0] |= 0x4350000000000000ll; + hiResult = (unsigned long long)ldUnion.d[0]; + hiResult <<= shift; + } else { + /* Detect cases for overflow. When the exponent of the high + * double is greater than 128 bits and when the long double + * input is positive, return the max 128-bit integer. + * For negative inputs with exponents > 128, return 1, like gcc. */ + if (ldUnion.d[0] > 0) { + return ((__uint128_t)0xFFFFFFFFFFFFFFFFll) << 64 | + (__uint128_t)0xFFFFFFFFFFFFFFFFll; + } else { + return ((__uint128_t)0x0000000000000000ll) << 64 | + (__uint128_t)0x0000000000000001ll; + } + } + + /* CASE 1 - Low double fits in int64. */ + if (loExponent < 63) { + loResult = (long long)ldUnion.d[1]; + } else { + /* CASE 2 - Low double does not fit in int64, scale and convert it. */ + shift = loExponent - 54; + ldUnion.ull[1] &= 0x800FFFFFFFFFFFFFll; + ldUnion.ull[1] |= 0x4350000000000000ll; + loResult = (long long)ldUnion.d[1]; + loResult <<= shift; + } + + /* Add the high and low doublewords together to form a 128 bit integer. */ + result = loResult + hiResult; + return result; +} diff --git a/lib/libcompiler_rt/ppc/floattitf.c b/lib/libcompiler_rt/ppc/floattitf.c new file mode 100644 index 00000000000..b8e297b6b8d --- /dev/null +++ b/lib/libcompiler_rt/ppc/floattitf.c @@ -0,0 +1,48 @@ +//===-- lib/builtins/ppc/floattitf.c - Convert int128->long double -*-C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements converting a signed 128 bit integer to a 128bit IBM / +// PowerPC long double (double-double) value. +// +//===----------------------------------------------------------------------===// + +#include <stdint.h> + +/* Conversions from signed and unsigned 64-bit int to long double. */ +long double __floatditf(int64_t); +long double __floatunditf(uint64_t); + +/* Convert a signed 128-bit integer to long double. + * This uses the following property: Let hi and lo be 64-bits each, + * and let signed_val_k() and unsigned_val_k() be the value of the + * argument interpreted as a signed or unsigned k-bit integer. Then, + * + * signed_val_128(hi,lo) = signed_val_64(hi) * 2^64 + unsigned_val_64(lo) + * = (long double)hi * 2^64 + (long double)lo, + * + * where (long double)hi and (long double)lo are signed and + * unsigned 64-bit integer to long double conversions, respectively. + */ +long double __floattitf(__int128_t arg) { + /* Split the int128 argument into 64-bit high and low int64 parts. */ + int64_t ArgHiPart = (int64_t)(arg >> 64); + uint64_t ArgLoPart = (uint64_t)arg; + + /* Convert each 64-bit part into long double. The high part + * must be a signed conversion and the low part an unsigned conversion + * to ensure the correct result. */ + long double ConvertedHiPart = __floatditf(ArgHiPart); + long double ConvertedLoPart = __floatunditf(ArgLoPart); + + /* The low bit of ArgHiPart corresponds to the 2^64 bit in arg. + * Multiply the high part by 2^64 to undo the right shift by 64-bits + * done in the splitting. Then, add to the low part to obtain the + * final result. */ + return ((ConvertedHiPart * 0x1.0p64) + ConvertedLoPart); +} |