diff options
74 files changed, 11427 insertions, 0 deletions
diff --git a/lib/libc/arch/arm/Makefile.inc b/lib/libc/arch/arm/Makefile.inc new file mode 100644 index 00000000000..a39bde3da72 --- /dev/null +++ b/lib/libc/arch/arm/Makefile.inc @@ -0,0 +1,14 @@ +# $NetBSD: Makefile.inc,v 1.5 2002/07/10 04:29:06 thorpej Exp $ + +.include <bsd.own.mk> + +KMINCLUDES= +KMSRCS=arch/arm/gen/divsi3.S arch/arm/string/memmove.S \ + arch/arm/string/memcpy.S + +#SRCS+= __sigaction14_sigtramp.c __sigtramp1.S + +CPPFLAGS += -DSOFTFLOAT + +SOFTFLOAT_BITS=32 +.include <arch/arm/softfloat/Makefile.inc> diff --git a/lib/libc/arch/arm/SYS.h b/lib/libc/arch/arm/SYS.h new file mode 100644 index 00000000000..d28f51e426d --- /dev/null +++ b/lib/libc/arch/arm/SYS.h @@ -0,0 +1,112 @@ +/* $NetBSD: SYS.h,v 1.8 2003/08/07 16:42:02 agc Exp $ */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)SYS.h 5.5 (Berkeley) 5/7/91 + */ + +#include <machine/asm.h> +#include <sys/syscall.h> +#include <arm/swi.h> + +#ifdef __STDC__ +#define _CONCAT(x,y) x##y +#define SYSTRAP(x) swi SWI_OS_NETBSD | SYS_ ## x +#else +#define _CONCAT(x,y) x/**/y +#define SYSTRAP(x) swi SWI_OS_NETBSD | SYS_/**/x +#endif + +#ifdef __ELF__ +#define CERROR _C_LABEL(__cerror) +#define CURBRK _C_LABEL(__curbrk) +#else +#define CERROR _ASM_LABEL(cerror) +#define CURBRK _ASM_LABEL(curbrk) +#endif + +#define _SYSCALL_NOERROR(x,y) \ + ENTRY(x); \ + SYSTRAP(y) + +#define ALIAS(x,y) .weak y; .set y,_CONCAT(x,y); + + +#ifdef __STDC__ +#define SYSENTRY(x) \ + .weak _C_LABEL(x); \ + _C_LABEL(x) = _C_LABEL(_thread_sys_ ## x); \ + ENTRY(_thread_sys_ ## x) +#else /* ! __STDC__ */ +#define SYSENTRY(x) \ + .weak _C_LABEL(x); \ + _C_LABEL(x) = _C_LABEL(_thread_sys_/**/x); \ + ENTRY(_thread_sys_/**/x) +#endif /* ! __STDC__ */ + + +#define _SYSCALL(x, y) \ + _SYSCALL_NOERROR(x,y); \ + bcs PIC_SYM(CERROR, PLT) + +#define SYSCALL_NOERROR(x) \ + _SYSCALL_NOERROR(x,x) + +#define SYSCALL(x) \ + _SYSCALL(x,x) + + +#define PSEUDO_NOERROR(x,y) \ + _SYSCALL_NOERROR(x,y); \ + mov r15, r14 + +#define PSEUDO(x,y) \ + _SYSCALL(x,y); \ + mov r15, r14 + + +#define RSYSCALL_NOERROR(x) \ + PSEUDO_NOERROR(x,x) + +#define RSYSCALL(x) \ + PSEUDO(x,x) + +#ifdef WEAK_ALIAS +#define WSYSCALL(weak,strong) \ + WEAK_ALIAS(weak,strong); \ + PSEUDO(strong,weak) +#else +#define WSYSCALL(weak,strong) \ + PSEUDO(weak,weak) +#endif + + .globl CERROR diff --git a/lib/libc/arch/arm/gen/Makefile.inc b/lib/libc/arch/arm/gen/Makefile.inc new file mode 100644 index 00000000000..0da88bb29ff --- /dev/null +++ b/lib/libc/arch/arm/gen/Makefile.inc @@ -0,0 +1,31 @@ +# $NetBSD: Makefile.inc,v 1.6 2003/08/01 17:03:47 lukem Exp $ + +SRCS+= alloca.S byte_swap_2.S byte_swap_4.S divsi3.S \ + fabs.c flt_rounds.c \ + infinity.c + +# Common ieee754 constants and functions +#SRCS+= nanf_ieee754.c # infinity is ``different'' on arm, use local version +#SRCS+= frexp_ieee754.c isinf_ieee754.c isinfl_ieee754.c isnan_ieee754.c +#SRCS+= isnanl_ieee754.c ldexp_ieee754.c modf_ieee754.c + +SRCS+= setjmp.S +#SRCS+= __setjmp14.S +SRCS+= _setjmp.S +SRCS+= sigsetjmp.S +#SRCS+= __sigsetjmp14.S + +#SRCS+= makecontext.c resumecontext.c swapcontext.S + +#SRCS+= _lwp.c + +SRCS.arm.gen= Lint_bswap16.c Lint_bswap32.c Lint_swapcontext.c +LSRCS+= ${SRCS.arm.gen} +DPSRCS+= ${SRCS.arm.gen} +CLEANFILES+= ${SRCS.arm.gen} + +SRCS+= isinf.c isnan.c +SRCS+= fpgetround.S + +SRCS+= modf_ieee754.c +SRCS+= ldexp.c diff --git a/lib/libc/arch/arm/gen/_setjmp.S b/lib/libc/arch/arm/gen/_setjmp.S new file mode 100644 index 00000000000..d8e8e492f69 --- /dev/null +++ b/lib/libc/arch/arm/gen/_setjmp.S @@ -0,0 +1,105 @@ +/* $NetBSD: _setjmp.S,v 1.5 2003/04/05 23:08:51 bjh21 Exp $ */ + +/* + * Copyright (c) 1997 Mark Brinicombe + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Mark Brinicombe + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <machine/asm.h> +#include <machine/setjmp.h> + +/* + * C library -- _setjmp, _longjmp + * + * _longjmp(a,v) + * will generate a "return(v)" from the last call to + * _setjmp(a) + * by restoring registers from the stack. + * The previous signal state is NOT restored. + * + * Note: r0 is the return value + * r1-r3 are scratch registers in functions + */ + +ENTRY(_setjmp) + ldr r1, .L_setjmp_magic + str r1, [r0], #4 +#ifdef SOFTFLOAT + add r0, r0, #52 +#else + /* Store fp registers */ + sfm f4, 4, [r0], #48 + /* Store fpsr */ + rfs r1 + str r1, [r0], #0x0004 +#endif /* SOFTFLOAT */ + /* Store integer registers */ + stmia r0, {r4-r14} + + mov r0, #0x00000000 + mov r15, r14 + +.L_setjmp_magic: + .word _JB_MAGIC__SETJMP + +ENTRY(_longjmp) + ldr r2, .L_setjmp_magic + ldr r3, [r0], #4 + teq r2, r3 + bne botch + +#ifdef SOFTFLOAT + add r0, r0, #52 +#else + /* Restore fp registers */ + lfm f4, 4, [r0], #48 + /* Restore fpsr */ + ldr r4, [r0], #0x0004 + wfs r4 +#endif /* SOFTFLOAT */ + /* Restore integer registers */ + ldmia r0, {r4-r14} + + /* Validate sp and r14 */ + teq sp, #0 + teqne r14, #0 + beq botch + + /* Set return value */ + mov r0, r1 + teq r0, #0x00000000 + moveq r0, #0x00000001 + mov r15, r14 + + /* validation failed, die die die. */ +botch: + bl PIC_SYM(_C_LABEL(longjmperror), PLT) + bl PIC_SYM(_C_LABEL(abort), PLT) + b . - 8 /* Cannot get here */ diff --git a/lib/libc/arch/arm/gen/alloca.S b/lib/libc/arch/arm/gen/alloca.S new file mode 100644 index 00000000000..affe1027b85 --- /dev/null +++ b/lib/libc/arch/arm/gen/alloca.S @@ -0,0 +1,44 @@ +/* $NetBSD: alloca.S,v 1.3 2003/04/05 23:08:51 bjh21 Exp $ */ + +/* + * Copyright (c) 1995 Mark Brinicombe + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Mark Brinicombe + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* like alloc, but automatic automatic free in return */ + +#include <machine/asm.h> + +ENTRY(alloca) + add r0, r0, #0x00000007 /* round up to next 8 byte alignment */ + bic r0, r0, #0x00000007 + sub sp, sp, r0 /* Adjust the stack pointer */ + mov r0, sp /* r0 = base of new space */ + mov r15, r14 /* return */ diff --git a/lib/libc/arch/arm/gen/byte_swap_2.S b/lib/libc/arch/arm/gen/byte_swap_2.S new file mode 100644 index 00000000000..258cfdb884c --- /dev/null +++ b/lib/libc/arch/arm/gen/byte_swap_2.S @@ -0,0 +1,48 @@ +/* $NetBSD: byte_swap_2.S,v 1.3 2003/04/05 23:08:51 bjh21 Exp $ */ + +/*- + * Copyright (c) 1999 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Charles M. Hannum. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <machine/asm.h> + +_ENTRY(_C_LABEL(__bswap16)) +_ENTRY(_C_LABEL(ntohs)) +_ENTRY(_C_LABEL(htons)) +_PROF_PROLOGUE + and r1, r0, #0xff + mov r0, r0, lsr #8 + orr r0, r0, r1, lsl #8 + mov pc, lr diff --git a/lib/libc/arch/arm/gen/byte_swap_4.S b/lib/libc/arch/arm/gen/byte_swap_4.S new file mode 100644 index 00000000000..7784b2829b4 --- /dev/null +++ b/lib/libc/arch/arm/gen/byte_swap_4.S @@ -0,0 +1,49 @@ +/* $NetBSD: byte_swap_4.S,v 1.2 2003/04/05 23:08:51 bjh21 Exp $ */ + +/*- + * Copyright (c) 1997 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Neil A. Carson + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <machine/asm.h> + +_ENTRY(_C_LABEL(__bswap32)) +_ENTRY(_C_LABEL(ntohl)) +_ENTRY(_C_LABEL(htonl)) +_PROF_PROLOGUE + eor r1, r0, r0, ror #16 + bic r1, r1, #0x00FF0000 + mov r0, r0, ror #8 + eor r0, r0, r1, lsr #8 + mov pc, lr diff --git a/lib/libc/arch/arm/gen/divsi3.S b/lib/libc/arch/arm/gen/divsi3.S new file mode 100644 index 00000000000..33ef4c18327 --- /dev/null +++ b/lib/libc/arch/arm/gen/divsi3.S @@ -0,0 +1,386 @@ +/* $NetBSD: divsi3.S,v 1.5 2003/04/05 23:08:51 bjh21 Exp $ */ + +/* + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <machine/asm.h> + +/* + * stack is aligned as there's a possibility of branching to .L_overflow + * which makes a C call + */ + +ENTRY(__umodsi3) + stmfd sp!, {lr} + sub sp, sp, #4 /* align stack */ + bl .L_udivide + add sp, sp, #4 /* unalign stack */ + mov r0, r1 + ldmfd sp!, {pc} + +ENTRY(__modsi3) + stmfd sp!, {lr} + sub sp, sp, #4 /* align stack */ + bl .L_divide + add sp, sp, #4 /* unalign stack */ + mov r0, r1 + ldmfd sp!, {pc} + +.L_overflow: +#if !defined(_KERNEL) && !defined(_STANDALONE) + mov r0, #8 /* SIGFPE */ + bl PIC_SYM(_C_LABEL(raise), PLT) /* raise it */ + mov r0, #0 +#else + /* XXX should cause a fatal error */ + mvn r0, #0 +#endif + mov pc, lr + +ENTRY(__udivsi3) +.L_udivide: /* r0 = r0 / r1; r1 = r0 % r1 */ + eor r0, r1, r0 + eor r1, r0, r1 + eor r0, r1, r0 + /* r0 = r1 / r0; r1 = r1 % r0 */ + cmp r0, #1 + bcc .L_overflow + beq .L_divide_l0 + mov ip, #0 + movs r1, r1 + bpl .L_divide_l1 + orr ip, ip, #0x20000000 /* ip bit 0x20000000 = -ve r1 */ + movs r1, r1, lsr #1 + orrcs ip, ip, #0x10000000 /* ip bit 0x10000000 = bit 0 of r1 */ + b .L_divide_l1 + +.L_divide_l0: /* r0 == 1 */ + mov r0, r1 + mov r1, #0 + mov pc, lr + +ENTRY(__divsi3) +.L_divide: /* r0 = r0 / r1; r1 = r0 % r1 */ + eor r0, r1, r0 + eor r1, r0, r1 + eor r0, r1, r0 + /* r0 = r1 / r0; r1 = r1 % r0 */ + cmp r0, #1 + bcc .L_overflow + beq .L_divide_l0 + ands ip, r0, #0x80000000 + rsbmi r0, r0, #0 + ands r2, r1, #0x80000000 + eor ip, ip, r2 + rsbmi r1, r1, #0 + orr ip, r2, ip, lsr #1 /* ip bit 0x40000000 = -ve division */ + /* ip bit 0x80000000 = -ve remainder */ + +.L_divide_l1: + mov r2, #1 + mov r3, #0 + + /* + * If the highest bit of the dividend is set, we have to be + * careful when shifting the divisor. Test this. + */ + movs r1,r1 + bpl .L_old_code + + /* + * At this point, the highest bit of r1 is known to be set. + * We abuse this below in the tst instructions. + */ + tst r1, r0 /*, lsl #0 */ + bmi .L_divide_b1 + tst r1, r0, lsl #1 + bmi .L_divide_b2 + tst r1, r0, lsl #2 + bmi .L_divide_b3 + tst r1, r0, lsl #3 + bmi .L_divide_b4 + tst r1, r0, lsl #4 + bmi .L_divide_b5 + tst r1, r0, lsl #5 + bmi .L_divide_b6 + tst r1, r0, lsl #6 + bmi .L_divide_b7 + tst r1, r0, lsl #7 + bmi .L_divide_b8 + tst r1, r0, lsl #8 + bmi .L_divide_b9 + tst r1, r0, lsl #9 + bmi .L_divide_b10 + tst r1, r0, lsl #10 + bmi .L_divide_b11 + tst r1, r0, lsl #11 + bmi .L_divide_b12 + tst r1, r0, lsl #12 + bmi .L_divide_b13 + tst r1, r0, lsl #13 + bmi .L_divide_b14 + tst r1, r0, lsl #14 + bmi .L_divide_b15 + tst r1, r0, lsl #15 + bmi .L_divide_b16 + tst r1, r0, lsl #16 + bmi .L_divide_b17 + tst r1, r0, lsl #17 + bmi .L_divide_b18 + tst r1, r0, lsl #18 + bmi .L_divide_b19 + tst r1, r0, lsl #19 + bmi .L_divide_b20 + tst r1, r0, lsl #20 + bmi .L_divide_b21 + tst r1, r0, lsl #21 + bmi .L_divide_b22 + tst r1, r0, lsl #22 + bmi .L_divide_b23 + tst r1, r0, lsl #23 + bmi .L_divide_b24 + tst r1, r0, lsl #24 + bmi .L_divide_b25 + tst r1, r0, lsl #25 + bmi .L_divide_b26 + tst r1, r0, lsl #26 + bmi .L_divide_b27 + tst r1, r0, lsl #27 + bmi .L_divide_b28 + tst r1, r0, lsl #28 + bmi .L_divide_b29 + tst r1, r0, lsl #29 + bmi .L_divide_b30 + tst r1, r0, lsl #30 + bmi .L_divide_b31 +/* + * instead of: + * tst r1, r0, lsl #31 + * bmi .L_divide_b32 + */ + b .L_divide_b32 + +.L_old_code: + cmp r1, r0 + bcc .L_divide_b0 + cmp r1, r0, lsl #1 + bcc .L_divide_b1 + cmp r1, r0, lsl #2 + bcc .L_divide_b2 + cmp r1, r0, lsl #3 + bcc .L_divide_b3 + cmp r1, r0, lsl #4 + bcc .L_divide_b4 + cmp r1, r0, lsl #5 + bcc .L_divide_b5 + cmp r1, r0, lsl #6 + bcc .L_divide_b6 + cmp r1, r0, lsl #7 + bcc .L_divide_b7 + cmp r1, r0, lsl #8 + bcc .L_divide_b8 + cmp r1, r0, lsl #9 + bcc .L_divide_b9 + cmp r1, r0, lsl #10 + bcc .L_divide_b10 + cmp r1, r0, lsl #11 + bcc .L_divide_b11 + cmp r1, r0, lsl #12 + bcc .L_divide_b12 + cmp r1, r0, lsl #13 + bcc .L_divide_b13 + cmp r1, r0, lsl #14 + bcc .L_divide_b14 + cmp r1, r0, lsl #15 + bcc .L_divide_b15 + cmp r1, r0, lsl #16 + bcc .L_divide_b16 + cmp r1, r0, lsl #17 + bcc .L_divide_b17 + cmp r1, r0, lsl #18 + bcc .L_divide_b18 + cmp r1, r0, lsl #19 + bcc .L_divide_b19 + cmp r1, r0, lsl #20 + bcc .L_divide_b20 + cmp r1, r0, lsl #21 + bcc .L_divide_b21 + cmp r1, r0, lsl #22 + bcc .L_divide_b22 + cmp r1, r0, lsl #23 + bcc .L_divide_b23 + cmp r1, r0, lsl #24 + bcc .L_divide_b24 + cmp r1, r0, lsl #25 + bcc .L_divide_b25 + cmp r1, r0, lsl #26 + bcc .L_divide_b26 + cmp r1, r0, lsl #27 + bcc .L_divide_b27 + cmp r1, r0, lsl #28 + bcc .L_divide_b28 + cmp r1, r0, lsl #29 + bcc .L_divide_b29 + cmp r1, r0, lsl #30 + bcc .L_divide_b30 +.L_divide_b32: + cmp r1, r0, lsl #31 + subhs r1, r1,r0, lsl #31 + addhs r3, r3,r2, lsl #31 +.L_divide_b31: + cmp r1, r0, lsl #30 + subhs r1, r1,r0, lsl #30 + addhs r3, r3,r2, lsl #30 +.L_divide_b30: + cmp r1, r0, lsl #29 + subhs r1, r1,r0, lsl #29 + addhs r3, r3,r2, lsl #29 +.L_divide_b29: + cmp r1, r0, lsl #28 + subhs r1, r1,r0, lsl #28 + addhs r3, r3,r2, lsl #28 +.L_divide_b28: + cmp r1, r0, lsl #27 + subhs r1, r1,r0, lsl #27 + addhs r3, r3,r2, lsl #27 +.L_divide_b27: + cmp r1, r0, lsl #26 + subhs r1, r1,r0, lsl #26 + addhs r3, r3,r2, lsl #26 +.L_divide_b26: + cmp r1, r0, lsl #25 + subhs r1, r1,r0, lsl #25 + addhs r3, r3,r2, lsl #25 +.L_divide_b25: + cmp r1, r0, lsl #24 + subhs r1, r1,r0, lsl #24 + addhs r3, r3,r2, lsl #24 +.L_divide_b24: + cmp r1, r0, lsl #23 + subhs r1, r1,r0, lsl #23 + addhs r3, r3,r2, lsl #23 +.L_divide_b23: + cmp r1, r0, lsl #22 + subhs r1, r1,r0, lsl #22 + addhs r3, r3,r2, lsl #22 +.L_divide_b22: + cmp r1, r0, lsl #21 + subhs r1, r1,r0, lsl #21 + addhs r3, r3,r2, lsl #21 +.L_divide_b21: + cmp r1, r0, lsl #20 + subhs r1, r1,r0, lsl #20 + addhs r3, r3,r2, lsl #20 +.L_divide_b20: + cmp r1, r0, lsl #19 + subhs r1, r1,r0, lsl #19 + addhs r3, r3,r2, lsl #19 +.L_divide_b19: + cmp r1, r0, lsl #18 + subhs r1, r1,r0, lsl #18 + addhs r3, r3,r2, lsl #18 +.L_divide_b18: + cmp r1, r0, lsl #17 + subhs r1, r1,r0, lsl #17 + addhs r3, r3,r2, lsl #17 +.L_divide_b17: + cmp r1, r0, lsl #16 + subhs r1, r1,r0, lsl #16 + addhs r3, r3,r2, lsl #16 +.L_divide_b16: + cmp r1, r0, lsl #15 + subhs r1, r1,r0, lsl #15 + addhs r3, r3,r2, lsl #15 +.L_divide_b15: + cmp r1, r0, lsl #14 + subhs r1, r1,r0, lsl #14 + addhs r3, r3,r2, lsl #14 +.L_divide_b14: + cmp r1, r0, lsl #13 + subhs r1, r1,r0, lsl #13 + addhs r3, r3,r2, lsl #13 +.L_divide_b13: + cmp r1, r0, lsl #12 + subhs r1, r1,r0, lsl #12 + addhs r3, r3,r2, lsl #12 +.L_divide_b12: + cmp r1, r0, lsl #11 + subhs r1, r1,r0, lsl #11 + addhs r3, r3,r2, lsl #11 +.L_divide_b11: + cmp r1, r0, lsl #10 + subhs r1, r1,r0, lsl #10 + addhs r3, r3,r2, lsl #10 +.L_divide_b10: + cmp r1, r0, lsl #9 + subhs r1, r1,r0, lsl #9 + addhs r3, r3,r2, lsl #9 +.L_divide_b9: + cmp r1, r0, lsl #8 + subhs r1, r1,r0, lsl #8 + addhs r3, r3,r2, lsl #8 +.L_divide_b8: + cmp r1, r0, lsl #7 + subhs r1, r1,r0, lsl #7 + addhs r3, r3,r2, lsl #7 +.L_divide_b7: + cmp r1, r0, lsl #6 + subhs r1, r1,r0, lsl #6 + addhs r3, r3,r2, lsl #6 +.L_divide_b6: + cmp r1, r0, lsl #5 + subhs r1, r1,r0, lsl #5 + addhs r3, r3,r2, lsl #5 +.L_divide_b5: + cmp r1, r0, lsl #4 + subhs r1, r1,r0, lsl #4 + addhs r3, r3,r2, lsl #4 +.L_divide_b4: + cmp r1, r0, lsl #3 + subhs r1, r1,r0, lsl #3 + addhs r3, r3,r2, lsl #3 +.L_divide_b3: + cmp r1, r0, lsl #2 + subhs r1, r1,r0, lsl #2 + addhs r3, r3,r2, lsl #2 +.L_divide_b2: + cmp r1, r0, lsl #1 + subhs r1, r1,r0, lsl #1 + addhs r3, r3,r2, lsl #1 +.L_divide_b1: + cmp r1, r0 + subhs r1, r1, r0 + addhs r3, r3, r2 +.L_divide_b0: + + tst ip, #0x20000000 + bne .L_udivide_l1 + mov r0, r3 + cmp ip, #0 + rsbmi r1, r1, #0 + movs ip, ip, lsl #1 + bicmi r0, r0, #0x80000000 /* Fix incase we divided 0x80000000 */ + rsbmi r0, r0, #0 + mov pc, lr + +.L_udivide_l1: + tst ip, #0x10000000 + mov r1, r1, lsl #1 + orrne r1, r1, #1 + mov r3, r3, lsl #1 + cmp r1, r0 + subhs r1, r1, r0 + addhs r3, r3, r2 + mov r0, r3 + mov pc, lr diff --git a/lib/libc/arch/arm/gen/fabs.c b/lib/libc/arch/arm/gen/fabs.c new file mode 100644 index 00000000000..6a79b3aea14 --- /dev/null +++ b/lib/libc/arch/arm/gen/fabs.c @@ -0,0 +1,44 @@ +/* $NetBSD: fabs.c,v 1.2 2002/05/26 11:48:01 wiz Exp $ */ + +/* + * Copyright (c) 1996 Mark Brinicombe + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Mark Brinicombe + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * fabs(x) returns the absolute value of x. + */ + +double +fabs(double x) +{ + if (x < 0) + x = -x; + return(x); +} diff --git a/lib/libc/arch/arm/gen/floatlib.c b/lib/libc/arch/arm/gen/floatlib.c new file mode 100644 index 00000000000..3d625739a81 --- /dev/null +++ b/lib/libc/arch/arm/gen/floatlib.c @@ -0,0 +1,966 @@ +/* +** libgcc support for software floating point. +** Copyright (C) 1991 by Pipeline Associates, Inc. All rights reserved. +** Permission is granted to do *anything* you want with this file, +** commercial or otherwise, provided this message remains intact. So there! +** I would appreciate receiving any updates/patches/changes that anyone +** makes, and am willing to be the repository for said changes (am I +** making a big mistake?). + +Warning! Only single-precision is actually implemented. This file +won't really be much use until double-precision is supported. + +However, once that is done, this file might make possible +cross-compilation for an IEEE target machine from a non-IEEE +host such as a VAX. + +If you'd like to work on completing this, please talk to rms@gnu.ai.mit.edu. + +--> Double precision floating support added by James Carlson on 20 April 1998. + +** +** Pat Wood +** Pipeline Associates, Inc. +** pipeline!phw@motown.com or +** sun!pipeline!phw or +** uunet!motown!pipeline!phw +** +** 05/01/91 -- V1.0 -- first release to gcc mailing lists +** 05/04/91 -- V1.1 -- added float and double prototypes and return values +** -- fixed problems with adding and subtracting zero +** -- fixed rounding in truncdfsf2 +** -- fixed SWAP define and tested on 386 +*/ + +/* +** The following are routines that replace the libgcc soft floating point +** routines that are called automatically when -msoft-float is selected. +** The support single and double precision IEEE format, with provisions +** for byte-swapped machines (tested on 386). Some of the double-precision +** routines work at full precision, but most of the hard ones simply punt +** and call the single precision routines, producing a loss of accuracy. +** long long support is not assumed or included. +** Overall accuracy is close to IEEE (actually 68882) for single-precision +** arithmetic. I think there may still be a 1 in 1000 chance of a bit +** being rounded the wrong way during a multiply. I'm not fussy enough to +** bother with it, but if anyone is, knock yourself out. +** +** Efficiency has only been addressed where it was obvious that something +** would make a big difference. Anyone who wants to do this right for +** best speed should go in and rewrite in assembler. +** +** I have tested this only on a 68030 workstation and 386/ix integrated +** in with -msoft-float. +*/ + +/* the following deal with IEEE single-precision numbers */ +#define EXCESS 126 +#define SIGNBIT 0x80000000 +#define HIDDEN (1 << 23) +#define SIGN(fp) ((fp) & SIGNBIT) +#define EXP(fp) (((fp) >> 23) & 0xFF) +#define MANT(fp) (((fp) & 0x7FFFFF) | HIDDEN) +#define PACK(s,e,m) ((s) | ((e) << 23) | (m)) + +/* the following deal with IEEE double-precision numbers */ +#define EXCESSD 1022 +#define HIDDEND (1 << 20) +#define EXPD(fp) (((fp.l.upper) >> 20) & 0x7FF) +#define SIGND(fp) ((fp.l.upper) & SIGNBIT) +#define MANTD(fp) (((((fp.l.upper) & 0xFFFFF) | HIDDEND) << 10) | \ + (fp.l.lower >> 22)) +#define HIDDEND_LL ((long long)1 << 52) +#define MANTD_LL(fp) ((fp.ll & (HIDDEND_LL-1)) | HIDDEND_LL) +#define PACKD_LL(s,e,m) (((long long)((s)+((e)<<20))<<32)|(m)) + +/* define SWAP for 386/960 reverse-byte-order brain-damaged CPUs */ +union double_long { + double d; +#ifdef SWAP + struct { + unsigned long lower; + long upper; + } l; +#else + struct { + long upper; + unsigned long lower; + } l; +#endif + long long ll; +}; + +union float_long + { + float f; + long l; + }; + +#if 0 +/* add two floats */ +float +__addsf3 (float a1, float a2) +{ + long mant1, mant2; + union float_long fl1, fl2; + int exp1, exp2; + int sign = 0; + + fl1.f = a1; + fl2.f = a2; + + /* check for zero args */ + if (!fl1.l) { + fl1.f = fl2.f; + goto test_done; + } + if (!fl2.l) + goto test_done; + + exp1 = EXP (fl1.l); + exp2 = EXP (fl2.l); + + if (exp1 > exp2 + 25) + goto test_done; + if (exp2 > exp1 + 25) { + fl1.f = fl2.f; + goto test_done; + } + + /* do everything in excess precision so's we can round later */ + mant1 = MANT (fl1.l) << 6; + mant2 = MANT (fl2.l) << 6; + + if (SIGN (fl1.l)) + mant1 = -mant1; + if (SIGN (fl2.l)) + mant2 = -mant2; + + if (exp1 > exp2) + { + mant2 >>= exp1 - exp2; + } + else + { + mant1 >>= exp2 - exp1; + exp1 = exp2; + } + mant1 += mant2; + + if (mant1 < 0) + { + mant1 = -mant1; + sign = SIGNBIT; + } + else if (!mant1) { + fl1.f = 0; + goto test_done; + } + + /* normalize up */ + while (!(mant1 & 0xE0000000)) + { + mant1 <<= 1; + exp1--; + } + + /* normalize down? */ + if (mant1 & (1 << 30)) + { + mant1 >>= 1; + exp1++; + } + + /* round to even */ + mant1 += (mant1 & 0x40) ? 0x20 : 0x1F; + + /* normalize down? */ + if (mant1 & (1 << 30)) + { + mant1 >>= 1; + exp1++; + } + + /* lose extra precision */ + mant1 >>= 6; + + /* turn off hidden bit */ + mant1 &= ~HIDDEN; + + /* pack up and go home */ + fl1.l = PACK (sign, exp1, mant1); +test_done: + return (fl1.f); +} +#endif + +#if 0 +/* subtract two floats */ +float +__subsf3 (float a1, float a2) +{ + union float_long fl1, fl2; + + fl1.f = a1; + fl2.f = a2; + + /* check for zero args */ + if (!fl2.l) + return (fl1.f); + if (!fl1.l) + return (-fl2.f); + + /* twiddle sign bit and add */ + fl2.l ^= SIGNBIT; + return __addsf3 (a1, fl2.f); +} +#endif + +#if 0 +/* compare two floats */ +long +__cmpsf2 (float a1, float a2) +{ + union float_long fl1, fl2; + + fl1.f = a1; + fl2.f = a2; + + if (SIGN (fl1.l) && SIGN (fl2.l)) + { + fl1.l ^= SIGNBIT; + fl2.l ^= SIGNBIT; + } + if (fl1.l < fl2.l) + return (-1); + if (fl1.l > fl2.l) + return (1); + return (0); +} +#endif + +#if 0 +/* multiply two floats */ +float +__mulsf3 (float a1, float a2) +{ + union float_long fl1, fl2; + unsigned long result; + int exp; + int sign; + + fl1.f = a1; + fl2.f = a2; + + if (!fl1.l || !fl2.l) { + fl1.f = 0; + goto test_done; + } + + /* compute sign and exponent */ + sign = SIGN (fl1.l) ^ SIGN (fl2.l); + exp = EXP (fl1.l) - EXCESS; + exp += EXP (fl2.l); + + fl1.l = MANT (fl1.l); + fl2.l = MANT (fl2.l); + + /* the multiply is done as one 16x16 multiply and two 16x8 multiples */ + result = (fl1.l >> 8) * (fl2.l >> 8); + result += ((fl1.l & 0xFF) * (fl2.l >> 8)) >> 8; + result += ((fl2.l & 0xFF) * (fl1.l >> 8)) >> 8; + + result >>= 2; + if (result & 0x20000000) + { + /* round */ + result += 0x20; + result >>= 6; + } + else + { + /* round */ + result += 0x10; + result >>= 5; + exp--; + } + if (result & (HIDDEN<<1)) { + result >>= 1; + exp++; + } + + result &= ~HIDDEN; + + /* pack up and go home */ + fl1.l = PACK (sign, exp, result); +test_done: + return (fl1.f); +} +#endif + +#if 0 +/* divide two floats */ +float +__divsf3 (float a1, float a2) +{ + union float_long fl1, fl2; + int result; + int mask; + int exp, sign; + + fl1.f = a1; + fl2.f = a2; + + /* subtract exponents */ + exp = EXP (fl1.l) - EXP (fl2.l) + EXCESS; + + /* compute sign */ + sign = SIGN (fl1.l) ^ SIGN (fl2.l); + + /* divide by zero??? */ + if (!fl2.l) + /* return NaN or -NaN */ + return (sign ? 0xFFFFFFFF : 0x7FFFFFFF); + + /* numerator zero??? */ + if (!fl1.l) + return (0); + + /* now get mantissas */ + fl1.l = MANT (fl1.l); + fl2.l = MANT (fl2.l); + + /* this assures we have 25 bits of precision in the end */ + if (fl1.l < fl2.l) + { + fl1.l <<= 1; + exp--; + } + + /* now we perform repeated subtraction of fl2.l from fl1.l */ + mask = 0x1000000; + result = 0; + while (mask) + { + if (fl1.l >= fl2.l) + { + result |= mask; + fl1.l -= fl2.l; + } + fl1.l <<= 1; + mask >>= 1; + } + + /* round */ + result += 1; + + /* normalize down */ + exp++; + result >>= 1; + + result &= ~HIDDEN; + + /* pack up and go home */ + fl1.l = PACK (sign, exp, result); + return (fl1.f); +} +#endif + +/* convert int to double */ +double +__floatsidf (long a1) +{ + int sign = 0, exp = 31 + EXCESSD; + union double_long dl; + + if (!a1) + { + dl.l.upper = dl.l.lower = 0; + return (dl.d); + } + + if (a1 < 0) + { + sign = SIGNBIT; + a1 = -a1; + } + + while (a1 < 0x1000000) + { + a1 <<= 4; + exp -= 4; + } + + while (a1 < 0x40000000) + { + a1 <<= 1; + exp--; + } + + /* pack up and go home */ + dl.l.upper = sign; + dl.l.upper |= exp << 20; + dl.l.upper |= (a1 >> 10) & ~HIDDEND; + dl.l.lower = a1 << 22; + + return (dl.d); +} + +#if 0 +double +__floatdidf (long long a1) +{ + int exp = 63 + EXCESSD; + union double_long dl; + + dl.l.upper = dl.l.lower = 0; + if (a1 == 0) + return (dl.d); + + if (a1 < 0) { + dl.l.upper = SIGNBIT; + a1 = -a1; + } + + while (a1 < (long long)1<<54) { + a1 <<= 8; + exp -= 8; + } + while (a1 < (long long)1<<62) { + a1 <<= 1; + exp -= 1; + } + + /* pack up and go home */ + dl.ll |= (a1 >> 10) & ~HIDDEND_LL; + dl.l.upper |= exp << 20; + + return (dl.d); +} +#endif + +#if 0 +float +__floatsisf (long a1) +{ + (float)__floatsidf(a1); +} +#endif + +#if 0 +float +__floatdisf (long long a1) +{ + (float)__floatdidf(a1); +} +#endif + +#if 0 +/* negate a float */ +float +__negsf2 (float a1) +{ + union float_long fl1; + + fl1.f = a1; + if (!fl1.l) + return (0); + + fl1.l ^= SIGNBIT; + return (fl1.f); +} +#endif + +/* negate a double */ +double +__negdf2 (double a1) +{ + union double_long dl1; + + dl1.d = a1; + + if (!dl1.l.upper && !dl1.l.lower) + return (dl1.d); + + dl1.l.upper ^= SIGNBIT; + return (dl1.d); +} + +/* convert float to double */ +double +__extendsfdf2 (float a1) +{ + union float_long fl1; + union double_long dl; + int exp; + + fl1.f = a1; + + if (!fl1.l) + { + dl.l.upper = dl.l.lower = 0; + return (dl.d); + } + + dl.l.upper = SIGN (fl1.l); + exp = EXP (fl1.l) - EXCESS + EXCESSD; + dl.l.upper |= exp << 20; + dl.l.upper |= (MANT (fl1.l) & ~HIDDEN) >> 3; + dl.l.lower = MANT (fl1.l) << 29; + + return (dl.d); +} + +/* convert double to float */ +float +__truncdfsf2 (double a1) +{ + int exp; + long mant; + union float_long fl; + union double_long dl1; + + dl1.d = a1; + + if (!dl1.l.upper && !dl1.l.lower) + return (float)(0); + + exp = EXPD (dl1) - EXCESSD + EXCESS; + + /* shift double mantissa 6 bits so we can round */ + mant = MANTD (dl1) >> 6; + + /* now round and shift down */ + mant += 1; + mant >>= 1; + + /* did the round overflow? */ + if (mant & 0xFE000000) + { + mant >>= 1; + exp++; + } + + mant &= ~HIDDEN; + + /* pack up and go home */ + fl.l = PACK (SIGND (dl1), exp, mant); + return (fl.f); +} + +/* compare two doubles */ +long +__cmpdf2 (double a1, double a2) +{ + union double_long dl1, dl2; + + dl1.d = a1; + dl2.d = a2; + + if (SIGND (dl1) && SIGND (dl2)) + { + dl1.l.upper ^= SIGNBIT; + dl2.l.upper ^= SIGNBIT; + } + if (dl1.l.upper < dl2.l.upper) + return (-1); + if (dl1.l.upper > dl2.l.upper) + return (1); + if (dl1.l.lower < dl2.l.lower) + return (-1); + if (dl1.l.lower > dl2.l.lower) + return (1); + return (0); +} + +/* convert double to int */ +long +__fixdfsi (double a1) +{ + union double_long dl1; + int exp; + long l; + + dl1.d = a1; + + if (!dl1.l.upper && !dl1.l.lower) + return (0); + + exp = EXPD (dl1) - EXCESSD - 31; + l = MANTD (dl1); + + if (exp > 0) + return SIGND(dl1) ? (1<<31) : ((1ul<<31)-1); + + /* shift down until exp = 0 or l = 0 */ + if (exp < 0 && exp > -32 && l) + l >>= -exp; + else + return (0); + + return (SIGND (dl1) ? -l : l); +} + +#if 0 +/* convert double to int */ +long long +__fixdfdi (double a1) +{ + union double_long dl1; + int exp; + long long l; + + dl1.d = a1; + + if (!dl1.l.upper && !dl1.l.lower) + return (0); + + exp = EXPD (dl1) - EXCESSD - 64; + l = MANTD_LL(dl1); + + if (exp > 0) { + l = (long long)1<<63; + if (!SIGND(dl1)) + l--; + return l; + } + + /* shift down until exp = 0 or l = 0 */ + if (exp < 0 && exp > -64 && l) + l >>= -exp; + else + return (0); + + return (SIGND (dl1) ? -l : l); +} + +/* convert double to unsigned int */ +unsigned long +__fixunsdfsi (double a1) +{ + union double_long dl1; + int exp; + unsigned long l; + + dl1.d = a1; + + if (!dl1.l.upper && !dl1.l.lower) + return (0); + + exp = EXPD (dl1) - EXCESSD - 32; + l = (((((dl1.l.upper) & 0xFFFFF) | HIDDEND) << 11) | (dl1.l.lower >> 21)); + + if (exp > 0) + return (0xFFFFFFFFul); /* largest integer */ + + /* shift down until exp = 0 or l = 0 */ + if (exp < 0 && exp > -32 && l) + l >>= -exp; + else + return (0); + + return (l); +} + +/* convert double to unsigned int */ +unsigned long long +__fixunsdfdi (double a1) +{ + union double_long dl1; + int exp; + unsigned long long l; + + dl1.d = a1; + + if (dl1.ll == 0) + return (0); + + exp = EXPD (dl1) - EXCESSD - 64; + + l = dl1.ll; + + if (exp > 0) + return (unsigned long long)-1; + + /* shift down until exp = 0 or l = 0 */ + if (exp < 0 && exp > -64 && l) + l >>= -exp; + else + return (0); + + return (l); +} +#endif + +/* addtwo doubles */ +double +__adddf3 (double a1, double a2) +{ + long long mant1, mant2; + union double_long fl1, fl2; + int exp1, exp2; + int sign = 0; + + fl1.d = a1; + fl2.d = a2; + + /* check for zero args */ + if (!fl2.ll) + goto test_done; + if (!fl1.ll) { + fl1.d = fl2.d; + goto test_done; + } + + exp1 = EXPD(fl1); + exp2 = EXPD(fl2); + + if (exp1 > exp2 + 54) + goto test_done; + if (exp2 > exp1 + 54) { + fl1.d = fl2.d; + goto test_done; + } + + /* do everything in excess precision so's we can round later */ + mant1 = MANTD_LL(fl1) << 9; + mant2 = MANTD_LL(fl2) << 9; + + if (SIGND(fl1)) + mant1 = -mant1; + if (SIGND(fl2)) + mant2 = -mant2; + + if (exp1 > exp2) + mant2 >>= exp1 - exp2; + else { + mant1 >>= exp2 - exp1; + exp1 = exp2; + } + mant1 += mant2; + + if (mant1 < 0) { + mant1 = -mant1; + sign = SIGNBIT; + } else if (!mant1) { + fl1.d = 0; + goto test_done; + } + + /* normalize up */ + while (!(mant1 & ((long long)7<<61))) { + mant1 <<= 1; + exp1--; + } + + /* normalize down? */ + if (mant1 & ((long long)3<<62)) { + mant1 >>= 1; + exp1++; + } + + /* round to even */ + mant1 += (mant1 & (1<<9)) ? (1<<8) : ((1<<8)-1); + + /* normalize down? */ + if (mant1 & ((long long)3<<62)) { + mant1 >>= 1; + exp1++; + } + + /* lose extra precision */ + mant1 >>= 9; + + /* turn off hidden bit */ + mant1 &= ~HIDDEND_LL; + + /* pack up and go home */ + fl1.ll = PACKD_LL(sign,exp1,mant1); + +test_done: + return (fl1.d); +} + +#if 1 +/* subtract two doubles */ +double +__subdf3 (double a1, double a2) +{ + union double_long fl1, fl2; + + fl1.d = a1; + fl2.d = a2; + + /* check for zero args */ + if (!fl2.ll) + return (fl1.d); + /* twiddle sign bit and add */ + fl2.l.upper ^= SIGNBIT; + if (!fl1.ll) + return (fl2.d); + return __adddf3 (a1, fl2.d); +} +#endif + +/* multiply two doubles */ +double +__muldf3 (double a1, double a2) +{ + union double_long fl1, fl2; + unsigned long long result; + int exp; + int sign; + + fl1.d = a1; + fl2.d = a2; + + if (!fl1.ll || !fl2.ll) { + fl1.d = 0; + goto test_done; + } + + /* compute sign and exponent */ + sign = SIGND(fl1) ^ SIGND(fl2); + exp = EXPD(fl1) - EXCESSD; + exp += EXPD(fl2); + + fl1.ll = MANTD_LL(fl1); + fl2.ll = MANTD_LL(fl2); + + /* the multiply is done as one 31x31 multiply and two 31x21 multiples */ + result = (fl1.ll >> 21) * (fl2.ll >> 21); + result += ((fl1.ll & 0x1FFFFF) * (fl2.ll >> 21)) >> 21; + result += ((fl2.ll & 0x1FFFFF) * (fl1.ll >> 21)) >> 21; + + result >>= 2; + if (result & ((long long)1<<61)) { + /* round */ + result += 1<<8; + result >>= 9; + } else { + /* round */ + result += 1<<7; + result >>= 8; + exp--; + } + if (result & (HIDDEND_LL<<1)) { + result >>= 1; + exp++; + } + + result &= ~HIDDEND_LL; + + /* pack up and go home */ + fl1.ll = PACKD_LL(sign,exp,result); +test_done: + return (fl1.d); +} + +/* divide two doubles */ +double +__divdf3 (double a1, double a2) +{ + union double_long fl1, fl2; + long long mask,result; + int exp, sign; + + fl1.d = a1; + fl2.d = a2; + + /* subtract exponents */ + exp = EXPD(fl1) - EXPD(fl2) + EXCESSD; + + /* compute sign */ + sign = SIGND(fl1) ^ SIGND(fl2); + + /* numerator zero??? */ + if (fl1.ll == 0) { + /* divide by zero??? */ + if (fl2.ll == 0) + fl1.ll = ((unsigned long long)1<<63)-1; /* NaN */ + else + fl1.ll = 0; + goto test_done; + } + + /* return +Inf or -Inf */ + if (fl2.ll == 0) { + fl1.ll = PACKD_LL(SIGND(fl1),2047,0); + goto test_done; + } + + + /* now get mantissas */ + fl1.ll = MANTD_LL(fl1); + fl2.ll = MANTD_LL(fl2); + + /* this assures we have 54 bits of precision in the end */ + if (fl1.ll < fl2.ll) { + fl1.ll <<= 1; + exp--; + } + + /* now we perform repeated subtraction of fl2.ll from fl1.ll */ + mask = (long long)1<<53; + result = 0; + while (mask) { + if (fl1.ll >= fl2.ll) + { + result |= mask; + fl1.ll -= fl2.ll; + } + fl1.ll <<= 1; + mask >>= 1; + } + + /* round */ + result += 1; + + /* normalize down */ + exp++; + result >>= 1; + + result &= ~HIDDEND_LL; + + /* pack up and go home */ + fl1.ll = PACKD_LL(sign, exp, result); + +test_done: + return (fl1.d); +} + +int +__gtdf2 (double a1, double a2) +{ + return __cmpdf2 ((float) a1, (float) a2) > 0; +} + +int +__gedf2 (double a1, double a2) +{ + return (__cmpdf2 ((float) a1, (float) a2) >= 0) - 1; +} + +int +__ltdf2 (double a1, double a2) +{ + return - (__cmpdf2 ((float) a1, (float) a2) < 0); +} + +int +__ledf2 (double a1, double a2) +{ + return __cmpdf2 ((float) a1, (float) a2) > 0; +} + +int +__eqdf2 (double a1, double a2) +{ + return *(long long *) &a1 == *(long long *) &a2; +} + +int +__nedf2 (double a1, double a2) +{ + return *(long long *) &a1 != *(long long *) &a2; +} diff --git a/lib/libc/arch/arm/gen/flt_rounds.c b/lib/libc/arch/arm/gen/flt_rounds.c new file mode 100644 index 00000000000..2227ccd1a60 --- /dev/null +++ b/lib/libc/arch/arm/gen/flt_rounds.c @@ -0,0 +1,76 @@ +/* $NetBSD: flt_rounds.c,v 1.1 2000/12/29 20:13:48 bjh21 Exp $ */ + +/* + * Copyright (c) 1996 Mark Brinicombe + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Mark Brinicombe + * for the NetBSD Project. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <ieeefp.h> + +static const int map[] = { + 1, /* round to nearest */ + 2, /* round to positive infinity */ + 3, /* round to negative infinity */ + 0 /* round to zero */ +}; + +/* + * Return the current FP rounding mode + * + * Returns: + * 0 - round to zero + * 1 - round to nearest + * 2 - round to postive infinity + * 3 - round to negative infinity + * + * ok all we need to do is get the current FP rounding mode + * index our map table and return the appropriate value. + * + * HOWEVER: + * The ARM FPA codes the rounding mode into the actual FP instructions + * so there is no such thing as a global rounding mode. + * The default is round to nearest if rounding is not explictly specified. + * FP instructions generated by GCC will not explicitly specify a rounding + * mode. + * + * So the best we can do it to return the rounding mode FP instructions + * use if rounding is not specified which is round to nearest. + * + * This could change in the future with new floating point emulators or + * soft float FP libraries. + */ + +int __flt_rounds(void); + +int +__flt_rounds() +{ + return(map[fpgetround()]); +} diff --git a/lib/libc/arch/arm/gen/fpgetround.S b/lib/libc/arch/arm/gen/fpgetround.S new file mode 100644 index 00000000000..7da59a7ce41 --- /dev/null +++ b/lib/libc/arch/arm/gen/fpgetround.S @@ -0,0 +1,7 @@ +/* BROKEN */ + +#include <machine/asm.h> + +ENTRY(fpgetround) + mov r0, #0x0 + mov pc, lr /* return */ diff --git a/lib/libc/arch/arm/gen/infinity.c b/lib/libc/arch/arm/gen/infinity.c new file mode 100644 index 00000000000..08f45bc2ee5 --- /dev/null +++ b/lib/libc/arch/arm/gen/infinity.c @@ -0,0 +1,20 @@ +/* $NetBSD: infinity.c,v 1.3 2002/02/19 20:08:19 bjh21 Exp $ */ + +/* + * IEEE-compatible infinity.c -- public domain. + */ + +#include <sys/types.h> +#include <math.h> +#include <machine/endian.h> + +char __infinity[] __attribute__((__aligned__(sizeof(double)))) = +#if BYTE_ORDER == BIG_ENDIAN + { { 0x7f, 0xf0, 0, 0, 0, 0, 0, 0} }; +#else +#ifdef __VFP_FP__ + { { 0, 0, 0, 0, 0, 0, 0xf0, 0x7f} }; +#else + { { 0, 0, 0xf0, 0x7f, 0, 0, 0, 0} }; +#endif +#endif diff --git a/lib/libc/arch/arm/gen/isinf.c b/lib/libc/arch/arm/gen/isinf.c new file mode 100644 index 00000000000..83716f76a2a --- /dev/null +++ b/lib/libc/arch/arm/gen/isinf.c @@ -0,0 +1,52 @@ +/* $OpenBSD: isinf.c,v 1.1 2004/02/01 05:30:40 drahn Exp $ */ + +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char rcsid[] = "$OpenBSD: isinf.c,v 1.1 2004/02/01 05:30:40 drahn Exp $"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> +#include <machine/ieee.h> +#include <math.h> + +int +isinf(d) + double d; +{ + struct ieee_double *p = (struct ieee_double *)&d; + + return (p->dbl_exp == DBL_EXP_INFNAN && + p->dbl_frach == 0 && p->dbl_fracl == 0); +} diff --git a/lib/libc/arch/arm/gen/isnan.c b/lib/libc/arch/arm/gen/isnan.c new file mode 100644 index 00000000000..2b83689b77d --- /dev/null +++ b/lib/libc/arch/arm/gen/isnan.c @@ -0,0 +1,52 @@ +/* $OpenBSD: isnan.c,v 1.1 2004/02/01 05:30:40 drahn Exp $ */ + +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char rcsid[] = "$OpenBSD: isnan.c,v 1.1 2004/02/01 05:30:40 drahn Exp $"; +#endif /* LIBC_SCCS and not lint */ + +#include <sys/types.h> +#include <machine/ieee.h> +#include <math.h> + +int +isnan(d) + double d; +{ + struct ieee_double *p = (struct ieee_double *)&d; + + return (p->dbl_exp == DBL_EXP_INFNAN && + (p->dbl_frach != 0 || p->dbl_fracl != 0)); +} diff --git a/lib/libc/arch/arm/gen/ldexp.c b/lib/libc/arch/arm/gen/ldexp.c new file mode 100644 index 00000000000..d35207c98b3 --- /dev/null +++ b/lib/libc/arch/arm/gen/ldexp.c @@ -0,0 +1,151 @@ +/* $NetBSD: ldexp.c,v 1.2 2001/11/08 22:45:45 bjh21 Exp $ */ + +/*- + * Copyright (c) 1999 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Charles M. Hannum. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> + +#include <sys/types.h> +#include <machine/ieee.h> +#include <errno.h> +#include <math.h> + +/* + * Multiply the given value by 2^exponent. + */ +double +ldexp(val, expo) + double val; + int expo; +{ + register int oldexp, newexp; + union { + double v; + struct ieee_double s; + } u, mul; + + u.v = val; + oldexp = u.s.dbl_exp; + + /* + * If input is zero, Inf or NaN, just return it. + */ + if (u.v == 0.0 || oldexp == DBL_EXP_INFNAN) + return (val); + + if (oldexp == 0) { + /* + * u.v is denormal. We must adjust it so that the exponent + * arithmetic below will work. + */ + if (expo <= DBL_EXP_BIAS) { + /* + * Optimization: if the scaling can be done in a single + * multiply, or underflows, just do it now. + */ + if (expo <= -DBL_FRACBITS) { + errno = ERANGE; + return (0.0); + } + mul.v = 0.0; + mul.s.dbl_exp = expo + DBL_EXP_BIAS; + u.v *= mul.v; + if (u.v == 0.0) { + errno = ERANGE; + return (0.0); + } + return (u.v); + } else { + /* + * We know that expo is very large, and therefore the + * result cannot be denormal (though it may be Inf). + * Shift u.v by just enough to make it normal. + */ + mul.v = 0.0; + mul.s.dbl_exp = DBL_FRACBITS + DBL_EXP_BIAS; + u.v *= mul.v; + expo -= DBL_FRACBITS; + oldexp = u.s.dbl_exp; + } + } + + /* + * u.v is now normalized and oldexp has been adjusted if necessary. + * Calculate the new exponent and check for underflow and overflow. + */ + newexp = oldexp + expo; + + if (newexp <= 0) { + /* + * The output number is either denormal or underflows (see + * comments in machine/ieee.h). + */ + if (newexp <= -DBL_FRACBITS) { + errno = ERANGE; + return (0.0); + } + /* + * Denormalize the result. We do this with a multiply. If expo + * is very large, it won't fit in a double, so we have to + * adjust the exponent first. This is safe because we know + * that u.v is normal at this point. + */ + if (expo <= -DBL_EXP_BIAS) { + u.s.dbl_exp = 1; + expo += oldexp - 1; + } + mul.v = 0.0; + mul.s.dbl_exp = expo + DBL_EXP_BIAS; + u.v *= mul.v; + return (u.v); + } else if (newexp >= DBL_EXP_INFNAN) { + /* + * The result overflowed; return +/-Inf. + */ + u.s.dbl_exp = DBL_EXP_INFNAN; + u.s.dbl_frach = 0; + u.s.dbl_fracl = 0; + errno = ERANGE; + return (u.v); + } else { + /* + * The result is normal; just replace the old exponent with the + * new one. + */ + u.s.dbl_exp = newexp; + return (u.v); + } +} diff --git a/lib/libc/arch/arm/gen/modf_ieee754.c b/lib/libc/arch/arm/gen/modf_ieee754.c new file mode 100644 index 00000000000..3b5799b5834 --- /dev/null +++ b/lib/libc/arch/arm/gen/modf_ieee754.c @@ -0,0 +1,100 @@ +/* $NetBSD: modf_ieee754.c,v 1.1 2003/05/12 15:15:16 kleink Exp $ */ + +/* + * Copyright (c) 1994, 1995 Carnegie-Mellon University. + * All rights reserved. + * + * Author: Chris G. Demetriou + * + * Permission to use, copy, modify and distribute this software and + * its documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie the + * rights to redistribute these changes. + */ + +#include <sys/types.h> +#include <machine/ieee.h> +#include <errno.h> +#include <math.h> + +/* + * double modf(double val, double *iptr) + * returns: f and i such that |f| < 1.0, (f + i) = val, and + * sign(f) == sign(i) == sign(val). + * + * Beware signedness when doing subtraction, and also operand size! + */ +double +modf(double val, double *iptr) +{ + union ieee_double_u u, v; + u_int64_t frac; + + /* + * If input is Inf or NaN, return it and leave i alone. + */ + u.dblu_d = val; + if (u.dblu_dbl.dbl_exp == DBL_EXP_INFNAN) + return (u.dblu_d); + + /* + * If input can't have a fractional part, return + * (appropriately signed) zero, and make i be the input. + */ + if ((int)u.dblu_dbl.dbl_exp - DBL_EXP_BIAS > DBL_FRACBITS - 1) { + *iptr = u.dblu_d; + v.dblu_d = 0.0; + v.dblu_dbl.dbl_sign = u.dblu_dbl.dbl_sign; + return (v.dblu_d); + } + + /* + * If |input| < 1.0, return it, and set i to the appropriately + * signed zero. + */ + if (u.dblu_dbl.dbl_exp < DBL_EXP_BIAS) { + v.dblu_d = 0.0; + v.dblu_dbl.dbl_sign = u.dblu_dbl.dbl_sign; + *iptr = v.dblu_d; + return (u.dblu_d); + } + + /* + * There can be a fractional part of the input. + * If you look at the math involved for a few seconds, it's + * plain to see that the integral part is the input, with the + * low (DBL_FRACBITS - (exponent - DBL_EXP_BIAS)) bits zeroed, + * the fractional part is the part with the rest of the + * bits zeroed. Just zeroing the high bits to get the + * fractional part would yield a fraction in need of + * normalization. Therefore, we take the easy way out, and + * just use subtraction to get the fractional part. + */ + v.dblu_d = u.dblu_d; + /* Zero the low bits of the fraction, the sleazy way. */ + frac = ((u_int64_t)v.dblu_dbl.dbl_frach << 32) + v.dblu_dbl.dbl_fracl; + frac >>= DBL_FRACBITS - (u.dblu_dbl.dbl_exp - DBL_EXP_BIAS); + frac <<= DBL_FRACBITS - (u.dblu_dbl.dbl_exp - DBL_EXP_BIAS); + v.dblu_dbl.dbl_fracl = frac & 0xffffffff; + v.dblu_dbl.dbl_frach = frac >> 32; + *iptr = v.dblu_d; + + u.dblu_d -= v.dblu_d; + u.dblu_dbl.dbl_sign = v.dblu_dbl.dbl_sign; + return (u.dblu_d); +} diff --git a/lib/libc/arch/arm/gen/setjmp.S b/lib/libc/arch/arm/gen/setjmp.S new file mode 100644 index 00000000000..98d34fbec3f --- /dev/null +++ b/lib/libc/arch/arm/gen/setjmp.S @@ -0,0 +1,134 @@ +/* $NetBSD: setjmp.S,v 1.5 2003/04/05 23:08:51 bjh21 Exp $ */ + +/* + * Copyright (c) 1997 Mark Brinicombe + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Mark Brinicombe + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <machine/asm.h> +#include <machine/setjmp.h> + +/* + * C library -- setjmp, longjmp + * + * longjmp(a,v) + * will generate a "return(v)" from the last call to + * setjmp(a) + * by restoring registers from the stack. + * The previous signal state is restored. + */ + +ENTRY(setjmp) + /* Block all signals and retrieve the old signal mask */ + stmfd sp!, {r0, r14} + mov r0, #0x00000000 + + bl PIC_SYM(_C_LABEL(sigblock), PLT) + mov r1, r0 + + ldmfd sp!, {r0, r14} + + /* Store signal mask */ + str r1, [r0, #(25 * 4)] + + ldr r1, .Lsetjmp_magic + str r1, [r0], #4 + +#ifdef SOFTFLOAT + add r0, r0, #52 +#else + /* Store fp registers */ + sfm f4, 4, [r0], #48 + /* Store fpsr */ + rfs r1 + str r1, [r0], #0x0004 +#endif /*SOFTFLOAT*/ + /* Store integer registers */ + stmia r0, {r4-r14} + mov r0, #0x00000000 + mov r15, r14 + +.Lsetjmp_magic: + .word _JB_MAGIC_SETJMP + + +ENTRY(longjmp) + ldr r2, .Lsetjmp_magic + ldr r3, [r0] + teq r2, r3 + bne botch + + /* Fetch signal mask */ + ldr r2, [r0, #(25 * 4)] + + /* Set signal mask */ + stmfd sp!, {r0, r1, r14} + sub sp, sp, #4 /* align the stack */ + + mov r0, r2 + bl PIC_SYM(_C_LABEL(sigsetmask), PLT) + + add sp, sp, #4 /* unalign the stack */ + ldmfd sp!, {r0, r1, r14} + + add r0, r0, #4 +#ifdef SOFTFLOAT + add r0, r0, #52 +#else + /* Restore fp registers */ + lfm f4, 4, [r0], #48 + /* Restore FPSR */ + ldr r4, [r0], #0x0004 + wfs r4 +#endif /* SOFTFLOAT */ + /* Restore integer registers */ + ldmia r0, {r4-r14} + + /* Validate sp and r14 */ + teq sp, #0 + teqne r14, #0 + beq botch + + /* Set return value */ + + mov r0, r1 + teq r0, #0x00000000 + moveq r0, #0x00000001 +#ifdef __ARM_26__ + mov r15, r14 +#else + mov r15, r14 +#endif + + /* validation failed, die die die. */ +botch: + bl PIC_SYM(_C_LABEL(longjmperror), PLT) + bl PIC_SYM(_C_LABEL(abort), PLT) + b . - 8 /* Cannot get here */ diff --git a/lib/libc/arch/arm/gen/sigsetjmp.S b/lib/libc/arch/arm/gen/sigsetjmp.S new file mode 100644 index 00000000000..a4fbdbbd809 --- /dev/null +++ b/lib/libc/arch/arm/gen/sigsetjmp.S @@ -0,0 +1,61 @@ +/* $NetBSD: sigsetjmp.S,v 1.3 2002/08/17 19:54:30 thorpej Exp $ */ + +/* + * Copyright (c) 1997 Mark Brinicombe + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Mark Brinicombe + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <machine/asm.h> +#include <machine/setjmp.h> + +/* + * C library -- sigsetjmp, siglongjmp + * + * longjmp(a,v) + * will generate a "return(v)" from the last call to + * setjmp(a, m) + * by restoring registers from the stack. + * The previous signal state is restored. + */ + +ENTRY(sigsetjmp) + teq r1, #0 + beq PIC_SYM(_C_LABEL(_setjmp), PLT) + b PIC_SYM(_C_LABEL(setjmp), PLT) + +.L_setjmp_magic: + .word _JB_MAGIC__SETJMP + +ENTRY(siglongjmp) + ldr r2, .L_setjmp_magic + ldr r3, [r0] + teq r2, r3 + beq PIC_SYM(_C_LABEL(_longjmp), PLT) + b PIC_SYM(_C_LABEL(longjmp), PLT) diff --git a/lib/libc/arch/arm/net/Makefile.inc b/lib/libc/arch/arm/net/Makefile.inc new file mode 100644 index 00000000000..10fabd39b67 --- /dev/null +++ b/lib/libc/arch/arm/net/Makefile.inc @@ -0,0 +1,4 @@ +# $NetBSD: Makefile.inc,v 1.1 2000/12/29 20:13:53 bjh21 Exp $ + +# hton* and nto* functions provided by ../gen/byte_swap_*.S +SRCS+= diff --git a/lib/libc/arch/arm/softfloat/Makefile.inc b/lib/libc/arch/arm/softfloat/Makefile.inc new file mode 100644 index 00000000000..1e16835d27a --- /dev/null +++ b/lib/libc/arch/arm/softfloat/Makefile.inc @@ -0,0 +1,10 @@ +# $OpenBSD: Makefile.inc,v 1.1 2004/02/01 05:30:41 drahn Exp $ +SRCS += eqdf2.c eqsf2.c fpgetmask.c fpgetround.c fpgetsticky.c fpsetmask.c +SRCS += fpsetround.c fpsetsticky.c gedf2.c gesf2.c gtdf2.c gtsf2.c ledf2.c +SRCS += lesf2.c ltdf2.c ltsf2.c nedf2.c negdf2.c negsf2.c nesf2.c +#SRCS += timesoftfloat.c +SRCS += unorddf2.c unordsf2.c +SRCS += softfloat.c +CFLAGS += -DSOFTFLOAT_FOR_GCC + +.PATH: arch/arm/softfloat diff --git a/lib/libc/arch/arm/softfloat/arm-gcc.h b/lib/libc/arch/arm/softfloat/arm-gcc.h new file mode 100644 index 00000000000..67574834223 --- /dev/null +++ b/lib/libc/arch/arm/softfloat/arm-gcc.h @@ -0,0 +1,98 @@ +/* $NetBSD: arm-gcc.h,v 1.2 2001/02/21 18:09:25 bjh21 Exp $ */ + +/* +------------------------------------------------------------------------------- +One of the macros `BIGENDIAN' or `LITTLEENDIAN' must be defined. +------------------------------------------------------------------------------- +*/ +#ifdef __ARMEB__ +#define BIGENDIAN +#else +#define LITTLEENDIAN +#endif + +/* +------------------------------------------------------------------------------- +The macro `BITS64' can be defined to indicate that 64-bit integer types are +supported by the compiler. +------------------------------------------------------------------------------- +*/ +#define BITS64 + +/* +------------------------------------------------------------------------------- +Each of the following `typedef's defines the most convenient type that holds +integers of at least as many bits as specified. For example, `uint8' should +be the most convenient type that can hold unsigned integers of as many as +8 bits. The `flag' type must be able to hold either a 0 or 1. For most +implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed +to the same as `int'. +------------------------------------------------------------------------------- +*/ +typedef int flag; +typedef int uint8; +typedef int int8; +typedef int uint16; +typedef int int16; +typedef unsigned int uint32; +typedef signed int int32; +#ifdef BITS64 +typedef unsigned long long int uint64; +typedef signed long long int int64; +#endif + +/* +------------------------------------------------------------------------------- +Each of the following `typedef's defines a type that holds integers +of _exactly_ the number of bits specified. For instance, for most +implementation of C, `bits16' and `sbits16' should be `typedef'ed to +`unsigned short int' and `signed short int' (or `short int'), respectively. +------------------------------------------------------------------------------- +*/ +typedef unsigned char bits8; +typedef signed char sbits8; +typedef unsigned short int bits16; +typedef signed short int sbits16; +typedef unsigned int bits32; +typedef signed int sbits32; +#ifdef BITS64 +typedef unsigned long long int bits64; +typedef signed long long int sbits64; +#endif + +#ifdef BITS64 +/* +------------------------------------------------------------------------------- +The `LIT64' macro takes as its argument a textual integer literal and +if necessary ``marks'' the literal as having a 64-bit integer type. +For example, the GNU C Compiler (`gcc') requires that 64-bit literals be +appended with the letters `LL' standing for `long long', which is `gcc's +name for the 64-bit integer type. Some compilers may allow `LIT64' to be +defined as the identity macro: `#define LIT64( a ) a'. +------------------------------------------------------------------------------- +*/ +#define LIT64( a ) a##LL +#endif + +/* +------------------------------------------------------------------------------- +The macro `INLINE' can be used before functions that should be inlined. If +a compiler does not support explicit inlining, this macro should be defined +to be `static'. +------------------------------------------------------------------------------- +*/ +#define INLINE static __inline + +/* +------------------------------------------------------------------------------- +The ARM FPA is odd in that it stores doubles high-order word first, no matter +what the endianness of the CPU. VFP is sane. +------------------------------------------------------------------------------- +*/ +#if defined(__VFP_FP__) || defined(__ARMEB__) +#define FLOAT64_DEMANGLE(a) (a) +#define FLOAT64_MANGLE(a) (a) +#else +#define FLOAT64_DEMANGLE(a) (((a) << 32) | ((a) >> 32)) +#define FLOAT64_MANGLE(a) FLOAT64_DEMANGLE(a) +#endif diff --git a/lib/libc/arch/arm/softfloat/eqdf2.c b/lib/libc/arch/arm/softfloat/eqdf2.c new file mode 100644 index 00000000000..e4a57c02b90 --- /dev/null +++ b/lib/libc/arch/arm/softfloat/eqdf2.c @@ -0,0 +1,21 @@ +/* $NetBSD: eqdf2.c,v 1.1 2000/06/06 08:15:02 bjh21 Exp $ */ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include <sys/cdefs.h> + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +flag __eqdf2(float64, float64); + +flag +__eqdf2(float64 a, float64 b) +{ + + /* libgcc1.c says !(a == b) */ + return !float64_eq(a, b); +} diff --git a/lib/libc/arch/arm/softfloat/eqsf2.c b/lib/libc/arch/arm/softfloat/eqsf2.c new file mode 100644 index 00000000000..3ca7c4f8851 --- /dev/null +++ b/lib/libc/arch/arm/softfloat/eqsf2.c @@ -0,0 +1,21 @@ +/* $NetBSD: eqsf2.c,v 1.1 2000/06/06 08:15:03 bjh21 Exp $ */ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include <sys/cdefs.h> + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +flag __eqsf2(float32, float32); + +flag +__eqsf2(float32 a, float32 b) +{ + + /* libgcc1.c says !(a == b) */ + return !float32_eq(a, b); +} diff --git a/lib/libc/arch/arm/softfloat/fpgetmask.c b/lib/libc/arch/arm/softfloat/fpgetmask.c new file mode 100644 index 00000000000..862d1bb7fc7 --- /dev/null +++ b/lib/libc/arch/arm/softfloat/fpgetmask.c @@ -0,0 +1,60 @@ +/* $NetBSD: fpgetmask.c,v 1.3 2002/05/12 13:12:45 bjh21 Exp $ */ + +/*- + * Copyright (c) 1997 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Neil A. Carson and Mark Brinicombe + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> + +#include "namespace.h" + +#include <ieeefp.h> +#ifdef SOFTFLOAT_FOR_GCC +#include "softfloat-for-gcc.h" +#endif + +#include "milieu.h" +#include "softfloat.h" + +#ifdef __weak_alias +__weak_alias(_fpgetmask,fpgetmask); +#endif + +fp_except +fpgetmask(void) +{ + + return float_exception_mask; +} diff --git a/lib/libc/arch/arm/softfloat/fpgetround.c b/lib/libc/arch/arm/softfloat/fpgetround.c new file mode 100644 index 00000000000..f471885a188 --- /dev/null +++ b/lib/libc/arch/arm/softfloat/fpgetround.c @@ -0,0 +1,59 @@ +/* $NetBSD: fpgetround.c,v 1.2 2002/01/13 21:45:53 thorpej Exp $ */ + +/*- + * Copyright (c) 1997 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Neil A. Carson and Mark Brinicombe + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> + +#include "namespace.h" + +#include <ieeefp.h> +#ifdef SOFTFLOAT_FOR_GCC +#include "softfloat-for-gcc.h" +#endif +#include "milieu.h" +#include "softfloat.h" + +#ifdef __weak_alias +__weak_alias(_fpgetround,fpgetround) +#endif + +fp_rnd +fpgetround(void) +{ + + return float_rounding_mode; +} diff --git a/lib/libc/arch/arm/softfloat/fpgetsticky.c b/lib/libc/arch/arm/softfloat/fpgetsticky.c new file mode 100644 index 00000000000..f708dea8e25 --- /dev/null +++ b/lib/libc/arch/arm/softfloat/fpgetsticky.c @@ -0,0 +1,59 @@ +/* $NetBSD: fpgetsticky.c,v 1.2 2002/01/13 21:45:53 thorpej Exp $ */ + +/*- + * Copyright (c) 1997 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Neil A. Carson and Mark Brinicombe + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> + +#include "namespace.h" + +#include <ieeefp.h> +#ifdef SOFTFLOAT_FOR_GCC +#include "softfloat-for-gcc.h" +#endif +#include "milieu.h" +#include "softfloat.h" + +#ifdef __weak_alias +__weak_alias(_fpgetsticky,fpgetsticky); +#endif + +fp_except +fpgetsticky(void) +{ + + return float_exception_flags; +} diff --git a/lib/libc/arch/arm/softfloat/fpsetmask.c b/lib/libc/arch/arm/softfloat/fpsetmask.c new file mode 100644 index 00000000000..2e70ee3c534 --- /dev/null +++ b/lib/libc/arch/arm/softfloat/fpsetmask.c @@ -0,0 +1,62 @@ +/* $NetBSD: fpsetmask.c,v 1.3 2002/05/12 13:12:45 bjh21 Exp $ */ + +/*- + * Copyright (c) 1997 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Neil A. Carson and Mark Brinicombe + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> + +#include "namespace.h" + +#include <ieeefp.h> +#ifdef SOFTFLOAT_FOR_GCC +#include "softfloat-for-gcc.h" +#endif +#include "milieu.h" +#include "softfloat.h" + +#ifdef __weak_alias +__weak_alias(_fpsetmask,fpsetmask); +#endif + +fp_except +fpsetmask(fp_except mask) +{ + fp_except old; + + old = float_exception_mask; + float_exception_mask = mask; + return old; +} diff --git a/lib/libc/arch/arm/softfloat/fpsetround.c b/lib/libc/arch/arm/softfloat/fpsetround.c new file mode 100644 index 00000000000..9a41515b7b0 --- /dev/null +++ b/lib/libc/arch/arm/softfloat/fpsetround.c @@ -0,0 +1,62 @@ +/* $NetBSD: fpsetround.c,v 1.2 2002/01/13 21:45:53 thorpej Exp $ */ + +/*- + * Copyright (c) 1997 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Neil A. Carson and Mark Brinicombe + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> + +#include "namespace.h" + +#include <ieeefp.h> +#ifdef SOFTFLOAT_FOR_GCC +#include "softfloat-for-gcc.h" +#endif +#include "milieu.h" +#include "softfloat.h" + +#ifdef __weak_alias +__weak_alias(_fpsetround,fpsetround); +#endif + +fp_rnd +fpsetround(fp_rnd rnd_dir) +{ + fp_rnd old; + + old = float_rounding_mode; + float_rounding_mode = rnd_dir; + return old; +} diff --git a/lib/libc/arch/arm/softfloat/fpsetsticky.c b/lib/libc/arch/arm/softfloat/fpsetsticky.c new file mode 100644 index 00000000000..66bcb189b73 --- /dev/null +++ b/lib/libc/arch/arm/softfloat/fpsetsticky.c @@ -0,0 +1,62 @@ +/* $NetBSD: fpsetsticky.c,v 1.2 2002/01/13 21:45:54 thorpej Exp $ */ + +/*- + * Copyright (c) 1997 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Neil A. Carson and Mark Brinicombe + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> + +#include "namespace.h" + +#include <ieeefp.h> +#ifdef SOFTFLOAT_FOR_GCC +#include "softfloat-for-gcc.h" +#endif +#include "milieu.h" +#include "softfloat.h" + +#ifdef __weak_alias +__weak_alias(_fpsetsticky,fpsetsticky); +#endif + +fp_except +fpsetsticky(fp_except except) +{ + fp_except old; + + old = float_exception_flags; + float_exception_flags = except; + return old; +} diff --git a/lib/libc/arch/arm/softfloat/gedf2.c b/lib/libc/arch/arm/softfloat/gedf2.c new file mode 100644 index 00000000000..f421c7ff654 --- /dev/null +++ b/lib/libc/arch/arm/softfloat/gedf2.c @@ -0,0 +1,21 @@ +/* $NetBSD: gedf2.c,v 1.1 2000/06/06 08:15:05 bjh21 Exp $ */ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include <sys/cdefs.h> + +flag __gedf2(float64, float64); + +flag +__gedf2(float64 a, float64 b) +{ + + /* libgcc1.c says (a >= b) - 1 */ + return float64_le(b, a) - 1; +} diff --git a/lib/libc/arch/arm/softfloat/gesf2.c b/lib/libc/arch/arm/softfloat/gesf2.c new file mode 100644 index 00000000000..2f09528cda1 --- /dev/null +++ b/lib/libc/arch/arm/softfloat/gesf2.c @@ -0,0 +1,21 @@ +/* $NetBSD: gesf2.c,v 1.1 2000/06/06 08:15:05 bjh21 Exp $ */ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include <sys/cdefs.h> + +flag __gesf2(float32, float32); + +flag +__gesf2(float32 a, float32 b) +{ + + /* libgcc1.c says (a >= b) - 1 */ + return float32_le(b, a) - 1; +} diff --git a/lib/libc/arch/arm/softfloat/gtdf2.c b/lib/libc/arch/arm/softfloat/gtdf2.c new file mode 100644 index 00000000000..2ece22660f7 --- /dev/null +++ b/lib/libc/arch/arm/softfloat/gtdf2.c @@ -0,0 +1,21 @@ +/* $NetBSD: gtdf2.c,v 1.1 2000/06/06 08:15:05 bjh21 Exp $ */ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include <sys/cdefs.h> + +flag __gtdf2(float64, float64); + +flag +__gtdf2(float64 a, float64 b) +{ + + /* libgcc1.c says a > b */ + return float64_lt(b, a); +} diff --git a/lib/libc/arch/arm/softfloat/gtsf2.c b/lib/libc/arch/arm/softfloat/gtsf2.c new file mode 100644 index 00000000000..b855328c1aa --- /dev/null +++ b/lib/libc/arch/arm/softfloat/gtsf2.c @@ -0,0 +1,21 @@ +/* $NetBSD: gtsf2.c,v 1.1 2000/06/06 08:15:06 bjh21 Exp $ */ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include <sys/cdefs.h> + +flag __gtsf2(float32, float32); + +flag +__gtsf2(float32 a, float32 b) +{ + + /* libgcc1.c says a > b */ + return float32_lt(b, a); +} diff --git a/lib/libc/arch/arm/softfloat/ledf2.c b/lib/libc/arch/arm/softfloat/ledf2.c new file mode 100644 index 00000000000..b34d8e2a355 --- /dev/null +++ b/lib/libc/arch/arm/softfloat/ledf2.c @@ -0,0 +1,21 @@ +/* $NetBSD: ledf2.c,v 1.1 2000/06/06 08:15:06 bjh21 Exp $ */ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include <sys/cdefs.h> + +flag __ledf2(float64, float64); + +flag +__ledf2(float64 a, float64 b) +{ + + /* libgcc1.c says 1 - (a <= b) */ + return 1 - float64_le(a, b); +} diff --git a/lib/libc/arch/arm/softfloat/lesf2.c b/lib/libc/arch/arm/softfloat/lesf2.c new file mode 100644 index 00000000000..ed3a49308ef --- /dev/null +++ b/lib/libc/arch/arm/softfloat/lesf2.c @@ -0,0 +1,21 @@ +/* $NetBSD: lesf2.c,v 1.1 2000/06/06 08:15:06 bjh21 Exp $ */ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include <sys/cdefs.h> + +flag __lesf2(float32, float32); + +flag +__lesf2(float32 a, float32 b) +{ + + /* libgcc1.c says 1 - (a <= b) */ + return 1 - float32_le(a, b); +} diff --git a/lib/libc/arch/arm/softfloat/ltdf2.c b/lib/libc/arch/arm/softfloat/ltdf2.c new file mode 100644 index 00000000000..cdf5ed11372 --- /dev/null +++ b/lib/libc/arch/arm/softfloat/ltdf2.c @@ -0,0 +1,21 @@ +/* $NetBSD: ltdf2.c,v 1.1 2000/06/06 08:15:06 bjh21 Exp $ */ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include <sys/cdefs.h> + +flag __ltdf2(float64, float64); + +flag +__ltdf2(float64 a, float64 b) +{ + + /* libgcc1.c says -(a < b) */ + return -float64_lt(a, b); +} diff --git a/lib/libc/arch/arm/softfloat/ltsf2.c b/lib/libc/arch/arm/softfloat/ltsf2.c new file mode 100644 index 00000000000..c8c1371df9d --- /dev/null +++ b/lib/libc/arch/arm/softfloat/ltsf2.c @@ -0,0 +1,21 @@ +/* $NetBSD: ltsf2.c,v 1.1 2000/06/06 08:15:06 bjh21 Exp $ */ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include <sys/cdefs.h> + +flag __ltsf2(float32, float32); + +flag +__ltsf2(float32 a, float32 b) +{ + + /* libgcc1.c says -(a < b) */ + return -float32_lt(a, b); +} diff --git a/lib/libc/arch/arm/softfloat/milieu.h b/lib/libc/arch/arm/softfloat/milieu.h new file mode 100644 index 00000000000..a3f829c4281 --- /dev/null +++ b/lib/libc/arch/arm/softfloat/milieu.h @@ -0,0 +1,48 @@ +/* $NetBSD: milieu.h,v 1.1 2000/12/29 20:13:54 bjh21 Exp $ */ + +/* +=============================================================================== + +This C header file is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* +------------------------------------------------------------------------------- +Include common integer types and flags. +------------------------------------------------------------------------------- +*/ +#include "arm-gcc.h" + +/* +------------------------------------------------------------------------------- +Symbolic Boolean literals. +------------------------------------------------------------------------------- +*/ +enum { + FALSE = 0, + TRUE = 1 +}; diff --git a/lib/libc/arch/arm/softfloat/nedf2.c b/lib/libc/arch/arm/softfloat/nedf2.c new file mode 100644 index 00000000000..ebf6adb9f95 --- /dev/null +++ b/lib/libc/arch/arm/softfloat/nedf2.c @@ -0,0 +1,21 @@ +/* $NetBSD: nedf2.c,v 1.1 2000/06/06 08:15:07 bjh21 Exp $ */ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include <sys/cdefs.h> + +flag __nedf2(float64, float64); + +flag +__nedf2(float64 a, float64 b) +{ + + /* libgcc1.c says a != b */ + return !float64_eq(a, b); +} diff --git a/lib/libc/arch/arm/softfloat/negdf2.c b/lib/libc/arch/arm/softfloat/negdf2.c new file mode 100644 index 00000000000..045cf8851a2 --- /dev/null +++ b/lib/libc/arch/arm/softfloat/negdf2.c @@ -0,0 +1,21 @@ +/* $NetBSD: negdf2.c,v 1.1 2000/06/06 08:15:07 bjh21 Exp $ */ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include <sys/cdefs.h> + +float64 __negdf2(float64); + +float64 +__negdf2(float64 a) +{ + + /* libgcc1.c says -a */ + return a ^ FLOAT64_MANGLE(0x8000000000000000ULL); +} diff --git a/lib/libc/arch/arm/softfloat/negsf2.c b/lib/libc/arch/arm/softfloat/negsf2.c new file mode 100644 index 00000000000..9046752a634 --- /dev/null +++ b/lib/libc/arch/arm/softfloat/negsf2.c @@ -0,0 +1,21 @@ +/* $NetBSD: negsf2.c,v 1.1 2000/06/06 08:15:07 bjh21 Exp $ */ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include <sys/cdefs.h> + +float32 __negsf2(float32); + +float32 +__negsf2(float32 a) +{ + + /* libgcc1.c says INTIFY(-a) */ + return a ^ 0x80000000; +} diff --git a/lib/libc/arch/arm/softfloat/nesf2.c b/lib/libc/arch/arm/softfloat/nesf2.c new file mode 100644 index 00000000000..db5f2c991ac --- /dev/null +++ b/lib/libc/arch/arm/softfloat/nesf2.c @@ -0,0 +1,21 @@ +/* $NetBSD: nesf2.c,v 1.1 2000/06/06 08:15:07 bjh21 Exp $ */ + +/* + * Written by Ben Harris, 2000. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include <sys/cdefs.h> + +flag __nesf2(float32, float32); + +flag +__nesf2(float32 a, float32 b) +{ + + /* libgcc1.c says a != b */ + return !float32_eq(a, b); +} diff --git a/lib/libc/arch/arm/softfloat/softfloat-for-gcc.h b/lib/libc/arch/arm/softfloat/softfloat-for-gcc.h new file mode 100644 index 00000000000..8352dd6e549 --- /dev/null +++ b/lib/libc/arch/arm/softfloat/softfloat-for-gcc.h @@ -0,0 +1,43 @@ +/* $NetBSD: softfloat-for-gcc.h,v 1.6 2003/07/26 19:24:51 salo Exp $ */ + +/* + * Move private identifiers with external linkage into implementation + * namespace. -- Klaus Klein <kleink@NetBSD.org>, May 5, 1999 + */ +#define float_exception_flags _softfloat_float_exception_flags +#define float_exception_mask _softfloat_float_exception_mask +#define float_rounding_mode _softfloat_float_rounding_mode +#define float_raise _softfloat_float_raise +/* The following batch are called by GCC through wrappers */ +#define float32_eq _softfloat_float32_eq +#define float32_le _softfloat_float32_le +#define float32_lt _softfloat_float32_lt +#define float64_eq _softfloat_float64_eq +#define float64_le _softfloat_float64_le +#define float64_lt _softfloat_float64_lt + +/* + * Macros to define functions with the GCC expected names + */ + +#define float32_add __addsf3 +#define float64_add __adddf3 +#define float32_sub __subsf3 +#define float64_sub __subdf3 +#define float32_mul __mulsf3 +#define float64_mul __muldf3 +#define float32_div __divsf3 +#define float64_div __divdf3 +#define int32_to_float32 __floatsisf +#define int32_to_float64 __floatsidf +#define int64_to_float32 __floatdisf +#define int64_to_float64 __floatdidf +#define float32_to_int32_round_to_zero __fixsfsi +#define float64_to_int32_round_to_zero __fixdfsi +#define float32_to_int64_round_to_zero __fixsfdi +#define float64_to_int64_round_to_zero __fixdfdi +#define float32_to_uint32_round_to_zero __fixunssfsi +#define float64_to_uint32_round_to_zero __fixunsdfsi +#define float32_to_float64 __extendsfdf2 +#define float64_to_float32 __truncdfsf2 + diff --git a/lib/libc/arch/arm/softfloat/softfloat-macros.h b/lib/libc/arch/arm/softfloat/softfloat-macros.h new file mode 100644 index 00000000000..1f93f364c04 --- /dev/null +++ b/lib/libc/arch/arm/softfloat/softfloat-macros.h @@ -0,0 +1,648 @@ + +/* +=============================================================================== + +This C source fragment is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* +------------------------------------------------------------------------------- +Shifts `a' right by the number of bits given in `count'. If any nonzero +bits are shifted off, they are ``jammed'' into the least significant bit of +the result by setting the least significant bit to 1. The value of `count' +can be arbitrarily large; in particular, if `count' is greater than 32, the +result will be either 0 or 1, depending on whether `a' is zero or nonzero. +The result is stored in the location pointed to by `zPtr'. +------------------------------------------------------------------------------- +*/ +__inline void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr ) +{ + bits32 z; + + if ( count == 0 ) { + z = a; + } + else if ( count < 32 ) { + z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); + } + else { + z = ( a != 0 ); + } + *zPtr = z; + +} + +/* +------------------------------------------------------------------------------- +Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the +number of bits given in `count'. Any bits shifted off are lost. The value +of `count' can be arbitrarily large; in particular, if `count' is greater +than 64, the result will be 0. The result is broken into two 32-bit pieces +which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +__inline void + shift64Right( + bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr ) +{ + bits32 z0, z1; + int8 negCount = ( - count ) & 31; + + if ( count == 0 ) { + z1 = a1; + z0 = a0; + } + else if ( count < 32 ) { + z1 = ( a0<<negCount ) | ( a1>>count ); + z0 = a0>>count; + } + else { + z1 = ( count < 64 ) ? ( a0>>( count & 31 ) ) : 0; + z0 = 0; + } + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the +number of bits given in `count'. If any nonzero bits are shifted off, they +are ``jammed'' into the least significant bit of the result by setting the +least significant bit to 1. The value of `count' can be arbitrarily large; +in particular, if `count' is greater than 64, the result will be either 0 +or 1, depending on whether the concatenation of `a0' and `a1' is zero or +nonzero. The result is broken into two 32-bit pieces which are stored at +the locations pointed to by `z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +__inline void + shift64RightJamming( + bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr ) +{ + bits32 z0, z1; + int8 negCount = ( - count ) & 31; + + if ( count == 0 ) { + z1 = a1; + z0 = a0; + } + else if ( count < 32 ) { + z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 ); + z0 = a0>>count; + } + else { + if ( count == 32 ) { + z1 = a0 | ( a1 != 0 ); + } + else if ( count < 64 ) { + z1 = ( a0>>( count & 31 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 ); + } + else { + z1 = ( ( a0 | a1 ) != 0 ); + } + z0 = 0; + } + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' right +by 32 _plus_ the number of bits given in `count'. The shifted result is +at most 64 nonzero bits; these are broken into two 32-bit pieces which are +stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted +off form a third 32-bit result as follows: The _last_ bit shifted off is +the most-significant bit of the extra result, and the other 31 bits of the +extra result are all zero if and only if _all_but_the_last_ bits shifted off +were all zero. This extra result is stored in the location pointed to by +`z2Ptr'. The value of `count' can be arbitrarily large. + (This routine makes more sense if `a0', `a1', and `a2' are considered +to form a fixed-point value with binary point between `a1' and `a2'. This +fixed-point value is shifted right by the number of bits given in `count', +and the integer part of the result is returned at the locations pointed to +by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly +corrupted as described above, and is returned at the location pointed to by +`z2Ptr'.) +------------------------------------------------------------------------------- +*/ +__inline void + shift64ExtraRightJamming( + bits32 a0, + bits32 a1, + bits32 a2, + int16 count, + bits32 *z0Ptr, + bits32 *z1Ptr, + bits32 *z2Ptr + ) +{ + bits32 z0, z1, z2; + int8 negCount = ( - count ) & 31; + + if ( count == 0 ) { + z2 = a2; + z1 = a1; + z0 = a0; + } + else { + if ( count < 32 ) { + z2 = a1<<negCount; + z1 = ( a0<<negCount ) | ( a1>>count ); + z0 = a0>>count; + } + else { + if ( count == 32 ) { + z2 = a1; + z1 = a0; + } + else { + a2 |= a1; + if ( count < 64 ) { + z2 = a0<<negCount; + z1 = a0>>( count & 31 ); + } + else { + z2 = ( count == 64 ) ? a0 : ( a0 != 0 ); + z1 = 0; + } + } + z0 = 0; + } + z2 |= ( a2 != 0 ); + } + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the +number of bits given in `count'. Any bits shifted off are lost. The value +of `count' must be less than 32. The result is broken into two 32-bit +pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +__inline void + shortShift64Left( + bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr ) +{ + + *z1Ptr = a1<<count; + *z0Ptr = + ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 31 ) ); + +} + +/* +------------------------------------------------------------------------------- +Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' left +by the number of bits given in `count'. Any bits shifted off are lost. +The value of `count' must be less than 32. The result is broken into three +32-bit pieces which are stored at the locations pointed to by `z0Ptr', +`z1Ptr', and `z2Ptr'. +------------------------------------------------------------------------------- +*/ +__inline void + shortShift96Left( + bits32 a0, + bits32 a1, + bits32 a2, + int16 count, + bits32 *z0Ptr, + bits32 *z1Ptr, + bits32 *z2Ptr + ) +{ + bits32 z0, z1, z2; + int8 negCount; + + z2 = a2<<count; + z1 = a1<<count; + z0 = a0<<count; + if ( 0 < count ) { + negCount = ( ( - count ) & 31 ); + z1 |= a2>>negCount; + z0 |= a1>>negCount; + } + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit +value formed by concatenating `b0' and `b1'. Addition is modulo 2^64, so +any carry out is lost. The result is broken into two 32-bit pieces which +are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +__inline void + add64( + bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr ) +{ + bits32 z1; + + z1 = a1 + b1; + *z1Ptr = z1; + *z0Ptr = a0 + b0 + ( z1 < a1 ); + +} + +/* +------------------------------------------------------------------------------- +Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the +96-bit value formed by concatenating `b0', `b1', and `b2'. Addition is +modulo 2^96, so any carry out is lost. The result is broken into three +32-bit pieces which are stored at the locations pointed to by `z0Ptr', +`z1Ptr', and `z2Ptr'. +------------------------------------------------------------------------------- +*/ +__inline void + add96( + bits32 a0, + bits32 a1, + bits32 a2, + bits32 b0, + bits32 b1, + bits32 b2, + bits32 *z0Ptr, + bits32 *z1Ptr, + bits32 *z2Ptr + ) +{ + bits32 z0, z1, z2; + int8 carry0, carry1; + + z2 = a2 + b2; + carry1 = ( z2 < a2 ); + z1 = a1 + b1; + carry0 = ( z1 < a1 ); + z0 = a0 + b0; + z1 += carry1; + z0 += ( z1 < carry1 ); + z0 += carry0; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the +64-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo +2^64, so any borrow out (carry out) is lost. The result is broken into two +32-bit pieces which are stored at the locations pointed to by `z0Ptr' and +`z1Ptr'. +------------------------------------------------------------------------------- +*/ +__inline void + sub64( + bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr ) +{ + + *z1Ptr = a1 - b1; + *z0Ptr = a0 - b0 - ( a1 < b1 ); + +} + +/* +------------------------------------------------------------------------------- +Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from +the 96-bit value formed by concatenating `a0', `a1', and `a2'. Subtraction +is modulo 2^96, so any borrow out (carry out) is lost. The result is broken +into three 32-bit pieces which are stored at the locations pointed to by +`z0Ptr', `z1Ptr', and `z2Ptr'. +------------------------------------------------------------------------------- +*/ +__inline void + sub96( + bits32 a0, + bits32 a1, + bits32 a2, + bits32 b0, + bits32 b1, + bits32 b2, + bits32 *z0Ptr, + bits32 *z1Ptr, + bits32 *z2Ptr + ) +{ + bits32 z0, z1, z2; + int8 borrow0, borrow1; + + z2 = a2 - b2; + borrow1 = ( a2 < b2 ); + z1 = a1 - b1; + borrow0 = ( a1 < b1 ); + z0 = a0 - b0; + z0 -= ( z1 < borrow1 ); + z1 -= borrow1; + z0 -= borrow0; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Multiplies `a' by `b' to obtain a 64-bit product. The product is broken +into two 32-bit pieces which are stored at the locations pointed to by +`z0Ptr' and `z1Ptr'. +------------------------------------------------------------------------------- +*/ +__inline void mul32To64( bits32 a, bits32 b, bits32 *z0Ptr, bits32 *z1Ptr ) +{ + bits16 aHigh, aLow, bHigh, bLow; + bits32 z0, zMiddleA, zMiddleB, z1; + + aLow = a; + aHigh = a>>16; + bLow = b; + bHigh = b>>16; + z1 = ( (bits32) aLow ) * bLow; + zMiddleA = ( (bits32) aLow ) * bHigh; + zMiddleB = ( (bits32) aHigh ) * bLow; + z0 = ( (bits32) aHigh ) * bHigh; + zMiddleA += zMiddleB; + z0 += ( ( (bits32) ( zMiddleA < zMiddleB ) )<<16 ) + ( zMiddleA>>16 ); + zMiddleA <<= 16; + z1 += zMiddleA; + z0 += ( z1 < zMiddleA ); + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Multiplies the 64-bit value formed by concatenating `a0' and `a1' by `b' +to obtain a 96-bit product. The product is broken into three 32-bit pieces +which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and +`z2Ptr'. +------------------------------------------------------------------------------- +*/ +__inline void + mul64By32To96( + bits32 a0, + bits32 a1, + bits32 b, + bits32 *z0Ptr, + bits32 *z1Ptr, + bits32 *z2Ptr + ) +{ + bits32 z0, z1, z2, more1; + + mul32To64( a1, b, &z1, &z2 ); + mul32To64( a0, b, &z0, &more1 ); + add64( z0, more1, 0, z1, &z0, &z1 ); + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the +64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit +product. The product is broken into four 32-bit pieces which are stored at +the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. +------------------------------------------------------------------------------- +*/ +__inline void + mul64To128( + bits32 a0, + bits32 a1, + bits32 b0, + bits32 b1, + bits32 *z0Ptr, + bits32 *z1Ptr, + bits32 *z2Ptr, + bits32 *z3Ptr + ) +{ + bits32 z0, z1, z2, z3; + bits32 more1, more2; + + mul32To64( a1, b1, &z2, &z3 ); + mul32To64( a1, b0, &z1, &more2 ); + add64( z1, more2, 0, z2, &z1, &z2 ); + mul32To64( a0, b0, &z0, &more1 ); + add64( z0, more1, 0, z1, &z0, &z1 ); + mul32To64( a0, b1, &more1, &more2 ); + add64( more1, more2, 0, z2, &more1, &z2 ); + add64( z0, z1, 0, more1, &z0, &z1 ); + *z3Ptr = z3; + *z2Ptr = z2; + *z1Ptr = z1; + *z0Ptr = z0; + +} + +/* +------------------------------------------------------------------------------- +Returns an approximation to the 32-bit integer quotient obtained by dividing +`b' into the 64-bit value formed by concatenating `a0' and `a1'. The +divisor `b' must be at least 2^31. If q is the exact quotient truncated +toward zero, the approximation returned lies between q and q + 2 inclusive. +If the exact quotient q is larger than 32 bits, the maximum positive 32-bit +unsigned integer is returned. +------------------------------------------------------------------------------- +*/ +static bits32 estimateDiv64To32( bits32 a0, bits32 a1, bits32 b ) +{ + bits32 b0, b1; + bits32 rem0, rem1, term0, term1; + bits32 z; + + if ( b <= a0 ) return 0xFFFFFFFF; + b0 = b>>16; + z = ( b0<<16 <= a0 ) ? 0xFFFF0000 : ( a0 / b0 )<<16; + mul32To64( b, z, &term0, &term1 ); + sub64( a0, a1, term0, term1, &rem0, &rem1 ); + while ( ( (sbits32) rem0 ) < 0 ) { + z -= 0x10000; + b1 = b<<16; + add64( rem0, rem1, b0, b1, &rem0, &rem1 ); + } + rem0 = ( rem0<<16 ) | ( rem1>>16 ); + z |= ( b0<<16 <= rem0 ) ? 0xFFFF : rem0 / b0; + return z; + +} + +#ifndef SOFTFLOAT_FOR_GCC +/* +------------------------------------------------------------------------------- +Returns an approximation to the square root of the 32-bit significand given +by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of +`aExp' (the least significant bit) is 1, the integer returned approximates +2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' +is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either +case, the approximation returned lies strictly within +/-2 of the exact +value. +------------------------------------------------------------------------------- +*/ +static bits32 estimateSqrt32( int16 aExp, bits32 a ) +{ + static const bits16 sqrtOddAdjustments[] = { + 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, + 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 + }; + static const bits16 sqrtEvenAdjustments[] = { + 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, + 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 + }; + int8 index; + bits32 z; + + index = ( a>>27 ) & 15; + if ( aExp & 1 ) { + z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ]; + z = ( ( a / z )<<14 ) + ( z<<15 ); + a >>= 1; + } + else { + z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ]; + z = a / z + z; + z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); + if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 ); + } + return ( ( estimateDiv64To32( a, 0, z ) )>>1 ) + ( z>>1 ); + +} +#endif + +/* +------------------------------------------------------------------------------- +Returns the number of leading 0 bits before the most-significant 1 bit of +`a'. If `a' is zero, 32 is returned. +------------------------------------------------------------------------------- +*/ +static int8 countLeadingZeros32( bits32 a ) +{ + static const int8 countLeadingZerosHigh[] = { + 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + int8 shiftCount; + + shiftCount = 0; + if ( a < 0x10000 ) { + shiftCount += 16; + a <<= 16; + } + if ( a < 0x1000000 ) { + shiftCount += 8; + a <<= 8; + } + shiftCount += countLeadingZerosHigh[ a>>24 ]; + return shiftCount; + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is +equal to the 64-bit value formed by concatenating `b0' and `b1'. Otherwise, +returns 0. +------------------------------------------------------------------------------- +*/ +__inline flag eq64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 ) +{ + + return ( a0 == b0 ) && ( a1 == b1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less +than or equal to the 64-bit value formed by concatenating `b0' and `b1'. +Otherwise, returns 0. +------------------------------------------------------------------------------- +*/ +__inline flag le64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 ) +{ + + return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less +than the 64-bit value formed by concatenating `b0' and `b1'. Otherwise, +returns 0. +------------------------------------------------------------------------------- +*/ +__inline flag lt64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 ) +{ + + return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is not +equal to the 64-bit value formed by concatenating `b0' and `b1'. Otherwise, +returns 0. +------------------------------------------------------------------------------- +*/ +__inline flag ne64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 ) +{ + + return ( a0 != b0 ) || ( a1 != b1 ); + +} + diff --git a/lib/libc/arch/arm/softfloat/softfloat-specialize.h b/lib/libc/arch/arm/softfloat/softfloat-specialize.h new file mode 100644 index 00000000000..8ff3befd02a --- /dev/null +++ b/lib/libc/arch/arm/softfloat/softfloat-specialize.h @@ -0,0 +1,489 @@ +/* $NetBSD: softfloat-specialize,v 1.3 2002/05/12 13:12:45 bjh21 Exp $ */ + +/* This is a derivative work. */ + +/* +=============================================================================== + +This C source fragment is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +#include <signal.h> + +/* +------------------------------------------------------------------------------- +Underflow tininess-detection mode, statically initialized to default value. +(The declaration in `softfloat.h' must match the `int8' type here.) +------------------------------------------------------------------------------- +*/ +#ifdef SOFTFLOAT_FOR_GCC +static +#endif +int8 float_detect_tininess = float_tininess_after_rounding; + +/* +------------------------------------------------------------------------------- +Raises the exceptions specified by `flags'. Floating-point traps can be +defined here if desired. It is currently not possible for such a trap to +substitute a result value. If traps are not implemented, this routine +should be simply `float_exception_flags |= flags;'. +------------------------------------------------------------------------------- +*/ +fp_except float_exception_mask = 0; +void float_raise( fp_except flags ) +{ + + float_exception_flags |= flags; + + if ( flags & float_exception_mask ) { + raise( SIGFPE ); + } +} + +/* +------------------------------------------------------------------------------- +Internal canonical NaN format. +------------------------------------------------------------------------------- +*/ +typedef struct { + flag sign; + bits64 high, low; +} commonNaNT; + +/* +------------------------------------------------------------------------------- +The pattern for a default generated single-precision NaN. +------------------------------------------------------------------------------- +*/ +#define float32_default_nan 0xFFFFFFFF + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is a NaN; +otherwise returns 0. +------------------------------------------------------------------------------- +*/ +#ifdef SOFTFLOAT_FOR_GCC +static +#endif +flag float32_is_nan( float32 a ) +{ + + return ( 0xFF000000 < (bits32) ( a<<1 ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is a signaling +NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +#if defined(SOFTFLOAT_FOR_GCC) && !defined(SOFTFLOATSPARC64_FOR_GCC) +static +#endif +flag float32_is_signaling_nan( float32 a ) +{ + + return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point NaN +`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +exception is raised. +------------------------------------------------------------------------------- +*/ +static commonNaNT float32ToCommonNaN( float32 a ) +{ + commonNaNT z; + + if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); + z.sign = a>>31; + z.low = 0; + z.high = ( (bits64) a )<<41; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the canonical NaN `a' to the single- +precision floating-point format. +------------------------------------------------------------------------------- +*/ +static float32 commonNaNToFloat32( commonNaNT a ) +{ + + return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 ); + +} + +/* +------------------------------------------------------------------------------- +Takes two single-precision floating-point values `a' and `b', one of which +is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a +signaling NaN, the invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static float32 propagateFloat32NaN( float32 a, float32 b ) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = float32_is_nan( a ); + aIsSignalingNaN = float32_is_signaling_nan( a ); + bIsNaN = float32_is_nan( b ); + bIsSignalingNaN = float32_is_signaling_nan( b ); + a |= 0x00400000; + b |= 0x00400000; + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } + +} + +/* +------------------------------------------------------------------------------- +The pattern for a default generated double-precision NaN. +------------------------------------------------------------------------------- +*/ +#define float64_default_nan LIT64( 0xFFFFFFFFFFFFFFFF ) + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is a NaN; +otherwise returns 0. +------------------------------------------------------------------------------- +*/ +#ifdef SOFTFLOAT_FOR_GCC +static +#endif +flag float64_is_nan( float64 a ) +{ + + return ( LIT64( 0xFFE0000000000000 ) < + (bits64) ( FLOAT64_DEMANGLE(a)<<1 ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is a signaling +NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +#if defined(SOFTFLOAT_FOR_GCC) && !defined(SOFTFLOATSPARC64_FOR_GCC) +static +#endif +flag float64_is_signaling_nan( float64 a ) +{ + + return + ( ( ( FLOAT64_DEMANGLE(a)>>51 ) & 0xFFF ) == 0xFFE ) + && ( FLOAT64_DEMANGLE(a) & LIT64( 0x0007FFFFFFFFFFFF ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point NaN +`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +exception is raised. +------------------------------------------------------------------------------- +*/ +static commonNaNT float64ToCommonNaN( float64 a ) +{ + commonNaNT z; + + if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); + z.sign = FLOAT64_DEMANGLE(a)>>63; + z.low = 0; + z.high = FLOAT64_DEMANGLE(a)<<12; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the canonical NaN `a' to the double- +precision floating-point format. +------------------------------------------------------------------------------- +*/ +static float64 commonNaNToFloat64( commonNaNT a ) +{ + + return FLOAT64_MANGLE( + ( ( (bits64) a.sign )<<63 ) + | LIT64( 0x7FF8000000000000 ) + | ( a.high>>12 ) ); + +} + +/* +------------------------------------------------------------------------------- +Takes two double-precision floating-point values `a' and `b', one of which +is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a +signaling NaN, the invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static float64 propagateFloat64NaN( float64 a, float64 b ) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = float64_is_nan( a ); + aIsSignalingNaN = float64_is_signaling_nan( a ); + bIsNaN = float64_is_nan( b ); + bIsSignalingNaN = float64_is_signaling_nan( b ); + a |= FLOAT64_MANGLE(LIT64( 0x0008000000000000 )); + b |= FLOAT64_MANGLE(LIT64( 0x0008000000000000 )); + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } + +} + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +The pattern for a default generated extended double-precision NaN. The +`high' and `low' values hold the most- and least-significant bits, +respectively. +------------------------------------------------------------------------------- +*/ +#define floatx80_default_nan_high 0xFFFF +#define floatx80_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF ) + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is a +NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag floatx80_is_nan( floatx80 a ) +{ + + return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the extended double-precision floating-point value `a' is a +signaling NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag floatx80_is_signaling_nan( floatx80 a ) +{ + bits64 aLow; + + aLow = a.low & ~ LIT64( 0x4000000000000000 ); + return + ( ( a.high & 0x7FFF ) == 0x7FFF ) + && (bits64) ( aLow<<1 ) + && ( a.low == aLow ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the extended double-precision floating- +point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the +invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static commonNaNT floatx80ToCommonNaN( floatx80 a ) +{ + commonNaNT z; + + if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); + z.sign = a.high>>15; + z.low = 0; + z.high = a.low<<1; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the canonical NaN `a' to the extended +double-precision floating-point format. +------------------------------------------------------------------------------- +*/ +static floatx80 commonNaNToFloatx80( commonNaNT a ) +{ + floatx80 z; + + z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 ); + z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF; + return z; + +} + +/* +------------------------------------------------------------------------------- +Takes two extended double-precision floating-point values `a' and `b', one +of which is a NaN, and returns the appropriate NaN result. If either `a' or +`b' is a signaling NaN, the invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b ) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = floatx80_is_nan( a ); + aIsSignalingNaN = floatx80_is_signaling_nan( a ); + bIsNaN = floatx80_is_nan( b ); + bIsSignalingNaN = floatx80_is_signaling_nan( b ); + a.low |= LIT64( 0xC000000000000000 ); + b.low |= LIT64( 0xC000000000000000 ); + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } + +} + +#endif + +#ifdef FLOAT128 + +/* +------------------------------------------------------------------------------- +The pattern for a default generated quadruple-precision NaN. The `high' and +`low' values hold the most- and least-significant bits, respectively. +------------------------------------------------------------------------------- +*/ +#define float128_default_nan_high LIT64( 0xFFFFFFFFFFFFFFFF ) +#define float128_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF ) + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is a NaN; +otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag float128_is_nan( float128 a ) +{ + + return + ( LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) ) + && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the quadruple-precision floating-point value `a' is a +signaling NaN; otherwise returns 0. +------------------------------------------------------------------------------- +*/ +flag float128_is_signaling_nan( float128 a ) +{ + + return + ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE ) + && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the quadruple-precision floating-point NaN +`a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +exception is raised. +------------------------------------------------------------------------------- +*/ +static commonNaNT float128ToCommonNaN( float128 a ) +{ + commonNaNT z; + + if ( float128_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); + z.sign = a.high>>63; + shortShift128Left( a.high, a.low, 16, &z.high, &z.low ); + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the canonical NaN `a' to the quadruple- +precision floating-point format. +------------------------------------------------------------------------------- +*/ +static float128 commonNaNToFloat128( commonNaNT a ) +{ + float128 z; + + shift128Right( a.high, a.low, 16, &z.high, &z.low ); + z.high |= ( ( (bits64) a.sign )<<63 ) | LIT64( 0x7FFF800000000000 ); + return z; + +} + +/* +------------------------------------------------------------------------------- +Takes two quadruple-precision floating-point values `a' and `b', one of +which is a NaN, and returns the appropriate NaN result. If either `a' or +`b' is a signaling NaN, the invalid exception is raised. +------------------------------------------------------------------------------- +*/ +static float128 propagateFloat128NaN( float128 a, float128 b ) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = float128_is_nan( a ); + aIsSignalingNaN = float128_is_signaling_nan( a ); + bIsNaN = float128_is_nan( b ); + bIsSignalingNaN = float128_is_signaling_nan( b ); + a.high |= LIT64( 0x0000800000000000 ); + b.high |= LIT64( 0x0000800000000000 ); + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } + +} + +#endif + diff --git a/lib/libc/arch/arm/softfloat/softfloat.c b/lib/libc/arch/arm/softfloat/softfloat.c new file mode 100644 index 00000000000..34842f92afa --- /dev/null +++ b/lib/libc/arch/arm/softfloat/softfloat.c @@ -0,0 +1,2346 @@ +/* $NetBSD: softfloat.c,v 1.1 2002/05/21 23:51:07 bjh21 Exp $ */ + +/* + * This version hacked for use with gcc -msoft-float by bjh21. + * (Mostly a case of #ifdefing out things GCC doesn't need or provides + * itself). + */ + +/* + * Things you may want to define: + * + * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with + * -msoft-float) to work. Include "softfloat-for-gcc.h" to get them + * properly renamed. + */ + +/* + * This differs from the standard bits32/softfloat.c in that float64 + * is defined to be a 64-bit integer rather than a structure. The + * structure is float64s, with translation between the two going via + * float64u. + */ + +/* +=============================================================================== + +This C source file is part of the SoftFloat IEC/IEEE Floating-Point +Arithmetic Package, Release 2a. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +#include <sys/cdefs.h> + +#ifdef SOFTFLOAT_FOR_GCC +#include "softfloat-for-gcc.h" +#endif + +#include "milieu.h" +#include "softfloat.h" + +/* + * Conversions between floats as stored in memory and floats as + * SoftFloat uses them + */ +#ifndef FLOAT64_DEMANGLE +#define FLOAT64_DEMANGLE(a) (a) +#endif +#ifndef FLOAT64_MANGLE +#define FLOAT64_MANGLE(a) (a) +#endif + +/* +------------------------------------------------------------------------------- +Primitive arithmetic functions, including multi-word arithmetic, and +division and square root approximations. (Can be specialized to target if +desired.) +------------------------------------------------------------------------------- +*/ +#include "softfloat-macros.h" + +/* +------------------------------------------------------------------------------- +Functions and definitions to determine: (1) whether tininess for underflow +is detected before or after rounding by default, (2) what (if anything) +happens when exceptions are raised, (3) how signaling NaNs are distinguished +from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs +are propagated from function inputs to output. These details are target- +specific. +------------------------------------------------------------------------------- +*/ +#include "softfloat-specialize.h" + +/* +------------------------------------------------------------------------------- +Floating-point rounding mode and exception flags. +------------------------------------------------------------------------------- +*/ +fp_rnd float_rounding_mode = float_round_nearest_even; +fp_except float_exception_flags = 0; + +/* +------------------------------------------------------------------------------- +Returns the fraction bits of the single-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE bits32 extractFloat32Frac( float32 a ) +{ + + return a & 0x007FFFFF; + +} + +/* +------------------------------------------------------------------------------- +Returns the exponent bits of the single-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE int16 extractFloat32Exp( float32 a ) +{ + + return ( a>>23 ) & 0xFF; + +} + +/* +------------------------------------------------------------------------------- +Returns the sign bit of the single-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE flag extractFloat32Sign( float32 a ) +{ + + return a>>31; + +} + +/* +------------------------------------------------------------------------------- +Normalizes the subnormal single-precision floating-point value represented +by the denormalized significand `aSig'. The normalized exponent and +significand are stored at the locations pointed to by `zExpPtr' and +`zSigPtr', respectively. +------------------------------------------------------------------------------- +*/ +static void + normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros32( aSig ) - 8; + *zSigPtr = aSig<<shiftCount; + *zExpPtr = 1 - shiftCount; + +} + +/* +------------------------------------------------------------------------------- +Packs the sign `zSign', exponent `zExp', and significand `zSig' into a +single-precision floating-point value, returning the result. After being +shifted into the proper positions, the three fields are simply added +together to form the result. This means that any integer portion of `zSig' +will be added into the exponent. Since a properly normalized significand +will have an integer portion equal to 1, the `zExp' input should be 1 less +than the desired result exponent whenever `zSig' is a complete, normalized +significand. +------------------------------------------------------------------------------- +*/ +INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig ) +{ + + return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig; + +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent `zExp', +and significand `zSig', and returns the proper single-precision floating- +point value corresponding to the abstract input. Ordinarily, the abstract +value is simply rounded and packed into the single-precision format, with +the inexact exception raised if the abstract input cannot be represented +exactly. However, if the abstract value is too large, the overflow and +inexact exceptions are raised and an infinity or maximal finite value is +returned. If the abstract value is too small, the input value is rounded to +a subnormal number, and the underflow and inexact exceptions are raised if +the abstract input cannot be represented exactly as a subnormal single- +precision floating-point number. + The input significand `zSig' has its binary point between bits 30 +and 29, which is 7 bits to the left of the usual location. This shifted +significand must be normalized or smaller. If `zSig' is not normalized, +`zExp' must be 0; in that case, the result returned is a subnormal number, +and it must not require rounding. In the usual case that `zSig' is +normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. +The handling of underflow and overflow follows the IEC/IEEE Standard for +Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) +{ + int8 roundingMode; + flag roundNearestEven; + int8 roundIncrement, roundBits; + flag isTiny; + + roundingMode = float_rounding_mode; + roundNearestEven = roundingMode == float_round_nearest_even; + roundIncrement = 0x40; + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + roundIncrement = 0; + } + else { + roundIncrement = 0x7F; + if ( zSign ) { + if ( roundingMode == float_round_up ) roundIncrement = 0; + } + else { + if ( roundingMode == float_round_down ) roundIncrement = 0; + } + } + } + roundBits = zSig & 0x7F; + if ( 0xFD <= (bits16) zExp ) { + if ( ( 0xFD < zExp ) + || ( ( zExp == 0xFD ) + && ( (sbits32) ( zSig + roundIncrement ) < 0 ) ) + ) { + float_raise( float_flag_overflow | float_flag_inexact ); + return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 ); + } + if ( zExp < 0 ) { + isTiny = + ( float_detect_tininess == float_tininess_before_rounding ) + || ( zExp < -1 ) + || ( zSig + roundIncrement < 0x80000000 ); + shift32RightJamming( zSig, - zExp, &zSig ); + zExp = 0; + roundBits = zSig & 0x7F; + if ( isTiny && roundBits ) float_raise( float_flag_underflow ); + } + } + if ( roundBits ) float_exception_flags |= float_flag_inexact; + zSig = ( zSig + roundIncrement )>>7; + zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); + if ( zSig == 0 ) zExp = 0; + return packFloat32( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent `zExp', +and significand `zSig', and returns the proper single-precision floating- +point value corresponding to the abstract input. This routine is just like +`roundAndPackFloat32' except that `zSig' does not have to be normalized. +Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' +floating-point exponent. +------------------------------------------------------------------------------- +*/ +static float32 + normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) +{ + int8 shiftCount; + + shiftCount = countLeadingZeros32( zSig ) - 1; + return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount ); + +} + +/* +------------------------------------------------------------------------------- +Returns the least-significant 32 fraction bits of the double-precision +floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE bits32 extractFloat64Frac1( float64 a ) +{ + + return FLOAT64_DEMANGLE(a) & LIT64( 0x00000000FFFFFFFF ); + +} + +/* +------------------------------------------------------------------------------- +Returns the most-significant 20 fraction bits of the double-precision +floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE bits32 extractFloat64Frac0( float64 a ) +{ + + return ( FLOAT64_DEMANGLE(a)>>32 ) & 0x000FFFFF; + +} + +/* +------------------------------------------------------------------------------- +Returns the exponent bits of the double-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE int16 extractFloat64Exp( float64 a ) +{ + + return ( FLOAT64_DEMANGLE(a)>>52 ) & 0x7FF; + +} + +/* +------------------------------------------------------------------------------- +Returns the sign bit of the double-precision floating-point value `a'. +------------------------------------------------------------------------------- +*/ +INLINE flag extractFloat64Sign( float64 a ) +{ + + return FLOAT64_DEMANGLE(a)>>63; + +} + +/* +------------------------------------------------------------------------------- +Normalizes the subnormal double-precision floating-point value represented +by the denormalized significand formed by the concatenation of `aSig0' and +`aSig1'. The normalized exponent is stored at the location pointed to by +`zExpPtr'. The most significant 21 bits of the normalized significand are +stored at the location pointed to by `zSig0Ptr', and the least significant +32 bits of the normalized significand are stored at the location pointed to +by `zSig1Ptr'. +------------------------------------------------------------------------------- +*/ +static void + normalizeFloat64Subnormal( + bits32 aSig0, + bits32 aSig1, + int16 *zExpPtr, + bits32 *zSig0Ptr, + bits32 *zSig1Ptr + ) +{ + int8 shiftCount; + + if ( aSig0 == 0 ) { + shiftCount = countLeadingZeros32( aSig1 ) - 11; + if ( shiftCount < 0 ) { + *zSig0Ptr = aSig1>>( - shiftCount ); + *zSig1Ptr = aSig1<<( shiftCount & 31 ); + } + else { + *zSig0Ptr = aSig1<<shiftCount; + *zSig1Ptr = 0; + } + *zExpPtr = - shiftCount - 31; + } + else { + shiftCount = countLeadingZeros32( aSig0 ) - 11; + shortShift64Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr ); + *zExpPtr = 1 - shiftCount; + } + +} + +/* +------------------------------------------------------------------------------- +Packs the sign `zSign', the exponent `zExp', and the significand formed by +the concatenation of `zSig0' and `zSig1' into a double-precision floating- +point value, returning the result. After being shifted into the proper +positions, the three fields `zSign', `zExp', and `zSig0' are simply added +together to form the most significant 32 bits of the result. This means +that any integer portion of `zSig0' will be added into the exponent. Since +a properly normalized significand will have an integer portion equal to 1, +the `zExp' input should be 1 less than the desired result exponent whenever +`zSig0' and `zSig1' concatenated form a complete, normalized significand. +------------------------------------------------------------------------------- +*/ +INLINE float64 + packFloat64( flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 ) +{ + + return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) + + ( ( (bits64) zExp )<<52 ) + + ( ( (bits64) zSig0 )<<32 ) + zSig1 ); + + +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent `zExp', +and extended significand formed by the concatenation of `zSig0', `zSig1', +and `zSig2', and returns the proper double-precision floating-point value +corresponding to the abstract input. Ordinarily, the abstract value is +simply rounded and packed into the double-precision format, with the inexact +exception raised if the abstract input cannot be represented exactly. +However, if the abstract value is too large, the overflow and inexact +exceptions are raised and an infinity or maximal finite value is returned. +If the abstract value is too small, the input value is rounded to a +subnormal number, and the underflow and inexact exceptions are raised if the +abstract input cannot be represented exactly as a subnormal double-precision +floating-point number. + The input significand must be normalized or smaller. If the input +significand is not normalized, `zExp' must be 0; in that case, the result +returned is a subnormal number, and it must not require rounding. In the +usual case that the input significand is normalized, `zExp' must be 1 less +than the ``true'' floating-point exponent. The handling of underflow and +overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float64 + roundAndPackFloat64( + flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1, bits32 zSig2 ) +{ + int8 roundingMode; + flag roundNearestEven, increment, isTiny; + + roundingMode = float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + increment = ( (sbits32) zSig2 < 0 ); + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + increment = 0; + } + else { + if ( zSign ) { + increment = ( roundingMode == float_round_down ) && zSig2; + } + else { + increment = ( roundingMode == float_round_up ) && zSig2; + } + } + } + if ( 0x7FD <= (bits16) zExp ) { + if ( ( 0x7FD < zExp ) + || ( ( zExp == 0x7FD ) + && eq64( 0x001FFFFF, 0xFFFFFFFF, zSig0, zSig1 ) + && increment + ) + ) { + float_raise( float_flag_overflow | float_flag_inexact ); + if ( ( roundingMode == float_round_to_zero ) + || ( zSign && ( roundingMode == float_round_up ) ) + || ( ! zSign && ( roundingMode == float_round_down ) ) + ) { + return packFloat64( zSign, 0x7FE, 0x000FFFFF, 0xFFFFFFFF ); + } + return packFloat64( zSign, 0x7FF, 0, 0 ); + } + if ( zExp < 0 ) { + isTiny = + ( float_detect_tininess == float_tininess_before_rounding ) + || ( zExp < -1 ) + || ! increment + || lt64( zSig0, zSig1, 0x001FFFFF, 0xFFFFFFFF ); + shift64ExtraRightJamming( + zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 ); + zExp = 0; + if ( isTiny && zSig2 ) float_raise( float_flag_underflow ); + if ( roundNearestEven ) { + increment = ( (sbits32) zSig2 < 0 ); + } + else { + if ( zSign ) { + increment = ( roundingMode == float_round_down ) && zSig2; + } + else { + increment = ( roundingMode == float_round_up ) && zSig2; + } + } + } + } + if ( zSig2 ) float_exception_flags |= float_flag_inexact; + if ( increment ) { + add64( zSig0, zSig1, 0, 1, &zSig0, &zSig1 ); + zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven ); + } + else { + if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0; + } + return packFloat64( zSign, zExp, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Takes an abstract floating-point value having sign `zSign', exponent `zExp', +and significand formed by the concatenation of `zSig0' and `zSig1', and +returns the proper double-precision floating-point value corresponding +to the abstract input. This routine is just like `roundAndPackFloat64' +except that the input significand has fewer bits and does not have to be +normalized. In all cases, `zExp' must be 1 less than the ``true'' floating- +point exponent. +------------------------------------------------------------------------------- +*/ +static float64 + normalizeRoundAndPackFloat64( + flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 ) +{ + int8 shiftCount; + bits32 zSig2; + + if ( zSig0 == 0 ) { + zSig0 = zSig1; + zSig1 = 0; + zExp -= 32; + } + shiftCount = countLeadingZeros32( zSig0 ) - 11; + if ( 0 <= shiftCount ) { + zSig2 = 0; + shortShift64Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); + } + else { + shift64ExtraRightJamming( + zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 ); + } + zExp -= shiftCount; + return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the 32-bit two's complement integer `a' to +the single-precision floating-point format. The conversion is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 int32_to_float32( int32 a ) +{ + flag zSign; + + if ( a == 0 ) return 0; + if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); + zSign = ( a < 0 ); + return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the 32-bit two's complement integer `a' to +the double-precision floating-point format. The conversion is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 int32_to_float64( int32 a ) +{ + flag zSign; + bits32 absA; + int8 shiftCount; + bits32 zSig0, zSig1; + + if ( a == 0 ) return packFloat64( 0, 0, 0, 0 ); + zSign = ( a < 0 ); + absA = zSign ? - a : a; + shiftCount = countLeadingZeros32( absA ) - 11; + if ( 0 <= shiftCount ) { + zSig0 = absA<<shiftCount; + zSig1 = 0; + } + else { + shift64Right( absA, 0, - shiftCount, &zSig0, &zSig1 ); + } + return packFloat64( zSign, 0x412 - shiftCount, zSig0, zSig1 ); + +} + +#ifndef SOFTFLOAT_FOR_GCC +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point value +`a' to the 32-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic---which means in particular that the conversion is rounded +according to the current rounding mode. If `a' is a NaN, the largest +positive integer is returned. Otherwise, if the conversion overflows, the +largest integer with the same sign as `a' is returned. +------------------------------------------------------------------------------- +*/ +int32 float32_to_int32( float32 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits32 aSig, aSigExtra; + int32 z; + int8 roundingMode; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + shiftCount = aExp - 0x96; + if ( 0 <= shiftCount ) { + if ( 0x9E <= aExp ) { + if ( a != 0xCF000000 ) { + float_raise( float_flag_invalid ); + if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { + return 0x7FFFFFFF; + } + } + return (sbits32) 0x80000000; + } + z = ( aSig | 0x00800000 )<<shiftCount; + if ( aSign ) z = - z; + } + else { + if ( aExp < 0x7E ) { + aSigExtra = aExp | aSig; + z = 0; + } + else { + aSig |= 0x00800000; + aSigExtra = aSig<<( shiftCount & 31 ); + z = aSig>>( - shiftCount ); + } + if ( aSigExtra ) float_exception_flags |= float_flag_inexact; + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + if ( (sbits32) aSigExtra < 0 ) { + ++z; + if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1; + } + if ( aSign ) z = - z; + } + else { + aSigExtra = ( aSigExtra != 0 ); + if ( aSign ) { + z += ( roundingMode == float_round_down ) & aSigExtra; + z = - z; + } + else { + z += ( roundingMode == float_round_up ) & aSigExtra; + } + } + } + return z; + +} +#endif + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point value +`a' to the 32-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic, except that the conversion is always rounded toward zero. +If `a' is a NaN, the largest positive integer is returned. Otherwise, if +the conversion overflows, the largest integer with the same sign as `a' is +returned. +------------------------------------------------------------------------------- +*/ +int32 float32_to_int32_round_to_zero( float32 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits32 aSig; + int32 z; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + shiftCount = aExp - 0x9E; + if ( 0 <= shiftCount ) { + if ( a != 0xCF000000 ) { + float_raise( float_flag_invalid ); + if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF; + } + return (sbits32) 0x80000000; + } + else if ( aExp <= 0x7E ) { + if ( aExp | aSig ) float_exception_flags |= float_flag_inexact; + return 0; + } + aSig = ( aSig | 0x00800000 )<<8; + z = aSig>>( - shiftCount ); + if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { + float_exception_flags |= float_flag_inexact; + } + if ( aSign ) z = - z; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the single-precision floating-point value +`a' to the double-precision floating-point format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float32_to_float64( float32 a ) +{ + flag aSign; + int16 aExp; + bits32 aSig, zSig0, zSig1; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) ); + return packFloat64( aSign, 0x7FF, 0, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + --aExp; + } + shift64Right( aSig, 0, 3, &zSig0, &zSig1 ); + return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 ); + +} + +#ifndef SOFTFLOAT_FOR_GCC +/* +------------------------------------------------------------------------------- +Rounds the single-precision floating-point value `a' to an integer, +and returns the result as a single-precision floating-point value. The +operation is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_round_to_int( float32 a ) +{ + flag aSign; + int16 aExp; + bits32 lastBitMask, roundBitsMask; + int8 roundingMode; + float32 z; + + aExp = extractFloat32Exp( a ); + if ( 0x96 <= aExp ) { + if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) { + return propagateFloat32NaN( a, a ); + } + return a; + } + if ( aExp <= 0x7E ) { + if ( (bits32) ( a<<1 ) == 0 ) return a; + float_exception_flags |= float_flag_inexact; + aSign = extractFloat32Sign( a ); + switch ( float_rounding_mode ) { + case float_round_nearest_even: + if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { + return packFloat32( aSign, 0x7F, 0 ); + } + break; + case float_round_to_zero: + break; + case float_round_down: + return aSign ? 0xBF800000 : 0; + case float_round_up: + return aSign ? 0x80000000 : 0x3F800000; + } + return packFloat32( aSign, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x96 - aExp; + roundBitsMask = lastBitMask - 1; + z = a; + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + z += lastBitMask>>1; + if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) { + z += roundBitsMask; + } + } + z &= ~ roundBitsMask; + if ( z != a ) float_exception_flags |= float_flag_inexact; + return z; + +} +#endif + +/* +------------------------------------------------------------------------------- +Returns the result of adding the absolute values of the single-precision +floating-point values `a' and `b'. If `zSign' is 1, the sum is negated +before being returned. `zSign' is ignored if the result is a NaN. +The addition is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float32 addFloat32Sigs( float32 a, float32 b, flag zSign ) +{ + int16 aExp, bExp, zExp; + bits32 aSig, bSig, zSig; + int16 expDiff; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + expDiff = aExp - bExp; + aSig <<= 6; + bSig <<= 6; + if ( 0 < expDiff ) { + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= 0x20000000; + } + shift32RightJamming( bSig, expDiff, &bSig ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + return packFloat32( zSign, 0xFF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= 0x20000000; + } + shift32RightJamming( aSig, - expDiff, &aSig ); + zExp = bExp; + } + else { + if ( aExp == 0xFF ) { + if ( aSig | bSig ) return propagateFloat32NaN( a, b ); + return a; + } + if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 ); + zSig = 0x40000000 + aSig + bSig; + zExp = aExp; + goto roundAndPack; + } + aSig |= 0x20000000; + zSig = ( aSig + bSig )<<1; + --zExp; + if ( (sbits32) zSig < 0 ) { + zSig = aSig + bSig; + ++zExp; + } + roundAndPack: + return roundAndPackFloat32( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the absolute values of the single- +precision floating-point values `a' and `b'. If `zSign' is 1, the +difference is negated before being returned. `zSign' is ignored if the +result is a NaN. The subtraction is performed according to the IEC/IEEE +Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) +{ + int16 aExp, bExp, zExp; + bits32 aSig, bSig, zSig; + int16 expDiff; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + expDiff = aExp - bExp; + aSig <<= 7; + bSig <<= 7; + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0xFF ) { + if ( aSig | bSig ) return propagateFloat32NaN( a, b ); + float_raise( float_flag_invalid ); + return float32_default_nan; + } + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } + if ( bSig < aSig ) goto aBigger; + if ( aSig < bSig ) goto bBigger; + return packFloat32( float_rounding_mode == float_round_down, 0, 0 ); + bExpBigger: + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + return packFloat32( zSign ^ 1, 0xFF, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig |= 0x40000000; + } + shift32RightJamming( aSig, - expDiff, &aSig ); + bSig |= 0x40000000; + bBigger: + zSig = bSig - aSig; + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig |= 0x40000000; + } + shift32RightJamming( bSig, expDiff, &bSig ); + aSig |= 0x40000000; + aBigger: + zSig = aSig - bSig; + zExp = aExp; + normalizeRoundAndPack: + --zExp; + return normalizeRoundAndPackFloat32( zSign, zExp, zSig ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the single-precision floating-point values `a' +and `b'. The operation is performed according to the IEC/IEEE Standard for +Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_add( float32 a, float32 b ) +{ + flag aSign, bSign; + + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign == bSign ) { + return addFloat32Sigs( a, b, aSign ); + } + else { + return subFloat32Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the single-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_sub( float32 a, float32 b ) +{ + flag aSign, bSign; + + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign == bSign ) { + return subFloat32Sigs( a, b, aSign ); + } + else { + return addFloat32Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of multiplying the single-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_mul( float32 a, float32 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits32 aSig, bSig, zSig0, zSig1; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + bSign = extractFloat32Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0xFF ) { + if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { + return propagateFloat32NaN( a, b ); + } + if ( ( bExp | bSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float32_default_nan; + } + return packFloat32( zSign, 0xFF, 0 ); + } + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + if ( ( aExp | aSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float32_default_nan; + } + return packFloat32( zSign, 0xFF, 0 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) return packFloat32( zSign, 0, 0 ); + normalizeFloat32Subnormal( bSig, &bExp, &bSig ); + } + zExp = aExp + bExp - 0x7F; + aSig = ( aSig | 0x00800000 )<<7; + bSig = ( bSig | 0x00800000 )<<8; + mul32To64( aSig, bSig, &zSig0, &zSig1 ); + zSig0 |= ( zSig1 != 0 ); + if ( 0 <= (sbits32) ( zSig0<<1 ) ) { + zSig0 <<= 1; + --zExp; + } + return roundAndPackFloat32( zSign, zExp, zSig0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of dividing the single-precision floating-point value `a' +by the corresponding value `b'. The operation is performed according to the +IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_div( float32 a, float32 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits32 aSig, bSig, zSig, rem0, rem1, term0, term1; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + bSign = extractFloat32Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, b ); + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + float_raise( float_flag_invalid ); + return float32_default_nan; + } + return packFloat32( zSign, 0xFF, 0 ); + } + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + return packFloat32( zSign, 0, 0 ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + if ( ( aExp | aSig ) == 0 ) { + float_raise( float_flag_invalid ); + return float32_default_nan; + } + float_raise( float_flag_divbyzero ); + return packFloat32( zSign, 0xFF, 0 ); + } + normalizeFloat32Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + zExp = aExp - bExp + 0x7D; + aSig = ( aSig | 0x00800000 )<<7; + bSig = ( bSig | 0x00800000 )<<8; + if ( bSig <= ( aSig + aSig ) ) { + aSig >>= 1; + ++zExp; + } + zSig = estimateDiv64To32( aSig, 0, bSig ); + if ( ( zSig & 0x3F ) <= 2 ) { + mul32To64( bSig, zSig, &term0, &term1 ); + sub64( aSig, 0, term0, term1, &rem0, &rem1 ); + while ( (sbits32) rem0 < 0 ) { + --zSig; + add64( rem0, rem1, 0, bSig, &rem0, &rem1 ); + } + zSig |= ( rem1 != 0 ); + } + return roundAndPackFloat32( zSign, zExp, zSig ); + +} + +#ifndef SOFTFLOAT_FOR_GCC +/* +------------------------------------------------------------------------------- +Returns the remainder of the single-precision floating-point value `a' +with respect to the corresponding value `b'. The operation is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_rem( float32 a, float32 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, expDiff; + bits32 aSig, bSig, q, allZero, alternateASig; + sbits32 sigMean; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + bSig = extractFloat32Frac( b ); + bExp = extractFloat32Exp( b ); + bSign = extractFloat32Sign( b ); + if ( aExp == 0xFF ) { + if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { + return propagateFloat32NaN( a, b ); + } + float_raise( float_flag_invalid ); + return float32_default_nan; + } + if ( bExp == 0xFF ) { + if ( bSig ) return propagateFloat32NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + float_raise( float_flag_invalid ); + return float32_default_nan; + } + normalizeFloat32Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return a; + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + expDiff = aExp - bExp; + aSig = ( aSig | 0x00800000 )<<8; + bSig = ( bSig | 0x00800000 )<<8; + if ( expDiff < 0 ) { + if ( expDiff < -1 ) return a; + aSig >>= 1; + } + q = ( bSig <= aSig ); + if ( q ) aSig -= bSig; + expDiff -= 32; + while ( 0 < expDiff ) { + q = estimateDiv64To32( aSig, 0, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + aSig = - ( ( bSig>>2 ) * q ); + expDiff -= 30; + } + expDiff += 32; + if ( 0 < expDiff ) { + q = estimateDiv64To32( aSig, 0, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + q >>= 32 - expDiff; + bSig >>= 2; + aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; + } + else { + aSig >>= 2; + bSig >>= 2; + } + do { + alternateASig = aSig; + ++q; + aSig -= bSig; + } while ( 0 <= (sbits32) aSig ); + sigMean = aSig + alternateASig; + if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { + aSig = alternateASig; + } + zSign = ( (sbits32) aSig < 0 ); + if ( zSign ) aSig = - aSig; + return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig ); + +} +#endif + +#ifndef SOFTFLOAT_FOR_GCC +/* +------------------------------------------------------------------------------- +Returns the square root of the single-precision floating-point value `a'. +The operation is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float32_sqrt( float32 a ) +{ + flag aSign; + int16 aExp, zExp; + bits32 aSig, zSig, rem0, rem1, term0, term1; + + aSig = extractFloat32Frac( a ); + aExp = extractFloat32Exp( a ); + aSign = extractFloat32Sign( a ); + if ( aExp == 0xFF ) { + if ( aSig ) return propagateFloat32NaN( a, 0 ); + if ( ! aSign ) return a; + float_raise( float_flag_invalid ); + return float32_default_nan; + } + if ( aSign ) { + if ( ( aExp | aSig ) == 0 ) return a; + float_raise( float_flag_invalid ); + return float32_default_nan; + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return 0; + normalizeFloat32Subnormal( aSig, &aExp, &aSig ); + } + zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E; + aSig = ( aSig | 0x00800000 )<<8; + zSig = estimateSqrt32( aExp, aSig ) + 2; + if ( ( zSig & 0x7F ) <= 5 ) { + if ( zSig < 2 ) { + zSig = 0x7FFFFFFF; + goto roundAndPack; + } + else { + aSig >>= aExp & 1; + mul32To64( zSig, zSig, &term0, &term1 ); + sub64( aSig, 0, term0, term1, &rem0, &rem1 ); + while ( (sbits32) rem0 < 0 ) { + --zSig; + shortShift64Left( 0, zSig, 1, &term0, &term1 ); + term1 |= 1; + add64( rem0, rem1, term0, term1, &rem0, &rem1 ); + } + zSig |= ( ( rem0 | rem1 ) != 0 ); + } + } + shift32RightJamming( zSig, 1, &zSig ); + roundAndPack: + return roundAndPackFloat32( 0, zExp, zSig ); + +} +#endif + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is equal to +the corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_eq( float32 a, float32 b ) +{ + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is less than +or equal to the corresponding value `b', and 0 otherwise. The comparison +is performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_le( float32 a, float32 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); + return ( a == b ) || ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_lt( float32 a, float32 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); + return ( a != b ) && ( aSign ^ ( a < b ) ); + +} + +#ifndef SOFTFLOAT_FOR_GCC /* Not needed */ +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is equal to +the corresponding value `b', and 0 otherwise. The invalid exception is +raised if either operand is a NaN. Otherwise, the comparison is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_eq_signaling( float32 a, float32 b ) +{ + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is less than or +equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not +cause an exception. Otherwise, the comparison is performed according to the +IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_le_quiet( float32 a, float32 b ) +{ + flag aSign, bSign; + int16 aExp, bExp; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); + return ( a == b ) || ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the single-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an +exception. Otherwise, the comparison is performed according to the IEC/IEEE +Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float32_lt_quiet( float32 a, float32 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) + || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) + ) { + if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat32Sign( a ); + bSign = extractFloat32Sign( b ); + if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); + return ( a != b ) && ( aSign ^ ( a < b ) ); + +} +#endif /* !SOFTFLOAT_FOR_GCC */ + +#ifndef SOFTFLOAT_FOR_GCC /* Not needed */ +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the 32-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic---which means in particular that the conversion is rounded +according to the current rounding mode. If `a' is a NaN, the largest +positive integer is returned. Otherwise, if the conversion overflows, the +largest integer with the same sign as `a' is returned. +------------------------------------------------------------------------------- +*/ +int32 float64_to_int32( float64 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits32 aSig0, aSig1, absZ, aSigExtra; + int32 z; + int8 roundingMode; + + aSig1 = extractFloat64Frac1( a ); + aSig0 = extractFloat64Frac0( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + shiftCount = aExp - 0x413; + if ( 0 <= shiftCount ) { + if ( 0x41E < aExp ) { + if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0; + goto invalid; + } + shortShift64Left( + aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra ); + if ( 0x80000000 < absZ ) goto invalid; + } + else { + aSig1 = ( aSig1 != 0 ); + if ( aExp < 0x3FE ) { + aSigExtra = aExp | aSig0 | aSig1; + absZ = 0; + } + else { + aSig0 |= 0x00100000; + aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1; + absZ = aSig0>>( - shiftCount ); + } + } + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + if ( (sbits32) aSigExtra < 0 ) { + ++absZ; + if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1; + } + z = aSign ? - absZ : absZ; + } + else { + aSigExtra = ( aSigExtra != 0 ); + if ( aSign ) { + z = - ( absZ + + ( ( roundingMode == float_round_down ) & aSigExtra ) ); + } + else { + z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra ); + } + } + if ( ( aSign ^ ( z < 0 ) ) && z ) { + invalid: + float_raise( float_flag_invalid ); + return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; + } + if ( aSigExtra ) float_exception_flags |= float_flag_inexact; + return z; + +} +#endif /* !SOFTFLOAT_FOR_GCC */ + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the 32-bit two's complement integer format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic, except that the conversion is always rounded toward zero. +If `a' is a NaN, the largest positive integer is returned. Otherwise, if +the conversion overflows, the largest integer with the same sign as `a' is +returned. +------------------------------------------------------------------------------- +*/ +int32 float64_to_int32_round_to_zero( float64 a ) +{ + flag aSign; + int16 aExp, shiftCount; + bits32 aSig0, aSig1, absZ, aSigExtra; + int32 z; + + aSig1 = extractFloat64Frac1( a ); + aSig0 = extractFloat64Frac0( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + shiftCount = aExp - 0x413; + if ( 0 <= shiftCount ) { + if ( 0x41E < aExp ) { + if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0; + goto invalid; + } + shortShift64Left( + aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra ); + } + else { + if ( aExp < 0x3FF ) { + if ( aExp | aSig0 | aSig1 ) { + float_exception_flags |= float_flag_inexact; + } + return 0; + } + aSig0 |= 0x00100000; + aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1; + absZ = aSig0>>( - shiftCount ); + } + z = aSign ? - absZ : absZ; + if ( ( aSign ^ ( z < 0 ) ) && z ) { + invalid: + float_raise( float_flag_invalid ); + return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; + } + if ( aSigExtra ) float_exception_flags |= float_flag_inexact; + return z; + +} + +/* +------------------------------------------------------------------------------- +Returns the result of converting the double-precision floating-point value +`a' to the single-precision floating-point format. The conversion is +performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic. +------------------------------------------------------------------------------- +*/ +float32 float64_to_float32( float64 a ) +{ + flag aSign; + int16 aExp; + bits32 aSig0, aSig1, zSig; + bits32 allZero; + + aSig1 = extractFloat64Frac1( a ); + aSig0 = extractFloat64Frac0( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if ( aSig0 | aSig1 ) { + return commonNaNToFloat32( float64ToCommonNaN( a ) ); + } + return packFloat32( aSign, 0xFF, 0 ); + } + shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig ); + if ( aExp ) zSig |= 0x40000000; + return roundAndPackFloat32( aSign, aExp - 0x381, zSig ); + +} + +#ifndef SOFTFLOAT_FOR_GCC +/* +------------------------------------------------------------------------------- +Rounds the double-precision floating-point value `a' to an integer, +and returns the result as a double-precision floating-point value. The +operation is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_round_to_int( float64 a ) +{ + flag aSign; + int16 aExp; + bits32 lastBitMask, roundBitsMask; + int8 roundingMode; + float64 z; + + aExp = extractFloat64Exp( a ); + if ( 0x413 <= aExp ) { + if ( 0x433 <= aExp ) { + if ( ( aExp == 0x7FF ) + && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) { + return propagateFloat64NaN( a, a ); + } + return a; + } + lastBitMask = 1; + lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1; + roundBitsMask = lastBitMask - 1; + z = a; + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + if ( lastBitMask ) { + add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low ); + if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; + } + else { + if ( (sbits32) z.low < 0 ) { + ++z.high; + if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1; + } + } + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloat64Sign( z ) + ^ ( roundingMode == float_round_up ) ) { + add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low ); + } + } + z.low &= ~ roundBitsMask; + } + else { + if ( aExp <= 0x3FE ) { + if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a; + float_exception_flags |= float_flag_inexact; + aSign = extractFloat64Sign( a ); + switch ( float_rounding_mode ) { + case float_round_nearest_even: + if ( ( aExp == 0x3FE ) + && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) + ) { + return packFloat64( aSign, 0x3FF, 0, 0 ); + } + break; + case float_round_down: + return + aSign ? packFloat64( 1, 0x3FF, 0, 0 ) + : packFloat64( 0, 0, 0, 0 ); + case float_round_up: + return + aSign ? packFloat64( 1, 0, 0, 0 ) + : packFloat64( 0, 0x3FF, 0, 0 ); + } + return packFloat64( aSign, 0, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x413 - aExp; + roundBitsMask = lastBitMask - 1; + z.low = 0; + z.high = a.high; + roundingMode = float_rounding_mode; + if ( roundingMode == float_round_nearest_even ) { + z.high += lastBitMask>>1; + if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) { + z.high &= ~ lastBitMask; + } + } + else if ( roundingMode != float_round_to_zero ) { + if ( extractFloat64Sign( z ) + ^ ( roundingMode == float_round_up ) ) { + z.high |= ( a.low != 0 ); + z.high += roundBitsMask; + } + } + z.high &= ~ roundBitsMask; + } + if ( ( z.low != a.low ) || ( z.high != a.high ) ) { + float_exception_flags |= float_flag_inexact; + } + return z; + +} +#endif + +/* +------------------------------------------------------------------------------- +Returns the result of adding the absolute values of the double-precision +floating-point values `a' and `b'. If `zSign' is 1, the sum is negated +before being returned. `zSign' is ignored if the result is a NaN. +The addition is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) +{ + int16 aExp, bExp, zExp; + bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; + int16 expDiff; + + aSig1 = extractFloat64Frac1( a ); + aSig0 = extractFloat64Frac0( a ); + aExp = extractFloat64Exp( a ); + bSig1 = extractFloat64Frac1( b ); + bSig0 = extractFloat64Frac0( b ); + bExp = extractFloat64Exp( b ); + expDiff = aExp - bExp; + if ( 0 < expDiff ) { + if ( aExp == 0x7FF ) { + if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig0 |= 0x00100000; + } + shift64ExtraRightJamming( + bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0x7FF ) { + if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); + return packFloat64( zSign, 0x7FF, 0, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig0 |= 0x00100000; + } + shift64ExtraRightJamming( + aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 ); + zExp = bExp; + } + else { + if ( aExp == 0x7FF ) { + if ( aSig0 | aSig1 | bSig0 | bSig1 ) { + return propagateFloat64NaN( a, b ); + } + return a; + } + add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); + if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 ); + zSig2 = 0; + zSig0 |= 0x00200000; + zExp = aExp; + goto shiftRight1; + } + aSig0 |= 0x00100000; + add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); + --zExp; + if ( zSig0 < 0x00200000 ) goto roundAndPack; + ++zExp; + shiftRight1: + shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); + roundAndPack: + return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the absolute values of the double- +precision floating-point values `a' and `b'. If `zSign' is 1, the +difference is negated before being returned. `zSign' is ignored if the +result is a NaN. The subtraction is performed according to the IEC/IEEE +Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) +{ + int16 aExp, bExp, zExp; + bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1; + int16 expDiff; + + aSig1 = extractFloat64Frac1( a ); + aSig0 = extractFloat64Frac0( a ); + aExp = extractFloat64Exp( a ); + bSig1 = extractFloat64Frac1( b ); + bSig0 = extractFloat64Frac0( b ); + bExp = extractFloat64Exp( b ); + expDiff = aExp - bExp; + shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 ); + shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 ); + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0x7FF ) { + if ( aSig0 | aSig1 | bSig0 | bSig1 ) { + return propagateFloat64NaN( a, b ); + } + float_raise( float_flag_invalid ); + return float64_default_nan; + } + if ( aExp == 0 ) { + aExp = 1; + bExp = 1; + } + if ( bSig0 < aSig0 ) goto aBigger; + if ( aSig0 < bSig0 ) goto bBigger; + if ( bSig1 < aSig1 ) goto aBigger; + if ( aSig1 < bSig1 ) goto bBigger; + return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 ); + bExpBigger: + if ( bExp == 0x7FF ) { + if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); + return packFloat64( zSign ^ 1, 0x7FF, 0, 0 ); + } + if ( aExp == 0 ) { + ++expDiff; + } + else { + aSig0 |= 0x40000000; + } + shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); + bSig0 |= 0x40000000; + bBigger: + sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 ); + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0x7FF ) { + if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + --expDiff; + } + else { + bSig0 |= 0x40000000; + } + shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 ); + aSig0 |= 0x40000000; + aBigger: + sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); + zExp = aExp; + normalizeRoundAndPack: + --zExp; + return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of adding the double-precision floating-point values `a' +and `b'. The operation is performed according to the IEC/IEEE Standard for +Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_add( float64 a, float64 b ) +{ + flag aSign, bSign; + + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign == bSign ) { + return addFloat64Sigs( a, b, aSign ); + } + else { + return subFloat64Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of subtracting the double-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_sub( float64 a, float64 b ) +{ + flag aSign, bSign; + + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign == bSign ) { + return subFloat64Sigs( a, b, aSign ); + } + else { + return addFloat64Sigs( a, b, aSign ); + } + +} + +/* +------------------------------------------------------------------------------- +Returns the result of multiplying the double-precision floating-point values +`a' and `b'. The operation is performed according to the IEC/IEEE Standard +for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_mul( float64 a, float64 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3; + + aSig1 = extractFloat64Frac1( a ); + aSig0 = extractFloat64Frac0( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + bSig1 = extractFloat64Frac1( b ); + bSig0 = extractFloat64Frac0( b ); + bExp = extractFloat64Exp( b ); + bSign = extractFloat64Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FF ) { + if ( ( aSig0 | aSig1 ) + || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) { + return propagateFloat64NaN( a, b ); + } + if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid; + return packFloat64( zSign, 0x7FF, 0, 0 ); + } + if ( bExp == 0x7FF ) { + if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); + if ( ( aExp | aSig0 | aSig1 ) == 0 ) { + invalid: + float_raise( float_flag_invalid ); + return float64_default_nan; + } + return packFloat64( zSign, 0x7FF, 0, 0 ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); + normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + if ( bExp == 0 ) { + if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); + normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); + } + zExp = aExp + bExp - 0x400; + aSig0 |= 0x00100000; + shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 ); + mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 ); + add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 ); + zSig2 |= ( zSig3 != 0 ); + if ( 0x00200000 <= zSig0 ) { + shift64ExtraRightJamming( + zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); + ++zExp; + } + return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); + +} + +/* +------------------------------------------------------------------------------- +Returns the result of dividing the double-precision floating-point value `a' +by the corresponding value `b'. The operation is performed according to the +IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_div( float64 a, float64 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, zExp; + bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; + bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3; + + aSig1 = extractFloat64Frac1( a ); + aSig0 = extractFloat64Frac0( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + bSig1 = extractFloat64Frac1( b ); + bSig0 = extractFloat64Frac0( b ); + bExp = extractFloat64Exp( b ); + bSign = extractFloat64Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FF ) { + if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); + if ( bExp == 0x7FF ) { + if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); + goto invalid; + } + return packFloat64( zSign, 0x7FF, 0, 0 ); + } + if ( bExp == 0x7FF ) { + if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); + return packFloat64( zSign, 0, 0, 0 ); + } + if ( bExp == 0 ) { + if ( ( bSig0 | bSig1 ) == 0 ) { + if ( ( aExp | aSig0 | aSig1 ) == 0 ) { + invalid: + float_raise( float_flag_invalid ); + return float64_default_nan; + } + float_raise( float_flag_divbyzero ); + return packFloat64( zSign, 0x7FF, 0, 0 ); + } + normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); + normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + zExp = aExp - bExp + 0x3FD; + shortShift64Left( aSig0 | 0x00100000, aSig1, 11, &aSig0, &aSig1 ); + shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 ); + if ( le64( bSig0, bSig1, aSig0, aSig1 ) ) { + shift64Right( aSig0, aSig1, 1, &aSig0, &aSig1 ); + ++zExp; + } + zSig0 = estimateDiv64To32( aSig0, aSig1, bSig0 ); + mul64By32To96( bSig0, bSig1, zSig0, &term0, &term1, &term2 ); + sub96( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 ); + while ( (sbits32) rem0 < 0 ) { + --zSig0; + add96( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 ); + } + zSig1 = estimateDiv64To32( rem1, rem2, bSig0 ); + if ( ( zSig1 & 0x3FF ) <= 4 ) { + mul64By32To96( bSig0, bSig1, zSig1, &term1, &term2, &term3 ); + sub96( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 ); + while ( (sbits32) rem1 < 0 ) { + --zSig1; + add96( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 ); + } + zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); + } + shift64ExtraRightJamming( zSig0, zSig1, 0, 11, &zSig0, &zSig1, &zSig2 ); + return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); + +} + +#ifndef SOFTFLOAT_FOR_GCC +/* +------------------------------------------------------------------------------- +Returns the remainder of the double-precision floating-point value `a' +with respect to the corresponding value `b'. The operation is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_rem( float64 a, float64 b ) +{ + flag aSign, bSign, zSign; + int16 aExp, bExp, expDiff; + bits32 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2; + bits32 allZero, alternateASig0, alternateASig1, sigMean1; + sbits32 sigMean0; + float64 z; + + aSig1 = extractFloat64Frac1( a ); + aSig0 = extractFloat64Frac0( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + bSig1 = extractFloat64Frac1( b ); + bSig0 = extractFloat64Frac0( b ); + bExp = extractFloat64Exp( b ); + bSign = extractFloat64Sign( b ); + if ( aExp == 0x7FF ) { + if ( ( aSig0 | aSig1 ) + || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) { + return propagateFloat64NaN( a, b ); + } + goto invalid; + } + if ( bExp == 0x7FF ) { + if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); + return a; + } + if ( bExp == 0 ) { + if ( ( bSig0 | bSig1 ) == 0 ) { + invalid: + float_raise( float_flag_invalid ); + return float64_default_nan; + } + normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return a; + normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + expDiff = aExp - bExp; + if ( expDiff < -1 ) return a; + shortShift64Left( + aSig0 | 0x00100000, aSig1, 11 - ( expDiff < 0 ), &aSig0, &aSig1 ); + shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 ); + q = le64( bSig0, bSig1, aSig0, aSig1 ); + if ( q ) sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); + expDiff -= 32; + while ( 0 < expDiff ) { + q = estimateDiv64To32( aSig0, aSig1, bSig0 ); + q = ( 4 < q ) ? q - 4 : 0; + mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 ); + shortShift96Left( term0, term1, term2, 29, &term1, &term2, &allZero ); + shortShift64Left( aSig0, aSig1, 29, &aSig0, &allZero ); + sub64( aSig0, 0, term1, term2, &aSig0, &aSig1 ); + expDiff -= 29; + } + if ( -32 < expDiff ) { + q = estimateDiv64To32( aSig0, aSig1, bSig0 ); + q = ( 4 < q ) ? q - 4 : 0; + q >>= - expDiff; + shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 ); + expDiff += 24; + if ( expDiff < 0 ) { + shift64Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); + } + else { + shortShift64Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 ); + } + mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 ); + sub64( aSig0, aSig1, term1, term2, &aSig0, &aSig1 ); + } + else { + shift64Right( aSig0, aSig1, 8, &aSig0, &aSig1 ); + shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 ); + } + do { + alternateASig0 = aSig0; + alternateASig1 = aSig1; + ++q; + sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); + } while ( 0 <= (sbits32) aSig0 ); + add64( + aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 ); + if ( ( sigMean0 < 0 ) + || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) { + aSig0 = alternateASig0; + aSig1 = alternateASig1; + } + zSign = ( (sbits32) aSig0 < 0 ); + if ( zSign ) sub64( 0, 0, aSig0, aSig1, &aSig0, &aSig1 ); + return + normalizeRoundAndPackFloat64( aSign ^ zSign, bExp - 4, aSig0, aSig1 ); + +} +#endif + +#ifndef SOFTFLOAT_FOR_GCC +/* +------------------------------------------------------------------------------- +Returns the square root of the double-precision floating-point value `a'. +The operation is performed according to the IEC/IEEE Standard for Binary +Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +float64 float64_sqrt( float64 a ) +{ + flag aSign; + int16 aExp, zExp; + bits32 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0; + bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3; + float64 z; + + aSig1 = extractFloat64Frac1( a ); + aSig0 = extractFloat64Frac0( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, a ); + if ( ! aSign ) return a; + goto invalid; + } + if ( aSign ) { + if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a; + invalid: + float_raise( float_flag_invalid ); + return float64_default_nan; + } + if ( aExp == 0 ) { + if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( 0, 0, 0, 0 ); + normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + } + zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE; + aSig0 |= 0x00100000; + shortShift64Left( aSig0, aSig1, 11, &term0, &term1 ); + zSig0 = ( estimateSqrt32( aExp, term0 )>>1 ) + 1; + if ( zSig0 == 0 ) zSig0 = 0x7FFFFFFF; + doubleZSig0 = zSig0 + zSig0; + shortShift64Left( aSig0, aSig1, 9 - ( aExp & 1 ), &aSig0, &aSig1 ); + mul32To64( zSig0, zSig0, &term0, &term1 ); + sub64( aSig0, aSig1, term0, term1, &rem0, &rem1 ); + while ( (sbits32) rem0 < 0 ) { + --zSig0; + doubleZSig0 -= 2; + add64( rem0, rem1, 0, doubleZSig0 | 1, &rem0, &rem1 ); + } + zSig1 = estimateDiv64To32( rem1, 0, doubleZSig0 ); + if ( ( zSig1 & 0x1FF ) <= 5 ) { + if ( zSig1 == 0 ) zSig1 = 1; + mul32To64( doubleZSig0, zSig1, &term1, &term2 ); + sub64( rem1, 0, term1, term2, &rem1, &rem2 ); + mul32To64( zSig1, zSig1, &term2, &term3 ); + sub96( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); + while ( (sbits32) rem1 < 0 ) { + --zSig1; + shortShift64Left( 0, zSig1, 1, &term2, &term3 ); + term3 |= 1; + term2 |= doubleZSig0; + add96( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); + } + zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); + } + shift64ExtraRightJamming( zSig0, zSig1, 0, 10, &zSig0, &zSig1, &zSig2 ); + return roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2 ); + +} +#endif + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is equal to +the corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_eq( float64 a, float64 b ) +{ + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) + && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) + && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) + ) { + if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + return ( a == b ) || + ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is less than +or equal to the corresponding value `b', and 0 otherwise. The comparison +is performed according to the IEC/IEEE Standard for Binary Floating-Point +Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_le( float64 a, float64 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) + && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) + && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign != bSign ) + return aSign || + ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == + 0 ); + return ( a == b ) || + ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) ); +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. The comparison is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_lt( float64 a, float64 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) + && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) + && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign != bSign ) + return aSign && + ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) != + 0 ); + return ( a != b ) && + ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) ); + +} + +#ifndef SOFTFLOAT_FOR_GCC +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is equal to +the corresponding value `b', and 0 otherwise. The invalid exception is +raised if either operand is a NaN. Otherwise, the comparison is performed +according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_eq_signaling( float64 a, float64 b ) +{ + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) + && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) + && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) + ) { + float_raise( float_flag_invalid ); + return 0; + } + return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is less than or +equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not +cause an exception. Otherwise, the comparison is performed according to the +IEC/IEEE Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_le_quiet( float64 a, float64 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) + && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) + && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) + ) { + if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 ); + return ( a == b ) || ( aSign ^ ( a < b ) ); + +} + +/* +------------------------------------------------------------------------------- +Returns 1 if the double-precision floating-point value `a' is less than +the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an +exception. Otherwise, the comparison is performed according to the IEC/IEEE +Standard for Binary Floating-Point Arithmetic. +------------------------------------------------------------------------------- +*/ +flag float64_lt_quiet( float64 a, float64 b ) +{ + flag aSign, bSign; + + if ( ( ( extractFloat64Exp( a ) == 0x7FF ) + && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) + || ( ( extractFloat64Exp( b ) == 0x7FF ) + && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) + ) { + if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid ); + } + return 0; + } + aSign = extractFloat64Sign( a ); + bSign = extractFloat64Sign( b ); + if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 ); + return ( a != b ) && ( aSign ^ ( a < b ) ); + +} + +#endif diff --git a/lib/libc/arch/arm/softfloat/softfloat.h b/lib/libc/arch/arm/softfloat/softfloat.h new file mode 100644 index 00000000000..3b39750859d --- /dev/null +++ b/lib/libc/arch/arm/softfloat/softfloat.h @@ -0,0 +1,312 @@ +/* $NetBSD: softfloat.h,v 1.6 2002/05/12 13:12:46 bjh21 Exp $ */ + +/* This is a derivative work. */ + +/* +=============================================================================== + +This C header file is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* +------------------------------------------------------------------------------- +The macro `FLOATX80' must be defined to enable the extended double-precision +floating-point format `floatx80'. If this macro is not defined, the +`floatx80' type will not be defined, and none of the functions that either +input or output the `floatx80' type will be defined. The same applies to +the `FLOAT128' macro and the quadruple-precision format `float128'. +------------------------------------------------------------------------------- +*/ +/* #define FLOATX80 */ +/* #define FLOAT128 */ + +#include <machine/ieeefp.h> + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE floating-point types. +------------------------------------------------------------------------------- +*/ +typedef unsigned int float32; +typedef unsigned long long float64; +#ifdef FLOATX80 +typedef struct { + unsigned short high; + unsigned long long low; +} floatx80; +#endif +#ifdef FLOAT128 +typedef struct { + unsigned long long high, low; +} float128; +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE floating-point underflow tininess-detection mode. +------------------------------------------------------------------------------- +*/ +extern int float_detect_tininess; +enum { + float_tininess_after_rounding = 0, + float_tininess_before_rounding = 1 +}; + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE floating-point rounding mode. +------------------------------------------------------------------------------- +*/ +extern fp_rnd float_rounding_mode; +enum { + float_round_nearest_even = FP_RN, + float_round_to_zero = FP_RZ, + float_round_down = FP_RM, + float_round_up = FP_RP +}; + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE floating-point exception flags. +------------------------------------------------------------------------------- +*/ +extern fp_except float_exception_flags; +extern fp_except float_exception_mask; +enum { + float_flag_inexact = FP_X_IMP, + float_flag_underflow = FP_X_UFL, + float_flag_overflow = FP_X_OFL, + float_flag_divbyzero = FP_X_DZ, + float_flag_invalid = FP_X_INV +}; + +/* +------------------------------------------------------------------------------- +Routine to raise any or all of the software IEC/IEEE floating-point +exception flags. +------------------------------------------------------------------------------- +*/ +void float_raise( fp_except ); + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE integer-to-floating-point conversion routines. +------------------------------------------------------------------------------- +*/ +float32 int32_to_float32( int ); +float64 int32_to_float64( int ); +#ifdef FLOATX80 +floatx80 int32_to_floatx80( int ); +#endif +#ifdef FLOAT128 +float128 int32_to_float128( int ); +#endif +#ifndef SOFTFLOAT_FOR_GCC /* __floatdi?f is in libgcc2.c */ +float32 int64_to_float32( long long ); +float64 int64_to_float64( long long ); +#ifdef FLOATX80 +floatx80 int64_to_floatx80( long long ); +#endif +#ifdef FLOAT128 +float128 int64_to_float128( long long ); +#endif +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE single-precision conversion routines. +------------------------------------------------------------------------------- +*/ +int float32_to_int32( float32 ); +int float32_to_int32_round_to_zero( float32 ); +#if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS) +unsigned int float32_to_uint32_round_to_zero( float32 ); +#endif +#ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */ +long long float32_to_int64( float32 ); +long long float32_to_int64_round_to_zero( float32 ); +#endif +float64 float32_to_float64( float32 ); +#ifdef FLOATX80 +floatx80 float32_to_floatx80( float32 ); +#endif +#ifdef FLOAT128 +float128 float32_to_float128( float32 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE single-precision operations. +------------------------------------------------------------------------------- +*/ +float32 float32_round_to_int( float32 ); +float32 float32_add( float32, float32 ); +float32 float32_sub( float32, float32 ); +float32 float32_mul( float32, float32 ); +float32 float32_div( float32, float32 ); +float32 float32_rem( float32, float32 ); +float32 float32_sqrt( float32 ); +int float32_eq( float32, float32 ); +int float32_le( float32, float32 ); +int float32_lt( float32, float32 ); +int float32_eq_signaling( float32, float32 ); +int float32_le_quiet( float32, float32 ); +int float32_lt_quiet( float32, float32 ); +#ifndef SOFTFLOAT_FOR_GCC +int float32_is_signaling_nan( float32 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE double-precision conversion routines. +------------------------------------------------------------------------------- +*/ +int float64_to_int32( float64 ); +int float64_to_int32_round_to_zero( float64 ); +#if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS) +unsigned int float64_to_uint32_round_to_zero( float64 ); +#endif +#ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */ +long long float64_to_int64( float64 ); +long long float64_to_int64_round_to_zero( float64 ); +#endif +float32 float64_to_float32( float64 ); +#ifdef FLOATX80 +floatx80 float64_to_floatx80( float64 ); +#endif +#ifdef FLOAT128 +float128 float64_to_float128( float64 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE double-precision operations. +------------------------------------------------------------------------------- +*/ +float64 float64_round_to_int( float64 ); +float64 float64_add( float64, float64 ); +float64 float64_sub( float64, float64 ); +float64 float64_mul( float64, float64 ); +float64 float64_div( float64, float64 ); +float64 float64_rem( float64, float64 ); +float64 float64_sqrt( float64 ); +int float64_eq( float64, float64 ); +int float64_le( float64, float64 ); +int float64_lt( float64, float64 ); +int float64_eq_signaling( float64, float64 ); +int float64_le_quiet( float64, float64 ); +int float64_lt_quiet( float64, float64 ); +#ifndef SOFTFLOAT_FOR_GCC +int float64_is_signaling_nan( float64 ); +#endif + +#ifdef FLOATX80 + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE extended double-precision conversion routines. +------------------------------------------------------------------------------- +*/ +int floatx80_to_int32( floatx80 ); +int floatx80_to_int32_round_to_zero( floatx80 ); +long long floatx80_to_int64( floatx80 ); +long long floatx80_to_int64_round_to_zero( floatx80 ); +float32 floatx80_to_float32( floatx80 ); +float64 floatx80_to_float64( floatx80 ); +#ifdef FLOAT128 +float128 floatx80_to_float128( floatx80 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE extended double-precision rounding precision. Valid +values are 32, 64, and 80. +------------------------------------------------------------------------------- +*/ +extern int floatx80_rounding_precision; + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE extended double-precision operations. +------------------------------------------------------------------------------- +*/ +floatx80 floatx80_round_to_int( floatx80 ); +floatx80 floatx80_add( floatx80, floatx80 ); +floatx80 floatx80_sub( floatx80, floatx80 ); +floatx80 floatx80_mul( floatx80, floatx80 ); +floatx80 floatx80_div( floatx80, floatx80 ); +floatx80 floatx80_rem( floatx80, floatx80 ); +floatx80 floatx80_sqrt( floatx80 ); +int floatx80_eq( floatx80, floatx80 ); +int floatx80_le( floatx80, floatx80 ); +int floatx80_lt( floatx80, floatx80 ); +int floatx80_eq_signaling( floatx80, floatx80 ); +int floatx80_le_quiet( floatx80, floatx80 ); +int floatx80_lt_quiet( floatx80, floatx80 ); +int floatx80_is_signaling_nan( floatx80 ); + +#endif + +#ifdef FLOAT128 + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE quadruple-precision conversion routines. +------------------------------------------------------------------------------- +*/ +int float128_to_int32( float128 ); +int float128_to_int32_round_to_zero( float128 ); +long long float128_to_int64( float128 ); +long long float128_to_int64_round_to_zero( float128 ); +float32 float128_to_float32( float128 ); +float64 float128_to_float64( float128 ); +#ifdef FLOATX80 +floatx80 float128_to_floatx80( float128 ); +#endif + +/* +------------------------------------------------------------------------------- +Software IEC/IEEE quadruple-precision operations. +------------------------------------------------------------------------------- +*/ +float128 float128_round_to_int( float128 ); +float128 float128_add( float128, float128 ); +float128 float128_sub( float128, float128 ); +float128 float128_mul( float128, float128 ); +float128 float128_div( float128, float128 ); +float128 float128_rem( float128, float128 ); +float128 float128_sqrt( float128 ); +int float128_eq( float128, float128 ); +int float128_le( float128, float128 ); +int float128_lt( float128, float128 ); +int float128_eq_signaling( float128, float128 ); +int float128_le_quiet( float128, float128 ); +int float128_lt_quiet( float128, float128 ); +int float128_is_signaling_nan( float128 ); + +#endif + diff --git a/lib/libc/arch/arm/softfloat/timesoftfloat.c b/lib/libc/arch/arm/softfloat/timesoftfloat.c new file mode 100644 index 00000000000..21ab4ff24ad --- /dev/null +++ b/lib/libc/arch/arm/softfloat/timesoftfloat.c @@ -0,0 +1,2638 @@ +/* $NetBSD: timesoftfloat.c,v 1.1 2000/06/06 08:15:11 bjh21 Exp $ */ + +/* +=============================================================================== + +This C source file is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +#include <sys/cdefs.h> + +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include <stdio.h> +#include <time.h> +#include "milieu.h" +#include "softfloat.h" + +enum { + minIterations = 1000 +}; + +static void fail( const char *message, ... ) +{ + va_list varArgs; + + fputs( "timesoftfloat: ", stderr ); + va_start( varArgs, message ); + vfprintf( stderr, message, varArgs ); + va_end( varArgs ); + fputs( ".\n", stderr ); + exit( EXIT_FAILURE ); + +} + +static char *functionName; +static char *roundingPrecisionName, *roundingModeName, *tininessModeName; + +static void reportTime( int32 count, long clocks ) +{ + + printf( + "%8.1f kops/s: %s", + ( count / ( ( (float) clocks ) / CLOCKS_PER_SEC ) ) / 1000, + functionName + ); + if ( roundingModeName ) { + if ( roundingPrecisionName ) { + fputs( ", precision ", stdout ); + fputs( roundingPrecisionName, stdout ); + } + fputs( ", rounding ", stdout ); + fputs( roundingModeName, stdout ); + if ( tininessModeName ) { + fputs( ", tininess ", stdout ); + fputs( tininessModeName, stdout ); + fputs( " rounding", stdout ); + } + } + fputc( '\n', stdout ); + +} + +enum { + numInputs_int32 = 32 +}; + +static const int32 inputs_int32[ numInputs_int32 ] = { + 0xFFFFBB79, 0x405CF80F, 0x00000000, 0xFFFFFD04, + 0xFFF20002, 0x0C8EF795, 0xF00011FF, 0x000006CA, + 0x00009BFE, 0xFF4862E3, 0x9FFFEFFE, 0xFFFFFFB7, + 0x0BFF7FFF, 0x0000F37A, 0x0011DFFE, 0x00000006, + 0xFFF02006, 0xFFFFF7D1, 0x10200003, 0xDE8DF765, + 0x00003E02, 0x000019E8, 0x0008FFFE, 0xFFFFFB5C, + 0xFFDF7FFE, 0x07C42FBF, 0x0FFFE3FF, 0x040B9F13, + 0xBFFFFFF8, 0x0001BF56, 0x000017F6, 0x000A908A +}; + +static void time_a_int32_z_float32( float32 function( int32 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_int32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int32 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_int32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int32 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_a_int32_z_float64( float64 function( int32 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_int32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int32 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_int32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int32 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#ifdef FLOATX80 + +static void time_a_int32_z_floatx80( floatx80 function( int32 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_int32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int32 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_int32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int32 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#endif + +#ifdef FLOAT128 + +static void time_a_int32_z_float128( float128 function( int32 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_int32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int32 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_int32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int32 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#endif + +enum { + numInputs_int64 = 32 +}; + +static const int64 inputs_int64[ numInputs_int64 ] = { + LIT64( 0xFBFFC3FFFFFFFFFF ), + LIT64( 0x0000000003C589BC ), + LIT64( 0x00000000400013FE ), + LIT64( 0x0000000000186171 ), + LIT64( 0xFFFFFFFFFFFEFBFA ), + LIT64( 0xFFFFFD79E6DFFC73 ), + LIT64( 0x0000000010001DFF ), + LIT64( 0xDD1A0F0C78513710 ), + LIT64( 0xFFFF83FFFFFEFFFE ), + LIT64( 0x00756EBD1AD0C1C7 ), + LIT64( 0x0003FDFFFFFFFFBE ), + LIT64( 0x0007D0FB2C2CA951 ), + LIT64( 0x0007FC0007FFFFFE ), + LIT64( 0x0000001F942B18BB ), + LIT64( 0x0000080101FFFFFE ), + LIT64( 0xFFFFFFFFFFFF0978 ), + LIT64( 0x000000000008BFFF ), + LIT64( 0x0000000006F5AF08 ), + LIT64( 0xFFDEFF7FFFFFFFFE ), + LIT64( 0x0000000000000003 ), + LIT64( 0x3FFFFFFFFF80007D ), + LIT64( 0x0000000000000078 ), + LIT64( 0xFFF80000007FDFFD ), + LIT64( 0x1BBC775B78016AB0 ), + LIT64( 0xFFF9001FFFFFFFFE ), + LIT64( 0xFFFD4767AB98E43F ), + LIT64( 0xFFFFFEFFFE00001E ), + LIT64( 0xFFFFFFFFFFF04EFD ), + LIT64( 0x07FFFFFFFFFFF7FF ), + LIT64( 0xFFFC9EAA38F89050 ), + LIT64( 0x00000020FBFFFFFE ), + LIT64( 0x0000099AE6455357 ) +}; + +static void time_a_int64_z_float32( float32 function( int64 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_int64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int64 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_int64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int64 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_a_int64_z_float64( float64 function( int64 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_int64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int64 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_int64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int64 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#ifdef FLOATX80 + +static void time_a_int64_z_floatx80( floatx80 function( int64 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_int64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int64 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_int64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int64 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#endif + +#ifdef FLOAT128 + +static void time_a_int64_z_float128( float128 function( int64 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_int64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int64 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_int64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_int64 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#endif + +enum { + numInputs_float32 = 32 +}; + +static const float32 inputs_float32[ numInputs_float32 ] = { + 0x4EFA0000, 0xC1D0B328, 0x80000000, 0x3E69A31E, + 0xAF803EFF, 0x3F800000, 0x17BF8000, 0xE74A301A, + 0x4E010003, 0x7EE3C75D, 0xBD803FE0, 0xBFFEFF00, + 0x7981F800, 0x431FFFFC, 0xC100C000, 0x3D87EFFF, + 0x4103FEFE, 0xBC000007, 0xBF01F7FF, 0x4E6C6B5C, + 0xC187FFFE, 0xC58B9F13, 0x4F88007F, 0xDF004007, + 0xB7FFD7FE, 0x7E8001FB, 0x46EFFBFF, 0x31C10000, + 0xDB428661, 0x33F89B1F, 0xA3BFEFFF, 0x537BFFBE +}; + +static void time_a_float32_z_int32( int32 function( float32 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_float32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float32 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_float32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float32 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_a_float32_z_int64( int64 function( float32 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_float32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float32 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_float32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float32 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_a_float32_z_float64( float64 function( float32 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_float32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float32 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_float32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float32 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#ifdef FLOATX80 + +static void time_a_float32_z_floatx80( floatx80 function( float32 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_float32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float32 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_float32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float32 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#endif + +#ifdef FLOAT128 + +static void time_a_float32_z_float128( float128 function( float32 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_float32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float32 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_float32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float32 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#endif + +static void time_az_float32( float32 function( float32 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_float32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float32 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_float32[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float32 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_ab_float32_z_flag( flag function( float32, float32 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNumA, inputNumB; + + count = 0; + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( + inputs_float32[ inputNumA ], inputs_float32[ inputNumB ] ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_float32 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_float32 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( + inputs_float32[ inputNumA ], inputs_float32[ inputNumB ] ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_float32 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_float32 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_abz_float32( float32 function( float32, float32 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNumA, inputNumB; + + count = 0; + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( + inputs_float32[ inputNumA ], inputs_float32[ inputNumB ] ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_float32 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_float32 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( + inputs_float32[ inputNumA ], inputs_float32[ inputNumB ] ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_float32 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_float32 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static const float32 inputs_float32_pos[ numInputs_float32 ] = { + 0x4EFA0000, 0x41D0B328, 0x00000000, 0x3E69A31E, + 0x2F803EFF, 0x3F800000, 0x17BF8000, 0x674A301A, + 0x4E010003, 0x7EE3C75D, 0x3D803FE0, 0x3FFEFF00, + 0x7981F800, 0x431FFFFC, 0x4100C000, 0x3D87EFFF, + 0x4103FEFE, 0x3C000007, 0x3F01F7FF, 0x4E6C6B5C, + 0x4187FFFE, 0x458B9F13, 0x4F88007F, 0x5F004007, + 0x37FFD7FE, 0x7E8001FB, 0x46EFFBFF, 0x31C10000, + 0x5B428661, 0x33F89B1F, 0x23BFEFFF, 0x537BFFBE +}; + +static void time_az_float32_pos( float32 function( float32 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_float32_pos[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float32 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_float32_pos[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float32 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +enum { + numInputs_float64 = 32 +}; + +static const float64 inputs_float64[ numInputs_float64 ] = { + LIT64( 0x422FFFC008000000 ), + LIT64( 0xB7E0000480000000 ), + LIT64( 0xF3FD2546120B7935 ), + LIT64( 0x3FF0000000000000 ), + LIT64( 0xCE07F766F09588D6 ), + LIT64( 0x8000000000000000 ), + LIT64( 0x3FCE000400000000 ), + LIT64( 0x8313B60F0032BED8 ), + LIT64( 0xC1EFFFFFC0002000 ), + LIT64( 0x3FB3C75D224F2B0F ), + LIT64( 0x7FD00000004000FF ), + LIT64( 0xA12FFF8000001FFF ), + LIT64( 0x3EE0000000FE0000 ), + LIT64( 0x0010000080000004 ), + LIT64( 0x41CFFFFE00000020 ), + LIT64( 0x40303FFFFFFFFFFD ), + LIT64( 0x3FD000003FEFFFFF ), + LIT64( 0xBFD0000010000000 ), + LIT64( 0xB7FC6B5C16CA55CF ), + LIT64( 0x413EEB940B9D1301 ), + LIT64( 0xC7E00200001FFFFF ), + LIT64( 0x47F00021FFFFFFFE ), + LIT64( 0xBFFFFFFFF80000FF ), + LIT64( 0xC07FFFFFE00FFFFF ), + LIT64( 0x001497A63740C5E8 ), + LIT64( 0xC4BFFFE0001FFFFF ), + LIT64( 0x96FFDFFEFFFFFFFF ), + LIT64( 0x403FC000000001FE ), + LIT64( 0xFFD00000000001F6 ), + LIT64( 0x0640400002000000 ), + LIT64( 0x479CEE1E4F789FE0 ), + LIT64( 0xC237FFFFFFFFFDFE ) +}; + +static void time_a_float64_z_int32( int32 function( float64 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_float64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float64 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_float64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float64 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_a_float64_z_int64( int64 function( float64 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_float64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float64 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_float64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float64 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_a_float64_z_float32( float32 function( float64 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_float64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float64 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_float64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float64 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#ifdef FLOATX80 + +static void time_a_float64_z_floatx80( floatx80 function( float64 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_float64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float64 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_float64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float64 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#endif + +#ifdef FLOAT128 + +static void time_a_float64_z_float128( float128 function( float64 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_float64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float64 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_float64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float64 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#endif + +static void time_az_float64( float64 function( float64 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_float64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float64 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_float64[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float64 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_ab_float64_z_flag( flag function( float64, float64 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNumA, inputNumB; + + count = 0; + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( + inputs_float64[ inputNumA ], inputs_float64[ inputNumB ] ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_float64 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_float64 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( + inputs_float64[ inputNumA ], inputs_float64[ inputNumB ] ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_float64 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_float64 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_abz_float64( float64 function( float64, float64 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNumA, inputNumB; + + count = 0; + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( + inputs_float64[ inputNumA ], inputs_float64[ inputNumB ] ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_float64 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_float64 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( + inputs_float64[ inputNumA ], inputs_float64[ inputNumB ] ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_float64 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_float64 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static const float64 inputs_float64_pos[ numInputs_float64 ] = { + LIT64( 0x422FFFC008000000 ), + LIT64( 0x37E0000480000000 ), + LIT64( 0x73FD2546120B7935 ), + LIT64( 0x3FF0000000000000 ), + LIT64( 0x4E07F766F09588D6 ), + LIT64( 0x0000000000000000 ), + LIT64( 0x3FCE000400000000 ), + LIT64( 0x0313B60F0032BED8 ), + LIT64( 0x41EFFFFFC0002000 ), + LIT64( 0x3FB3C75D224F2B0F ), + LIT64( 0x7FD00000004000FF ), + LIT64( 0x212FFF8000001FFF ), + LIT64( 0x3EE0000000FE0000 ), + LIT64( 0x0010000080000004 ), + LIT64( 0x41CFFFFE00000020 ), + LIT64( 0x40303FFFFFFFFFFD ), + LIT64( 0x3FD000003FEFFFFF ), + LIT64( 0x3FD0000010000000 ), + LIT64( 0x37FC6B5C16CA55CF ), + LIT64( 0x413EEB940B9D1301 ), + LIT64( 0x47E00200001FFFFF ), + LIT64( 0x47F00021FFFFFFFE ), + LIT64( 0x3FFFFFFFF80000FF ), + LIT64( 0x407FFFFFE00FFFFF ), + LIT64( 0x001497A63740C5E8 ), + LIT64( 0x44BFFFE0001FFFFF ), + LIT64( 0x16FFDFFEFFFFFFFF ), + LIT64( 0x403FC000000001FE ), + LIT64( 0x7FD00000000001F6 ), + LIT64( 0x0640400002000000 ), + LIT64( 0x479CEE1E4F789FE0 ), + LIT64( 0x4237FFFFFFFFFDFE ) +}; + +static void time_az_float64_pos( float64 function( float64 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + function( inputs_float64_pos[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float64 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + function( inputs_float64_pos[ inputNum ] ); + inputNum = ( inputNum + 1 ) & ( numInputs_float64 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#ifdef FLOATX80 + +enum { + numInputs_floatx80 = 32 +}; + +static const struct { + bits16 high; + bits64 low; +} inputs_floatx80[ numInputs_floatx80 ] = { + { 0xC03F, LIT64( 0xA9BE15A19C1E8B62 ) }, + { 0x8000, LIT64( 0x0000000000000000 ) }, + { 0x75A8, LIT64( 0xE59591E4788957A5 ) }, + { 0xBFFF, LIT64( 0xFFF0000000000040 ) }, + { 0x0CD8, LIT64( 0xFC000000000007FE ) }, + { 0x43BA, LIT64( 0x99A4000000000000 ) }, + { 0x3FFF, LIT64( 0x8000000000000000 ) }, + { 0x4081, LIT64( 0x94FBF1BCEB5545F0 ) }, + { 0x403E, LIT64( 0xFFF0000000002000 ) }, + { 0x3FFE, LIT64( 0xC860E3C75D224F28 ) }, + { 0x407E, LIT64( 0xFC00000FFFFFFFFE ) }, + { 0x737A, LIT64( 0x800000007FFDFFFE ) }, + { 0x4044, LIT64( 0xFFFFFF80000FFFFF ) }, + { 0xBBFE, LIT64( 0x8000040000001FFE ) }, + { 0xC002, LIT64( 0xFF80000000000020 ) }, + { 0xDE8D, LIT64( 0xFFFFFFFFFFE00004 ) }, + { 0xC004, LIT64( 0x8000000000003FFB ) }, + { 0x407F, LIT64( 0x800000000003FFFE ) }, + { 0xC000, LIT64( 0xA459EE6A5C16CA55 ) }, + { 0x8003, LIT64( 0xC42CBF7399AEEB94 ) }, + { 0xBF7F, LIT64( 0xF800000000000006 ) }, + { 0xC07F, LIT64( 0xBF56BE8871F28FEA ) }, + { 0xC07E, LIT64( 0xFFFF77FFFFFFFFFE ) }, + { 0xADC9, LIT64( 0x8000000FFFFFFFDE ) }, + { 0xC001, LIT64( 0xEFF7FFFFFFFFFFFF ) }, + { 0x4001, LIT64( 0xBE84F30125C497A6 ) }, + { 0xC06B, LIT64( 0xEFFFFFFFFFFFFFFF ) }, + { 0x4080, LIT64( 0xFFFFFFFFBFFFFFFF ) }, + { 0x87E9, LIT64( 0x81FFFFFFFFFFFBFF ) }, + { 0xA63F, LIT64( 0x801FFFFFFEFFFFFE ) }, + { 0x403C, LIT64( 0x801FFFFFFFF7FFFF ) }, + { 0x4018, LIT64( 0x8000000000080003 ) } +}; + +static void time_a_floatx80_z_int32( int32 function( floatx80 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + floatx80 a; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_floatx80[ inputNum ].low; + a.high = inputs_floatx80[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_floatx80 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_floatx80[ inputNum ].low; + a.high = inputs_floatx80[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_floatx80 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_a_floatx80_z_int64( int64 function( floatx80 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + floatx80 a; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_floatx80[ inputNum ].low; + a.high = inputs_floatx80[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_floatx80 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_floatx80[ inputNum ].low; + a.high = inputs_floatx80[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_floatx80 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_a_floatx80_z_float32( float32 function( floatx80 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + floatx80 a; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_floatx80[ inputNum ].low; + a.high = inputs_floatx80[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_floatx80 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_floatx80[ inputNum ].low; + a.high = inputs_floatx80[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_floatx80 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_a_floatx80_z_float64( float64 function( floatx80 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + floatx80 a; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_floatx80[ inputNum ].low; + a.high = inputs_floatx80[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_floatx80 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_floatx80[ inputNum ].low; + a.high = inputs_floatx80[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_floatx80 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#ifdef FLOAT128 + +static void time_a_floatx80_z_float128( float128 function( floatx80 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + floatx80 a; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_floatx80[ inputNum ].low; + a.high = inputs_floatx80[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_floatx80 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_floatx80[ inputNum ].low; + a.high = inputs_floatx80[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_floatx80 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#endif + +static void time_az_floatx80( floatx80 function( floatx80 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + floatx80 a; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_floatx80[ inputNum ].low; + a.high = inputs_floatx80[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_floatx80 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_floatx80[ inputNum ].low; + a.high = inputs_floatx80[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_floatx80 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_ab_floatx80_z_flag( flag function( floatx80, floatx80 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNumA, inputNumB; + floatx80 a, b; + + count = 0; + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_floatx80[ inputNumA ].low; + a.high = inputs_floatx80[ inputNumA ].high; + b.low = inputs_floatx80[ inputNumB ].low; + b.high = inputs_floatx80[ inputNumB ].high; + function( a, b ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_floatx80 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_floatx80 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_floatx80[ inputNumA ].low; + a.high = inputs_floatx80[ inputNumA ].high; + b.low = inputs_floatx80[ inputNumB ].low; + b.high = inputs_floatx80[ inputNumB ].high; + function( a, b ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_floatx80 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_floatx80 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_abz_floatx80( floatx80 function( floatx80, floatx80 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNumA, inputNumB; + floatx80 a, b; + + count = 0; + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_floatx80[ inputNumA ].low; + a.high = inputs_floatx80[ inputNumA ].high; + b.low = inputs_floatx80[ inputNumB ].low; + b.high = inputs_floatx80[ inputNumB ].high; + function( a, b ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_floatx80 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_floatx80 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_floatx80[ inputNumA ].low; + a.high = inputs_floatx80[ inputNumA ].high; + b.low = inputs_floatx80[ inputNumB ].low; + b.high = inputs_floatx80[ inputNumB ].high; + function( a, b ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_floatx80 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_floatx80 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static const struct { + bits16 high; + bits64 low; +} inputs_floatx80_pos[ numInputs_floatx80 ] = { + { 0x403F, LIT64( 0xA9BE15A19C1E8B62 ) }, + { 0x0000, LIT64( 0x0000000000000000 ) }, + { 0x75A8, LIT64( 0xE59591E4788957A5 ) }, + { 0x3FFF, LIT64( 0xFFF0000000000040 ) }, + { 0x0CD8, LIT64( 0xFC000000000007FE ) }, + { 0x43BA, LIT64( 0x99A4000000000000 ) }, + { 0x3FFF, LIT64( 0x8000000000000000 ) }, + { 0x4081, LIT64( 0x94FBF1BCEB5545F0 ) }, + { 0x403E, LIT64( 0xFFF0000000002000 ) }, + { 0x3FFE, LIT64( 0xC860E3C75D224F28 ) }, + { 0x407E, LIT64( 0xFC00000FFFFFFFFE ) }, + { 0x737A, LIT64( 0x800000007FFDFFFE ) }, + { 0x4044, LIT64( 0xFFFFFF80000FFFFF ) }, + { 0x3BFE, LIT64( 0x8000040000001FFE ) }, + { 0x4002, LIT64( 0xFF80000000000020 ) }, + { 0x5E8D, LIT64( 0xFFFFFFFFFFE00004 ) }, + { 0x4004, LIT64( 0x8000000000003FFB ) }, + { 0x407F, LIT64( 0x800000000003FFFE ) }, + { 0x4000, LIT64( 0xA459EE6A5C16CA55 ) }, + { 0x0003, LIT64( 0xC42CBF7399AEEB94 ) }, + { 0x3F7F, LIT64( 0xF800000000000006 ) }, + { 0x407F, LIT64( 0xBF56BE8871F28FEA ) }, + { 0x407E, LIT64( 0xFFFF77FFFFFFFFFE ) }, + { 0x2DC9, LIT64( 0x8000000FFFFFFFDE ) }, + { 0x4001, LIT64( 0xEFF7FFFFFFFFFFFF ) }, + { 0x4001, LIT64( 0xBE84F30125C497A6 ) }, + { 0x406B, LIT64( 0xEFFFFFFFFFFFFFFF ) }, + { 0x4080, LIT64( 0xFFFFFFFFBFFFFFFF ) }, + { 0x07E9, LIT64( 0x81FFFFFFFFFFFBFF ) }, + { 0x263F, LIT64( 0x801FFFFFFEFFFFFE ) }, + { 0x403C, LIT64( 0x801FFFFFFFF7FFFF ) }, + { 0x4018, LIT64( 0x8000000000080003 ) } +}; + +static void time_az_floatx80_pos( floatx80 function( floatx80 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + floatx80 a; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_floatx80_pos[ inputNum ].low; + a.high = inputs_floatx80_pos[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_floatx80 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_floatx80_pos[ inputNum ].low; + a.high = inputs_floatx80_pos[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_floatx80 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#endif + +#ifdef FLOAT128 + +enum { + numInputs_float128 = 32 +}; + +static const struct { + bits64 high, low; +} inputs_float128[ numInputs_float128 ] = { + { LIT64( 0x3FDA200000100000 ), LIT64( 0x0000000000000000 ) }, + { LIT64( 0x3FFF000000000000 ), LIT64( 0x0000000000000000 ) }, + { LIT64( 0x85F14776190C8306 ), LIT64( 0xD8715F4E3D54BB92 ) }, + { LIT64( 0xF2B00000007FFFFF ), LIT64( 0xFFFFFFFFFFF7FFFF ) }, + { LIT64( 0x8000000000000000 ), LIT64( 0x0000000000000000 ) }, + { LIT64( 0xBFFFFFFFFFE00000 ), LIT64( 0x0000008000000000 ) }, + { LIT64( 0x407F1719CE722F3E ), LIT64( 0xDA6B3FE5FF29425B ) }, + { LIT64( 0x43FFFF8000000000 ), LIT64( 0x0000000000400000 ) }, + { LIT64( 0x401E000000000100 ), LIT64( 0x0000000000002000 ) }, + { LIT64( 0x3FFED71DACDA8E47 ), LIT64( 0x4860E3C75D224F28 ) }, + { LIT64( 0xBF7ECFC1E90647D1 ), LIT64( 0x7A124FE55623EE44 ) }, + { LIT64( 0x0DF7007FFFFFFFFF ), LIT64( 0xFFFFFFFFEFFFFFFF ) }, + { LIT64( 0x3FE5FFEFFFFFFFFF ), LIT64( 0xFFFFFFFFFFFFEFFF ) }, + { LIT64( 0x403FFFFFFFFFFFFF ), LIT64( 0xFFFFFFFFFFFFFBFE ) }, + { LIT64( 0xBFFB2FBF7399AFEB ), LIT64( 0xA459EE6A5C16CA55 ) }, + { LIT64( 0xBDB8FFFFFFFFFFFC ), LIT64( 0x0000000000000400 ) }, + { LIT64( 0x3FC8FFDFFFFFFFFF ), LIT64( 0xFFFFFFFFF0000000 ) }, + { LIT64( 0x3FFBFFFFFFDFFFFF ), LIT64( 0xFFF8000000000000 ) }, + { LIT64( 0x407043C11737BE84 ), LIT64( 0xDDD58212ADC937F4 ) }, + { LIT64( 0x8001000000000000 ), LIT64( 0x0000001000000001 ) }, + { LIT64( 0xC036FFFFFFFFFFFF ), LIT64( 0xFE40000000000000 ) }, + { LIT64( 0x4002FFFFFE000002 ), LIT64( 0x0000000000000000 ) }, + { LIT64( 0x4000C3FEDE897773 ), LIT64( 0x326AC4FD8EFBE6DC ) }, + { LIT64( 0xBFFF0000000FFFFF ), LIT64( 0xFFFFFE0000000000 ) }, + { LIT64( 0x62C3E502146E426D ), LIT64( 0x43F3CAA0DC7DF1A0 ) }, + { LIT64( 0xB5CBD32E52BB570E ), LIT64( 0xBCC477CB11C6236C ) }, + { LIT64( 0xE228FFFFFFC00000 ), LIT64( 0x0000000000000000 ) }, + { LIT64( 0x3F80000000000000 ), LIT64( 0x0000000080000008 ) }, + { LIT64( 0xC1AFFFDFFFFFFFFF ), LIT64( 0xFFFC000000000000 ) }, + { LIT64( 0xC96F000000000000 ), LIT64( 0x00000001FFFBFFFF ) }, + { LIT64( 0x3DE09BFE7923A338 ), LIT64( 0xBCC8FBBD7CEC1F4F ) }, + { LIT64( 0x401CFFFFFFFFFFFF ), LIT64( 0xFFFFFFFEFFFFFF80 ) } +}; + +static void time_a_float128_z_int32( int32 function( float128 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + float128 a; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_float128[ inputNum ].low; + a.high = inputs_float128[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_float128 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_float128[ inputNum ].low; + a.high = inputs_float128[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_float128 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_a_float128_z_int64( int64 function( float128 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + float128 a; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_float128[ inputNum ].low; + a.high = inputs_float128[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_float128 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_float128[ inputNum ].low; + a.high = inputs_float128[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_float128 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_a_float128_z_float32( float32 function( float128 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + float128 a; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_float128[ inputNum ].low; + a.high = inputs_float128[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_float128 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_float128[ inputNum ].low; + a.high = inputs_float128[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_float128 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_a_float128_z_float64( float64 function( float128 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + float128 a; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_float128[ inputNum ].low; + a.high = inputs_float128[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_float128 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_float128[ inputNum ].low; + a.high = inputs_float128[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_float128 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#ifdef FLOATX80 + +static void time_a_float128_z_floatx80( floatx80 function( float128 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + float128 a; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_float128[ inputNum ].low; + a.high = inputs_float128[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_float128 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_float128[ inputNum ].low; + a.high = inputs_float128[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_float128 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#endif + +static void time_az_float128( float128 function( float128 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + float128 a; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_float128[ inputNum ].low; + a.high = inputs_float128[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_float128 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_float128[ inputNum ].low; + a.high = inputs_float128[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_float128 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_ab_float128_z_flag( flag function( float128, float128 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNumA, inputNumB; + float128 a, b; + + count = 0; + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_float128[ inputNumA ].low; + a.high = inputs_float128[ inputNumA ].high; + b.low = inputs_float128[ inputNumB ].low; + b.high = inputs_float128[ inputNumB ].high; + function( a, b ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_float128 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_float128 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_float128[ inputNumA ].low; + a.high = inputs_float128[ inputNumA ].high; + b.low = inputs_float128[ inputNumB ].low; + b.high = inputs_float128[ inputNumB ].high; + function( a, b ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_float128 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_float128 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static void time_abz_float128( float128 function( float128, float128 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNumA, inputNumB; + float128 a, b; + + count = 0; + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_float128[ inputNumA ].low; + a.high = inputs_float128[ inputNumA ].high; + b.low = inputs_float128[ inputNumB ].low; + b.high = inputs_float128[ inputNumB ].high; + function( a, b ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_float128 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_float128 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNumA = 0; + inputNumB = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_float128[ inputNumA ].low; + a.high = inputs_float128[ inputNumA ].high; + b.low = inputs_float128[ inputNumB ].low; + b.high = inputs_float128[ inputNumB ].high; + function( a, b ); + inputNumA = ( inputNumA + 1 ) & ( numInputs_float128 - 1 ); + if ( inputNumA == 0 ) ++inputNumB; + inputNumB = ( inputNumB + 1 ) & ( numInputs_float128 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +static const struct { + bits64 high, low; +} inputs_float128_pos[ numInputs_float128 ] = { + { LIT64( 0x3FDA200000100000 ), LIT64( 0x0000000000000000 ) }, + { LIT64( 0x3FFF000000000000 ), LIT64( 0x0000000000000000 ) }, + { LIT64( 0x05F14776190C8306 ), LIT64( 0xD8715F4E3D54BB92 ) }, + { LIT64( 0x72B00000007FFFFF ), LIT64( 0xFFFFFFFFFFF7FFFF ) }, + { LIT64( 0x0000000000000000 ), LIT64( 0x0000000000000000 ) }, + { LIT64( 0x3FFFFFFFFFE00000 ), LIT64( 0x0000008000000000 ) }, + { LIT64( 0x407F1719CE722F3E ), LIT64( 0xDA6B3FE5FF29425B ) }, + { LIT64( 0x43FFFF8000000000 ), LIT64( 0x0000000000400000 ) }, + { LIT64( 0x401E000000000100 ), LIT64( 0x0000000000002000 ) }, + { LIT64( 0x3FFED71DACDA8E47 ), LIT64( 0x4860E3C75D224F28 ) }, + { LIT64( 0x3F7ECFC1E90647D1 ), LIT64( 0x7A124FE55623EE44 ) }, + { LIT64( 0x0DF7007FFFFFFFFF ), LIT64( 0xFFFFFFFFEFFFFFFF ) }, + { LIT64( 0x3FE5FFEFFFFFFFFF ), LIT64( 0xFFFFFFFFFFFFEFFF ) }, + { LIT64( 0x403FFFFFFFFFFFFF ), LIT64( 0xFFFFFFFFFFFFFBFE ) }, + { LIT64( 0x3FFB2FBF7399AFEB ), LIT64( 0xA459EE6A5C16CA55 ) }, + { LIT64( 0x3DB8FFFFFFFFFFFC ), LIT64( 0x0000000000000400 ) }, + { LIT64( 0x3FC8FFDFFFFFFFFF ), LIT64( 0xFFFFFFFFF0000000 ) }, + { LIT64( 0x3FFBFFFFFFDFFFFF ), LIT64( 0xFFF8000000000000 ) }, + { LIT64( 0x407043C11737BE84 ), LIT64( 0xDDD58212ADC937F4 ) }, + { LIT64( 0x0001000000000000 ), LIT64( 0x0000001000000001 ) }, + { LIT64( 0x4036FFFFFFFFFFFF ), LIT64( 0xFE40000000000000 ) }, + { LIT64( 0x4002FFFFFE000002 ), LIT64( 0x0000000000000000 ) }, + { LIT64( 0x4000C3FEDE897773 ), LIT64( 0x326AC4FD8EFBE6DC ) }, + { LIT64( 0x3FFF0000000FFFFF ), LIT64( 0xFFFFFE0000000000 ) }, + { LIT64( 0x62C3E502146E426D ), LIT64( 0x43F3CAA0DC7DF1A0 ) }, + { LIT64( 0x35CBD32E52BB570E ), LIT64( 0xBCC477CB11C6236C ) }, + { LIT64( 0x6228FFFFFFC00000 ), LIT64( 0x0000000000000000 ) }, + { LIT64( 0x3F80000000000000 ), LIT64( 0x0000000080000008 ) }, + { LIT64( 0x41AFFFDFFFFFFFFF ), LIT64( 0xFFFC000000000000 ) }, + { LIT64( 0x496F000000000000 ), LIT64( 0x00000001FFFBFFFF ) }, + { LIT64( 0x3DE09BFE7923A338 ), LIT64( 0xBCC8FBBD7CEC1F4F ) }, + { LIT64( 0x401CFFFFFFFFFFFF ), LIT64( 0xFFFFFFFEFFFFFF80 ) } +}; + +static void time_az_float128_pos( float128 function( float128 ) ) +{ + clock_t startClock, endClock; + int32 count, i; + int8 inputNum; + float128 a; + + count = 0; + inputNum = 0; + startClock = clock(); + do { + for ( i = minIterations; i; --i ) { + a.low = inputs_float128_pos[ inputNum ].low; + a.high = inputs_float128_pos[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_float128 - 1 ); + } + count += minIterations; + } while ( clock() - startClock < CLOCKS_PER_SEC ); + inputNum = 0; + startClock = clock(); + for ( i = count; i; --i ) { + a.low = inputs_float128_pos[ inputNum ].low; + a.high = inputs_float128_pos[ inputNum ].high; + function( a ); + inputNum = ( inputNum + 1 ) & ( numInputs_float128 - 1 ); + } + endClock = clock(); + reportTime( count, endClock - startClock ); + +} + +#endif + +enum { + INT32_TO_FLOAT32 = 1, + INT32_TO_FLOAT64, +#ifdef FLOATX80 + INT32_TO_FLOATX80, +#endif +#ifdef FLOAT128 + INT32_TO_FLOAT128, +#endif + INT64_TO_FLOAT32, + INT64_TO_FLOAT64, +#ifdef FLOATX80 + INT64_TO_FLOATX80, +#endif +#ifdef FLOAT128 + INT64_TO_FLOAT128, +#endif + FLOAT32_TO_INT32, + FLOAT32_TO_INT32_ROUND_TO_ZERO, + FLOAT32_TO_INT64, + FLOAT32_TO_INT64_ROUND_TO_ZERO, + FLOAT32_TO_FLOAT64, +#ifdef FLOATX80 + FLOAT32_TO_FLOATX80, +#endif +#ifdef FLOAT128 + FLOAT32_TO_FLOAT128, +#endif + FLOAT32_ROUND_TO_INT, + FLOAT32_ADD, + FLOAT32_SUB, + FLOAT32_MUL, + FLOAT32_DIV, + FLOAT32_REM, + FLOAT32_SQRT, + FLOAT32_EQ, + FLOAT32_LE, + FLOAT32_LT, + FLOAT32_EQ_SIGNALING, + FLOAT32_LE_QUIET, + FLOAT32_LT_QUIET, + FLOAT64_TO_INT32, + FLOAT64_TO_INT32_ROUND_TO_ZERO, + FLOAT64_TO_INT64, + FLOAT64_TO_INT64_ROUND_TO_ZERO, + FLOAT64_TO_FLOAT32, +#ifdef FLOATX80 + FLOAT64_TO_FLOATX80, +#endif +#ifdef FLOAT128 + FLOAT64_TO_FLOAT128, +#endif + FLOAT64_ROUND_TO_INT, + FLOAT64_ADD, + FLOAT64_SUB, + FLOAT64_MUL, + FLOAT64_DIV, + FLOAT64_REM, + FLOAT64_SQRT, + FLOAT64_EQ, + FLOAT64_LE, + FLOAT64_LT, + FLOAT64_EQ_SIGNALING, + FLOAT64_LE_QUIET, + FLOAT64_LT_QUIET, +#ifdef FLOATX80 + FLOATX80_TO_INT32, + FLOATX80_TO_INT32_ROUND_TO_ZERO, + FLOATX80_TO_INT64, + FLOATX80_TO_INT64_ROUND_TO_ZERO, + FLOATX80_TO_FLOAT32, + FLOATX80_TO_FLOAT64, +#ifdef FLOAT128 + FLOATX80_TO_FLOAT128, +#endif + FLOATX80_ROUND_TO_INT, + FLOATX80_ADD, + FLOATX80_SUB, + FLOATX80_MUL, + FLOATX80_DIV, + FLOATX80_REM, + FLOATX80_SQRT, + FLOATX80_EQ, + FLOATX80_LE, + FLOATX80_LT, + FLOATX80_EQ_SIGNALING, + FLOATX80_LE_QUIET, + FLOATX80_LT_QUIET, +#endif +#ifdef FLOAT128 + FLOAT128_TO_INT32, + FLOAT128_TO_INT32_ROUND_TO_ZERO, + FLOAT128_TO_INT64, + FLOAT128_TO_INT64_ROUND_TO_ZERO, + FLOAT128_TO_FLOAT32, + FLOAT128_TO_FLOAT64, +#ifdef FLOATX80 + FLOAT128_TO_FLOATX80, +#endif + FLOAT128_ROUND_TO_INT, + FLOAT128_ADD, + FLOAT128_SUB, + FLOAT128_MUL, + FLOAT128_DIV, + FLOAT128_REM, + FLOAT128_SQRT, + FLOAT128_EQ, + FLOAT128_LE, + FLOAT128_LT, + FLOAT128_EQ_SIGNALING, + FLOAT128_LE_QUIET, + FLOAT128_LT_QUIET, +#endif + NUM_FUNCTIONS +}; + +static struct { + char *name; + int8 numInputs; + flag roundingPrecision, roundingMode; + flag tininessMode, tininessModeAtReducedPrecision; +} functions[ NUM_FUNCTIONS ] = { + { 0, 0, 0, 0, 0, 0 }, + { "int32_to_float32", 1, FALSE, TRUE, FALSE, FALSE }, + { "int32_to_float64", 1, FALSE, FALSE, FALSE, FALSE }, +#ifdef FLOATX80 + { "int32_to_floatx80", 1, FALSE, FALSE, FALSE, FALSE }, +#endif +#ifdef FLOAT128 + { "int32_to_float128", 1, FALSE, FALSE, FALSE, FALSE }, +#endif + { "int64_to_float32", 1, FALSE, TRUE, FALSE, FALSE }, + { "int64_to_float64", 1, FALSE, TRUE, FALSE, FALSE }, +#ifdef FLOATX80 + { "int64_to_floatx80", 1, FALSE, FALSE, FALSE, FALSE }, +#endif +#ifdef FLOAT128 + { "int64_to_float128", 1, FALSE, FALSE, FALSE, FALSE }, +#endif + { "float32_to_int32", 1, FALSE, TRUE, FALSE, FALSE }, + { "float32_to_int32_round_to_zero", 1, FALSE, FALSE, FALSE, FALSE }, + { "float32_to_int64", 1, FALSE, TRUE, FALSE, FALSE }, + { "float32_to_int64_round_to_zero", 1, FALSE, FALSE, FALSE, FALSE }, + { "float32_to_float64", 1, FALSE, FALSE, FALSE, FALSE }, +#ifdef FLOATX80 + { "float32_to_floatx80", 1, FALSE, FALSE, FALSE, FALSE }, +#endif +#ifdef FLOAT128 + { "float32_to_float128", 1, FALSE, FALSE, FALSE, FALSE }, +#endif + { "float32_round_to_int", 1, FALSE, TRUE, FALSE, FALSE }, + { "float32_add", 2, FALSE, TRUE, FALSE, FALSE }, + { "float32_sub", 2, FALSE, TRUE, FALSE, FALSE }, + { "float32_mul", 2, FALSE, TRUE, TRUE, FALSE }, + { "float32_div", 2, FALSE, TRUE, FALSE, FALSE }, + { "float32_rem", 2, FALSE, FALSE, FALSE, FALSE }, + { "float32_sqrt", 1, FALSE, TRUE, FALSE, FALSE }, + { "float32_eq", 2, FALSE, FALSE, FALSE, FALSE }, + { "float32_le", 2, FALSE, FALSE, FALSE, FALSE }, + { "float32_lt", 2, FALSE, FALSE, FALSE, FALSE }, + { "float32_eq_signaling", 2, FALSE, FALSE, FALSE, FALSE }, + { "float32_le_quiet", 2, FALSE, FALSE, FALSE, FALSE }, + { "float32_lt_quiet", 2, FALSE, FALSE, FALSE, FALSE }, + { "float64_to_int32", 1, FALSE, TRUE, FALSE, FALSE }, + { "float64_to_int32_round_to_zero", 1, FALSE, FALSE, FALSE, FALSE }, + { "float64_to_int64", 1, FALSE, TRUE, FALSE, FALSE }, + { "float64_to_int64_round_to_zero", 1, FALSE, FALSE, FALSE, FALSE }, + { "float64_to_float32", 1, FALSE, TRUE, TRUE, FALSE }, +#ifdef FLOATX80 + { "float64_to_floatx80", 1, FALSE, FALSE, FALSE, FALSE }, +#endif +#ifdef FLOAT128 + { "float64_to_float128", 1, FALSE, FALSE, FALSE, FALSE }, +#endif + { "float64_round_to_int", 1, FALSE, TRUE, FALSE, FALSE }, + { "float64_add", 2, FALSE, TRUE, FALSE, FALSE }, + { "float64_sub", 2, FALSE, TRUE, FALSE, FALSE }, + { "float64_mul", 2, FALSE, TRUE, TRUE, FALSE }, + { "float64_div", 2, FALSE, TRUE, FALSE, FALSE }, + { "float64_rem", 2, FALSE, FALSE, FALSE, FALSE }, + { "float64_sqrt", 1, FALSE, TRUE, FALSE, FALSE }, + { "float64_eq", 2, FALSE, FALSE, FALSE, FALSE }, + { "float64_le", 2, FALSE, FALSE, FALSE, FALSE }, + { "float64_lt", 2, FALSE, FALSE, FALSE, FALSE }, + { "float64_eq_signaling", 2, FALSE, FALSE, FALSE, FALSE }, + { "float64_le_quiet", 2, FALSE, FALSE, FALSE, FALSE }, + { "float64_lt_quiet", 2, FALSE, FALSE, FALSE, FALSE }, +#ifdef FLOATX80 + { "floatx80_to_int32", 1, FALSE, TRUE, FALSE, FALSE }, + { "floatx80_to_int32_round_to_zero", 1, FALSE, FALSE, FALSE, FALSE }, + { "floatx80_to_int64", 1, FALSE, TRUE, FALSE, FALSE }, + { "floatx80_to_int64_round_to_zero", 1, FALSE, FALSE, FALSE, FALSE }, + { "floatx80_to_float32", 1, FALSE, TRUE, TRUE, FALSE }, + { "floatx80_to_float64", 1, FALSE, TRUE, TRUE, FALSE }, +#ifdef FLOAT128 + { "floatx80_to_float128", 1, FALSE, FALSE, FALSE, FALSE }, +#endif + { "floatx80_round_to_int", 1, FALSE, TRUE, FALSE, FALSE }, + { "floatx80_add", 2, TRUE, TRUE, FALSE, TRUE }, + { "floatx80_sub", 2, TRUE, TRUE, FALSE, TRUE }, + { "floatx80_mul", 2, TRUE, TRUE, TRUE, TRUE }, + { "floatx80_div", 2, TRUE, TRUE, FALSE, TRUE }, + { "floatx80_rem", 2, FALSE, FALSE, FALSE, FALSE }, + { "floatx80_sqrt", 1, TRUE, TRUE, FALSE, FALSE }, + { "floatx80_eq", 2, FALSE, FALSE, FALSE, FALSE }, + { "floatx80_le", 2, FALSE, FALSE, FALSE, FALSE }, + { "floatx80_lt", 2, FALSE, FALSE, FALSE, FALSE }, + { "floatx80_eq_signaling", 2, FALSE, FALSE, FALSE, FALSE }, + { "floatx80_le_quiet", 2, FALSE, FALSE, FALSE, FALSE }, + { "floatx80_lt_quiet", 2, FALSE, FALSE, FALSE, FALSE }, +#endif +#ifdef FLOAT128 + { "float128_to_int32", 1, FALSE, TRUE, FALSE, FALSE }, + { "float128_to_int32_round_to_zero", 1, FALSE, FALSE, FALSE, FALSE }, + { "float128_to_int64", 1, FALSE, TRUE, FALSE, FALSE }, + { "float128_to_int64_round_to_zero", 1, FALSE, FALSE, FALSE, FALSE }, + { "float128_to_float32", 1, FALSE, TRUE, TRUE, FALSE }, + { "float128_to_float64", 1, FALSE, TRUE, TRUE, FALSE }, +#ifdef FLOATX80 + { "float128_to_floatx80", 1, FALSE, TRUE, TRUE, FALSE }, +#endif + { "float128_round_to_int", 1, FALSE, TRUE, FALSE, FALSE }, + { "float128_add", 2, FALSE, TRUE, FALSE, FALSE }, + { "float128_sub", 2, FALSE, TRUE, FALSE, FALSE }, + { "float128_mul", 2, FALSE, TRUE, TRUE, FALSE }, + { "float128_div", 2, FALSE, TRUE, FALSE, FALSE }, + { "float128_rem", 2, FALSE, FALSE, FALSE, FALSE }, + { "float128_sqrt", 1, FALSE, TRUE, FALSE, FALSE }, + { "float128_eq", 2, FALSE, FALSE, FALSE, FALSE }, + { "float128_le", 2, FALSE, FALSE, FALSE, FALSE }, + { "float128_lt", 2, FALSE, FALSE, FALSE, FALSE }, + { "float128_eq_signaling", 2, FALSE, FALSE, FALSE, FALSE }, + { "float128_le_quiet", 2, FALSE, FALSE, FALSE, FALSE }, + { "float128_lt_quiet", 2, FALSE, FALSE, FALSE, FALSE }, +#endif +}; + +enum { + ROUND_NEAREST_EVEN = 1, + ROUND_TO_ZERO, + ROUND_DOWN, + ROUND_UP, + NUM_ROUNDINGMODES +}; +enum { + TININESS_BEFORE_ROUNDING = 1, + TININESS_AFTER_ROUNDING, + NUM_TININESSMODES +}; + +static void + timeFunctionVariety( + uint8 functionCode, + int8 roundingPrecision, + int8 roundingMode, + int8 tininessMode + ) +{ + uint8 roundingCode; + int8 tininessCode; + + functionName = functions[ functionCode ].name; + if ( roundingPrecision == 32 ) { + roundingPrecisionName = "32"; + } + else if ( roundingPrecision == 64 ) { + roundingPrecisionName = "64"; + } + else if ( roundingPrecision == 80 ) { + roundingPrecisionName = "80"; + } + else { + roundingPrecisionName = 0; + } +#ifdef FLOATX80 + floatx80_rounding_precision = roundingPrecision; +#endif + switch ( roundingMode ) { + case 0: + roundingModeName = 0; + roundingCode = float_round_nearest_even; + break; + case ROUND_NEAREST_EVEN: + roundingModeName = "nearest_even"; + roundingCode = float_round_nearest_even; + break; + case ROUND_TO_ZERO: + roundingModeName = "to_zero"; + roundingCode = float_round_to_zero; + break; + case ROUND_DOWN: + roundingModeName = "down"; + roundingCode = float_round_down; + break; + case ROUND_UP: + roundingModeName = "up"; + roundingCode = float_round_up; + break; + } + float_rounding_mode = roundingCode; + switch ( tininessMode ) { + case 0: + tininessModeName = 0; + tininessCode = float_tininess_after_rounding; + break; + case TININESS_BEFORE_ROUNDING: + tininessModeName = "before"; + tininessCode = float_tininess_before_rounding; + break; + case TININESS_AFTER_ROUNDING: + tininessModeName = "after"; + tininessCode = float_tininess_after_rounding; + break; + } + float_detect_tininess = tininessCode; + switch ( functionCode ) { + case INT32_TO_FLOAT32: + time_a_int32_z_float32( int32_to_float32 ); + break; + case INT32_TO_FLOAT64: + time_a_int32_z_float64( int32_to_float64 ); + break; +#ifdef FLOATX80 + case INT32_TO_FLOATX80: + time_a_int32_z_floatx80( int32_to_floatx80 ); + break; +#endif +#ifdef FLOAT128 + case INT32_TO_FLOAT128: + time_a_int32_z_float128( int32_to_float128 ); + break; +#endif + case INT64_TO_FLOAT32: + time_a_int64_z_float32( int64_to_float32 ); + break; + case INT64_TO_FLOAT64: + time_a_int64_z_float64( int64_to_float64 ); + break; +#ifdef FLOATX80 + case INT64_TO_FLOATX80: + time_a_int64_z_floatx80( int64_to_floatx80 ); + break; +#endif +#ifdef FLOAT128 + case INT64_TO_FLOAT128: + time_a_int64_z_float128( int64_to_float128 ); + break; +#endif + case FLOAT32_TO_INT32: + time_a_float32_z_int32( float32_to_int32 ); + break; + case FLOAT32_TO_INT32_ROUND_TO_ZERO: + time_a_float32_z_int32( float32_to_int32_round_to_zero ); + break; + case FLOAT32_TO_INT64: + time_a_float32_z_int64( float32_to_int64 ); + break; + case FLOAT32_TO_INT64_ROUND_TO_ZERO: + time_a_float32_z_int64( float32_to_int64_round_to_zero ); + break; + case FLOAT32_TO_FLOAT64: + time_a_float32_z_float64( float32_to_float64 ); + break; +#ifdef FLOATX80 + case FLOAT32_TO_FLOATX80: + time_a_float32_z_floatx80( float32_to_floatx80 ); + break; +#endif +#ifdef FLOAT128 + case FLOAT32_TO_FLOAT128: + time_a_float32_z_float128( float32_to_float128 ); + break; +#endif + case FLOAT32_ROUND_TO_INT: + time_az_float32( float32_round_to_int ); + break; + case FLOAT32_ADD: + time_abz_float32( float32_add ); + break; + case FLOAT32_SUB: + time_abz_float32( float32_sub ); + break; + case FLOAT32_MUL: + time_abz_float32( float32_mul ); + break; + case FLOAT32_DIV: + time_abz_float32( float32_div ); + break; + case FLOAT32_REM: + time_abz_float32( float32_rem ); + break; + case FLOAT32_SQRT: + time_az_float32_pos( float32_sqrt ); + break; + case FLOAT32_EQ: + time_ab_float32_z_flag( float32_eq ); + break; + case FLOAT32_LE: + time_ab_float32_z_flag( float32_le ); + break; + case FLOAT32_LT: + time_ab_float32_z_flag( float32_lt ); + break; + case FLOAT32_EQ_SIGNALING: + time_ab_float32_z_flag( float32_eq_signaling ); + break; + case FLOAT32_LE_QUIET: + time_ab_float32_z_flag( float32_le_quiet ); + break; + case FLOAT32_LT_QUIET: + time_ab_float32_z_flag( float32_lt_quiet ); + break; + case FLOAT64_TO_INT32: + time_a_float64_z_int32( float64_to_int32 ); + break; + case FLOAT64_TO_INT32_ROUND_TO_ZERO: + time_a_float64_z_int32( float64_to_int32_round_to_zero ); + break; + case FLOAT64_TO_INT64: + time_a_float64_z_int64( float64_to_int64 ); + break; + case FLOAT64_TO_INT64_ROUND_TO_ZERO: + time_a_float64_z_int64( float64_to_int64_round_to_zero ); + break; + case FLOAT64_TO_FLOAT32: + time_a_float64_z_float32( float64_to_float32 ); + break; +#ifdef FLOATX80 + case FLOAT64_TO_FLOATX80: + time_a_float64_z_floatx80( float64_to_floatx80 ); + break; +#endif +#ifdef FLOAT128 + case FLOAT64_TO_FLOAT128: + time_a_float64_z_float128( float64_to_float128 ); + break; +#endif + case FLOAT64_ROUND_TO_INT: + time_az_float64( float64_round_to_int ); + break; + case FLOAT64_ADD: + time_abz_float64( float64_add ); + break; + case FLOAT64_SUB: + time_abz_float64( float64_sub ); + break; + case FLOAT64_MUL: + time_abz_float64( float64_mul ); + break; + case FLOAT64_DIV: + time_abz_float64( float64_div ); + break; + case FLOAT64_REM: + time_abz_float64( float64_rem ); + break; + case FLOAT64_SQRT: + time_az_float64_pos( float64_sqrt ); + break; + case FLOAT64_EQ: + time_ab_float64_z_flag( float64_eq ); + break; + case FLOAT64_LE: + time_ab_float64_z_flag( float64_le ); + break; + case FLOAT64_LT: + time_ab_float64_z_flag( float64_lt ); + break; + case FLOAT64_EQ_SIGNALING: + time_ab_float64_z_flag( float64_eq_signaling ); + break; + case FLOAT64_LE_QUIET: + time_ab_float64_z_flag( float64_le_quiet ); + break; + case FLOAT64_LT_QUIET: + time_ab_float64_z_flag( float64_lt_quiet ); + break; +#ifdef FLOATX80 + case FLOATX80_TO_INT32: + time_a_floatx80_z_int32( floatx80_to_int32 ); + break; + case FLOATX80_TO_INT32_ROUND_TO_ZERO: + time_a_floatx80_z_int32( floatx80_to_int32_round_to_zero ); + break; + case FLOATX80_TO_INT64: + time_a_floatx80_z_int64( floatx80_to_int64 ); + break; + case FLOATX80_TO_INT64_ROUND_TO_ZERO: + time_a_floatx80_z_int64( floatx80_to_int64_round_to_zero ); + break; + case FLOATX80_TO_FLOAT32: + time_a_floatx80_z_float32( floatx80_to_float32 ); + break; + case FLOATX80_TO_FLOAT64: + time_a_floatx80_z_float64( floatx80_to_float64 ); + break; +#ifdef FLOAT128 + case FLOATX80_TO_FLOAT128: + time_a_floatx80_z_float128( floatx80_to_float128 ); + break; +#endif + case FLOATX80_ROUND_TO_INT: + time_az_floatx80( floatx80_round_to_int ); + break; + case FLOATX80_ADD: + time_abz_floatx80( floatx80_add ); + break; + case FLOATX80_SUB: + time_abz_floatx80( floatx80_sub ); + break; + case FLOATX80_MUL: + time_abz_floatx80( floatx80_mul ); + break; + case FLOATX80_DIV: + time_abz_floatx80( floatx80_div ); + break; + case FLOATX80_REM: + time_abz_floatx80( floatx80_rem ); + break; + case FLOATX80_SQRT: + time_az_floatx80_pos( floatx80_sqrt ); + break; + case FLOATX80_EQ: + time_ab_floatx80_z_flag( floatx80_eq ); + break; + case FLOATX80_LE: + time_ab_floatx80_z_flag( floatx80_le ); + break; + case FLOATX80_LT: + time_ab_floatx80_z_flag( floatx80_lt ); + break; + case FLOATX80_EQ_SIGNALING: + time_ab_floatx80_z_flag( floatx80_eq_signaling ); + break; + case FLOATX80_LE_QUIET: + time_ab_floatx80_z_flag( floatx80_le_quiet ); + break; + case FLOATX80_LT_QUIET: + time_ab_floatx80_z_flag( floatx80_lt_quiet ); + break; +#endif +#ifdef FLOAT128 + case FLOAT128_TO_INT32: + time_a_float128_z_int32( float128_to_int32 ); + break; + case FLOAT128_TO_INT32_ROUND_TO_ZERO: + time_a_float128_z_int32( float128_to_int32_round_to_zero ); + break; + case FLOAT128_TO_INT64: + time_a_float128_z_int64( float128_to_int64 ); + break; + case FLOAT128_TO_INT64_ROUND_TO_ZERO: + time_a_float128_z_int64( float128_to_int64_round_to_zero ); + break; + case FLOAT128_TO_FLOAT32: + time_a_float128_z_float32( float128_to_float32 ); + break; + case FLOAT128_TO_FLOAT64: + time_a_float128_z_float64( float128_to_float64 ); + break; +#ifdef FLOATX80 + case FLOAT128_TO_FLOATX80: + time_a_float128_z_floatx80( float128_to_floatx80 ); + break; +#endif + case FLOAT128_ROUND_TO_INT: + time_az_float128( float128_round_to_int ); + break; + case FLOAT128_ADD: + time_abz_float128( float128_add ); + break; + case FLOAT128_SUB: + time_abz_float128( float128_sub ); + break; + case FLOAT128_MUL: + time_abz_float128( float128_mul ); + break; + case FLOAT128_DIV: + time_abz_float128( float128_div ); + break; + case FLOAT128_REM: + time_abz_float128( float128_rem ); + break; + case FLOAT128_SQRT: + time_az_float128_pos( float128_sqrt ); + break; + case FLOAT128_EQ: + time_ab_float128_z_flag( float128_eq ); + break; + case FLOAT128_LE: + time_ab_float128_z_flag( float128_le ); + break; + case FLOAT128_LT: + time_ab_float128_z_flag( float128_lt ); + break; + case FLOAT128_EQ_SIGNALING: + time_ab_float128_z_flag( float128_eq_signaling ); + break; + case FLOAT128_LE_QUIET: + time_ab_float128_z_flag( float128_le_quiet ); + break; + case FLOAT128_LT_QUIET: + time_ab_float128_z_flag( float128_lt_quiet ); + break; +#endif + } + +} + +static void + timeFunction( + uint8 functionCode, + int8 roundingPrecisionIn, + int8 roundingModeIn, + int8 tininessModeIn + ) +{ + int8 roundingPrecision, roundingMode, tininessMode; + + roundingPrecision = 32; + for (;;) { + if ( ! functions[ functionCode ].roundingPrecision ) { + roundingPrecision = 0; + } + else if ( roundingPrecisionIn ) { + roundingPrecision = roundingPrecisionIn; + } + for ( roundingMode = 1; + roundingMode < NUM_ROUNDINGMODES; + ++roundingMode + ) { + if ( ! functions[ functionCode ].roundingMode ) { + roundingMode = 0; + } + else if ( roundingModeIn ) { + roundingMode = roundingModeIn; + } + for ( tininessMode = 1; + tininessMode < NUM_TININESSMODES; + ++tininessMode + ) { + if ( ( roundingPrecision == 32 ) + || ( roundingPrecision == 64 ) ) { + if ( ! functions[ functionCode ] + .tininessModeAtReducedPrecision + ) { + tininessMode = 0; + } + else if ( tininessModeIn ) { + tininessMode = tininessModeIn; + } + } + else { + if ( ! functions[ functionCode ].tininessMode ) { + tininessMode = 0; + } + else if ( tininessModeIn ) { + tininessMode = tininessModeIn; + } + } + timeFunctionVariety( + functionCode, roundingPrecision, roundingMode, tininessMode + ); + if ( tininessModeIn || ! tininessMode ) break; + } + if ( roundingModeIn || ! roundingMode ) break; + } + if ( roundingPrecisionIn || ! roundingPrecision ) break; + if ( roundingPrecision == 80 ) { + break; + } + else if ( roundingPrecision == 64 ) { + roundingPrecision = 80; + } + else if ( roundingPrecision == 32 ) { + roundingPrecision = 64; + } + } + +} + +main( int argc, char **argv ) +{ + char *argPtr; + flag functionArgument; + uint8 functionCode; + int8 operands, roundingPrecision, roundingMode, tininessMode; + + if ( argc <= 1 ) goto writeHelpMessage; + functionArgument = FALSE; + functionCode = 0; + operands = 0; + roundingPrecision = 0; + roundingMode = 0; + tininessMode = 0; + --argc; + ++argv; + while ( argc && ( argPtr = argv[ 0 ] ) ) { + if ( argPtr[ 0 ] == '-' ) ++argPtr; + if ( strcmp( argPtr, "help" ) == 0 ) { + writeHelpMessage: + fputs( +"timesoftfloat [<option>...] <function>\n" +" <option>: (* is default)\n" +" -help --Write this message and exit.\n" +#ifdef FLOATX80 +" -precision32 --Only time rounding precision equivalent to float32.\n" +" -precision64 --Only time rounding precision equivalent to float64.\n" +" -precision80 --Only time maximum rounding precision.\n" +#endif +" -nearesteven --Only time rounding to nearest/even.\n" +" -tozero --Only time rounding to zero.\n" +" -down --Only time rounding down.\n" +" -up --Only time rounding up.\n" +" -tininessbefore --Only time underflow tininess before rounding.\n" +" -tininessafter --Only time underflow tininess after rounding.\n" +" <function>:\n" +" int32_to_<float> <float>_add <float>_eq\n" +" <float>_to_int32 <float>_sub <float>_le\n" +" <float>_to_int32_round_to_zero <float>_mul <float>_lt\n" +" int64_to_<float> <float>_div <float>_eq_signaling\n" +" <float>_to_int64 <float>_rem <float>_le_quiet\n" +" <float>_to_int64_round_to_zero <float>_lt_quiet\n" +" <float>_to_<float>\n" +" <float>_round_to_int\n" +" <float>_sqrt\n" +" -all1 --All 1-operand functions.\n" +" -all2 --All 2-operand functions.\n" +" -all --All functions.\n" +" <float>:\n" +" float32 --Single precision.\n" +" float64 --Double precision.\n" +#ifdef FLOATX80 +" floatx80 --Extended double precision.\n" +#endif +#ifdef FLOAT128 +" float128 --Quadruple precision.\n" +#endif + , + stdout + ); + return EXIT_SUCCESS; + } +#ifdef FLOATX80 + else if ( strcmp( argPtr, "precision32" ) == 0 ) { + roundingPrecision = 32; + } + else if ( strcmp( argPtr, "precision64" ) == 0 ) { + roundingPrecision = 64; + } + else if ( strcmp( argPtr, "precision80" ) == 0 ) { + roundingPrecision = 80; + } +#endif + else if ( ( strcmp( argPtr, "nearesteven" ) == 0 ) + || ( strcmp( argPtr, "nearest_even" ) == 0 ) ) { + roundingMode = ROUND_NEAREST_EVEN; + } + else if ( ( strcmp( argPtr, "tozero" ) == 0 ) + || ( strcmp( argPtr, "to_zero" ) == 0 ) ) { + roundingMode = ROUND_TO_ZERO; + } + else if ( strcmp( argPtr, "down" ) == 0 ) { + roundingMode = ROUND_DOWN; + } + else if ( strcmp( argPtr, "up" ) == 0 ) { + roundingMode = ROUND_UP; + } + else if ( strcmp( argPtr, "tininessbefore" ) == 0 ) { + tininessMode = TININESS_BEFORE_ROUNDING; + } + else if ( strcmp( argPtr, "tininessafter" ) == 0 ) { + tininessMode = TININESS_AFTER_ROUNDING; + } + else if ( strcmp( argPtr, "all1" ) == 0 ) { + functionArgument = TRUE; + functionCode = 0; + operands = 1; + } + else if ( strcmp( argPtr, "all2" ) == 0 ) { + functionArgument = TRUE; + functionCode = 0; + operands = 2; + } + else if ( strcmp( argPtr, "all" ) == 0 ) { + functionArgument = TRUE; + functionCode = 0; + operands = 0; + } + else { + for ( functionCode = 1; + functionCode < NUM_FUNCTIONS; + ++functionCode + ) { + if ( strcmp( argPtr, functions[ functionCode ].name ) == 0 ) { + break; + } + } + if ( functionCode == NUM_FUNCTIONS ) { + fail( "Invalid option or function `%s'", argv[ 0 ] ); + } + functionArgument = TRUE; + } + --argc; + ++argv; + } + if ( ! functionArgument ) fail( "Function argument required" ); + if ( functionCode ) { + timeFunction( + functionCode, roundingPrecision, roundingMode, tininessMode ); + } + else if ( operands == 1 ) { + for ( functionCode = 1; functionCode < NUM_FUNCTIONS; ++functionCode + ) { + if ( functions[ functionCode ].numInputs == 1 ) { + timeFunction( + functionCode, roundingPrecision, roundingMode, tininessMode + ); + } + } + } + else if ( operands == 2 ) { + for ( functionCode = 1; functionCode < NUM_FUNCTIONS; ++functionCode + ) { + if ( functions[ functionCode ].numInputs == 2 ) { + timeFunction( + functionCode, roundingPrecision, roundingMode, tininessMode + ); + } + } + } + else { + for ( functionCode = 1; functionCode < NUM_FUNCTIONS; ++functionCode + ) { + timeFunction( + functionCode, roundingPrecision, roundingMode, tininessMode ); + } + } + return EXIT_SUCCESS; + +} + diff --git a/lib/libc/arch/arm/softfloat/unorddf2.c b/lib/libc/arch/arm/softfloat/unorddf2.c new file mode 100644 index 00000000000..168278b8748 --- /dev/null +++ b/lib/libc/arch/arm/softfloat/unorddf2.c @@ -0,0 +1,25 @@ +/* $NetBSD: unorddf2.c,v 1.1 2003/05/06 08:58:19 rearnsha Exp $ */ + +/* + * Written by Richard Earnshaw, 2003. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include <sys/cdefs.h> + +flag __unorddf2(float64, float64); + +flag +__unorddf2(float64 a, float64 b) +{ + /* + * The comparison is unordered if either input is a NaN. + * Test for this by comparing each operand with itself. + * We must perform both comparisons to correctly check for + * signalling NaNs. + */ + return 1 ^ (float64_eq(a, a) & float64_eq(b, b)); +} diff --git a/lib/libc/arch/arm/softfloat/unordsf2.c b/lib/libc/arch/arm/softfloat/unordsf2.c new file mode 100644 index 00000000000..0e1efedd151 --- /dev/null +++ b/lib/libc/arch/arm/softfloat/unordsf2.c @@ -0,0 +1,25 @@ +/* $NetBSD: unordsf2.c,v 1.1 2003/05/06 08:58:20 rearnsha Exp $ */ + +/* + * Written by Richard Earnshaw, 2003. This file is in the Public Domain. + */ + +#include "softfloat-for-gcc.h" +#include "milieu.h" +#include "softfloat.h" + +#include <sys/cdefs.h> + +flag __unordsf2(float32, float32); + +flag +__unordsf2(float32 a, float32 b) +{ + /* + * The comparison is unordered if either input is a NaN. + * Test for this by comparing each operand with itself. + * We must perform both comparisons to correctly check for + * signalling NaNs. + */ + return 1 ^ (float32_eq(a, a) & float32_eq(b, b)); +} diff --git a/lib/libc/arch/arm/string/Makefile.inc b/lib/libc/arch/arm/string/Makefile.inc new file mode 100644 index 00000000000..2d9e5dd7915 --- /dev/null +++ b/lib/libc/arch/arm/string/Makefile.inc @@ -0,0 +1,8 @@ +# $NetBSD: Makefile.inc,v 1.5 2002/11/23 14:26:04 chris Exp $ + +SRCS+= memcpy.S _memcpy.S bcopy.S memmove.S memset.S bzero.S ffs.S strcmp.S +SRCS+= strncmp.S memcmp.S +SRCS+= bcmp.c index.c memchr.c \ + rindex.c strcat.c strcpy.c strcspn.c strlen.c \ + strncat.c strncpy.c strpbrk.c strsep.c \ + strspn.c strstr.c swab.c strlcpy.c strlcat.c diff --git a/lib/libc/arch/arm/string/_memcpy.S b/lib/libc/arch/arm/string/_memcpy.S new file mode 100644 index 00000000000..11fb564e778 --- /dev/null +++ b/lib/libc/arch/arm/string/_memcpy.S @@ -0,0 +1,468 @@ +/* $NetBSD: _memcpy.S,v 1.4 2003/04/05 23:08:52 bjh21 Exp $ */ + +/*- + * Copyright (c) 1997 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Neil A. Carson and Mark Brinicombe + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <machine/asm.h> + +/* + * This is one fun bit of code ... + * Some easy listening music is suggested while trying to understand this + * code e.g. Iron Maiden + * + * For anyone attempting to understand it : + * + * The core code is implemented here with simple stubs for memcpy() + * memmove() and bcopy(). + * + * All local labels are prefixed with Lmemcpy_ + * Following the prefix a label starting f is used in the forward copy code + * while a label using b is used in the backwards copy code + * The source and destination addresses determine whether a forward or + * backward copy is performed. + * Separate bits of code are used to deal with the following situations + * for both the forward and backwards copy. + * unaligned source address + * unaligned destination address + * Separate copy routines are used to produce an optimised result for each + * of these cases. + * The copy code will use LDM/STM instructions to copy up to 32 bytes at + * a time where possible. + * + * Note: r12 (aka ip) can be trashed during the function along with + * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out. + * Additional registers are preserved prior to use i.e. r4, r5 & lr + * + * Apologies for the state of the comments ;-) + */ + +ENTRY(_memcpy) + /* Determine copy direction */ + cmp r1, r0 + bcc .Lmemcpy_backwards + + moveq r0, #0 /* Quick abort for len=0 */ + moveq pc, lr + + stmdb sp!, {r0, lr} /* memcpy() returns dest addr */ + subs r2, r2, #4 + blt .Lmemcpy_fl4 /* less than 4 bytes */ + ands r12, r0, #3 + bne .Lmemcpy_fdestul /* oh unaligned destination addr */ + ands r12, r1, #3 + bne .Lmemcpy_fsrcul /* oh unaligned source addr */ + +.Lmemcpy_ft8: + /* We have aligned source and destination */ + subs r2, r2, #8 + blt .Lmemcpy_fl12 /* less than 12 bytes (4 from above) */ + subs r2, r2, #0x14 + blt .Lmemcpy_fl32 /* less than 32 bytes (12 from above) */ + stmdb sp!, {r4} /* borrow r4 */ + + /* blat 32 bytes at a time */ + /* XXX for really big copies perhaps we should use more registers */ +.Lmemcpy_floop32: + ldmia r1!, {r3, r4, r12, lr} + stmia r0!, {r3, r4, r12, lr} + ldmia r1!, {r3, r4, r12, lr} + stmia r0!, {r3, r4, r12, lr} + subs r2, r2, #0x20 + bge .Lmemcpy_floop32 + + cmn r2, #0x10 + ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ + stmgeia r0!, {r3, r4, r12, lr} + subge r2, r2, #0x10 + ldmia sp!, {r4} /* return r4 */ + +.Lmemcpy_fl32: + adds r2, r2, #0x14 + + /* blat 12 bytes at a time */ +.Lmemcpy_floop12: + ldmgeia r1!, {r3, r12, lr} + stmgeia r0!, {r3, r12, lr} + subges r2, r2, #0x0c + bge .Lmemcpy_floop12 + +.Lmemcpy_fl12: + adds r2, r2, #8 + blt .Lmemcpy_fl4 + + subs r2, r2, #4 + ldrlt r3, [r1], #4 + strlt r3, [r0], #4 + ldmgeia r1!, {r3, r12} + stmgeia r0!, {r3, r12} + subge r2, r2, #4 + +.Lmemcpy_fl4: + /* less than 4 bytes to go */ + adds r2, r2, #4 + ldmeqia sp!, {r0, pc} /* done */ + + /* copy the crud byte at a time */ + cmp r2, #2 + ldrb r3, [r1], #1 + strb r3, [r0], #1 + ldrgeb r3, [r1], #1 + strgeb r3, [r0], #1 + ldrgtb r3, [r1], #1 + strgtb r3, [r0], #1 + ldmia sp!, {r0, pc} + + /* erg - unaligned destination */ +.Lmemcpy_fdestul: + rsb r12, r12, #4 + cmp r12, #2 + + /* align destination with byte copies */ + ldrb r3, [r1], #1 + strb r3, [r0], #1 + ldrgeb r3, [r1], #1 + strgeb r3, [r0], #1 + ldrgtb r3, [r1], #1 + strgtb r3, [r0], #1 + subs r2, r2, r12 + blt .Lmemcpy_fl4 /* less the 4 bytes */ + + ands r12, r1, #3 + beq .Lmemcpy_ft8 /* we have an aligned source */ + + /* erg - unaligned source */ + /* This is where it gets nasty ... */ +.Lmemcpy_fsrcul: + bic r1, r1, #3 + ldr lr, [r1], #4 + cmp r12, #2 + bgt .Lmemcpy_fsrcul3 + beq .Lmemcpy_fsrcul2 + cmp r2, #0x0c + blt .Lmemcpy_fsrcul1loop4 + sub r2, r2, #0x0c + stmdb sp!, {r4, r5} + +.Lmemcpy_fsrcul1loop16: + mov r3, lr, lsr #8 + ldmia r1!, {r4, r5, r12, lr} + orr r3, r3, r4, lsl #24 + mov r4, r4, lsr #8 + orr r4, r4, r5, lsl #24 + mov r5, r5, lsr #8 + orr r5, r5, r12, lsl #24 + mov r12, r12, lsr #8 + orr r12, r12, lr, lsl #24 + stmia r0!, {r3-r5, r12} + subs r2, r2, #0x10 + bge .Lmemcpy_fsrcul1loop16 + ldmia sp!, {r4, r5} + adds r2, r2, #0x0c + blt .Lmemcpy_fsrcul1l4 + +.Lmemcpy_fsrcul1loop4: + mov r12, lr, lsr #8 + ldr lr, [r1], #4 + orr r12, r12, lr, lsl #24 + str r12, [r0], #4 + subs r2, r2, #4 + bge .Lmemcpy_fsrcul1loop4 + +.Lmemcpy_fsrcul1l4: + sub r1, r1, #3 + b .Lmemcpy_fl4 + +.Lmemcpy_fsrcul2: + cmp r2, #0x0c + blt .Lmemcpy_fsrcul2loop4 + sub r2, r2, #0x0c + stmdb sp!, {r4, r5} + +.Lmemcpy_fsrcul2loop16: + mov r3, lr, lsr #16 + ldmia r1!, {r4, r5, r12, lr} + orr r3, r3, r4, lsl #16 + mov r4, r4, lsr #16 + orr r4, r4, r5, lsl #16 + mov r5, r5, lsr #16 + orr r5, r5, r12, lsl #16 + mov r12, r12, lsr #16 + orr r12, r12, lr, lsl #16 + stmia r0!, {r3-r5, r12} + subs r2, r2, #0x10 + bge .Lmemcpy_fsrcul2loop16 + ldmia sp!, {r4, r5} + adds r2, r2, #0x0c + blt .Lmemcpy_fsrcul2l4 + +.Lmemcpy_fsrcul2loop4: + mov r12, lr, lsr #16 + ldr lr, [r1], #4 + orr r12, r12, lr, lsl #16 + str r12, [r0], #4 + subs r2, r2, #4 + bge .Lmemcpy_fsrcul2loop4 + +.Lmemcpy_fsrcul2l4: + sub r1, r1, #2 + b .Lmemcpy_fl4 + +.Lmemcpy_fsrcul3: + cmp r2, #0x0c + blt .Lmemcpy_fsrcul3loop4 + sub r2, r2, #0x0c + stmdb sp!, {r4, r5} + +.Lmemcpy_fsrcul3loop16: + mov r3, lr, lsr #24 + ldmia r1!, {r4, r5, r12, lr} + orr r3, r3, r4, lsl #8 + mov r4, r4, lsr #24 + orr r4, r4, r5, lsl #8 + mov r5, r5, lsr #24 + orr r5, r5, r12, lsl #8 + mov r12, r12, lsr #24 + orr r12, r12, lr, lsl #8 + stmia r0!, {r3-r5, r12} + subs r2, r2, #0x10 + bge .Lmemcpy_fsrcul3loop16 + ldmia sp!, {r4, r5} + adds r2, r2, #0x0c + blt .Lmemcpy_fsrcul3l4 + +.Lmemcpy_fsrcul3loop4: + mov r12, lr, lsr #24 + ldr lr, [r1], #4 + orr r12, r12, lr, lsl #8 + str r12, [r0], #4 + subs r2, r2, #4 + bge .Lmemcpy_fsrcul3loop4 + +.Lmemcpy_fsrcul3l4: + sub r1, r1, #1 + b .Lmemcpy_fl4 + +.Lmemcpy_backwards: + add r1, r1, r2 + add r0, r0, r2 + subs r2, r2, #4 + blt .Lmemcpy_bl4 /* less than 4 bytes */ + ands r12, r0, #3 + bne .Lmemcpy_bdestul /* oh unaligned destination addr */ + ands r12, r1, #3 + bne .Lmemcpy_bsrcul /* oh unaligned source addr */ + +.Lmemcpy_bt8: + /* We have aligned source and destination */ + subs r2, r2, #8 + blt .Lmemcpy_bl12 /* less than 12 bytes (4 from above) */ + stmdb sp!, {r4, lr} + subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ + blt .Lmemcpy_bl32 + + /* blat 32 bytes at a time */ + /* XXX for really big copies perhaps we should use more registers */ +.Lmemcpy_bloop32: + ldmdb r1!, {r3, r4, r12, lr} + stmdb r0!, {r3, r4, r12, lr} + ldmdb r1!, {r3, r4, r12, lr} + stmdb r0!, {r3, r4, r12, lr} + subs r2, r2, #0x20 + bge .Lmemcpy_bloop32 + +.Lmemcpy_bl32: + cmn r2, #0x10 + ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */ + stmgedb r0!, {r3, r4, r12, lr} + subge r2, r2, #0x10 + adds r2, r2, #0x14 + ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */ + stmgedb r0!, {r3, r12, lr} + subge r2, r2, #0x0c + ldmia sp!, {r4, lr} + +.Lmemcpy_bl12: + adds r2, r2, #8 + blt .Lmemcpy_bl4 + subs r2, r2, #4 + ldrlt r3, [r1, #-4]! + strlt r3, [r0, #-4]! + ldmgedb r1!, {r3, r12} + stmgedb r0!, {r3, r12} + subge r2, r2, #4 + +.Lmemcpy_bl4: + /* less than 4 bytes to go */ + adds r2, r2, #4 + moveq pc, lr /* done */ + + /* copy the crud byte at a time */ + cmp r2, #2 + ldrb r3, [r1, #-1]! + strb r3, [r0, #-1]! + ldrgeb r3, [r1, #-1]! + strgeb r3, [r0, #-1]! + ldrgtb r3, [r1, #-1]! + strgtb r3, [r0, #-1]! + mov pc, lr + + /* erg - unaligned destination */ +.Lmemcpy_bdestul: + cmp r12, #2 + + /* align destination with byte copies */ + ldrb r3, [r1, #-1]! + strb r3, [r0, #-1]! + ldrgeb r3, [r1, #-1]! + strgeb r3, [r0, #-1]! + ldrgtb r3, [r1, #-1]! + strgtb r3, [r0, #-1]! + subs r2, r2, r12 + blt .Lmemcpy_bl4 /* less than 4 bytes to go */ + ands r12, r1, #3 + beq .Lmemcpy_bt8 /* we have an aligned source */ + + /* erg - unaligned source */ + /* This is where it gets nasty ... */ +.Lmemcpy_bsrcul: + bic r1, r1, #3 + ldr r3, [r1, #0] + cmp r12, #2 + blt .Lmemcpy_bsrcul1 + beq .Lmemcpy_bsrcul2 + cmp r2, #0x0c + blt .Lmemcpy_bsrcul3loop4 + sub r2, r2, #0x0c + stmdb sp!, {r4, r5, lr} + +.Lmemcpy_bsrcul3loop16: + mov lr, r3, lsl #8 + ldmdb r1!, {r3-r5, r12} + orr lr, lr, r12, lsr #24 + mov r12, r12, lsl #8 + orr r12, r12, r5, lsr #24 + mov r5, r5, lsl #8 + orr r5, r5, r4, lsr #24 + mov r4, r4, lsl #8 + orr r4, r4, r3, lsr #24 + stmdb r0!, {r4, r5, r12, lr} + subs r2, r2, #0x10 + bge .Lmemcpy_bsrcul3loop16 + ldmia sp!, {r4, r5, lr} + adds r2, r2, #0x0c + blt .Lmemcpy_bsrcul3l4 + +.Lmemcpy_bsrcul3loop4: + mov r12, r3, lsl #8 + ldr r3, [r1, #-4]! + orr r12, r12, r3, lsr #24 + str r12, [r0, #-4]! + subs r2, r2, #4 + bge .Lmemcpy_bsrcul3loop4 + +.Lmemcpy_bsrcul3l4: + add r1, r1, #3 + b .Lmemcpy_bl4 + +.Lmemcpy_bsrcul2: + cmp r2, #0x0c + blt .Lmemcpy_bsrcul2loop4 + sub r2, r2, #0x0c + stmdb sp!, {r4, r5, lr} + +.Lmemcpy_bsrcul2loop16: + mov lr, r3, lsl #16 + ldmdb r1!, {r3-r5, r12} + orr lr, lr, r12, lsr #16 + mov r12, r12, lsl #16 + orr r12, r12, r5, lsr #16 + mov r5, r5, lsl #16 + orr r5, r5, r4, lsr #16 + mov r4, r4, lsl #16 + orr r4, r4, r3, lsr #16 + stmdb r0!, {r4, r5, r12, lr} + subs r2, r2, #0x10 + bge .Lmemcpy_bsrcul2loop16 + ldmia sp!, {r4, r5, lr} + adds r2, r2, #0x0c + blt .Lmemcpy_bsrcul2l4 + +.Lmemcpy_bsrcul2loop4: + mov r12, r3, lsl #16 + ldr r3, [r1, #-4]! + orr r12, r12, r3, lsr #16 + str r12, [r0, #-4]! + subs r2, r2, #4 + bge .Lmemcpy_bsrcul2loop4 + +.Lmemcpy_bsrcul2l4: + add r1, r1, #2 + b .Lmemcpy_bl4 + +.Lmemcpy_bsrcul1: + cmp r2, #0x0c + blt .Lmemcpy_bsrcul1loop4 + sub r2, r2, #0x0c + stmdb sp!, {r4, r5, lr} + +.Lmemcpy_bsrcul1loop32: + mov lr, r3, lsl #24 + ldmdb r1!, {r3-r5, r12} + orr lr, lr, r12, lsr #8 + mov r12, r12, lsl #24 + orr r12, r12, r5, lsr #8 + mov r5, r5, lsl #24 + orr r5, r5, r4, lsr #8 + mov r4, r4, lsl #24 + orr r4, r4, r3, lsr #8 + stmdb r0!, {r4, r5, r12, lr} + subs r2, r2, #0x10 + bge .Lmemcpy_bsrcul1loop32 + ldmia sp!, {r4, r5, lr} + adds r2, r2, #0x0c + blt .Lmemcpy_bsrcul1l4 + +.Lmemcpy_bsrcul1loop4: + mov r12, r3, lsl #24 + ldr r3, [r1, #-4]! + orr r12, r12, r3, lsr #8 + str r12, [r0, #-4]! + subs r2, r2, #4 + bge .Lmemcpy_bsrcul1loop4 + +.Lmemcpy_bsrcul1l4: + add r1, r1, #1 + b .Lmemcpy_bl4 diff --git a/lib/libc/arch/arm/string/bcopy.S b/lib/libc/arch/arm/string/bcopy.S new file mode 100644 index 00000000000..3df71718371 --- /dev/null +++ b/lib/libc/arch/arm/string/bcopy.S @@ -0,0 +1,48 @@ +/* $NetBSD: bcopy.S,v 1.2 2001/07/16 05:50:06 matt Exp $ */ + +/*- + * Copyright (c) 1997 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Neil A. Carson and Mark Brinicombe + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <machine/asm.h> + +/* bcopy = memcpy/memmove with arguments reversed. */ + +ENTRY(bcopy) + /* switch the source and destination registers */ + eor r0, r1, r0 + eor r1, r0, r1 + eor r0, r1, r0 + b PIC_SYM(_C_LABEL(_memcpy), PLT) diff --git a/lib/libc/arch/arm/string/bzero.S b/lib/libc/arch/arm/string/bzero.S new file mode 100644 index 00000000000..ea197c62c3a --- /dev/null +++ b/lib/libc/arch/arm/string/bzero.S @@ -0,0 +1,44 @@ +/* $NetBSD: bzero.S,v 1.2 2001/07/16 05:50:06 matt Exp $ */ + +/*- + * Copyright (c) 1997 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Neil A. Carson and Mark Brinicombe + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <machine/asm.h> + +ENTRY(bzero) + mov r2, r1 + mov r1, #0 + b PIC_SYM(_C_LABEL(memset), PLT) diff --git a/lib/libc/arch/arm/string/ffs.S b/lib/libc/arch/arm/string/ffs.S new file mode 100644 index 00000000000..5dfcc7b5fd3 --- /dev/null +++ b/lib/libc/arch/arm/string/ffs.S @@ -0,0 +1,76 @@ +/* $NetBSD: ffs.S,v 1.5 2003/04/05 23:08:52 bjh21 Exp $ */ +/* + * Copyright (c) 2001 Christopher Gilbert + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the company nor the name of the author may be used to + * endorse or promote products derived from this software without specific + * prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <machine/asm.h> + +RCSID("$NetBSD: ffs.S,v 1.5 2003/04/05 23:08:52 bjh21 Exp $") + +/* + * ffs - find first set bit, this algorithm isolates the first set + * bit, then multiplies the number by 0x0450fbaf which leaves the top + * 6 bits as an index into the table. This algorithm should be a win + * over the checking each bit in turn as per the C compiled version. + * + * under ARMv5 there's an instruction called CLZ (count leading Zero's) that + * could be used + * + * This is the ffs algorithm devised by d.seal and posted to comp.sys.arm on + * 16 Feb 1994. + */ + +ENTRY(ffs) + /* Standard trick to isolate bottom bit in r0 or 0 if r0 = 0 on entry */ + rsb r1, r0, #0 + ands r0, r0, r1 + /* + * now r0 has at most one set bit, call this X + * if X = 0, all further instructions are skipped + */ + adrne r2, .L_ffs_table + orrne r0, r0, r0, lsl #4 /* r0 = X * 0x11 */ + orrne r0, r0, r0, lsl #6 /* r0 = X * 0x451 */ + rsbne r0, r0, r0, lsl #16 /* r0 = X * 0x0450fbaf */ + + /* now lookup in table indexed on top 6 bits of r0 */ + ldrneb r0, [ r2, r0, lsr #26 ] + + mov pc, lr +.text; +.type .L_ffs_table, _ASM_TYPE_OBJECT; +.L_ffs_table: +/* 0 1 2 3 4 5 6 7 */ + .byte 0, 1, 2, 13, 3, 7, 0, 14 /* 0- 7 */ + .byte 4, 0, 8, 0, 0, 0, 0, 15 /* 8-15 */ + .byte 11, 5, 0, 0, 9, 0, 0, 26 /* 16-23 */ + .byte 0, 0, 0, 0, 0, 22, 28, 16 /* 24-31 */ + .byte 32, 12, 6, 0, 0, 0, 0, 0 /* 32-39 */ + .byte 10, 0, 0, 25, 0, 0, 21, 27 /* 40-47 */ + .byte 31, 0, 0, 0, 0, 24, 0, 20 /* 48-55 */ + .byte 30, 0, 23, 19, 29, 18, 17, 0 /* 56-63 */ diff --git a/lib/libc/arch/arm/string/memcmp.S b/lib/libc/arch/arm/string/memcmp.S new file mode 100644 index 00000000000..9ade675baa9 --- /dev/null +++ b/lib/libc/arch/arm/string/memcmp.S @@ -0,0 +1,50 @@ +/* $NetBSD: memcmp.S,v 1.2 2003/04/05 23:08:52 bjh21 Exp $ */ + +/* + * Copyright (c) 2002 ARM Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the company may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <machine/asm.h> + +RCSID("$NetBSD: memcmp.S,v 1.2 2003/04/05 23:08:52 bjh21 Exp $") + +ENTRY(memcmp) +/* if ((len - 1) < 0) return 0 */ + subs r2, r2, #1 + movmi r0, #0 + movmi pc, lr + +/* ip == last src address to compare */ + add ip, r0, r2 +1: + ldrb r2, [r0], #1 + ldrb r3, [r1], #1 + cmp ip, r0 + cmpcs r2, r3 + beq 1b + sub r0, r2, r3 + mov pc, lr diff --git a/lib/libc/arch/arm/string/memcpy.S b/lib/libc/arch/arm/string/memcpy.S new file mode 100644 index 00000000000..2a00942d840 --- /dev/null +++ b/lib/libc/arch/arm/string/memcpy.S @@ -0,0 +1,44 @@ +/* $NetBSD: memcpy.S,v 1.3 2003/04/05 23:08:52 bjh21 Exp $ */ + +/*- + * Copyright (c) 1997 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Neil A. Carson and Mark Brinicombe + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <machine/asm.h> + +ENTRY(memcpy) + stmfd sp!, {r0, lr} + bl PIC_SYM(_C_LABEL(_memcpy), PLT) + ldmfd sp!, {r0, pc} diff --git a/lib/libc/arch/arm/string/memmove.S b/lib/libc/arch/arm/string/memmove.S new file mode 100644 index 00000000000..ddaad8b12b1 --- /dev/null +++ b/lib/libc/arch/arm/string/memmove.S @@ -0,0 +1,44 @@ +/* $NetBSD: memmove.S,v 1.3 2003/04/05 23:08:52 bjh21 Exp $ */ + +/*- + * Copyright (c) 1997 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Neil A. Carson and Mark Brinicombe + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <machine/asm.h> + +ENTRY(memmove) + stmfd sp!, {r0, lr} + bl PIC_SYM(_C_LABEL(_memcpy), PLT) + ldmfd sp!, {r0, pc} diff --git a/lib/libc/arch/arm/string/memset.S b/lib/libc/arch/arm/string/memset.S new file mode 100644 index 00000000000..564c5d9e219 --- /dev/null +++ b/lib/libc/arch/arm/string/memset.S @@ -0,0 +1,126 @@ +/* $NetBSD: memset.S,v 1.3 2003/04/05 23:08:52 bjh21 Exp $ */ + +/* + * Copyright (c) 1995 Mark Brinicombe. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Mark Brinicombe. + * 4. The name of the company nor the name of the author may be used to + * endorse or promote products derived from this software without specific + * prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <machine/asm.h> + +/* + * Sets a block of memory to the specified value + * + * On entry: + * r0 - dest address + * r1 - byte to write + * r2 - number of bytes to write + * + * On exit: + * r0 - dest address + */ + +ENTRY(memset) + stmfd sp!, {r0} /* Remember address for return value */ + and r1, r1, #0x000000ff /* We write bytes */ + + cmp r2, #0x00000004 /* Do we have less than 4 bytes */ + blt .Lmemset_lessthanfour + + /* Ok first we will word align the address */ + + ands r3, r0, #0x00000003 /* Get the bottom two bits */ + beq .Lmemset_addraligned /* The address is word aligned */ + + rsb r3, r3, #0x00000004 + sub r2, r2, r3 + cmp r3, #0x00000002 + strb r1, [r0], #0x0001 /* Set 1 byte */ + strgeb r1, [r0], #0x0001 /* Set another byte */ + strgtb r1, [r0], #0x0001 /* and a third */ + + cmp r2, #0x00000004 + blt .Lmemset_lessthanfour + + /* Now we must be word aligned */ + +.Lmemset_addraligned: + + orr r3, r1, r1, lsl #8 /* Repeat the byte into a word */ + orr r3, r3, r3, lsl #16 + + /* We know we have at least 4 bytes ... */ + + cmp r2, #0x00000020 /* If less than 32 then use words */ + blt .Lmemset_lessthan32 + + /* We have at least 32 so lets use quad words */ + + stmfd sp!, {r4-r6} /* Store registers */ + mov r4, r3 /* Duplicate data */ + mov r5, r3 + mov r6, r3 + +.Lmemset_loop16: + stmia r0!, {r3-r6} /* Store 16 bytes */ + sub r2, r2, #0x00000010 /* Adjust count */ + cmp r2, #0x00000010 /* Still got at least 16 bytes ? */ + bgt .Lmemset_loop16 + + ldmfd sp!, {r4-r6} /* Restore registers */ + + /* Do we need to set some words as well ? */ + + cmp r2, #0x00000004 + blt .Lmemset_lessthanfour + + /* Have either less than 16 or less than 32 depending on route taken */ + +.Lmemset_lessthan32: + + /* We have at least 4 bytes so copy as words */ + +.Lmemset_loop4: + str r3, [r0], #0x0004 + sub r2, r2, #0x0004 + cmp r2, #0x00000004 + bge .Lmemset_loop4 + +.Lmemset_lessthanfour: + cmp r2, #0x00000000 + ldmeqfd sp!, {r0} + moveq pc, lr /* Zero length so exit */ + + cmp r2, #0x00000002 + strb r1, [r0], #0x0001 /* Set 1 byte */ + strgeb r1, [r0], #0x0001 /* Set another byte */ + strgtb r1, [r0], #0x0001 /* and a third */ + + ldmfd sp!, {r0} + mov pc, lr /* Exit */ diff --git a/lib/libc/arch/arm/string/strcmp.S b/lib/libc/arch/arm/string/strcmp.S new file mode 100644 index 00000000000..9914c65b860 --- /dev/null +++ b/lib/libc/arch/arm/string/strcmp.S @@ -0,0 +1,43 @@ +/* $NetBSD: strcmp.S,v 1.3 2003/04/05 23:08:52 bjh21 Exp $ */ + +/* + * Copyright (c) 2002 ARM Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the company may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <machine/asm.h> + +RCSID("$NetBSD: strcmp.S,v 1.3 2003/04/05 23:08:52 bjh21 Exp $") + +ENTRY(strcmp) +1: + ldrb r2, [r0], #1 + ldrb r3, [r1], #1 + cmp r2, #1 + cmpcs r2, r3 + beq 1b + sub r0, r2, r3 + mov pc, lr diff --git a/lib/libc/arch/arm/string/strncmp.S b/lib/libc/arch/arm/string/strncmp.S new file mode 100644 index 00000000000..78403fbfea5 --- /dev/null +++ b/lib/libc/arch/arm/string/strncmp.S @@ -0,0 +1,51 @@ +/* $NetBSD: strncmp.S,v 1.2 2003/04/05 23:08:52 bjh21 Exp $ */ + +/* + * Copyright (c) 2002 ARM Ltd + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the company may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <machine/asm.h> + +RCSID("$NetBSD: strncmp.S,v 1.2 2003/04/05 23:08:52 bjh21 Exp $") + +ENTRY(strncmp) +/* if ((len - 1) < 0) return 0 */ + subs r2, r2, #1 + movmi r0, #0 + movmi pc, lr + +/* ip == last src address to compare */ + add ip, r0, r2 +1: + ldrb r2, [r0], #1 + ldrb r3, [r1], #1 + cmp ip, r0 + cmpcs r2, #1 + cmpcs r2, r3 + beq 1b + sub r0, r2, r3 + mov pc, lr diff --git a/lib/libc/arch/arm/sys/Ovfork.S b/lib/libc/arch/arm/sys/Ovfork.S new file mode 100644 index 00000000000..53383c5de42 --- /dev/null +++ b/lib/libc/arch/arm/sys/Ovfork.S @@ -0,0 +1,55 @@ +/* $NetBSD: Ovfork.S,v 1.6 2003/08/07 16:42:03 agc Exp $ */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)Ovfork.s 5.1 (Berkeley) 4/23/90 + */ + +#include "SYS.h" + +WARN_REFERENCES(vfork, \ + "warning: reference to compatibility vfork(); include <unistd.h> for correct reference") + +/* + * pid = vfork(); + * + * On return from the SWI: + * r1 == 0 in parent process, r1 == 1 in child process. + * r0 == pid of child in parent, r0 == pid of parent in child. + */ + .text + .align 0 + +SYSENTRY(vfork) + mov r2, r14 + SYSTRAP(vfork) + bcs PIC_SYM(CERROR, PLT) + sub r1, r1, #1 /* r1 == 0xffffffff if parent, 0 if child */ + and r0, r0, r1 /* r0 == 0 if child, else unchanged */ + mov r15, r2 diff --git a/lib/libc/arch/arm/sys/brk.S b/lib/libc/arch/arm/sys/brk.S new file mode 100644 index 00000000000..c6b50ecd41f --- /dev/null +++ b/lib/libc/arch/arm/sys/brk.S @@ -0,0 +1,98 @@ +/* $NetBSD: brk.S,v 1.6 2003/08/07 16:42:04 agc Exp $ */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)brk.s 5.2 (Berkeley) 12/17/90 + */ + +#include "SYS.h" + + .globl _C_LABEL(end) + .globl CURBRK + +#ifdef WEAK_ALIAS +WEAK_ALIAS(brk, _brk) +#endif + + .data + .align 0 + .globl _C_LABEL(__minbrk) + .type _C_LABEL(__minbrk),#object +_C_LABEL(__minbrk): + .word _C_LABEL(end) + +/* + * Change the data segment size + */ +SYSENTRY(_brk) +#ifdef PIC + /* Setup the GOT */ + ldr r3, .Lgot + add r3, pc, r3 +.L1: + ldr r1, .Lminbrk + ldr r1, [r3, r1] +#else + ldr r1, .Lminbrk +#endif + /* Get the minimum allowable brk address */ + ldr r1, [r1] + + /* + * Valid the address specified and set to the minimum + * if the address is below minbrk. + */ + cmp r0, r1 + movlt r0, r1 + mov r2, r0 + SYSTRAP(break) + bcs PIC_SYM(CERROR, PLT) + +#ifdef PIC + ldr r1, .Lcurbrk + ldr r1, [r3, r1] +#else + ldr r1, .Lcurbrk +#endif + /* Store the new address in curbrk */ + str r2, [r1] + + /* Return 0 for success */ + mov r0, #0x00000000 + mov r15, r14 + + .align 2 +#ifdef PIC +.Lgot: + .word _C_LABEL(_GLOBAL_OFFSET_TABLE_) + (. - (.L1+4)) +#endif +.Lminbrk: + .word PIC_SYM(_C_LABEL(__minbrk), GOT) +.Lcurbrk: + .word PIC_SYM(CURBRK, GOT) diff --git a/lib/libc/arch/arm/sys/cerror.S b/lib/libc/arch/arm/sys/cerror.S new file mode 100644 index 00000000000..2a06fca2f3e --- /dev/null +++ b/lib/libc/arch/arm/sys/cerror.S @@ -0,0 +1,71 @@ +/* $NetBSD: cerror.S,v 1.5 2003/08/07 16:42:04 agc Exp $ */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)cerror.s 5.1 (Berkeley) 4/23/90 + */ + +#include "SYS.h" + +ASENTRY(CERROR) +#ifdef _REENTRANT + stmfd sp!, {r4, lr} + mov r4, r0 + bl PIC_SYM(_C_LABEL(__errno), PLT) + str r4, [r0] + mvn r0, #0x00000000 + mvn r1, #0x00000000 + ldmfd sp!, {r4, pc} +#else +#ifdef PIC + /* Setup the GOT */ + ldr r3, .Lgot + add r3, pc, r3 +.L1: + ldr r1, .Lerrno + ldr r1, [r3, r1] +#else + ldr r1, .Lerrno +#endif /* PIC */ + str r0, [r1] + mvn r0, #0x00000000 + mvn r1, #0x00000000 + mov pc, lr + +#ifdef PIC + .align 0 +.Lgot: + .word _C_LABEL(_GLOBAL_OFFSET_TABLE_) + (. - (.L1+4)) +#endif /* PIC */ + + .globl _C_LABEL(errno) + +.Lerrno: + .word PIC_SYM(_C_LABEL(errno), GOT) +#endif /* _REENTRANT */ diff --git a/lib/libc/arch/arm/sys/exect.S b/lib/libc/arch/arm/sys/exect.S new file mode 100644 index 00000000000..07d9c5d863d --- /dev/null +++ b/lib/libc/arch/arm/sys/exect.S @@ -0,0 +1,36 @@ +/* $NetBSD: exect.S,v 1.4 2003/08/07 16:42:04 agc Exp $ */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)exect.s 5.1 (Berkeley) 4/23/90 + */ + +#include "SYS.h" + +PSEUDO(exect,execve) diff --git a/lib/libc/arch/arm/sys/fork.S b/lib/libc/arch/arm/sys/fork.S new file mode 100644 index 00000000000..726e2414a6b --- /dev/null +++ b/lib/libc/arch/arm/sys/fork.S @@ -0,0 +1,47 @@ +/* $NetBSD: fork.S,v 1.5 2003/08/07 16:42:04 agc Exp $ */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)fork.s 5.1 (Berkeley) 4/23/90 + */ + +#include "SYS.h" + +/* + * pid = fork(); + * + * On return from the SWI: + * r1 == 0 in parent process, r1 == 1 in child process. + * r0 == pid of child in parent, r0 == pid of parent in child. + */ + +_SYSCALL(fork,fork) + sub r1, r1, #1 /* r1 == 0xffffffff if parent, 0 if child */ + and r0, r0, r1 /* r0 == 0 if child, else unchanged */ + mov r15, r14 diff --git a/lib/libc/arch/arm/sys/rfork.S b/lib/libc/arch/arm/sys/rfork.S new file mode 100644 index 00000000000..75e011b76a7 --- /dev/null +++ b/lib/libc/arch/arm/sys/rfork.S @@ -0,0 +1,32 @@ +/* $OpenBSD: rfork.S,v 1.1 2004/02/01 05:30:41 drahn Exp $ */ + +/* + * Copyright (c) 2004 Dale Rahn + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* pid = rfork() */ + +#include "SYS.h" + +RSYSCALL(rfork) diff --git a/lib/libc/arch/arm/sys/sbrk.S b/lib/libc/arch/arm/sys/sbrk.S new file mode 100644 index 00000000000..46f4559e063 --- /dev/null +++ b/lib/libc/arch/arm/sys/sbrk.S @@ -0,0 +1,86 @@ +/* $NetBSD: sbrk.S,v 1.7 2003/08/07 16:42:05 agc Exp $ */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)sbrk.s 5.1 (Berkeley) 4/23/90 + */ + +#include "SYS.h" + + .globl _C_LABEL(end) + +#ifdef WEAK_ALIAS +WEAK_ALIAS(sbrk, _sbrk) +#endif + + .data + .align 0 + .globl CURBRK + .type CURBRK,#object +CURBRK: + .word _C_LABEL(end) + +/* + * Change the data segment size + */ +SYSENTRY(_sbrk) +#ifdef PIC + /* Setup the GOT */ + ldr r3, .Lgot + add r3, pc, r3 +.L1: + ldr r2, .Lcurbrk + ldr r2, [r3, r2] +#else + ldr r2, .Lcurbrk +#endif + /* Get the current brk address */ + ldr r1, [r2] + + /* Calculate new value */ + mov r3, r0 + add r0, r0, r1 + SYSTRAP(break) + bcs PIC_SYM(CERROR, PLT) + + /* Store new curbrk value */ + ldr r0, [r2] + add r1, r0, r3 + str r1, [r2] + + /* Return old curbrk value */ + mov r15, r14 + + .align 0 +#ifdef PIC +.Lgot: + .word _C_LABEL(_GLOBAL_OFFSET_TABLE_) + (. - (.L1+4)) +#endif +.Lcurbrk: + .word PIC_SYM(CURBRK, GOT) diff --git a/lib/libc/arch/arm/sys/sigpending.S b/lib/libc/arch/arm/sys/sigpending.S new file mode 100644 index 00000000000..71f73b99f38 --- /dev/null +++ b/lib/libc/arch/arm/sys/sigpending.S @@ -0,0 +1,38 @@ +/* $NetBSD: sigpending.S,v 1.5 2003/08/07 16:42:05 agc Exp $ */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)sigpending.s 5.1 (Berkeley) 7/1/90 + */ + +#include "SYS.h" + +#include "SYS.h" + +RSYSCALL(sigpending) diff --git a/lib/libc/arch/arm/sys/sigprocmask.S b/lib/libc/arch/arm/sys/sigprocmask.S new file mode 100644 index 00000000000..9c317d8067e --- /dev/null +++ b/lib/libc/arch/arm/sys/sigprocmask.S @@ -0,0 +1,46 @@ +/* $NetBSD: sigprocmask.S,v 1.5 2003/08/07 16:42:05 agc Exp $ */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)sigprocmask.s 5.2 (Berkeley) 12/17/90 + */ + +#include "SYS.h" + +SYSENTRY(sigprocmask) + teq r1, #0x00000000 + moveq r0, #0x00000001 + moveq r1, #0x00000000 + ldrne r1, [r1] + SYSTRAP(sigprocmask) + bcs PIC_SYM(CERROR, PLT) + teq r2, #0x00000000 + strne r0, [r2] + mov r0, #0x00000000 + mov r15, r14 diff --git a/lib/libc/arch/arm/sys/sigreturn.S b/lib/libc/arch/arm/sys/sigreturn.S new file mode 100644 index 00000000000..62b2aa41836 --- /dev/null +++ b/lib/libc/arch/arm/sys/sigreturn.S @@ -0,0 +1,40 @@ +/* $NetBSD: sigreturn.S,v 1.2 2003/08/07 16:42:05 agc Exp $ */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)sigreturn.s 5.2 (Berkeley) 12/17/90" + */ + +#include "SYS.h" + +/* + * We must preserve the state of the registers as the user has set them up. + */ + +RSYSCALL(sigreturn) diff --git a/lib/libc/arch/arm/sys/sigsuspend.S b/lib/libc/arch/arm/sys/sigsuspend.S new file mode 100644 index 00000000000..c39fba966af --- /dev/null +++ b/lib/libc/arch/arm/sys/sigsuspend.S @@ -0,0 +1,44 @@ +/* $NetBSD: sigsuspend.S,v 1.6 2003/08/07 16:42:05 agc Exp $ */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)sigsuspend.s 5.2 (Berkeley) 12/17/90 + */ + +#include "SYS.h" + +WARN_REFERENCES(sigsuspend, \ + "warning: reference to compatibility sigsuspend(); include <signal.h> for correct reference") + +SYSENTRY(sigsuspend) + ldr r0, [r0] + SYSTRAP(sigsuspend) + bcs PIC_SYM(CERROR, PLT) + mov r0, #0x00000000 + mov r15, r14 diff --git a/lib/libc/arch/arm/sys/syscall.S b/lib/libc/arch/arm/sys/syscall.S new file mode 100644 index 00000000000..43c826145e0 --- /dev/null +++ b/lib/libc/arch/arm/sys/syscall.S @@ -0,0 +1,36 @@ +/* $NetBSD: syscall.S,v 1.4 2003/08/07 16:42:05 agc Exp $ */ + +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)syscall.s 5.1 (Berkeley) 4/23/90 + */ + +#include "SYS.h" + +RSYSCALL(syscall) |