diff options
author | Miod Vallat <miod@cvs.openbsd.org> | 2010-09-21 20:29:18 +0000 |
---|---|---|
committer | Miod Vallat <miod@cvs.openbsd.org> | 2010-09-21 20:29:18 +0000 |
commit | 39eff95ee263d1a682cb8667f5e7ea2307be5a0c (patch) | |
tree | 4dbffcedda9b781e4a328757263429adf2fdc640 /sys/arch | |
parent | 37d466cb419fc3bce08d762cf8bf4cad7f3c5ae5 (diff) |
Replace the old floating point completion code with a C interface to the
MI softfloat code, implementing all MIPS IV specified floating point
operations.
Tested on R5000, R10000, R14000 and Loongson2F.
Diffstat (limited to 'sys/arch')
-rw-r--r-- | sys/arch/mips64/conf/files.mips64 | 6 | ||||
-rw-r--r-- | sys/arch/mips64/include/cpu.h | 40 | ||||
-rw-r--r-- | sys/arch/mips64/include/ieeefp.h | 23 | ||||
-rw-r--r-- | sys/arch/mips64/mips64/fp.S | 3127 | ||||
-rw-r--r-- | sys/arch/mips64/mips64/fp_emulate.c | 1310 | ||||
-rw-r--r-- | sys/arch/mips64/mips64/lcore_float.S | 133 | ||||
-rw-r--r-- | sys/arch/mips64/mips64/process_machdep.c | 6 | ||||
-rw-r--r-- | sys/arch/mips64/mips64/trap.c | 24 |
8 files changed, 1350 insertions, 3319 deletions
diff --git a/sys/arch/mips64/conf/files.mips64 b/sys/arch/mips64/conf/files.mips64 index 4cbaa6bb8b4..4e558136edd 100644 --- a/sys/arch/mips64/conf/files.mips64 +++ b/sys/arch/mips64/conf/files.mips64 @@ -1,4 +1,4 @@ -# $OpenBSD: files.mips64,v 1.15 2010/09/20 12:10:26 syuu Exp $ +# $OpenBSD: files.mips64,v 1.16 2010/09/21 20:29:13 miod Exp $ file arch/mips64/mips64/arcbios.c arcbios file arch/mips64/mips64/clock.c @@ -20,7 +20,7 @@ file arch/mips64/mips64/cache_octeon.c cpu_octeon file arch/mips64/mips64/context.S file arch/mips64/mips64/cp0access.S file arch/mips64/mips64/exception.S -file arch/mips64/mips64/fp.S +file arch/mips64/mips64/fp_emulate.c file arch/mips64/mips64/lcore_access.S file arch/mips64/mips64/lcore_float.S file arch/mips64/mips64/tlbhandler.S @@ -33,3 +33,5 @@ file arch/mips64/mips64/ipifuncs.c multiprocessor file netinet/in_cksum.c inet file netinet/in4_cksum.c inet + +file lib/libkern/softfloat.c diff --git a/sys/arch/mips64/include/cpu.h b/sys/arch/mips64/include/cpu.h index 429bd17d05f..6913ad4a4f9 100644 --- a/sys/arch/mips64/include/cpu.h +++ b/sys/arch/mips64/include/cpu.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.h,v 1.64 2010/09/20 12:10:26 syuu Exp $ */ +/* $OpenBSD: cpu.h,v 1.65 2010/09/21 20:29:17 miod Exp $ */ /*- * Copyright (c) 1992, 1993 @@ -284,43 +284,6 @@ extern vaddr_t uncached_base; #define FPC_CSR $31 /* - * The floating point coprocessor status register bits. - */ -#define FPC_ROUNDING_BITS 0x00000003 -#define FPC_ROUND_RN 0x00000000 -#define FPC_ROUND_RZ 0x00000001 -#define FPC_ROUND_RP 0x00000002 -#define FPC_ROUND_RM 0x00000003 -#define FPC_STICKY_BITS 0x0000007c -#define FPC_STICKY_INEXACT 0x00000004 -#define FPC_STICKY_UNDERFLOW 0x00000008 -#define FPC_STICKY_OVERFLOW 0x00000010 -#define FPC_STICKY_DIV0 0x00000020 -#define FPC_STICKY_INVALID 0x00000040 -#define FPC_ENABLE_BITS 0x00000f80 -#define FPC_ENABLE_INEXACT 0x00000080 -#define FPC_ENABLE_UNDERFLOW 0x00000100 -#define FPC_ENABLE_OVERFLOW 0x00000200 -#define FPC_ENABLE_DIV0 0x00000400 -#define FPC_ENABLE_INVALID 0x00000800 -#define FPC_EXCEPTION_BITS 0x0003f000 -#define FPC_EXCEPTION_INEXACT 0x00001000 -#define FPC_EXCEPTION_UNDERFLOW 0x00002000 -#define FPC_EXCEPTION_OVERFLOW 0x00004000 -#define FPC_EXCEPTION_DIV0 0x00008000 -#define FPC_EXCEPTION_INVALID 0x00010000 -#define FPC_EXCEPTION_UNIMPL 0x00020000 -#define FPC_COND_BIT 0x00800000 -#define FPC_FLUSH_BIT 0x01000000 -#define FPC_MBZ_BITS 0xfe7c0000 - -/* - * Constants to determine if have a floating point instruction. - */ -#define OPCODE_SHIFT 26 -#define OPCODE_C1 0x11 - -/* * The low part of the TLB entry. */ #define VMTLB_PF_NUM 0x3fffffc0 @@ -636,6 +599,7 @@ void save_fpu(void); int guarded_read_4(paddr_t, uint32_t *); int guarded_write_4(paddr_t, uint32_t); +void MipsFPTrap(struct trap_frame *); register_t MipsEmulateBranch(struct trap_frame *, vaddr_t, uint32_t, uint32_t); /* diff --git a/sys/arch/mips64/include/ieeefp.h b/sys/arch/mips64/include/ieeefp.h index b833c549bd3..0c2f18909b5 100644 --- a/sys/arch/mips64/include/ieeefp.h +++ b/sys/arch/mips64/include/ieeefp.h @@ -1,4 +1,4 @@ -/* $OpenBSD: ieeefp.h,v 1.2 2004/08/10 20:28:13 deraadt Exp $ */ +/* $OpenBSD: ieeefp.h,v 1.3 2010/09/21 20:29:17 miod Exp $ */ /* * Written by J.T. Conklin, Apr 11, 1995 @@ -22,4 +22,25 @@ typedef enum { FP_RM=3 /* round toward negative infinity */ } fp_rnd; +#ifdef _KERNEL + +/* + * Defines for the floating-point completion/emulation code. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <machine/fpu.h> + +#define float_raise(bits) \ + do { curproc->p_md.md_regs->fsr |= (bits) << FPCSR_C_SHIFT; } while (0) +#define float_set_inexact() float_raise(FP_X_IMP) +#define float_set_invalid() float_raise(FP_X_INV) + +#define float_get_round(csr) (csr & FPCSR_RM_MASK) +#define fpgetround() float_get_round(curproc->p_md.md_regs->fsr) + +#endif + #endif /* !_MIPS_IEEEFP_H_ */ diff --git a/sys/arch/mips64/mips64/fp.S b/sys/arch/mips64/mips64/fp.S deleted file mode 100644 index 5578b6f576f..00000000000 --- a/sys/arch/mips64/mips64/fp.S +++ /dev/null @@ -1,3127 +0,0 @@ -/* $OpenBSD: fp.S,v 1.9 2010/02/08 19:26:46 miod Exp $ */ -/* - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Ralph Campbell. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)fp.s 8.1 (Berkeley) 6/10/93 - * $Id: fp.S,v 1.9 2010/02/08 19:26:46 miod Exp $ - */ - -/* - * Standard header stuff. - */ - -#include <machine/regdef.h> -#include <machine/asm.h> -#include <machine/regnum.h> -#include <machine/cpu.h> - -#include "assym.h" - -#define SEXP_INF 0xff -#define DEXP_INF 0x7ff -#define SEXP_BIAS 127 -#define DEXP_BIAS 1023 -#define SEXP_MIN -126 -#define DEXP_MIN -1022 -#define SEXP_MAX 127 -#define DEXP_MAX 1023 -#define WEXP_MAX 30 /* maximum unbiased exponent for int */ -#define WEXP_MIN -1 /* minimum unbiased exponent for int */ -#define LEXP_MAX 62 /* maximum unbiased exponent for long */ -#define LEXP_MIN -1 /* minimum unbiased exponent for long */ -#define SFRAC_BITS 23 -#define DFRAC_BITS 52 -#define SIMPL_ONE 0x00800000 -#define DIMPL_ONE 0x0010000000000000 -#define SLEAD_ZEROS 63 - 55 -#define DLEAD_ZEROS 63 - 52 -#define STICKYBIT 1 -#define GUARDBIT 0x0000000080000000 -#define DGUARDBIT 0x8000000000000000 - -#define SSIGNAL_NAN 0x00400000 -#define DSIGNAL_NAN 0x00080000 -#define SQUIET_NAN 0x003fffff -#define DQUIET_NAN 0x0007ffffffffffff -#define INT_MIN 0x80000000 -#define INT_MAX 0x7fffffff -#define LONG_MIN 0x8000000000000000 -#define LONG_MAX 0x7fffffffffffffff - -#define COND_UNORDERED 0x1 -#define COND_EQUAL 0x2 -#define COND_LESS 0x4 -#define COND_SIGNAL 0x8 - -/*---------------------------------------------------------------------------- - * - * MipsEmulateFP -- - * - * Emulate unimplemented floating point operations. - * This routine should only be called by MipsFPInterrupt() - * and only if this is a COP1 instruction. - * - * MipsEmulateFP(instr) - * unsigned instr; - * - * Results: - * None. - * - * Side effects: - * Floating point registers are modified according to instruction. - * - *---------------------------------------------------------------------------- - */ -NON_LEAF(MipsEmulateFP, FRAMESZ(CF_SZ), ra) - PTR_SUB sp, sp, FRAMESZ(CF_SZ) - PTR_S ra, CF_RA_OFFS(sp) - - srl v0, a0, 21 # get FMT field - and v0, v0, 0x1f # mask FMT field - dla a3, func_s - beq v0, 0x10, 1f - dla a3, func_d - beq v0, 0x11, 1f - dla a3, func_w - beq v0, 0x14, 1f - dla a3, func_l - beq v0, 0x15, 1f - b ill # illegal format - -1: - and v1, a0, 0x3f # mask FUNC field - sll v1, v1, 3 # align for table lookup - daddu v1, a3 - cfc1 a1, FPC_CSR # get exception register - ld a3, (v1) # switch on FUNC & FMT - and a1, a1, ~FPC_EXCEPTION_UNIMPL # clear exception - ctc1 a1, FPC_CSR - j a3 - - .rdata -func_s: - .dword add_s # 0 - .dword sub_s # 1 - .dword mul_s # 2 - .dword div_s # 3 - .dword ill # 4 (sqrt) - .dword abs_s # 5 - .dword mov_s # 6 - .dword neg_s # 7 - .dword round_l_s # 8 - .dword trunc_l_s # 9 - .dword ceil_l_s # 10 - .dword floor_l_s # 11 - .dword round_w_s # 12 - .dword trunc_w_s # 13 - .dword ceil_w_s # 14 - .dword floor_w_s # 15 - .dword ill # 16 - .dword ill # 17 - .dword ill # 18 - .dword ill # 19 - .dword ill # 20 - .dword ill # 21 - .dword ill # 22 - .dword ill # 23 - .dword ill # 24 - .dword ill # 25 - .dword ill # 26 - .dword ill # 27 - .dword ill # 28 - .dword ill # 29 - .dword ill # 30 - .dword ill # 31 - .dword ill # 32 - .dword cvt_d_s # 33 - .dword ill # 34 - .dword ill # 35 - .dword cvt_w_s # 36 - .dword cvt_l_s # 37 - .dword ill # 38 - .dword ill # 39 - .dword ill # 40 - .dword ill # 41 - .dword ill # 42 - .dword ill # 43 - .dword ill # 44 - .dword ill # 45 - .dword ill # 46 - .dword ill # 47 - .dword cmp_s # 48 - .dword cmp_s # 49 - .dword cmp_s # 50 - .dword cmp_s # 51 - .dword cmp_s # 52 - .dword cmp_s # 53 - .dword cmp_s # 54 - .dword cmp_s # 55 - .dword cmp_s # 56 - .dword cmp_s # 57 - .dword cmp_s # 58 - .dword cmp_s # 59 - .dword cmp_s # 60 - .dword cmp_s # 61 - .dword cmp_s # 62 - .dword cmp_s # 63 - -func_d: - .dword add_d # 0 - .dword sub_d # 1 - .dword mul_d # 2 - .dword div_d # 3 - .dword ill # 4 (sqrt) - .dword abs_d # 5 - .dword mov_d # 6 - .dword neg_d # 7 - .dword round_l_d # 8 - .dword trunc_l_d # 9 - .dword ceil_l_d # 10 - .dword floor_l_d # 11 - .dword round_w_d # 12 - .dword trunc_w_d # 13 - .dword ceil_w_d # 14 - .dword floor_w_d # 15 - .dword ill # 16 - .dword ill # 17 - .dword ill # 18 - .dword ill # 19 - .dword ill # 20 - .dword ill # 21 - .dword ill # 22 - .dword ill # 23 - .dword ill # 24 - .dword ill # 25 - .dword ill # 26 - .dword ill # 27 - .dword ill # 28 - .dword ill # 29 - .dword ill # 30 - .dword ill # 31 - .dword cvt_s_d # 32 - .dword ill # 33 - .dword ill # 34 - .dword ill # 35 - .dword cvt_w_d # 36 - .dword cvt_l_d # 37 - .dword ill # 38 - .dword ill # 39 - .dword ill # 40 - .dword ill # 41 - .dword ill # 42 - .dword ill # 43 - .dword ill # 44 - .dword ill # 45 - .dword ill # 46 - .dword ill # 47 - .dword cmp_d # 48 - .dword cmp_d # 49 - .dword cmp_d # 50 - .dword cmp_d # 51 - .dword cmp_d # 52 - .dword cmp_d # 53 - .dword cmp_d # 54 - .dword cmp_d # 55 - .dword cmp_d # 56 - .dword cmp_d # 57 - .dword cmp_d # 58 - .dword cmp_d # 59 - .dword cmp_d # 60 - .dword cmp_d # 61 - .dword cmp_d # 62 - .dword cmp_d # 63 - -func_w: - .dword ill # 0 - .dword ill # 1 - .dword ill # 2 - .dword ill # 3 - .dword ill # 4 - .dword ill # 5 - .dword ill # 6 - .dword ill # 7 - .dword ill # 8 - .dword ill # 9 - .dword ill # 10 - .dword ill # 11 - .dword ill # 12 - .dword ill # 13 - .dword ill # 14 - .dword ill # 15 - .dword ill # 16 - .dword ill # 17 - .dword ill # 18 - .dword ill # 19 - .dword ill # 20 - .dword ill # 21 - .dword ill # 22 - .dword ill # 23 - .dword ill # 24 - .dword ill # 25 - .dword ill # 26 - .dword ill # 27 - .dword ill # 28 - .dword ill # 29 - .dword ill # 30 - .dword ill # 31 - .dword cvt_s_w # 32 - .dword cvt_d_w # 33 - .dword ill # 34 - .dword ill # 35 - .dword ill # 36 - .dword ill # 37 - .dword ill # 38 - .dword ill # 39 - .dword ill # 40 - .dword ill # 41 - .dword ill # 42 - .dword ill # 43 - .dword ill # 44 - .dword ill # 45 - .dword ill # 46 - .dword ill # 47 - .dword ill # 48 - .dword ill # 49 - .dword ill # 50 - .dword ill # 51 - .dword ill # 52 - .dword ill # 53 - .dword ill # 54 - .dword ill # 55 - .dword ill # 56 - .dword ill # 57 - .dword ill # 58 - .dword ill # 59 - .dword ill # 60 - .dword ill # 61 - .dword ill # 62 - .dword ill # 63 - -func_l: - .dword ill # 0 - .dword ill # 1 - .dword ill # 2 - .dword ill # 3 - .dword ill # 4 - .dword ill # 5 - .dword ill # 6 - .dword ill # 7 - .dword ill # 8 - .dword ill # 9 - .dword ill # 10 - .dword ill # 11 - .dword ill # 12 - .dword ill # 13 - .dword ill # 14 - .dword ill # 15 - .dword ill # 16 - .dword ill # 17 - .dword ill # 18 - .dword ill # 19 - .dword ill # 20 - .dword ill # 21 - .dword ill # 22 - .dword ill # 23 - .dword ill # 24 - .dword ill # 25 - .dword ill # 26 - .dword ill # 27 - .dword ill # 28 - .dword ill # 29 - .dword ill # 30 - .dword ill # 31 - .dword cvt_s_l # 32 - .dword cvt_d_l # 33 - .dword ill # 34 - .dword ill # 35 - .dword ill # 36 - .dword ill # 37 - .dword ill # 38 - .dword ill # 39 - .dword ill # 40 - .dword ill # 41 - .dword ill # 42 - .dword ill # 43 - .dword ill # 44 - .dword ill # 45 - .dword ill # 46 - .dword ill # 47 - .dword ill # 48 - .dword ill # 49 - .dword ill # 50 - .dword ill # 51 - .dword ill # 52 - .dword ill # 53 - .dword ill # 54 - .dword ill # 55 - .dword ill # 56 - .dword ill # 57 - .dword ill # 58 - .dword ill # 59 - .dword ill # 60 - .dword ill # 61 - .dword ill # 62 - .dword ill # 63 - - .text - -/* - * Single precision subtract. - */ -sub_s: - jal get_ft_fs_s - xor ta0, 1 # negate FT sign bit - b add_sub_s -/* - * Single precision add. - */ -add_s: - jal get_ft_fs_s -add_sub_s: - bne t1, SEXP_INF, 1f # is FS an infinity? - bne ta1, SEXP_INF, result_fs_s # if FT is not inf, result=FS - bne t2, zero, result_fs_s # if FS is NAN, result is FS - bne ta2, zero, result_ft_s # if FT is NAN, result is FT - bne t0, ta0, invalid_s # both infinities same sign? - b result_fs_s # result is in FS -1: - beq ta1, SEXP_INF, result_ft_s # if FT is inf, result=FT - bne t1, zero, 4f # is FS a denormalized num? - beq t2, zero, 3f # is FS zero? - bne ta1, zero, 2f # is FT a denormalized num? - beq ta2, zero, result_fs_s # FT is zero, result=FS - jal renorm_fs_s - jal renorm_ft_s - b 5f -2: - jal renorm_fs_s - subu ta1, ta1, SEXP_BIAS # unbias FT exponent - or ta2, ta2, SIMPL_ONE # set implied one bit - b 5f -3: - bne ta1, zero, result_ft_s # if FT != 0, result=FT - bne ta2, zero, result_ft_s - and v0, a1, FPC_ROUNDING_BITS # get rounding mode - bne v0, FPC_ROUND_RM, 1f # round to -infinity? - or t0, t0, ta0 # compute result sign - b result_fs_s -1: - and t0, ta0 # compute result sign - b result_fs_s -4: - bne ta1, zero, 2f # is FT a denormalized num? - beq ta2, zero, result_fs_s # FT is zero, result=FS - subu t1, SEXP_BIAS # unbias FS exponent - or t2, SIMPL_ONE # set implied one bit - jal renorm_ft_s - b 5f -2: - subu t1, SEXP_BIAS # unbias FS exponent - or t2, SIMPL_ONE # set implied one bit - subu ta1, SEXP_BIAS # unbias FT exponent - or ta2, SIMPL_ONE # set implied one bit -/* - * Perform the addition. - */ -5: - move t8, zero # no shifted bits (sticky reg) - beq t1, ta1, 4f # exp equal, no shift needed - subu v0, t1, ta1 # v0 = difference of exponents - move v1, v0 # v1 = abs(difference) - bge v0, zero, 1f - negu v1 -1: - ble v1, SFRAC_BITS+2, 2f # is difference too great? - li t8, STICKYBIT # set the sticky bit - bge v0, zero, 1f # check which exp is larger - move t1, ta1 # result exp is FTs - move t2, zero # FSs fraction shifted is zero - b 4f -1: - move ta2, zero # FTs fraction shifted is zero - b 4f -2: - li t9, 32 # compute 32 - abs(exp diff) - subu t9, t9, v1 - bgt v0, zero, 3f # if FS > FT, shift FTs frac - move t1, ta1 # FT > FS, result exp is FTs - sll t8, t2, t9 # save bits shifted out - srl t2, t2, v1 # shift FSs fraction - b 4f -3: - sll t8, ta2, t9 # save bits shifted out - srl ta2, ta2, v1 # shift FTs fraction -4: - bne t0, ta0, 1f # if signs differ, subtract - addu t2, t2, ta2 # add fractions - b norm_s -1: - blt t2, ta2, 3f # subtract larger from smaller - bne t2, ta2, 2f # if same, result=0 - move t1, zero # result=0 - move t2, zero - and v0, a1, FPC_ROUNDING_BITS # get rounding mode - bne v0, FPC_ROUND_RM, 1f # round to -infinity? - or t0, t0, ta0 # compute result sign - b result_fs_s -1: - and t0, t0, ta0 # compute result sign - b result_fs_s -2: - sltu t9, zero, t8 # compute t2:zero - ta2:t8 - subu t8, zero, t8 - subu t2, t2, ta2 # subtract fractions - subu t2, t2, t9 # subtract barrow - b norm_s -3: - move t0, ta0 # sign of result = FTs - sltu t9, zero, t8 # compute ta2:zero - t2:t8 - subu t8, zero, t8 - subu t2, ta2, t2 # subtract fractions - subu t2, t2, t9 # subtract barrow - b norm_s - -/* - * Double precision subtract. - */ -sub_d: - jal get_ft_fs_d - xor ta0, ta0, 1 # negate sign bit - b add_sub_d -/* - * Double precision add. - */ -add_d: - jal get_ft_fs_d -add_sub_d: - bne t1, DEXP_INF, 1f # is FS an infinity? - bne ta1, DEXP_INF, result_fs_d # if FT is not inf, result=FS - bne t2, zero, result_fs_d # if FS is NAN, result is FS - bne ta2, zero, result_ft_d # if FT is NAN, result is FT - bne t0, ta0, invalid_d # both infinities same sign? - b result_fs_d # result is in FS -1: - beq ta1, DEXP_INF, result_ft_d # if FT is inf, result=FT - bne t1, zero, 4f # is FS a denormalized num? - beq t2, zero, 3f # is FS zero? - bne ta1, zero, 2f # is FT a denormalized num? - beq ta2, zero, result_fs_d # FT is zero, result=FS - jal renorm_fs_d - jal renorm_ft_d - b 5f -2: - jal renorm_fs_d - subu ta1, ta1, DEXP_BIAS # unbias FT exponent - or ta2, ta2, DIMPL_ONE # set implied one bit - b 5f -3: - bne ta1, zero, result_ft_d # if FT != 0, result=FT - bne ta2, zero, result_ft_d - and v0, a1, FPC_ROUNDING_BITS # get rounding mode - bne v0, FPC_ROUND_RM, 1f # round to -infinity? - or t0, t0, ta0 # compute result sign - b result_fs_d -1: - and t0, t0, ta0 # compute result sign - b result_fs_d -4: - bne ta1, zero, 2f # is FT a denormalized num? - beq ta2, zero, result_fs_d # FT is zero, result=FS - subu t1, t1, DEXP_BIAS # unbias FS exponent - or t2, t2, DIMPL_ONE # set implied one bit - jal renorm_ft_d - b 5f -2: - subu t1, t1, DEXP_BIAS # unbias FS exponent - or t2, t2, DIMPL_ONE # set implied one bit - subu ta1, ta1, DEXP_BIAS # unbias FT exponent - or ta2, ta2, DIMPL_ONE # set implied one bit -/* - * Perform the addition. - */ -5: - move t8, zero # no shifted bits (sticky reg) - beq t1, ta1, 4f # no shift needed - subu v0, t1, ta1 # v0 = difference of exponents - move v1, v0 # v1 = abs(difference) - bge v0, zero, 1f - negu v1 -1: - ble v1, DFRAC_BITS+2, 2f # is difference too great? - li t8, STICKYBIT # set the sticky bit - bge v0, zero, 1f # check which exp is larger - move t1, ta1 # result exp is FTs - move t2, zero # FSs fraction shifted is zero - b 4f -1: - move ta2, zero # FTs fraction shifted is zero - b 4f -2: - li t9, 64 - subu t9, t9, v1 - bge v0, zero, 3f # if FS > FT, shift FTs frac - move t1, ta1 # FT > FS, result exp is FTs - dsll t8, t2, t9 # save bits shifted out - dsrl t2, t2, v1 - b 4f -3: - dsll t8, ta2, t9 # save bits shifted out - dsrl ta2, ta2, v1 -4: - bne t0, ta0, 1f # if signs differ, subtract - daddu t2, ta2 # add fractions - b norm_d -1: - blt t2, ta2, 3f # subtract larger from smaller - bne t2, ta2, 2f - move t1, zero # result=0 - move t2, zero - and v0, a1, FPC_ROUNDING_BITS # get rounding mode - bne v0, FPC_ROUND_RM, 1f # round to -infinity? - or t0, t0, ta0 # compute result sign - b result_fs_d -1: - and t0, t0, ta0 # compute result sign - b result_fs_d -2: - sltu t9, zero, t8 # compute t2:zero - ta2:t8 - dsubu t8, zero, t8 - dsubu t2, t2, ta2 # subtract fractions - dsubu t2, t2, t9 # subtract barrow - b norm_d -3: - move t0, ta0 # sign of result = FTs - sltu t9, zero, t8 - dsubu t2, ta2, t2 # subtract fractions - dsubu t2, t2, t9 # subtract barrow - b norm_d - -/* - * Single precision multiply. - */ -mul_s: - jal get_ft_fs_s - xor t0, t0, ta0 # compute sign of result - move ta0, t0 - bne t1, SEXP_INF, 2f # is FS an infinity? - bne t2, zero, result_fs_s # if FS is a NAN, result=FS - bne ta1, SEXP_INF, 1f # FS is inf, is FT an infinity? - bne ta2, zero, result_ft_s # if FT is a NAN, result=FT - b result_fs_s # result is infinity -1: - bne ta1, zero, result_fs_s # inf * zero? if no, result=FS - bne ta2, zero, result_fs_s - b invalid_s # infinity * zero is invalid -2: - bne ta1, SEXP_INF, 1f # FS != inf, is FT an infinity? - bne t1, zero, result_ft_s # zero * inf? if no, result=FT - bne t2, zero, result_ft_s - bne ta2, zero, result_ft_s # if FT is a NAN, result=FT - b invalid_s # zero * infinity is invalid -1: - bne t1, zero, 1f # is FS zero? - beq t2, zero, result_fs_s # result is zero - jal renorm_fs_s - b 2f -1: - subu t1, t1, SEXP_BIAS # unbias FS exponent - or t2, t2, SIMPL_ONE # set implied one bit -2: - bne ta1, zero, 1f # is FT zero? - beq ta2, zero, result_ft_s # result is zero - jal renorm_ft_s - b 2f -1: - subu ta1, ta1, SEXP_BIAS # unbias FT exponent - or ta2, ta2, SIMPL_ONE # set implied one bit -2: - addu t1, t1, ta1 # compute result exponent - addu t1, t1, 9 # account for binary point - multu t2, ta2 # multiply fractions - mflo t8 - mfhi t2 - b norm_s - -/* - * Double precision multiply. - */ -mul_d: - jal get_ft_fs_d - xor t0, t0, ta0 # compute sign of result - move ta0, t0 - bne t1, DEXP_INF, 2f # is FS an infinity? - bne t2, zero, result_fs_d # if FS is a NAN, result=FS - bne ta1, DEXP_INF, 1f # FS is inf, is FT an infinity? - bne ta2, zero, result_ft_d # if FT is a NAN, result=FT - b result_fs_d # result is infinity -1: - bne ta1, zero, result_fs_d # inf * zero? if no, result=FS - bne ta2, zero, result_fs_d - b invalid_d # infinity * zero is invalid -2: - bne ta1, DEXP_INF, 1f # FS != inf, is FT an infinity? - bne t1, zero, result_ft_d # zero * inf? if no, result=FT - bne t2, zero, result_ft_d # if FS is a NAN, result=FS - bne ta2, zero, result_ft_d # if FT is a NAN, result=FT - b invalid_d # zero * infinity is invalid -1: - bne t1, zero, 2f # is FS zero? - beq t2, zero, result_fs_d # result is zero - jal renorm_fs_d - b 3f -2: - subu t1, t1, DEXP_BIAS # unbias FS exponent - or t2, t2, DIMPL_ONE # set implied one bit -3: - bne ta1, zero, 2f # is FT zero? - beq ta2, zero, result_ft_d # result is zero - jal renorm_ft_d - b 3f -2: - subu ta1, ta1, DEXP_BIAS # unbias FT exponent - or ta2, ta2, DIMPL_ONE # set implied one bit -3: - addu t1, t1, ta1 # compute result exponent - addu t1, t1, 12 # ??? - dmultu t2, ta2 # multiply fractions - mflo t8 - mfhi t2 - b norm_d - -/* - * Single precision divide. - */ -div_s: - jal get_ft_fs_s - xor t0, t0, ta0 # compute sign of result - move ta0, t0 - bne t1, SEXP_INF, 1f # is FS an infinity? - bne t2, zero, result_fs_s # if FS is NAN, result is FS - bne ta1, SEXP_INF, result_fs_s # is FT an infinity? - bne ta2, zero, result_ft_s # if FT is NAN, result is FT - b invalid_s # infinity/infinity is invalid -1: - bne ta1, SEXP_INF, 1f # is FT an infinity? - bne ta2, zero, result_ft_s # if FT is NAN, result is FT - move t1, zero # x / infinity is zero - move t2, zero - b result_fs_s -1: - bne t1, zero, 2f # is FS zero? - bne t2, zero, 1f - bne ta1, zero, result_fs_s # FS=zero, is FT zero? - beq ta2, zero, invalid_s # 0 / 0 - b result_fs_s # result = zero -1: - jal renorm_fs_s - b 3f -2: - subu t1, t1, SEXP_BIAS # unbias FS exponent - or t2, t2, SIMPL_ONE # set implied one bit -3: - bne ta1, zero, 2f # is FT zero? - bne ta2, zero, 1f - or a1, a1, FPC_EXCEPTION_DIV0 | FPC_STICKY_DIV0 - and v0, a1, FPC_ENABLE_DIV0 # trap enabled? - bne v0, zero, fpe_trap - ctc1 a1, FPC_CSR # save exceptions - li t1, SEXP_INF # result is infinity - move t2, zero - b result_fs_s -1: - jal renorm_ft_s - b 3f -2: - subu ta1, ta1, SEXP_BIAS # unbias FT exponent - or ta2, ta2, SIMPL_ONE # set implied one bit -3: - subu t1, t1, ta1 # compute exponent - subu t1, t1, 3 # compensate for result position - li v0, SFRAC_BITS+3 # number of bits to divide - move t8, t2 # init dividend - move t2, zero # init result -1: - bltu t8, ta2, 3f # is dividend >= divisor? -2: - subu t8, t8, ta2 # subtract divisor from dividend - or t2, t2, 1 # remember that we did - bne t8, zero, 3f # if not done, continue - sll t2, t2, v0 # shift result to final position - b norm_s -3: - sll t8, t8, 1 # shift dividend - sll t2, t2, 1 # shift result - subu v0, v0, 1 # are we done? - bne v0, zero, 1b # no, continue - b norm_s - -/* - * Double precision divide. - */ -div_d: - jal get_ft_fs_d - xor t0, t0, ta0 # compute sign of result - move ta0, t0 - bne t1, DEXP_INF, 1f # is FS an infinity? - bne t2, zero, result_fs_d # if FS is NAN, result is FS - bne ta1, DEXP_INF, result_fs_d # is FT an infinity? - bne ta2, zero, result_ft_d # if FT is NAN, result is FT - b invalid_d # infinity/infinity is invalid -1: - bne ta1, DEXP_INF, 1f # is FT an infinity? - bne ta2, zero, result_ft_d # if FT is NAN, result is FT - move t1, zero # x / infinity is zero - move t2, zero - b result_fs_d -1: - bne t1, zero, 2f # is FS zero? - bne t2, zero, 1f - bne ta1, zero, result_fs_d # FS=zero, is FT zero? - beq ta2, zero, invalid_d # 0 / 0 - b result_fs_d # result = zero -1: - jal renorm_fs_d - b 3f -2: - subu t1, t1, DEXP_BIAS # unbias FS exponent - or t2, t2, DIMPL_ONE # set implied one bit -3: - bne ta1, zero, 2f # is FT zero? - bne ta2, zero, 1f - or a1, a1, FPC_EXCEPTION_DIV0 | FPC_STICKY_DIV0 - and v0, a1, FPC_ENABLE_DIV0 # trap enabled? - bne v0, zero, fpe_trap - ctc1 a1, FPC_CSR # Save exceptions - li t1, DEXP_INF # result is infinity - move t2, zero - b result_fs_d -1: - jal renorm_ft_d - b 3f -2: - subu ta1, ta1, DEXP_BIAS # unbias FT exponent - or ta2, ta2, DIMPL_ONE # set implied one bit -3: - subu t1, t1, ta1 # compute exponent - subu t1, t1, 3 # compensate for result position - li v0, DFRAC_BITS+3 # number of bits to divide - move t8, t2 # init dividend - move t2, zero # init result -1: - bltu t8, ta2, 3f # is dividend >= divisor? -2: - dsubu t8, t8, ta2 # subtract divisor from dividend - or t2, t2, 1 # remember that we did - bne t8, zero, 3f # if not done, continue - dsll t2, t2, v0 # shift upper part - b norm_d -3: - dsll t8, t8, 1 # shift dividend - dsll t2, t2, 1 # shift result - subu v0, v0, 1 # are we done? - bne v0, zero, 1b # no, continue - b norm_d - -/* - * Single precision absolute value. - */ -abs_s: - jal get_fs_s - move t0, zero # set sign positive - b result_fs_s - -/* - * Double precision absolute value. - */ -abs_d: - jal get_fs_d - move t0, zero # set sign positive - b result_fs_d - -/* - * Single precision move. - */ -mov_s: - jal get_fs_s - b result_fs_s - -/* - * Double precision move. - */ -mov_d: - jal get_fs_d - b result_fs_d - -/* - * Single precision negate. - */ -neg_s: - jal get_fs_s - xor t0, t0, 1 # reverse sign - b result_fs_s - -/* - * Double precision negate. - */ -neg_d: - jal get_fs_d - xor t0, t0, 1 # reverse sign - b result_fs_d - -/* - * Convert double to single. - */ -cvt_s_d: - jal get_fs_d - bne t1, DEXP_INF, 1f # is FS an infinity? - li t1, SEXP_INF # convert to single - dsll t2, t2, 3 # convert D fraction to S - b result_fs_s -1: - bne t1, zero, 2f # is FS zero? - beq t2, zero, result_fs_s # result=0 - jal renorm_fs_d - subu t1, t1, 3 # correct exp for shift below - b 3f -2: - subu t1, t1, DEXP_BIAS # unbias exponent - or t2, t2, DIMPL_ONE # add implied one bit -3: - dsll t2, t2, 3 # convert D fraction to S - b norm_noshift_s - -/* - * Convert long integer to single. - */ -cvt_s_l: - jal get_fs_long - b cvt_s_int -/* - * Convert integer to single. - */ -cvt_s_w: - jal get_fs_int -cvt_s_int: - bne t2, zero, 1f # check for zero - move t1, zero - b result_fs_s -/* - * Find out how many leading zero bits are in t2 and put in t9. - */ -1: - move v0, t2 - move t9, zero - dsrl v1, v0, 32 - bne v1, zero, 1f - addu t9, 32 - dsll v0, 32 -1: - dsrl v1, v0, 16 - bne v1, zero, 1f - addu t9, 16 - dsll v0, 16 -1: - dsrl v1, v0, 24 - bne v1, zero, 1f - addu t9, 8 - dsll v0, 8 -1: - dsrl v1, v0, 28 - bne v1, zero, 1f - addu t9, 4 - dsll v0, 4 -1: - dsrl v1, v0, 30 - bne v1, zero, 1f - addu t9, 2 - dsll v0, 2 -1: - dsrl v1, v0, 31 - bne v1, zero, 1f - addu t9, 1 -/* - * Now shift t2 the correct number of bits. - */ -1: - subu t9, SLEAD_ZEROS # dont count leading zeros - li t1, 23+32 # init exponent - subu t1, t1, t9 # compute exponent - beq t9, zero, 1f - li v0, 32 - blt t9, zero, 2f # if shift < 0, shift right - subu v0, v0, t9 - sll t2, t2, t9 # shift left -1: - add t1, t1, SEXP_BIAS # bias exponent - and t2, t2, ~SIMPL_ONE # clear implied one bit - b result_fs_s -2: - negu t9 # shift right by t9 - subu v0, v0, t9 - sll t8, t2, v0 # save bits shifted out - srl t2, t2, t9 - b norm_noshift_s - -/* - * Convert single to double. - */ -cvt_d_s: - jal get_fs_s - dsll t2, 32 - bne t1, SEXP_INF, 1f # is FS an infinity? - li t1, DEXP_INF # convert to double - b result_fs_d -1: - bne t1, zero, 2f # is FS denormalized or zero? - beq t2, zero, result_fs_d # is FS zero? - jal renorm_fs_s - move t8, zero - b norm_d -2: - addu t1, t1, DEXP_BIAS - SEXP_BIAS # bias exponent correctly - dsrl t2, t2, 3 - b result_fs_d - -/* - * Convert long integer to double. - */ -cvt_d_l: - jal get_fs_long - b cvt_d_int -/* - * Convert integer to double. - */ -cvt_d_w: - jal get_fs_int -cvt_d_int: - bne t2, zero, 1f # check for zero - move t1, zero # result=0 - b result_fs_d -/* - * Find out how many leading zero bits are in t2 and put in t9. - */ -1: - move v0, t2 - move t9, zero - dsrl v1, v0, 32 - bne v1, zero, 1f - addu t9, 32 - dsll v0, 32 -1: - dsrl v1, v0, 16 - bne v1, zero, 1f - addu t9, 16 - dsll v0, 16 -1: - dsrl v1, v0, 24 - bne v1, zero, 1f - addu t9, 8 - dsll v0, 8 -1: - dsrl v1, v0, 28 - bne v1, zero, 1f - addu t9, 4 - dsll v0, 4 -1: - dsrl v1, v0, 30 - bne v1, zero, 1f - addu t9, 2 - dsll v0, 2 -1: - dsrl v1, v0, 31 - bne v1, zero, 1f - addu t9, 1 -/* - * Now shift t2 the correct number of bits. - */ -1: - subu t9, t9, DLEAD_ZEROS # dont count leading zeros - li t1, DEXP_BIAS + 20 # init exponent - subu t1, t1, t9 # compute exponent - beq t9, zero, 1f - li v0, 64 - blt t9, zero, 2f # if shift < 0, shift right - subu v0, v0, t9 - dsll t2, t2, t9 # shift left -1: - and t2, t2, ~DIMPL_ONE # clear implied one bit - b result_fs_d -2: - negu t9 # shift right by t9 - subu v0, v0, t9 - dsrl t2, t2, t9 - and t2, t2, ~DIMPL_ONE # clear implied one bit - b result_fs_d - -/* - * Convert single to integer with specific rounding. - */ -round_w_s: - li t3, FPC_ROUND_RN - b do_cvt_w_s -trunc_w_s: - li t3, FPC_ROUND_RZ - b do_cvt_w_s -ceil_w_s: - li t3, FPC_ROUND_RP - b do_cvt_w_s -floor_w_s: - li t3, FPC_ROUND_RM - b do_cvt_w_s - -/* - * Convert single to integer. - */ -cvt_w_s: - and t3, a1, FPC_ROUNDING_BITS # get rounding mode -do_cvt_w_s: - jal get_fs_s - bne t1, SEXP_INF, 1f # is FS an infinity? - bne t2, zero, invalid_w # invalid conversion -1: - bne t1, zero, 1f # is FS zero? - beq t2, zero, result_fs_w # result is zero - move t2, zero # result is an inexact zero - b inexact_w -1: - subu t1, t1, SEXP_BIAS # unbias exponent - or t2, t2, SIMPL_ONE # add implied one bit - dsll t2, t2, DFRAC_BITS - SFRAC_BITS # convert S fraction to D - b cvt_w - -/* - * Convert double to integer with specific rounding. - */ -round_w_d: - li t3, FPC_ROUND_RN - b do_cvt_w_d -trunc_w_d: - li t3, FPC_ROUND_RZ - b do_cvt_w_d -ceil_w_d: - li t3, FPC_ROUND_RP - b do_cvt_w_d -floor_w_d: - li t3, FPC_ROUND_RM - b do_cvt_w_d - -/* - * Convert double to integer. - */ -cvt_w_d: - and t3, a1, FPC_ROUNDING_BITS # get rounding mode -do_cvt_w_d: - jal get_fs_d - bne t1, DEXP_INF, 1f # is FS an infinity? - bne t2, zero, invalid_w # invalid conversion -1: - bne t1, zero, 2f # is FS zero? - beq t2, zero, result_fs_w # result is zero - move t2, zero # result is an inexact zero - b inexact_w -2: - subu t1, t1, DEXP_BIAS # unbias exponent - or t2, t2, DIMPL_ONE # add implied one bit -cvt_w: - blt t1, WEXP_MIN, underflow_w # is exponent too small? - li v0, WEXP_MAX+1 - bgt t1, v0, overflow_w # is exponent too large? - bne t1, v0, 1f # special check for INT_MIN - beq t0, zero, overflow_w # if positive, overflow - bne t2, DIMPL_ONE, overflow_w - li t2, INT_MIN # result is INT_MIN - b result_fs_w -1: - subu v0, t1, 20 # compute amount to shift - beq v0, zero, 2f # is shift needed? - li v1, 64 - blt v0, zero, 1f # if shift < 0, shift right - subu v1, v1, v0 # shift left - dsll t2, t2, v0 - b 2f -1: - negu v0 # shift right by v0 - subu v1, v1, v0 - dsll t8, t2, v1 # save bits shifted out - sltu t8, zero, t8 # dont lose any ones - dsrl t2, t2, v0 -/* - * round (t0 is sign, t2:63-32 is integer part, t2:31-0 is fractional part). - */ -2: - beq t3, FPC_ROUND_RN, 3f # round to nearest - beq t3, FPC_ROUND_RZ, 5f # round to zero (truncate) - beq t3, FPC_ROUND_RP, 1f # round to +infinity - beq t0, zero, 5f # if sign is positive, truncate - b 2f -1: - bne t0, zero, 5f # if sign is negative, truncate -2: - daddu t2, t2, GUARDBIT # add in fractional - blt t2, zero, overflow_w # overflow? - b 5f -3: - daddu t2, t2, GUARDBIT # add in fractional - blt t2, zero, overflow_w # overflow? -4: - bne v0, zero, 5f # if rounded remainder is zero - and t2, 0xfffffffe00000000 # clear LSB (round to nearest) -5: - beq t0, zero, 1f # result positive? - negu t2 # convert to negative integer -1: - dsll v0, 32 # save fraction - dsrl t2, 32 # shift out fractional part - beq v0, zero, result_fs_w # is result exact? -/* - * Handle inexact exception. - */ -inexact_w: - or a1, a1, FPC_EXCEPTION_INEXACT | FPC_STICKY_INEXACT - and v0, a1, FPC_ENABLE_INEXACT - bne v0, zero, fpe_trap - ctc1 a1, FPC_CSR # save exceptions - b result_fs_w - -/* - * Conversions to integer which overflow will trap (if enabled), - * or generate an inexact trap (if enabled), - * or generate an invalid exception. - */ -overflow_w: - or a1, a1, FPC_EXCEPTION_OVERFLOW | FPC_STICKY_OVERFLOW - and v0, a1, FPC_ENABLE_OVERFLOW - bne v0, zero, fpe_trap - and v0, a1, FPC_ENABLE_INEXACT - bne v0, zero, inexact_w # inexact traps enabled? - b invalid_w - -/* - * Conversions to integer which underflow will trap (if enabled), - * or generate an inexact trap (if enabled), - * or generate an invalid exception. - */ -underflow_w: - or a1, a1, FPC_EXCEPTION_UNDERFLOW | FPC_STICKY_UNDERFLOW - and v0, a1, FPC_ENABLE_UNDERFLOW - bne v0, zero, fpe_trap - and v0, a1, FPC_ENABLE_INEXACT - bne v0, zero, inexact_w # inexact traps enabled? - b invalid_w - -/* - * Convert single to long integer with specific rounding. - */ -round_l_s: - li t3, FPC_ROUND_RN - b do_cvt_l_s -trunc_l_s: - li t3, FPC_ROUND_RZ - b do_cvt_l_s -ceil_l_s: - li t3, FPC_ROUND_RP - b do_cvt_l_s -floor_l_s: - li t3, FPC_ROUND_RM - b do_cvt_l_s - -/* - * Convert single to long integer. - */ -cvt_l_s: - and t3, a1, FPC_ROUNDING_BITS # get rounding mode -do_cvt_l_s: - jal get_fs_s - bne t1, SEXP_INF, 1f # is FS an infinity? - bne t2, zero, invalid_l # invalid conversion -1: - bne t1, zero, 1f # is FS zero? - beq t2, zero, result_fs_l # result is zero - move t2, zero # result is an inexact zero - b inexact_l -1: - subu t1, t1, SEXP_BIAS # unbias exponent - or t2, t2, SIMPL_ONE # add implied one bit - dsll t2, t2, DFRAC_BITS - SFRAC_BITS # convert S fraction to D - b cvt_l - -/* - * Convert double to long integer with specific rounding. - */ -round_l_d: - li t3, FPC_ROUND_RN - b do_cvt_l_d -trunc_l_d: - li t3, FPC_ROUND_RZ - b do_cvt_l_d -ceil_l_d: - li t3, FPC_ROUND_RP - b do_cvt_l_d -floor_l_d: - li t3, FPC_ROUND_RM - b do_cvt_l_d - -/* - * Convert double to long integer. - */ -cvt_l_d: - and t3, a1, FPC_ROUNDING_BITS # get rounding mode -do_cvt_l_d: - jal get_fs_d - bne t1, DEXP_INF, 1f # is FS an infinity? - bne t2, zero, invalid_l # invalid conversion -1: - bne t1, zero, 2f # is FS zero? - beq t2, zero, result_fs_l # result is zero - move t2, zero # result is an inexact zero - b inexact_l -2: - subu t1, t1, DEXP_BIAS # unbias exponent - or t2, t2, DIMPL_ONE # add implied one bit -cvt_l: - blt t1, LEXP_MIN, underflow_l # is exponent too small? - li v0, LEXP_MAX+1 - bgt t1, v0, overflow_l # is exponent too large? - bne t1, v0, 1f # special check for LONG_MIN - beq t0, zero, overflow_l # if positive, overflow - bne t2, DIMPL_ONE, overflow_l - dli t2, LONG_MIN # result is LONG_MIN - b result_fs_l -1: - subu v0, t1, DFRAC_BITS # compute amount to shift - beq v0, zero, 2f # is shift needed? - li v1, 64 - blt v0, zero, 1f # if shift < 0, shift right - subu v1, v1, v0 # shift left - dsll t2, t2, v0 - b 2f -1: - negu v0 # shift right by v0 - subu v1, v1, v0 - dsll t8, t2, v1 # save bits shifted out - sltu t8, zero, t8 # dont lose any ones - dsrl t2, t2, v0 -/* - * round (t0 is sign, t2 is integer part). - */ -2: - beq t3, FPC_ROUND_RN, 3f # round to nearest - beq t3, FPC_ROUND_RZ, 5f # round to zero (truncate) - beq t3, FPC_ROUND_RP, 1f # round to +infinity - beq t0, zero, 5f # if sign is positive, truncate - b 2f -1: - bne t0, zero, 5f # if sign is negative, truncate -2: - daddu t2, t2, DGUARDBIT # add in fractional - blt t2, zero, overflow_l # overflow? - b 5f -3: - daddu t2, t2, DGUARDBIT # add in fractional - blt t2, zero, overflow_l # overflow? -4: - bne v0, zero, 5f # if rounded remainder is zero - and t2, 0xe000000000000000 # clear LSB (round to nearest) -5: - beq t0, zero, 1f # result positive? - negu t2 # convert to negative integer -1: - b result_fs_l - nop -/* - * Handle inexact exception. - */ -inexact_l: - or a1, a1, FPC_EXCEPTION_INEXACT | FPC_STICKY_INEXACT - and v0, a1, FPC_ENABLE_INEXACT - bne v0, zero, fpe_trap - ctc1 a1, FPC_CSR # save exceptions - b result_fs_l - -/* - * Conversions to integer which overflow will trap (if enabled), - * or generate an inexact trap (if enabled), - * or generate an invalid exception. - */ -overflow_l: - or a1, a1, FPC_EXCEPTION_OVERFLOW | FPC_STICKY_OVERFLOW - and v0, a1, FPC_ENABLE_OVERFLOW - bne v0, zero, fpe_trap - and v0, a1, FPC_ENABLE_INEXACT - bne v0, zero, inexact_l # inexact traps enabled? - b invalid_l - -/* - * Conversions to integer which underflow will trap (if enabled), - * or generate an inexact trap (if enabled), - * or generate an invalid exception. - */ -underflow_l: - or a1, a1, FPC_EXCEPTION_UNDERFLOW | FPC_STICKY_UNDERFLOW - and v0, a1, FPC_ENABLE_UNDERFLOW - bne v0, zero, fpe_trap - and v0, a1, FPC_ENABLE_INEXACT - bne v0, zero, inexact_l # inexact traps enabled? - b invalid_l - -/* - * Compare single. - */ -cmp_s: - jal get_cmp_s - bne t1, SEXP_INF, 1f # is FS an infinity? - bne t2, zero, unordered # FS is a NAN -1: - bne ta1, SEXP_INF, 2f # is FT an infinity? - bne ta2, zero, unordered # FT is a NAN -2: - sll t1, t1, SFRAC_BITS # reassemble exp & frac - or t1, t1, t2 - sll ta1, ta1, SFRAC_BITS # reassemble exp & frac - or ta1, ta1, ta2 - beq t0, zero, 1f # is FS positive? - negu t1 -1: - beq ta0, zero, 1f # is FT positive? - negu ta1 -1: - li v0, COND_LESS - blt t1, ta1, test_cond # is FS < FT? - li v0, COND_EQUAL - beq t1, ta1, test_cond # is FS == FT? - move v0, zero # FS > FT - b test_cond - -/* - * Compare double. - */ -cmp_d: - jal get_cmp_d - bne t1, DEXP_INF, 1f # is FS an infinity? - bne t2, zero, unordered # FS is a NAN -1: - bne ta1, DEXP_INF, 2f # is FT an infinity? - bne ta2, zero, unordered # FT is a NAN -2: - dsll t1, t1, DFRAC_BITS # reassemble exp & frac - or t1, t1, t2 - dsll ta1, ta1, DFRAC_BITS # reassemble exp & frac - or ta1, ta1, ta2 - beq t0, zero, 1f # is FS positive? - dnegu t1 # negate t1 -1: - beq ta0, zero, 1f # is FT positive? - dnegu ta1 -1: - li v0, COND_LESS - blt t1, ta1, test_cond # is FS(MSW) < FT(MSW)? - li v0, COND_EQUAL - beq t1, ta1, test_cond # is FS(LSW) == FT(LSW)? - move v0, zero # FS > FT -test_cond: - and v0, v0, a0 # condition match instruction? -set_cond: - bne v0, zero, 1f - and a1, a1, ~FPC_COND_BIT # clear condition bit - b 2f -1: - or a1, a1, FPC_COND_BIT # set condition bit -2: - ctc1 a1, FPC_CSR # save condition bit - b done - -unordered: - and v0, a0, COND_UNORDERED # this cmp match unordered? - bne v0, zero, 1f - and a1, a1, ~FPC_COND_BIT # clear condition bit - b 2f -1: - or a1, a1, FPC_COND_BIT # set condition bit -2: - and v0, a0, COND_SIGNAL - beq v0, zero, 1f # is this a signaling cmp? - or a1, a1, FPC_EXCEPTION_INVALID | FPC_STICKY_INVALID - and v0, a1, FPC_ENABLE_INVALID - bne v0, zero, fpe_trap -1: - ctc1 a1, FPC_CSR # save condition bit - b done - -/* - * Determine the amount to shift the fraction in order to restore the - * normalized position. After that, round and handle exceptions. - */ -norm_s: - move v0, t2 - move t9, zero # t9 = num of leading zeros - bne t2, zero, 1f - move v0, t8 - addu t9, 32 -1: - srl v1, v0, 16 - bne v1, zero, 1f - addu t9, 16 - sll v0, 16 -1: - srl v1, v0, 24 - bne v1, zero, 1f - addu t9, 8 - sll v0, 8 -1: - srl v1, v0, 28 - bne v1, zero, 1f - addu t9, 4 - sll v0, 4 -1: - srl v1, v0, 30 - bne v1, zero, 1f - addu t9, 2 - sll v0, 2 -1: - srl v1, v0, 31 - bne v1, zero, 1f - addu t9, 1 -/* - * Now shift t2,t8 the correct number of bits. - */ -1: - subu t9, t9, SLEAD_ZEROS # dont count leading zeros - subu t1, t1, t9 # adjust the exponent - beq t9, zero, norm_noshift_s - li v1, 32 - blt t9, zero, 1f # if shift < 0, shift right - subu v1, v1, t9 - sll t2, t2, t9 # shift t2,t8 left - srl v0, t8, v1 # save bits shifted out - or t2, t2, v0 - sll t8, t8, t9 - b norm_noshift_s -1: - negu t9 # shift t2,t8 right by t9 - subu v1, v1, t9 - sll v0, t8, v1 # save bits shifted out - sltu v0, zero, v0 # be sure to save any one bits - srl t8, t8, t9 - or t8, t8, v0 - sll v0, t2, v1 # save bits shifted out - or t8, t8, v0 - srl t2, t2, t9 -norm_noshift_s: - move ta1, t1 # save unrounded exponent - move ta2, t2 # save unrounded fraction - and v0, a1, FPC_ROUNDING_BITS # get rounding mode - beq v0, FPC_ROUND_RN, 3f # round to nearest - beq v0, FPC_ROUND_RZ, 5f # round to zero (truncate) - beq v0, FPC_ROUND_RP, 1f # round to +infinity - beq t0, zero, 5f # if sign is positive, truncate - b 2f -1: - bne t0, zero, 5f # if sign is negative, truncate -2: - beq t8, zero, 5f # if exact, continue - addu t2, t2, 1 # add rounding bit - bne t2, SIMPL_ONE<<1, 5f # need to adjust exponent? - addu t1, t1, 1 # adjust exponent - srl t2, t2, 1 # renormalize fraction - b 5f -3: - li v0, GUARDBIT # load guard bit for rounding - addu v0, v0, t8 # add remainder - sltu v1, v0, t8 # compute carry out - beq v1, zero, 4f # if no carry, continue - addu t2, t2, 1 # add carry to result - bne t2, SIMPL_ONE<<1, 4f # need to adjust exponent? - addu t1, t1, 1 # adjust exponent - srl t2, t2, 1 # renormalize fraction -4: - bne v0, zero, 5f # if rounded remainder is zero - and t2, t2, ~1 # clear LSB (round to nearest) -5: - bgt t1, SEXP_MAX, overflow_s # overflow? - blt t1, SEXP_MIN, underflow_s # underflow? - bne t8, zero, inexact_s # is result inexact? - addu t1, t1, SEXP_BIAS # bias exponent - and t2, t2, ~SIMPL_ONE # clear implied one bit - b result_fs_s - -/* - * Handle inexact exception. - */ -inexact_s: - addu t1, t1, SEXP_BIAS # bias exponent - and t2, t2, ~SIMPL_ONE # clear implied one bit -inexact_nobias_s: - jal set_fd_s # save result - or a1, a1, FPC_EXCEPTION_INEXACT | FPC_STICKY_INEXACT - and v0, a1, FPC_ENABLE_INEXACT - bne v0, zero, fpe_trap - ctc1 a1, FPC_CSR # save exceptions - b done - -/* - * Overflow will trap (if enabled), - * or generate an inexact trap (if enabled), - * or generate an infinity. - */ -overflow_s: - or a1, a1, FPC_EXCEPTION_OVERFLOW | FPC_STICKY_OVERFLOW - and v0, a1, FPC_ENABLE_OVERFLOW - beq v0, zero, 1f - subu t1, t1, 192 # bias exponent - and t2, t2, ~SIMPL_ONE # clear implied one bit - jal set_fd_s # save result - b fpe_trap -1: - and v0, a1, FPC_ROUNDING_BITS # get rounding mode - beq v0, FPC_ROUND_RN, 3f # round to nearest - beq v0, FPC_ROUND_RZ, 1f # round to zero (truncate) - beq v0, FPC_ROUND_RP, 2f # round to +infinity - bne t0, zero, 3f -1: - li t1, SEXP_MAX # result is max finite - li t2, 0x007fffff - b inexact_s -2: - bne t0, zero, 1b -3: - li t1, SEXP_MAX + 1 # result is infinity - move t2, zero - b inexact_s - -/* - * In this implementation, "tininess" is detected "after rounding" and - * "loss of accuracy" is detected as "an inexact result". - */ -underflow_s: - and v0, a1, FPC_ENABLE_UNDERFLOW - beq v0, zero, 1f -/* - * Underflow is enabled so compute the result and trap. - */ - addu t1, t1, 192 # bias exponent - and t2, t2, ~SIMPL_ONE # clear implied one bit - jal set_fd_s # save result - or a1, a1, FPC_EXCEPTION_UNDERFLOW | FPC_STICKY_UNDERFLOW - b fpe_trap -/* - * Underflow is not enabled so compute the result, - * signal inexact result (if it is) and trap (if enabled). - */ -1: - move t1, ta1 # get unrounded exponent - move t2, ta2 # get unrounded fraction - li t9, SEXP_MIN # compute shift amount - subu t9, t9, t1 # shift t2,t8 right by t9 - blt t9, SFRAC_BITS+2, 3f # shift all the bits out? - move t1, zero # result is inexact zero - move t2, zero - or a1, a1, FPC_EXCEPTION_UNDERFLOW | FPC_STICKY_UNDERFLOW -/* - * Now round the zero result. - * Only need to worry about rounding to +- infinity when the sign matches. - */ - and v0, a1, FPC_ROUNDING_BITS # get rounding mode - beq v0, FPC_ROUND_RN, inexact_nobias_s # round to nearest - beq v0, FPC_ROUND_RZ, inexact_nobias_s # round to zero - beq v0, FPC_ROUND_RP, 1f # round to +infinity - beq t0, zero, inexact_nobias_s # if sign is positive, truncate - b 2f -1: - bne t0, zero, inexact_nobias_s # if sign is negative, truncate -2: - addu t2, t2, 1 # add rounding bit - b inexact_nobias_s -3: - li v1, 32 - subu v1, v1, t9 - sltu v0, zero, t8 # be sure to save any one bits - sll t8, t2, v1 # save bits shifted out - or t8, t8, v0 # include sticky bits - srl t2, t2, t9 -/* - * Now round the denormalized result. - */ - and v0, a1, FPC_ROUNDING_BITS # get rounding mode - beq v0, FPC_ROUND_RN, 3f # round to nearest - beq v0, FPC_ROUND_RZ, 5f # round to zero (truncate) - beq v0, FPC_ROUND_RP, 1f # round to +infinity - beq t0, zero, 5f # if sign is positive, truncate - b 2f -1: - bne t0, zero, 5f # if sign is negative, truncate -2: - beq t8, zero, 5f # if exact, continue - addu t2, t2, 1 # add rounding bit - b 5f -3: - li v0, GUARDBIT # load guard bit for rounding - addu v0, v0, t8 # add remainder - sltu v1, v0, t8 # compute carry out - beq v1, zero, 4f # if no carry, continue - addu t2, t2, 1 # add carry to result -4: - bne v0, zero, 5f # if rounded remainder is zero - and t2, t2, ~1 # clear LSB (round to nearest) -5: - move t1, zero # denorm or zero exponent - jal set_fd_s # save result - beq t8, zero, done # check for exact result - or a1, a1, FPC_EXCEPTION_UNDERFLOW | FPC_STICKY_UNDERFLOW - or a1, a1, FPC_EXCEPTION_INEXACT | FPC_STICKY_INEXACT - and v0, a1, FPC_ENABLE_INEXACT - bne v0, zero, fpe_trap - ctc1 a1, FPC_CSR # save exceptions - b done - -/* - * Determine the amount to shift the fraction in order to restore the - * normalized position. After that, round and handle exceptions. - */ -norm_d: - move v0, t2 - move t9, zero # t9 = num of leading zeros - dsrl v1, v0, 32 - bne v1, zero, 1f - addu t9, 32 - dsll v0, 32 -1: - dsrl v1, v0, 16 - bne v1, zero, 1f - addu t9, 16 - dsll v0, 16 -1: - dsrl v1, v0, 24 - bne v1, zero, 1f - addu t9, 8 - dsll v0, 8 -1: - dsrl v1, v0, 28 - bne v1, zero, 1f - addu t9, 4 - dsll v0, 4 -1: - dsrl v1, v0, 30 - bne v1, zero, 1f - addu t9, 2 - dsll v0, 2 -1: - dsrl v1, v0, 31 - bne v1, zero, 1f - addu t9, 1 -/* - * Now shift t2,t8 the correct number of bits. - */ -1: - subu t9, t9, DLEAD_ZEROS # dont count leading zeros - subu t1, t1, t9 # adjust the exponent - beq t9, zero, norm_noshift_d - li v1, 64 - blt t9, zero, 2f # if shift < 0, shift right - subu v1, v1, t9 - dsll t2, t2, t9 # shift left by t9 - dsrl v0, t8, v1 # save bits shifted out - or t2, t2, v0 - dsll t8, t8, t9 - b norm_noshift_d -2: - negu t9 # shift right by t9 - subu v1, v1, t9 # (known to be < 32 bits) - dsll v0, t8, v1 # save bits shifted out - sltu v0, zero, v0 # be sure to save any one bits - dsrl t8, t8, t9 - or t8, t8, v0 - dsll v0, t2, v1 # save bits shifted out - or t8, t8, v0 - dsrl t2, t2, t9 -norm_noshift_d: - move ta1, t1 # save unrounded exponent - move ta2, t2 # save unrounded fraction (MS) - and v0, a1, FPC_ROUNDING_BITS # get rounding mode - beq v0, FPC_ROUND_RN, 3f # round to nearest - beq v0, FPC_ROUND_RZ, 5f # round to zero (truncate) - beq v0, FPC_ROUND_RP, 1f # round to +infinity - beq t0, zero, 5f # if sign is positive, truncate - b 2f -1: - bne t0, zero, 5f # if sign is negative, truncate -2: - beq t8, zero, 5f # if exact, continue - daddu t2, t2, 1 # add rounding bit - bne t2, DIMPL_ONE<<1, 5f # need to adjust exponent? - addu t1, t1, 1 # adjust exponent - dsrl t2, t2, 1 # renormalize fraction - b 5f -3: - dli v0, DGUARDBIT # load guard bit for rounding - addu v0, v0, t8 # add remainder - sltu v1, v0, t8 # compute carry out - beq v1, zero, 4f # branch if no carry - daddu t2, t2, 1 # add carry to result - bne t2, DIMPL_ONE<<1, 4f # need to adjust exponent? - addu t1, t1, 1 # adjust exponent - srl t2, t2, 1 # renormalize fraction -4: - bne v0, zero, 5f # if rounded remainder is zero - and t2, t2, ~1 # clear LSB (round to nearest) -5: - bgt t1, DEXP_MAX, overflow_d # overflow? - blt t1, DEXP_MIN, underflow_d # underflow? - bne t8, zero, inexact_d # is result inexact? - addu t1, t1, DEXP_BIAS # bias exponent - and t2, t2, ~DIMPL_ONE # clear implied one bit - b result_fs_d - -/* - * Handle inexact exception. - */ -inexact_d: - addu t1, t1, DEXP_BIAS # bias exponent - and t2, t2, ~DIMPL_ONE # clear implied one bit -inexact_nobias_d: - jal set_fd_d # save result - or a1, a1, FPC_EXCEPTION_INEXACT | FPC_STICKY_INEXACT - and v0, a1, FPC_ENABLE_INEXACT - bne v0, zero, fpe_trap - ctc1 a1, FPC_CSR # save exceptions - b done - -/* - * Overflow will trap (if enabled), - * or generate an inexact trap (if enabled), - * or generate an infinity. - */ -overflow_d: - or a1, a1, FPC_EXCEPTION_OVERFLOW | FPC_STICKY_OVERFLOW - and v0, a1, FPC_ENABLE_OVERFLOW - beq v0, zero, 1f - subu t1, t1, 1536 # bias exponent - and t2, t2, ~DIMPL_ONE # clear implied one bit - jal set_fd_d # save result - b fpe_trap -1: - and v0, a1, FPC_ROUNDING_BITS # get rounding mode - beq v0, FPC_ROUND_RN, 3f # round to nearest - beq v0, FPC_ROUND_RZ, 1f # round to zero (truncate) - beq v0, FPC_ROUND_RP, 2f # round to +infinity - bne t0, zero, 3f -1: - li t1, DEXP_MAX # result is max finite - dli t2, 0x000fffffffffffff - b inexact_d -2: - bne t0, zero, 1b -3: - li t1, DEXP_MAX + 1 # result is infinity - move t2, zero - b inexact_d - -/* - * In this implementation, "tininess" is detected "after rounding" and - * "loss of accuracy" is detected as "an inexact result". - */ -underflow_d: - and v0, a1, FPC_ENABLE_UNDERFLOW - beq v0, zero, 1f -/* - * Underflow is enabled so compute the result and trap. - */ - addu t1, t1, 1536 # bias exponent - and t2, t2, ~DIMPL_ONE # clear implied one bit - jal set_fd_d # save result - or a1, a1, FPC_EXCEPTION_UNDERFLOW | FPC_STICKY_UNDERFLOW - b fpe_trap -/* - * Underflow is not enabled so compute the result, - * signal inexact result (if it is) and trap (if enabled). - */ -1: - move t1, ta1 # get unrounded exponent - move t2, ta2 # get unrounded fraction (MS) - li t9, DEXP_MIN # compute shift amount - subu t9, t9, t1 # shift t2,t8 right by t9 - blt t9, DFRAC_BITS+2, 3f # shift all the bits out? - move t1, zero # result is inexact zero - move t2, zero - or a1, a1, FPC_EXCEPTION_UNDERFLOW | FPC_STICKY_UNDERFLOW -/* - * Now round the zero result. - * Only need to worry about rounding to +- infinity when the sign matches. - */ - and v0, a1, FPC_ROUNDING_BITS # get rounding mode - beq v0, FPC_ROUND_RN, inexact_nobias_d # round to nearest - beq v0, FPC_ROUND_RZ, inexact_nobias_d # round to zero - beq v0, FPC_ROUND_RP, 1f # round to +infinity - beq t0, zero, inexact_nobias_d # if sign is positive, truncate - b 2f -1: - bne t0, zero, inexact_nobias_d # if sign is negative, truncate -2: - daddu t2, t2, 1 # add rounding bit - b inexact_nobias_d -3: - li v1, 64 - subu v1, v1, t9 - sltu v0, zero, t8 # be sure to save any one bits - dsll t8, t2, v1 # save bits shifted out - or t8, t8, v0 # include sticky bits - dsrl t2, t2, t9 -/* - * Now round the denormalized result. - */ - and v0, a1, FPC_ROUNDING_BITS # get rounding mode - beq v0, FPC_ROUND_RN, 3f # round to nearest - beq v0, FPC_ROUND_RZ, 5f # round to zero (truncate) - beq v0, FPC_ROUND_RP, 1f # round to +infinity - beq t0, zero, 5f # if sign is positive, truncate - b 2f -1: - bne t0, zero, 5f # if sign is negative, truncate -2: - beq t8, zero, 5f # if exact, continue - daddu t2, t2, 1 # add rounding bit - b 5f -3: - dli v0, DGUARDBIT # load guard bit for rounding - daddu v0, v0, t8 # add remainder - sltu v1, v0, t8 # compute carry out - beq v1, zero, 4f # if no carry, continue - daddu t2, t2, 1 # add carry -4: - bne v0, zero, 5f # if rounded remainder is zero - and t2, t2, ~1 # clear LSB (round to nearest) -5: - move t1, zero # denorm or zero exponent - jal set_fd_d # save result - beq t8, zero, done # check for exact result - or a1, a1, FPC_EXCEPTION_UNDERFLOW | FPC_STICKY_UNDERFLOW - or a1, a1, FPC_EXCEPTION_INEXACT | FPC_STICKY_INEXACT - and v0, a1, FPC_ENABLE_INEXACT - bne v0, zero, fpe_trap - ctc1 a1, FPC_CSR # save exceptions - b done - -/* - * Signal an invalid operation if the trap is enabled; otherwise, - * the result is a quiet NAN. - */ -invalid_s: # trap invalid operation - or a1, a1, FPC_EXCEPTION_INVALID | FPC_STICKY_INVALID - and v0, a1, FPC_ENABLE_INVALID - bne v0, zero, fpe_trap - ctc1 a1, FPC_CSR # save exceptions - move t0, zero # result is a quiet NAN - li t1, SEXP_INF - li t2, SQUIET_NAN - jal set_fd_s # save result (in t0,t1,t2) - b done - -/* - * Signal an invalid operation if the trap is enabled; otherwise, - * the result is a quiet NAN. - */ -invalid_d: # trap invalid operation - or a1, a1, FPC_EXCEPTION_INVALID | FPC_STICKY_INVALID - and v0, a1, FPC_ENABLE_INVALID - bne v0, zero, fpe_trap - ctc1 a1, FPC_CSR # save exceptions - move t0, zero # result is a quiet NAN - li t1, DEXP_INF - dli t2, DQUIET_NAN - jal set_fd_d # save result (in t0,t1,t2) - b done - -/* - * Signal an invalid operation if the trap is enabled; otherwise, - * the result is INT_MAX or INT_MIN. - */ -invalid_w: # trap invalid operation - or a1, a1, FPC_EXCEPTION_INVALID | FPC_STICKY_INVALID - and v0, a1, FPC_ENABLE_INVALID - bne v0, zero, fpe_trap - ctc1 a1, FPC_CSR # save exceptions - bne t0, zero, 1f - li t2, INT_MAX # result is INT_MAX - b result_fs_w -1: - li t2, INT_MIN # result is INT_MIN - b result_fs_w - -/* - * Signal an invalid operation if the trap is enabled; otherwise, - * the result is LONG_MAX or LONG_MIN. - */ -invalid_l: # trap invalid operation - or a1, a1, FPC_EXCEPTION_INVALID | FPC_STICKY_INVALID - and v0, a1, FPC_ENABLE_INVALID - bne v0, zero, fpe_trap - ctc1 a1, FPC_CSR # save exceptions - bne t0, zero, 1f - dli t2, LONG_MAX # result is INT_MAX - b result_fs_l -1: - dli t2, LONG_MIN # result is INT_MIN - b result_fs_l - -/* - * Trap if the hardware should have handled this case. - */ -fpe_trap: - move a2, a1 # code = FP CSR - ctc1 a1, FPC_CSR # save exceptions - li v0, 1 - b done_err - -/* - * Send an illegal instruction signal to the current process. - */ -ill: - ctc1 a1, FPC_CSR # save exceptions - move a2, a0 # code = FP instruction - li v0, 1 - b done_err - -result_ft_s: - move t0, ta0 # result is FT - move t1, ta1 - move t2, ta2 -result_fs_s: # result is FS - jal set_fd_s # save result (in t0,t1,t2) - b done - -result_fs_w: - jal set_fd_word # save result (in t2) - b done - -result_fs_l: - move t0, t2 - jal set_fd_dword # save result (in t0) - b done - -result_ft_d: - move t0, ta0 # result is FT - move t1, ta1 - move t2, ta2 -result_fs_d: # result is FS - jal set_fd_d # save result (in t0,t1,t2) - -done: - li v0, 0 -done_err: - PTR_L ra, CF_RA_OFFS(sp) - PTR_ADD sp, sp, FRAMESZ(CF_SZ) - j ra -END(MipsEmulateFP) - -/*---------------------------------------------------------------------------- - * get_fs_int -- - * - * Read (integer) the FS register (bits 15-11). - * This is an internal routine used by MipsEmulateFP only. - * - * Results: - * t0 contains the sign - * t2 contains the fraction - * - *---------------------------------------------------------------------------- - */ -#define GET_FS_INT(n) \ - .rdata; \ - .dword get_fs_int_ ## n; \ - .text; \ -get_fs_int_ ## n: \ - mfc1 t2, $ ## n; \ - b get_fs_int_done - -LEAF(get_fs_int, 0) - srl a3, a0, 11 - 3 # get FS field - and a3, a3, 0x1f << 3 # mask FS field - ld a3, get_fs_int_tbl(a3) # switch on register number - j a3 - - .rdata -get_fs_int_tbl: - .text - - GET_FS_INT(f0) - GET_FS_INT(f1) - GET_FS_INT(f2) - GET_FS_INT(f3) - GET_FS_INT(f4) - GET_FS_INT(f5) - GET_FS_INT(f6) - GET_FS_INT(f7) - GET_FS_INT(f8) - GET_FS_INT(f9) - GET_FS_INT(f10) - GET_FS_INT(f11) - GET_FS_INT(f12) - GET_FS_INT(f13) - GET_FS_INT(f14) - GET_FS_INT(f15) - GET_FS_INT(f16) - GET_FS_INT(f17) - GET_FS_INT(f18) - GET_FS_INT(f19) - GET_FS_INT(f20) - GET_FS_INT(f21) - GET_FS_INT(f22) - GET_FS_INT(f23) - GET_FS_INT(f24) - GET_FS_INT(f25) - GET_FS_INT(f26) - GET_FS_INT(f27) - GET_FS_INT(f28) - GET_FS_INT(f29) - GET_FS_INT(f30) - GET_FS_INT(f31) - -get_fs_int_done: - srl t0, t2, 31 # init the sign bit - bge t2, zero, 1f - negu t2 - dsll t2, 33 - dsrl t2, 33 -1: - j ra -END(get_fs_int) - -/*---------------------------------------------------------------------------- - * get_fs_long -- - * - * Read (long integer) the FS register (bits 15-11). - * This is an internal routine used by MipsEmulateFP only. - * - * Results: - * t0 contains the sign - * t2 contains the fraction - * - *---------------------------------------------------------------------------- - */ -#define GET_FS_LONG(n) \ - .rdata; \ - .dword get_fs_long_ ## n; \ - .text; \ -get_fs_long_ ## n: \ - dmfc1 t2, $ ## n; \ - b get_fs_long_done - -LEAF(get_fs_long, 0) - srl a3, a0, 11 - 3 # get FS field - and a3, a3, 0x1f << 3 # mask FS field - ld a3, get_fs_long_tbl(a3) # switch on register number - j a3 - - .rdata -get_fs_long_tbl: - .text - - GET_FS_LONG(f0) - GET_FS_LONG(f1) - GET_FS_LONG(f2) - GET_FS_LONG(f3) - GET_FS_LONG(f4) - GET_FS_LONG(f5) - GET_FS_LONG(f6) - GET_FS_LONG(f7) - GET_FS_LONG(f8) - GET_FS_LONG(f9) - GET_FS_LONG(f10) - GET_FS_LONG(f11) - GET_FS_LONG(f12) - GET_FS_LONG(f13) - GET_FS_LONG(f14) - GET_FS_LONG(f15) - GET_FS_LONG(f16) - GET_FS_LONG(f17) - GET_FS_LONG(f18) - GET_FS_LONG(f19) - GET_FS_LONG(f20) - GET_FS_LONG(f21) - GET_FS_LONG(f22) - GET_FS_LONG(f23) - GET_FS_LONG(f24) - GET_FS_LONG(f25) - GET_FS_LONG(f26) - GET_FS_LONG(f27) - GET_FS_LONG(f28) - GET_FS_LONG(f29) - GET_FS_LONG(f30) - GET_FS_LONG(f31) - -get_fs_long_done: - dsrl t0, t2, 63 # init the sign bit - bge t2, zero, 1f - dnegu t2 -1: - j ra -END(get_fs_long) - -/*---------------------------------------------------------------------------- - * get_ft_fs_s -- - * - * Read (single precision) the FT register (bits 20-16) and - * the FS register (bits 15-11) and break up into fields. - * This is an internal routine used by MipsEmulateFP only. - * - * Results: - * t0 contains the FS sign - * t1 contains the FS (biased) exponent - * t2 contains the FS fraction - * ta0 contains the FT sign - * ta1 contains the FT (biased) exponent - * ta2 contains the FT fraction - * - *---------------------------------------------------------------------------- - */ -#define GET_FT_S(n) \ - .rdata; \ - .dword get_ft_s_ ## n; \ - .text; \ -get_ft_s_ ## n: \ - mfc1 ta0, $ ## n; \ - b get_ft_s_done - -LEAF(get_ft_fs_s, 0) - srl a3, a0, 16 - 3 # get FT field - and a3, a3, 0x1f << 3 # mask FT field - ld a3, get_ft_s_tbl(a3) # switch on register number - j a3 - - .rdata -get_ft_s_tbl: - .text - - GET_FT_S(f0) - GET_FT_S(f1) - GET_FT_S(f2) - GET_FT_S(f3) - GET_FT_S(f4) - GET_FT_S(f5) - GET_FT_S(f6) - GET_FT_S(f7) - GET_FT_S(f8) - GET_FT_S(f9) - GET_FT_S(f10) - GET_FT_S(f11) - GET_FT_S(f12) - GET_FT_S(f13) - GET_FT_S(f14) - GET_FT_S(f15) - GET_FT_S(f16) - GET_FT_S(f17) - GET_FT_S(f18) - GET_FT_S(f19) - GET_FT_S(f20) - GET_FT_S(f21) - GET_FT_S(f22) - GET_FT_S(f23) - GET_FT_S(f24) - GET_FT_S(f25) - GET_FT_S(f26) - GET_FT_S(f27) - GET_FT_S(f28) - GET_FT_S(f29) - GET_FT_S(f30) - GET_FT_S(f31) - -get_ft_s_done: - srl ta1, ta0, SFRAC_BITS # get exponent - and ta1, ta1, 0xFF - and ta2, ta0, 0x7FFFFF # get fraction - srl ta0, ta0, 31 # get sign - bne ta1, SEXP_INF, 1f # is it a signaling NAN? - and v0, ta2, SSIGNAL_NAN - bne v0, zero, invalid_s -1: - /* fall through to get FS */ - -/*---------------------------------------------------------------------------- - * get_fs_s -- - * - * Read (single precision) the FS register (bits 15-11) and - * break up into fields. - * This is an internal routine used by MipsEmulateFP only. - * - * Results: - * t0 contains the sign - * t1 contains the (biased) exponent - * t2 contains the fraction - * - *---------------------------------------------------------------------------- - */ -#define GET_FS_S(n) \ - .rdata; \ - .dword get_fs_s_ ## n; \ - .text; \ -get_fs_s_ ## n: \ - mfc1 t0, $ ## n; \ - b get_fs_s_done - -ALEAF(get_fs_s) - srl a3, a0, 11 - 3 # get FS field - and a3, a3, 0x1f << 3 # mask FS field - ld a3, get_fs_s_tbl(a3) # switch on register number - j a3 - - .rdata -get_fs_s_tbl: - .text - - GET_FS_S(f0) - GET_FS_S(f1) - GET_FS_S(f2) - GET_FS_S(f3) - GET_FS_S(f4) - GET_FS_S(f5) - GET_FS_S(f6) - GET_FS_S(f7) - GET_FS_S(f8) - GET_FS_S(f9) - GET_FS_S(f10) - GET_FS_S(f11) - GET_FS_S(f12) - GET_FS_S(f13) - GET_FS_S(f14) - GET_FS_S(f15) - GET_FS_S(f16) - GET_FS_S(f17) - GET_FS_S(f18) - GET_FS_S(f19) - GET_FS_S(f20) - GET_FS_S(f21) - GET_FS_S(f22) - GET_FS_S(f23) - GET_FS_S(f24) - GET_FS_S(f25) - GET_FS_S(f26) - GET_FS_S(f27) - GET_FS_S(f28) - GET_FS_S(f29) - GET_FS_S(f30) - GET_FS_S(f31) - -get_fs_s_done: - srl t1, t0, SFRAC_BITS # get exponent - and t1, t1, 0xFF - and t2, t0, 0x7FFFFF # get fraction - srl t0, t0, 31 # get sign - bne t1, SEXP_INF, 1f # is it a signaling NAN? - and v0, t2, SSIGNAL_NAN - bne v0, zero, invalid_s -1: - j ra -END(get_ft_fs_s) - -/*---------------------------------------------------------------------------- - * get_ft_fs_d -- - * - * Read (double precision) the FT register (bits 20-16) and - * the FS register (bits 15-11) and break up into fields. - * This is an internal routine used by MipsEmulateFP only. - * - * Results: - * t0 contains the FS sign - * t1 contains the FS (biased) exponent - * t2 contains the FS fraction - * ta0 contains the FT sign - * ta1 contains the FT (biased) exponent - * ta2 contains the FT fraction - * - *---------------------------------------------------------------------------- - */ -#define GET_FT_FS_D(n) \ - .rdata; \ - .dword get_ft_fs_d_ ## n; \ - .text; \ -get_ft_fs_d_ ## n: \ - dmfc1 ta2, $ ## n; \ - b get_ft_d_done - -LEAF(get_ft_fs_d, 0) - srl a3, a0, 16 - 3 # get FT field - and a3, a3, 0x1f << 3 # mask FT field - ld a3, get_ft_d_tbl(a3) # switch on register number - j a3 - - .rdata -get_ft_d_tbl: - .text - - GET_FT_FS_D(f0) - GET_FT_FS_D(f1) - GET_FT_FS_D(f2) - GET_FT_FS_D(f3) - GET_FT_FS_D(f4) - GET_FT_FS_D(f5) - GET_FT_FS_D(f6) - GET_FT_FS_D(f7) - GET_FT_FS_D(f8) - GET_FT_FS_D(f9) - GET_FT_FS_D(f10) - GET_FT_FS_D(f11) - GET_FT_FS_D(f12) - GET_FT_FS_D(f13) - GET_FT_FS_D(f14) - GET_FT_FS_D(f15) - GET_FT_FS_D(f16) - GET_FT_FS_D(f17) - GET_FT_FS_D(f18) - GET_FT_FS_D(f19) - GET_FT_FS_D(f20) - GET_FT_FS_D(f21) - GET_FT_FS_D(f22) - GET_FT_FS_D(f23) - GET_FT_FS_D(f24) - GET_FT_FS_D(f25) - GET_FT_FS_D(f26) - GET_FT_FS_D(f27) - GET_FT_FS_D(f28) - GET_FT_FS_D(f29) - GET_FT_FS_D(f30) - GET_FT_FS_D(f31) - -get_ft_d_done: - dsrl ta0, ta2, 63 # get sign - dsrl ta1, ta2, DFRAC_BITS # get exponent - and ta1, ta1, 0x7FF - dsll ta2, 12 - dsrl ta2, 12 # get fraction - bne ta1, DEXP_INF, 1f # is it a signaling NAN? - and v0, ta2, DSIGNAL_NAN - bne v0, zero, invalid_d -1: - /* fall through to get FS */ - -/*---------------------------------------------------------------------------- - * get_fs_d -- - * - * Read (double precision) the FS register (bits 15-11) and - * break up into fields. - * This is an internal routine used by MipsEmulateFP only. - * - * Results: - * t0 contains the sign - * t1 contains the (biased) exponent - * t2 contains the fraction - * - *---------------------------------------------------------------------------- - */ -#define GET_FS_D(n) \ - .rdata; \ - .dword get_fs_d_ ## n; \ - .text; \ -get_fs_d_ ## n: \ - dmfc1 t2, $ ## n; \ - b get_fs_d_done - -ALEAF(get_fs_d) - srl a3, a0, 11 - 3 # get FS field - and a3, a3, 0x1f << 3 # mask FS field - ld a3, get_fs_d_tbl(a3) # switch on register number - j a3 - - .rdata -get_fs_d_tbl: - .text - - GET_FS_D(f0) - GET_FS_D(f1) - GET_FS_D(f2) - GET_FS_D(f3) - GET_FS_D(f4) - GET_FS_D(f5) - GET_FS_D(f6) - GET_FS_D(f7) - GET_FS_D(f8) - GET_FS_D(f9) - GET_FS_D(f10) - GET_FS_D(f11) - GET_FS_D(f12) - GET_FS_D(f13) - GET_FS_D(f14) - GET_FS_D(f15) - GET_FS_D(f16) - GET_FS_D(f17) - GET_FS_D(f18) - GET_FS_D(f19) - GET_FS_D(f20) - GET_FS_D(f21) - GET_FS_D(f22) - GET_FS_D(f23) - GET_FS_D(f24) - GET_FS_D(f25) - GET_FS_D(f26) - GET_FS_D(f27) - GET_FS_D(f28) - GET_FS_D(f29) - GET_FS_D(f30) - GET_FS_D(f31) - -get_fs_d_done: - dsrl t0, t2, 63 # get sign - dsrl t1, t2, DFRAC_BITS # get exponent - and t1, t1, 0x7FF - dsll t2, 12 - dsrl t2, 12 # get fraction - bne t1, DEXP_INF, 1f # is it a signaling NAN? - and v0, t2, DSIGNAL_NAN - bne v0, zero, invalid_d -1: - j ra -END(get_ft_fs_d) - -/*---------------------------------------------------------------------------- - * get_cmp_s -- - * - * Read (single precision) the FS register (bits 15-11) and - * the FT register (bits 20-16) and break up into fields. - * This is an internal routine used by MipsEmulateFP only. - * - * Results: - * t0 contains the sign - * t1 contains the (biased) exponent - * t2 contains the fraction - * ta0 contains the sign - * ta1 contains the (biased) exponent - * ta2 contains the fraction - * - *---------------------------------------------------------------------------- - */ -#define CMP_FS_S(n) \ - .rdata; \ - .dword cmp_fs_s_ ## n; \ - .text; \ -cmp_fs_s_ ## n: \ - mfc1 t0, $ ## n; \ - b cmp_fs_s_done - -LEAF(get_cmp_s, 0) - srl a3, a0, 11 - 3 # get FS field - and a3, a3, 0x1f << 3 # mask FS field - ld a3, cmp_fs_s_tbl(a3) # switch on register number - j a3 - - .rdata -cmp_fs_s_tbl: - .text - - CMP_FS_S(f0) - CMP_FS_S(f1) - CMP_FS_S(f2) - CMP_FS_S(f3) - CMP_FS_S(f4) - CMP_FS_S(f5) - CMP_FS_S(f6) - CMP_FS_S(f7) - CMP_FS_S(f8) - CMP_FS_S(f9) - CMP_FS_S(f10) - CMP_FS_S(f11) - CMP_FS_S(f12) - CMP_FS_S(f13) - CMP_FS_S(f14) - CMP_FS_S(f15) - CMP_FS_S(f16) - CMP_FS_S(f17) - CMP_FS_S(f18) - CMP_FS_S(f19) - CMP_FS_S(f20) - CMP_FS_S(f21) - CMP_FS_S(f22) - CMP_FS_S(f23) - CMP_FS_S(f24) - CMP_FS_S(f25) - CMP_FS_S(f26) - CMP_FS_S(f27) - CMP_FS_S(f28) - CMP_FS_S(f29) - CMP_FS_S(f30) - CMP_FS_S(f31) - -cmp_fs_s_done: - srl t1, t0, SFRAC_BITS # get exponent - and t1, t1, 0xFF - and t2, t0, 0x7FFFFF # get fraction - srl t0, t0, 31 # get sign - -#define CMP_FT_S(n) \ - .rdata; \ - .dword cmp_ft_s_ ## n; \ - .text; \ -cmp_ft_s_ ## n: \ - mfc1 ta0, $ ## n; \ - b cmp_ft_s_done - - srl a3, a0, 16 - 3 # get FT field - and a3, a3, 0x1f << 3 # mask FT field - ld a3, cmp_ft_s_tbl(a3) # switch on register number - j a3 - - .rdata -cmp_ft_s_tbl: - .text - - CMP_FT_S(f0) - CMP_FT_S(f1) - CMP_FT_S(f2) - CMP_FT_S(f3) - CMP_FT_S(f4) - CMP_FT_S(f5) - CMP_FT_S(f6) - CMP_FT_S(f7) - CMP_FT_S(f8) - CMP_FT_S(f9) - CMP_FT_S(f10) - CMP_FT_S(f11) - CMP_FT_S(f12) - CMP_FT_S(f13) - CMP_FT_S(f14) - CMP_FT_S(f15) - CMP_FT_S(f16) - CMP_FT_S(f17) - CMP_FT_S(f18) - CMP_FT_S(f19) - CMP_FT_S(f20) - CMP_FT_S(f21) - CMP_FT_S(f22) - CMP_FT_S(f23) - CMP_FT_S(f24) - CMP_FT_S(f25) - CMP_FT_S(f26) - CMP_FT_S(f27) - CMP_FT_S(f28) - CMP_FT_S(f29) - CMP_FT_S(f30) - -cmp_ft_s_done: - srl ta1, ta0, SFRAC_BITS # get exponent - and ta1, ta1, 0xFF - and ta2, ta0, 0x7FFFFF # get fraction - srl ta0, ta0, 31 # get sign - j ra -END(get_cmp_s) - -/*---------------------------------------------------------------------------- - * get_cmp_d -- - * - * Read (double precision) the FS register (bits 15-11) and - * the FT register (bits 20-16) and break up into fields. - * This is an internal routine used by MipsEmulateFP only. - * - * Results: - * t0 contains the sign - * t1 contains the (biased) exponent - * t2 contains the fraction - * ta0 contains the sign - * ta1 contains the (biased) exponent - * ta2 contains the fraction - * - *---------------------------------------------------------------------------- - */ -#define CMP_FS_D(n) \ - .rdata; \ - .dword cmp_fs_d_ ## n; \ - .text; \ -cmp_fs_d_ ## n: \ - dmfc1 t2, $ ## n; \ - b cmp_fs_d_done - -LEAF(get_cmp_d, 0) - srl a3, a0, 11 - 3 # get FS field - and a3, a3, 0x1f << 3 # mask FS field - ld a3, cmp_fs_d_tbl(a3) # switch on register number - j a3 - - .rdata -cmp_fs_d_tbl: - .text - - CMP_FS_D(f0) - CMP_FS_D(f1) - CMP_FS_D(f2) - CMP_FS_D(f3) - CMP_FS_D(f4) - CMP_FS_D(f5) - CMP_FS_D(f6) - CMP_FS_D(f7) - CMP_FS_D(f8) - CMP_FS_D(f9) - CMP_FS_D(f10) - CMP_FS_D(f11) - CMP_FS_D(f12) - CMP_FS_D(f13) - CMP_FS_D(f14) - CMP_FS_D(f15) - CMP_FS_D(f16) - CMP_FS_D(f17) - CMP_FS_D(f18) - CMP_FS_D(f19) - CMP_FS_D(f20) - CMP_FS_D(f21) - CMP_FS_D(f22) - CMP_FS_D(f23) - CMP_FS_D(f24) - CMP_FS_D(f25) - CMP_FS_D(f26) - CMP_FS_D(f27) - CMP_FS_D(f28) - CMP_FS_D(f29) - CMP_FS_D(f30) - CMP_FS_D(f31) - -cmp_fs_d_done: - dsrl t0, t2, 63 # get sign - dsrl t1, t2, DFRAC_BITS # get exponent - and t1, t1, 0x7FF - dsll t2, 12 - dsrl t2, 12 # get fraction - -#define CMP_FT_D(n) \ - .rdata; \ - .dword cmp_ft_d_ ## n; \ - .text; \ -cmp_ft_d_ ## n: \ - dmfc1 ta2, $ ## n; \ - b cmp_ft_d_done - - srl a3, a0, 16 - 3 # get FT field - and a3, a3, 0x1f << 3 # mask FT field - ld a3, cmp_ft_d_tbl(a3) # switch on register number - j a3 - - .rdata -cmp_ft_d_tbl: - .text - - CMP_FT_D(f0) - CMP_FT_D(f1) - CMP_FT_D(f2) - CMP_FT_D(f3) - CMP_FT_D(f4) - CMP_FT_D(f5) - CMP_FT_D(f6) - CMP_FT_D(f7) - CMP_FT_D(f8) - CMP_FT_D(f9) - CMP_FT_D(f10) - CMP_FT_D(f11) - CMP_FT_D(f12) - CMP_FT_D(f13) - CMP_FT_D(f14) - CMP_FT_D(f15) - CMP_FT_D(f16) - CMP_FT_D(f17) - CMP_FT_D(f18) - CMP_FT_D(f19) - CMP_FT_D(f20) - CMP_FT_D(f21) - CMP_FT_D(f22) - CMP_FT_D(f23) - CMP_FT_D(f24) - CMP_FT_D(f25) - CMP_FT_D(f26) - CMP_FT_D(f27) - CMP_FT_D(f28) - CMP_FT_D(f29) - CMP_FT_D(f30) - CMP_FT_D(f31) - -cmp_ft_d_done: - dsrl ta0, ta2, 63 # get sign - dsrl ta1, ta2, DFRAC_BITS # get exponent - and ta1, ta1, 0x7FF - dsll ta2, 12 - dsrl ta2, 12 # get fraction - j ra -END(get_cmp_d) - -/*---------------------------------------------------------------------------- - * set_fd_s -- - * - * Write (single precision) the FD register (bits 10-6). - * This is an internal routine used by MipsEmulateFP only. - * - * Arguments: - * a0 contains the FP instruction - * t0 contains the sign - * t1 contains the (biased) exponent - * t2 contains the fraction - * - * set_fd_word -- - * - * Write (integer) the FD register (bits 10-6). - * This is an internal routine used by MipsEmulateFP only. - * - * Arguments: - * a0 contains the FP instruction - * t2 contains the integer - * - *---------------------------------------------------------------------------- - */ -#define SET_FD_S(n) \ - .rdata; \ - .dword set_fd_s_ ## n; \ - .text; \ -set_fd_s_ ## n: \ - mtc1 t2, $ ## n; \ - j ra - -LEAF(set_fd_s, 0) - sll t0, t0, 31 # position sign - sll t1, t1, SFRAC_BITS # position exponent - or t2, t2, t0 - or t2, t2, t1 -ALEAF(set_fd_word) - srl a3, a0, 6 - 3 # get FD field - and a3, a3, 0x1f << 3 # mask FT field - ld a3, set_fd_s_tbl(a3) # switch on register number - j a3 - - .rdata -set_fd_s_tbl: - .text - - SET_FD_S(f0) - SET_FD_S(f1) - SET_FD_S(f2) - SET_FD_S(f3) - SET_FD_S(f4) - SET_FD_S(f5) - SET_FD_S(f6) - SET_FD_S(f7) - SET_FD_S(f8) - SET_FD_S(f9) - SET_FD_S(f10) - SET_FD_S(f11) - SET_FD_S(f12) - SET_FD_S(f13) - SET_FD_S(f14) - SET_FD_S(f15) - SET_FD_S(f16) - SET_FD_S(f17) - SET_FD_S(f18) - SET_FD_S(f19) - SET_FD_S(f20) - SET_FD_S(f21) - SET_FD_S(f22) - SET_FD_S(f23) - SET_FD_S(f24) - SET_FD_S(f25) - SET_FD_S(f26) - SET_FD_S(f27) - SET_FD_S(f28) - SET_FD_S(f29) - SET_FD_S(f30) - SET_FD_S(f31) - -END(set_fd_s) - -/*---------------------------------------------------------------------------- - * set_fd_d -- - * - * Write (double precision) the FT register (bits 10-6). - * This is an internal routine used by MipsEmulateFP only. - * - * Arguments: - * a0 contains the FP instruction - * t0 contains the sign - * t1 contains the (biased) exponent - * t2 contains the fraction - * - *---------------------------------------------------------------------------- - */ -#define SET_FD_D(n) \ - .rdata; \ - .dword set_fd_d_ ## n; \ - .text; \ -set_fd_d_ ## n: \ - dmtc1 t0, $ ## n; \ - j ra - -LEAF(set_fd_d, 0) - dsll t0, 63 # set sign - dsll t1, t1, DFRAC_BITS # set exponent - or t0, t0, t1 - or t0, t0, t2 # set fraction -ALEAF(set_fd_dword) - srl a3, a0, 6 - 3 # get FD field - and a3, a3, 0x1f << 3 # mask FD field - ld a3, set_fd_d_tbl(a3) # switch on register number - j a3 - - .rdata -set_fd_d_tbl: - .text - - SET_FD_D(f0) - SET_FD_D(f1) - SET_FD_D(f2) - SET_FD_D(f3) - SET_FD_D(f4) - SET_FD_D(f5) - SET_FD_D(f6) - SET_FD_D(f7) - SET_FD_D(f8) - SET_FD_D(f9) - SET_FD_D(f10) - SET_FD_D(f11) - SET_FD_D(f12) - SET_FD_D(f13) - SET_FD_D(f14) - SET_FD_D(f15) - SET_FD_D(f16) - SET_FD_D(f17) - SET_FD_D(f18) - SET_FD_D(f19) - SET_FD_D(f20) - SET_FD_D(f21) - SET_FD_D(f22) - SET_FD_D(f23) - SET_FD_D(f24) - SET_FD_D(f25) - SET_FD_D(f26) - SET_FD_D(f27) - SET_FD_D(f28) - SET_FD_D(f29) - SET_FD_D(f30) - SET_FD_D(f31) - -END(set_fd_d) - -/*---------------------------------------------------------------------------- - * renorm_fs_s -- - * - * Results: - * t1 unbiased exponent - * t2 normalized fraction - * - *---------------------------------------------------------------------------- - */ -LEAF(renorm_fs_s, 0) -/* - * Find out how many leading zero bits are in t2 and put in t9. - */ - move v0, t2 - move t9, zero - srl v1, v0, 16 - bne v1, zero, 1f - addu t9, 16 - sll v0, 16 -1: - srl v1, v0, 24 - bne v1, zero, 1f - addu t9, 8 - sll v0, 8 -1: - srl v1, v0, 28 - bne v1, zero, 1f - addu t9, 4 - sll v0, 4 -1: - srl v1, v0, 30 - bne v1, zero, 1f - addu t9, 2 - sll v0, 2 -1: - srl v1, v0, 31 - bne v1, zero, 1f - addu t9, 1 -/* - * Now shift t2 the correct number of bits. - */ -1: - subu t9, t9, SLEAD_ZEROS # dont count normal leading zeros - li t1, SEXP_MIN - subu t1, t1, t9 # adjust exponent - sll t2, t2, t9 - j ra -END(renorm_fs_s) - -/*---------------------------------------------------------------------------- - * renorm_fs_d -- - * - * Results: - * t1 unbiased exponent - * t2 normalized fraction - * - *---------------------------------------------------------------------------- - */ -LEAF(renorm_fs_d, 0) -/* - * Find out how many leading zero bits are in t2 and put in t9. - */ - move v0, t2 - move t9, zero - dsrl v1, v0, 32 - bne v1, zero, 1f - addu t9, 32 - dsll v0, 32 -1: - dsrl v1, v0, 16 - bne v1, zero, 1f - addu t9, 16 - dsll v0, 16 -1: - dsrl v1, v0, 24 - bne v1, zero, 1f - addu t9, 8 - dsll v0, 8 -1: - dsrl v1, v0, 28 - bne v1, zero, 1f - addu t9, 4 - dsll v0, 4 -1: - dsrl v1, v0, 30 - bne v1, zero, 1f - addu t9, 2 - dsll v0, 2 -1: - dsrl v1, v0, 31 - bne v1, zero, 1f - addu t9, 1 -/* - * Now shift t2 the correct number of bits. - */ -1: - subu t9, t9, DLEAD_ZEROS # dont count normal leading zeros - li t1, DEXP_MIN - subu t1, t9 # adjust exponent - dsll t2, t9 - j ra -END(renorm_fs_d) - -/*---------------------------------------------------------------------------- - * renorm_ft_s -- - * - * Results: - * ta1 unbiased exponent - * ta2 normalized fraction - * - *---------------------------------------------------------------------------- - */ -LEAF(renorm_ft_s, 0) -/* - * Find out how many leading zero bits are in ta2 and put in t9. - */ - move v0, ta2 - move t9, zero - srl v1, v0, 16 - bne v1, zero, 1f - addu t9, 16 - sll v0, 16 -1: - srl v1, v0, 24 - bne v1, zero, 1f - addu t9, 8 - sll v0, 8 -1: - srl v1, v0, 28 - bne v1, zero, 1f - addu t9, 4 - sll v0, 4 -1: - srl v1, v0, 30 - bne v1, zero, 1f - addu t9, 2 - sll v0, 2 -1: - srl v1, v0, 31 - bne v1, zero, 1f - addu t9, 1 -/* - * Now shift ta2 the correct number of bits. - */ -1: - subu t9, t9, SLEAD_ZEROS # dont count normal leading zeros - li ta1, SEXP_MIN - subu ta1, t9 # adjust exponent - sll ta2, t9 - j ra -END(renorm_ft_s) - -/*---------------------------------------------------------------------------- - * renorm_ft_d -- - * - * Results: - * ta1 unbiased exponent - * ta2 normalized fraction - * - *---------------------------------------------------------------------------- - */ -LEAF(renorm_ft_d, 0) -/* - * Find out how many leading zero bits are in ta2 and put in t9. - */ - move v0, ta2 - move t9, zero - dsrl v1, v0, 32 - bne v1, zero, 1f - addu t9, 32 - dsll v0, 32 -1: - dsrl v1, v0, 16 - bne v1, zero, 1f - addu t9, 16 - dsll v0, 16 -1: - dsrl v1, v0, 24 - bne v1, zero, 1f - addu t9, 8 - dsll v0, 8 -1: - dsrl v1, v0, 28 - bne v1, zero, 1f - addu t9, 4 - dsll v0, 4 -1: - dsrl v1, v0, 30 - bne v1, zero, 1f - addu t9, 2 - dsll v0, 2 -1: - dsrl v1, v0, 31 - bne v1, zero, 1f - addu t9, 1 -/* - * Now shift ta2 the correct number of bits. - */ -1: - subu t9, t9, DLEAD_ZEROS # dont count normal leading zeros - li ta1, DEXP_MIN - subu ta1, t9 # adjust exponent - dsll ta2, t9 - j ra -END(renorm_ft_d) diff --git a/sys/arch/mips64/mips64/fp_emulate.c b/sys/arch/mips64/mips64/fp_emulate.c new file mode 100644 index 00000000000..d392b8d1564 --- /dev/null +++ b/sys/arch/mips64/mips64/fp_emulate.c @@ -0,0 +1,1310 @@ +/* $OpenBSD: fp_emulate.c,v 1.1 2010/09/21 20:29:17 miod Exp $ */ + +/* + * Copyright (c) 2010 Miodrag Vallat. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * Floating Point completion code (MI softfloat code control engine). + * + * Supports all MIPS IV COP1 and COP1X floating-point instructions. + * Floating-point load and store instructions, as well as branch instructions, + * are not handled, as they should not require completion code. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/signalvar.h> + +#include <machine/cpu.h> +#include <machine/fpu.h> +#include <machine/frame.h> +#include <machine/ieee.h> +#include <machine/ieeefp.h> +#include <machine/mips_opcode.h> +#include <machine/regnum.h> + +#include <lib/libkern/softfloat.h> +#if defined(DEBUG) && defined(DDB) +#include <machine/db_machdep.h> +#endif + +int fpu_emulate(struct trap_frame *, uint32_t, union sigval *); +int fpu_emulate_cop1(struct trap_frame *, uint32_t); +int fpu_emulate_cop1x(struct trap_frame *, uint32_t); +uint64_t + fpu_load(struct trap_frame *, uint, uint); +void fpu_store(struct trap_frame *, uint, uint, uint64_t); + +typedef int (fpu_fn3)(struct trap_frame *, uint, uint, uint, uint); +typedef int (fpu_fn4)(struct trap_frame *, uint, uint, uint, uint, uint); +fpu_fn3 fpu_abs; +fpu_fn3 fpu_add; +int fpu_c(struct trap_frame *, uint, uint, uint, uint, uint); +fpu_fn3 fpu_ceil_l; +fpu_fn3 fpu_ceil_w; +fpu_fn3 fpu_cvt_d; +fpu_fn3 fpu_cvt_l; +fpu_fn3 fpu_cvt_s; +fpu_fn3 fpu_cvt_w; +fpu_fn3 fpu_div; +fpu_fn3 fpu_floor_l; +fpu_fn3 fpu_floor_w; +fpu_fn4 fpu_madd; +fpu_fn4 fpu_msub; +fpu_fn3 fpu_mov; +fpu_fn3 fpu_movcf; +fpu_fn3 fpu_movn; +fpu_fn3 fpu_movz; +fpu_fn3 fpu_mul; +fpu_fn3 fpu_neg; +fpu_fn4 fpu_nmadd; +fpu_fn4 fpu_nmsub; +fpu_fn3 fpu_recip; +fpu_fn3 fpu_round_l; +fpu_fn3 fpu_round_w; +fpu_fn3 fpu_rsqrt; +fpu_fn3 fpu_sqrt; +fpu_fn3 fpu_sub; +fpu_fn3 fpu_trunc_l; +fpu_fn3 fpu_trunc_w; + +int fpu_int_l(struct trap_frame *, uint, uint, uint, uint, uint); +int fpu_int_w(struct trap_frame *, uint, uint, uint, uint, uint); + +/* + * Encoding of operand format within opcodes `fmt' and `fmt3' fields. + */ +#define FMT_S 0x00 +#define FMT_D 0x01 +#define FMT_W 0x04 +#define FMT_L 0x05 + +/* + * Inlines from softfloat-specialize.h which are not made public, needed + * for fpu_abs. + */ +#define float32_is_nan(a) \ + (0xff000000 < (a << 1)) +#define float32_is_signaling_nan(a) \ + ((((a >> 22) & 0x1ff) == 0x1fe) && (a & 0x003fffff)) + +/* + * Precomputed results of intXX_to_floatXX(1) + */ +#define ONE_F32 (float32)(SNG_EXP_BIAS << SNG_FRACBITS) +#define ONE_F64 (float64)((uint64_t)DBL_EXP_BIAS << DBL_FRACBITS) + +/* + * Handle a floating-point exception. + */ +void +MipsFPTrap(struct trap_frame *tf) +{ + struct cpu_info *ci = curcpu(); + struct proc *p = ci->ci_curproc; + union sigval sv; + vaddr_t pc; + uint32_t fsr, excbits; + uint32_t insn; + InstFmt inst; + int sig = 0; + int fault_type = SI_NOINFO; + int update_pcb = 0; + int emulate = 0; + uint32_t sr; + + KDASSERT(tf == p->p_md.md_regs); + + /* + * Enable FPU, and read its status register. + */ + + sr = getsr(); + setsr(sr | SR_COP_1_BIT); + + __asm__ __volatile__ ("cfc1 %0, $31" : "=r" (fsr)); + __asm__ __volatile__ ("cfc1 %0, $31" : "=r" (fsr)); + + /* + * If this is not an unimplemented operation, but a genuine + * FPU exception, signal the process. + */ + + if ((fsr & FPCSR_C_E) == 0) { + sig = SIGFPE; + goto deliver; + } + + /* + * Get the faulting instruction. This should not fail, and + * if it does, it's probably not your lucky day. + */ + + pc = (vaddr_t)tf->pc; + if (tf->cause & CR_BR_DELAY) + pc += 4; + if (copyin((void *)pc, &insn, sizeof insn) != 0) { + sig = SIGBUS; + fault_type = BUS_OBJERR; + goto deliver; + } + inst = *(InstFmt *)&insn; + + /* + * Emulate the instruction. + */ + +#ifdef DEBUG +#ifdef DDB + printf("%s: unimplemented FPU completion, fsr 0x%08x\n%p: ", + p->p_comm, fsr, pc); + dbmd_print_insn(insn, pc, printf); +#else + printf("%s: unimplemented FPU completion, insn 0x%08x fsr 0x%08x\n", + p->p_comm, insn, fsr); +#endif +#endif + + switch (inst.FRType.op) { + default: + /* + * Not a FPU instruction. + */ + break; + case OP_COP1: + switch (inst.RType.rs) { + case OP_BC: + case OP_MF: + case OP_DMF: + case OP_CF: + case OP_MT: + case OP_DMT: + case OP_CT: + /* + * These instructions should not require emulation, + * unless there is no FPU. + */ + break; + default: + emulate = 1; + break; + } + break; + case OP_COP1X: + switch (inst.FQType.op4) { + default: + break; + case OP_MADD: + case OP_MSUB: + case OP_NMADD: + case OP_NMSUB: + emulate = 1; + break; + } + break; + } + + if (emulate) { + KASSERT(p == ci->ci_fpuproc); + save_fpu(); + update_pcb = 1; + + sig = fpu_emulate(tf, insn, &sv); + /* reload fsr, possibly modified by softfloat code */ + fsr = tf->fsr; + if (sig == 0) { + /* raise SIGFPE if necessary */ + excbits = (fsr & FPCSR_C_MASK) >> FPCSR_C_SHIFT; + excbits &= (fsr & FPCSR_E_MASK) >> FPCSR_E_SHIFT; + if (excbits != 0) + sig = SIGFPE; + } + } else { + sig = SIGILL; + fault_type = ILL_ILLOPC; + } + +deliver: + switch (sig) { + case SIGFPE: + excbits = (fsr & FPCSR_C_MASK) >> FPCSR_C_SHIFT; + excbits &= (fsr & FPCSR_E_MASK) >> FPCSR_E_SHIFT; + if (excbits & FP_X_INV) + fault_type = FPE_FLTINV; + else if (excbits & FP_X_DZ) + fault_type = FPE_INTDIV; + else if (excbits & FP_X_OFL) + fault_type = FPE_FLTUND; + else if (excbits & FP_X_UFL) + fault_type = FPE_FLTOVF; + else /* if (excbits & FP_X_IMP) */ + fault_type = FPE_FLTRES; + break; + } + + /* + * Skip the instruction, unless we are delivering SIGILL. + */ + + if (sig != SIGILL) { + if (tf->cause & CR_BR_DELAY) { + /* + * Note that it doesn't matter, at this point, + * that we pass the updated FSR value, as it is + * only used to decide whether to branch or not + * if the faulting instruction was BC1[FT]. + */ + tf->pc = MipsEmulateBranch(tf, tf->pc, fsr, 0); + } else + tf->pc += 4; + } + + /* + * Update the FPU status register. + * We need to make sure that this will not cause an exception + * in kernel mode. + */ + + /* propagate raised exceptions to the sticky bits */ + fsr &= ~FPCSR_C_E; + excbits = (fsr & FPCSR_C_MASK) >> FPCSR_C_SHIFT; + fsr |= excbits << FPCSR_F_SHIFT; + /* clear all exception sources */ + fsr &= ~FPCSR_C_MASK; + if (update_pcb) + tf->fsr = fsr; + __asm__ __volatile__ ("ctc1 %0, $31" :: "r" (fsr)); + /* disable fpu before returning to trap() */ + setsr(sr); + + if (sig != 0) { + sv.sival_ptr = (void *)pc; + KERNEL_PROC_LOCK(p); + trapsignal(p, sig, 0, fault_type, sv); + KERNEL_PROC_UNLOCK(p); + } +} + +/* + * Emulate an FPU instruction. The FPU register set has been saved in the + * current PCB, and is pointed to by the trap frame. + */ +int +fpu_emulate(struct trap_frame *tf, uint32_t insn, union sigval *sv) +{ + InstFmt inst; + + tf->zero = 0; /* not written by trap code */ + + inst = *(InstFmt *)&insn; + switch (inst.FRType.op) { + default: + break; + case OP_COP1: + return fpu_emulate_cop1(tf, insn); + case OP_COP1X: + return fpu_emulate_cop1x(tf, insn); + } + + return SIGILL; +} + +/* + * Emulate a COP1 FPU instruction. + */ +int +fpu_emulate_cop1(struct trap_frame *tf, uint32_t insn) +{ + InstFmt inst; + uint ft, fs, fd; + fpu_fn3 *fpu_op; + static fpu_fn3 *const fpu_ops1[1 << 6] = { + fpu_add, /* 0x00 */ + fpu_sub, + fpu_mul, + fpu_div, + fpu_sqrt, + fpu_abs, + fpu_mov, + fpu_neg, + fpu_round_l, /* 0x08 */ + fpu_trunc_l, + fpu_ceil_l, + fpu_floor_l, + fpu_round_w, + fpu_trunc_w, + fpu_ceil_w, + fpu_floor_w, + NULL, /* 0x10 */ + fpu_movcf, + fpu_movz, + fpu_movn, + NULL, + fpu_recip, + fpu_rsqrt, + NULL, + NULL, /* 0x18 */ + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + fpu_cvt_s, /* 0x20 */ + fpu_cvt_d, + NULL, + NULL, + fpu_cvt_w, + fpu_cvt_l, + NULL, + NULL, + NULL, /* 0x28 */ + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + (fpu_fn3 *)fpu_c, /* 0x30 */ + (fpu_fn3 *)fpu_c, + (fpu_fn3 *)fpu_c, + (fpu_fn3 *)fpu_c, + (fpu_fn3 *)fpu_c, + (fpu_fn3 *)fpu_c, + (fpu_fn3 *)fpu_c, + (fpu_fn3 *)fpu_c, + (fpu_fn3 *)fpu_c, /* 0x38 */ + (fpu_fn3 *)fpu_c, + (fpu_fn3 *)fpu_c, + (fpu_fn3 *)fpu_c, + (fpu_fn3 *)fpu_c, + (fpu_fn3 *)fpu_c, + (fpu_fn3 *)fpu_c, + (fpu_fn3 *)fpu_c + }; + + inst = *(InstFmt *)&insn; + + /* + * Check for valid function code. + */ + + fpu_op = fpu_ops1[inst.FRType.func]; + if (fpu_op == NULL) + return SIGILL; + + /* + * Check for valid format. FRType assumes bit 25 is always set, + * so we need to check for it explicitely. + */ + + if ((insn & (1 << 25)) == 0) + return SIGILL; + switch (inst.FRType.fmt) { + default: + return SIGILL; + case FMT_S: + case FMT_D: + case FMT_W: + case FMT_L: + break; + } + + /* + * Check for valid register values. Only even-numbered registers + * can be used if the FR bit is clear in coprocessor 0 status + * register. + * + * Note that c.cond does not specify a register number in the fd + * field, but the fd field must have zero in its low two bits, so + * the test will not reject valid c.cond instructions. + */ + + ft = inst.FRType.ft; + fs = inst.FRType.fs; + fd = inst.FRType.fd; + if ((tf->sr & SR_FR_32) == 0) { + if ((ft | fs | fd) & 1) + return SIGILL; + } + + /* + * Finally dispatch to the proper routine. + */ + + if (fpu_op == (fpu_fn3 *)&fpu_c) + return fpu_c(tf, inst.FRType.fmt, ft, fs, fd, inst.FRType.func); + else + return (*fpu_op)(tf, inst.FRType.fmt, ft, fs, fd); +} + +/* + * Emulate a COP1X FPU instruction. + */ +int +fpu_emulate_cop1x(struct trap_frame *tf, uint32_t insn) +{ + InstFmt inst; + uint fr, ft, fs, fd; + fpu_fn4 *fpu_op; + static fpu_fn4 *const fpu_ops1x[1 << 3] = { + NULL, + NULL, + NULL, + NULL, + fpu_madd, + fpu_msub, + fpu_nmadd, + fpu_nmsub + }; + + inst = *(InstFmt *)&insn; + + /* + * Check for valid function code. + */ + + fpu_op = fpu_ops1x[inst.FQType.op4]; + if (fpu_op == NULL) + return SIGILL; + + /* + * Check for valid format. + */ + + switch (inst.FQType.fmt3) { + default: + return SIGILL; + case FMT_S: + case FMT_D: + case FMT_W: + case FMT_L: + break; + } + + /* + * Check for valid register values. Only even-numbered registers + * can be used if the FR bit is clear in coprocessor 0 status + * register. + */ + + fr = inst.FQType.fr; + ft = inst.FQType.ft; + fs = inst.FQType.fs; + fd = inst.FQType.fd; + if ((tf->sr & SR_FR_32) == 0) { + if ((fr | ft | fs | fd) & 1) + return SIGILL; + } + + /* + * Finally dispatch to the proper routine. + */ + + return (*fpu_op)(tf, inst.FRType.fmt, fr, ft, fs, fd); +} + +/* + * Load a floating-point argument according to the specified format. + */ +uint64_t +fpu_load(struct trap_frame *tf, uint fmt, uint regno) +{ + register_t *regs = (register_t *)tf; + uint64_t tmp, tmp2; + + tmp = (uint64_t)regs[FPBASE + regno]; + if (tf->sr & SR_FR_32) { + switch (fmt) { + case FMT_D: + case FMT_L: + break; + case FMT_S: + case FMT_W: + tmp &= 0xffffffff; + break; + } + } else { + tmp &= 0xffffffff; + switch (fmt) { + case FMT_D: + case FMT_L: + /* caller has enforced regno is even */ + tmp2 = (uint64_t)regs[FPBASE + regno + 1]; + tmp |= tmp2 << 32; + break; + case FMT_S: + case FMT_W: + break; + } + } + + return tmp; +} + +/* + * Store a floating-point result according to the specified format. + */ +void +fpu_store(struct trap_frame *tf, uint fmt, uint regno, uint64_t rslt) +{ + register_t *regs = (register_t *)tf; + + if (tf->sr & SR_FR_32) { + regs[FPBASE + regno] = rslt; + } else { + /* caller has enforced regno is even */ + regs[FPBASE + regno] = rslt & 0xffffffff; + regs[FPBASE + regno + 1] = (rslt >> 32) & 0xffffffff; + } +} + +/* + * Integer conversion + */ + +int +fpu_int_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd, uint rm) +{ + uint64_t raw; + uint32_t oldrm; + + if (ft != 0) + return SIGILL; + if (fmt != FMT_S && fmt != FMT_D) + return SIGILL; + + raw = fpu_load(tf, fmt, fs); + + /* round towards required mode */ + oldrm = tf->fsr & FPCSR_RM_MASK; + tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | rm; + if (fmt == FMT_S) + raw = float32_to_int64((float32)raw); + else + raw = float64_to_int64((float64)raw); + /* restore rounding mode */ + tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | oldrm; + + if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V)) + fpu_store(tf, fmt, fd, raw); + + return 0; +} + +int +fpu_int_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd, uint rm) +{ + uint64_t raw; + uint32_t oldrm; + + if (ft != 0) + return SIGILL; + if (fmt != FMT_S && fmt != FMT_D) + return SIGILL; + + raw = fpu_load(tf, fmt, fs); + + /* round towards required mode */ + oldrm = tf->fsr & FPCSR_RM_MASK; + tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | rm; + if (fmt == FMT_S) + raw = float32_to_int32((float32)raw); + else + raw = float64_to_int32((float64)raw); + /* restore rounding mode */ + tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | oldrm; + + if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V)) + fpu_store(tf, fmt, fd, raw); + + return 0; +} + +/* + * FPU Instruction emulation + */ + +int +fpu_abs(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd) +{ + uint64_t raw; + + if (ft != 0) + return SIGILL; + if (fmt != FMT_S && fmt != FMT_D) + return SIGILL; + + raw = fpu_load(tf, fmt, fs); + /* clear sign bit unless NaN */ + if (fmt == FMT_S) { + float32 f32 = (float32)raw; + if (float32_is_nan(f32)) { + float_set_invalid(); + } else { + f32 &= ~(1L << 31); + raw = (uint64_t)f32; + } + } else { + float64 f64 = (float64)raw; + if (float64_is_nan(f64)) { + float_set_invalid(); + } else { + f64 &= ~(1L << 63); + raw = (uint64_t)f64; + } + } + fpu_store(tf, fmt, fd, raw); + + return 0; +} + +int +fpu_add(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd) +{ + uint64_t raw1, raw2, rslt; + + if (fmt != FMT_S && fmt != FMT_D) + return SIGILL; + + raw1 = fpu_load(tf, fmt, fs); + raw2 = fpu_load(tf, fmt, ft); + if (fmt == FMT_S) { + float32 f32 = float32_add((float32)raw1, (float32)raw2); + rslt = (uint64_t)f32; + } else { + float64 f64 = float64_add((float64)raw1, (float64)raw2); + rslt = (uint64_t)f64; + } + fpu_store(tf, fmt, fd, rslt); + + return 0; +} + +int +fpu_c(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd, uint op) +{ + uint64_t raw1, raw2; + uint cc, lt, eq, uo; + + if ((fd & 0x03) != 0) + return SIGILL; + if (fmt != FMT_S && fmt != FMT_D) + return SIGILL; + + lt = eq = uo = 0; + cc = fd >> 2; + + raw1 = fpu_load(tf, fmt, ft); + raw2 = fpu_load(tf, fmt, fs); + + if (fmt == FMT_S) { + float32 f32a = (float32)raw1; + float32 f32b = (float32)raw2; + if (float32_is_nan(f32a)) { + uo = 1 << 0; + if (float32_is_signaling_nan(f32a)) + op |= 0x08; /* force invalid exception */ + } else if (float32_is_nan(f32b)) { + uo = 1 << 0; + if (float32_is_signaling_nan(f32b)) + op |= 0x08; /* force invalid exception */ + } else { + if (float32_eq(f32a, f32b)) + eq = 1 << 1; + else if (float32_lt(f32a, f32b)) + lt = 1 << 2; + } + } else { + float64 f64a = (float64)raw1; + float64 f64b = (float64)raw2; + if (float64_is_nan(f64a)) { + uo = 1 << 0; + if (float64_is_signaling_nan(f64a)) + op |= 0x08; /* force invalid exception */ + } else if (float64_is_nan(f64b)) { + uo = 1 << 0; + if (float64_is_signaling_nan(f64b)) + op |= 0x08; /* force invalid exception */ + } else { + if (float64_eq(f64a, f64b)) + eq = 1 << 1; + else if (float64_lt(f64a, f64b)) + lt = 1 << 2; + } + } + + if (uo && (op & 0x08)) { + float_set_invalid(); + if (tf->fsr & FPCSR_E_V) { + /* comparison result intentionaly not written */ + goto skip; + } + } else { + if ((uo | eq | lt) & op) + tf->fsr |= FPCSR_CONDVAL(cc); + else + tf->fsr &= ~FPCSR_CONDVAL(cc); + } +skip: + + return 0; +} + +int +fpu_ceil_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd) +{ + /* round towards positive infinity */ + return fpu_int_l(tf, fmt, ft, fs, fd, FP_RP); +} + +int +fpu_ceil_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd) +{ + /* round towards positive infinity */ + return fpu_int_w(tf, fmt, ft, fs, fd, FP_RP); +} + +int +fpu_cvt_d(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd) +{ + uint64_t raw; + + if (ft != 0) + return SIGILL; + if (fmt == FMT_D) + return SIGILL; + + raw = fpu_load(tf, fmt, fs); + switch (fmt) { + case FMT_L: + raw = int64_to_float64((int64_t)raw); + break; + case FMT_S: + raw = float32_to_float64((float32)raw); + break; + case FMT_W: + raw = int32_to_float64((int32_t)raw); + break; + } + fpu_store(tf, fmt, fd, raw); + + return 0; +} + +int +fpu_cvt_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd) +{ + uint64_t raw; + uint32_t rm; + + if (ft != 0) + return SIGILL; + if (fmt != FMT_S && fmt != FMT_D) + return SIGILL; + + rm = tf->fsr & FPCSR_RM_MASK; + raw = fpu_load(tf, fmt, fs); + if (fmt == FMT_D) { + if (rm == FP_RZ) + raw = float64_to_int64_round_to_zero((float64)raw); + else + raw = float64_to_int64((float64)raw); + } else { + if (rm == FP_RZ) + raw = float32_to_int64_round_to_zero((float32)raw); + else + raw = float32_to_int64((float32)raw); + } + if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V)) + fpu_store(tf, fmt, fd, raw); + + return 0; +} + +int +fpu_cvt_s(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd) +{ + uint64_t raw; + + if (ft != 0) + return SIGILL; + if (fmt == FMT_S) + return SIGILL; + + raw = fpu_load(tf, fmt, fs); + switch (fmt) { + case FMT_D: + raw = float64_to_float32((float64)raw); + break; + case FMT_L: + raw = int64_to_float32((int64_t)raw); + break; + case FMT_W: + raw = int32_to_float32((int32_t)raw); + break; + } + fpu_store(tf, fmt, fd, raw); + + return 0; +} + +int +fpu_cvt_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd) +{ + uint64_t raw; + uint32_t rm; + + if (ft != 0) + return SIGILL; + if (fmt != FMT_S && fmt != FMT_D) + return SIGILL; + + rm = tf->fsr & FPCSR_RM_MASK; + raw = fpu_load(tf, fmt, fs); + if (fmt == FMT_D) { + if (rm == FP_RZ) + raw = float64_to_int32_round_to_zero((float64)raw); + else + raw = float64_to_int32((float64)raw); + } else { + if (rm == FP_RZ) + raw = float32_to_int32_round_to_zero((float32)raw); + else + raw = float32_to_int32((float32)raw); + } + if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V)) + fpu_store(tf, fmt, fd, raw); + + return 0; +} + +int +fpu_div(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd) +{ + uint64_t raw1, raw2, rslt; + + if (fmt != FMT_S && fmt != FMT_D) + return SIGILL; + + raw1 = fpu_load(tf, fmt, fs); + raw2 = fpu_load(tf, fmt, ft); + if (fmt == FMT_S) { + float32 f32 = float32_div((float32)raw1, (float32)raw2); + rslt = (uint64_t)f32; + } else { + float64 f64 = float64_div((float64)raw1, (float64)raw2); + rslt = (uint64_t)f64; + } + fpu_store(tf, fmt, fd, rslt); + + return 0; +} + +int +fpu_floor_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd) +{ + /* round towards negative infinity */ + return fpu_int_l(tf, fmt, ft, fs, fd, FP_RM); +} + +int +fpu_floor_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd) +{ + /* round towards negative infinity */ + return fpu_int_w(tf, fmt, ft, fs, fd, FP_RM); +} + +int +fpu_madd(struct trap_frame *tf, uint fmt, uint fr, uint ft, uint fs, uint fd) +{ + uint64_t raw1, raw2, raw3, rslt; + + if (fmt != FMT_S && fmt != FMT_D) + return SIGILL; + + raw1 = fpu_load(tf, fmt, fs); + raw2 = fpu_load(tf, fmt, ft); + raw3 = fpu_load(tf, fmt, fr); + if (fmt == FMT_S) { + float32 f32 = float32_add( + float32_mul((float32)raw1, (float32)raw2), + (float32)raw3); + rslt = (uint64_t)f32; + } else { + float64 f64 = float64_add( + float64_mul((float64)raw1, (float64)raw2), + (float64)raw3); + rslt = (uint64_t)f64; + } + fpu_store(tf, fmt, fd, rslt); + + return 0; +} + +int +fpu_mov(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd) +{ + uint64_t raw; + + if (ft != 0) + return SIGILL; + if (fmt != FMT_S && fmt != FMT_D) + return SIGILL; + + raw = fpu_load(tf, fmt, fs); + fpu_store(tf, fmt, fd, raw); + + return 0; +} + +int +fpu_movcf(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd) +{ + uint64_t raw; + uint cc, istf; + int condition; + + if ((ft & 0x02) != 0) + return SIGILL; + cc = ft >> 2; + if (fmt != FMT_S && fmt != FMT_D) + return SIGILL; + + condition = tf->fsr & FPCSR_CONDVAL(cc); + istf = ft & COPz_BC_TF_MASK; + if ((!condition && !istf) /*movf*/ || (condition && istf) /*movt*/) { + raw = fpu_load(tf, fmt, fs); + fpu_store(tf, fmt, fd, raw); + } + + return 0; +} + +int +fpu_movn(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd) +{ + register_t *regs = (register_t *)tf; + uint64_t raw; + + if (fmt != FMT_S && fmt != FMT_D) + return SIGILL; + + if (ft != ZERO && regs[ft] != 0) { + raw = fpu_load(tf, fmt, fs); + fpu_store(tf, fmt, fd, raw); + } + + return 0; +} + +int +fpu_movz(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd) +{ + register_t *regs = (register_t *)tf; + uint64_t raw; + + if (fmt != FMT_S && fmt != FMT_D) + return SIGILL; + + if (ft == ZERO || regs[ft] == 0) { + raw = fpu_load(tf, fmt, fs); + fpu_store(tf, fmt, fd, raw); + } + + return 0; +} + +int +fpu_msub(struct trap_frame *tf, uint fmt, uint fr, uint ft, uint fs, uint fd) +{ + uint64_t raw1, raw2, raw3, rslt; + + if (fmt != FMT_S && fmt != FMT_D) + return SIGILL; + + raw1 = fpu_load(tf, fmt, fs); + raw2 = fpu_load(tf, fmt, ft); + raw3 = fpu_load(tf, fmt, fr); + if (fmt == FMT_S) { + float32 f32 = float32_sub( + float32_mul((float32)raw1, (float32)raw2), + (float32)raw3); + rslt = (uint64_t)f32; + } else { + float64 f64 = float64_sub( + float64_mul((float64)raw1, (float64)raw2), + (float64)raw3); + rslt = (uint64_t)f64; + } + fpu_store(tf, fmt, fd, rslt); + + return 0; +} + +int +fpu_mul(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd) +{ + uint64_t raw1, raw2, rslt; + + if (fmt != FMT_S && fmt != FMT_D) + return SIGILL; + + raw1 = fpu_load(tf, fmt, fs); + raw2 = fpu_load(tf, fmt, ft); + if (fmt == FMT_S) { + float32 f32 = float32_mul((float32)raw1, (float32)raw2); + rslt = (uint64_t)f32; + } else { + float64 f64 = float64_mul((float64)raw1, (float64)raw2); + rslt = (uint64_t)f64; + } + fpu_store(tf, fmt, fd, rslt); + + return 0; +} + +int +fpu_neg(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd) +{ + uint64_t raw; + + if (ft != 0) + return SIGILL; + if (fmt != FMT_S && fmt != FMT_D) + return SIGILL; + + raw = fpu_load(tf, fmt, fs); + /* flip sign bit unless NaN */ + if (fmt == FMT_S) { + float32 f32 = (float32)raw; + if (float32_is_nan(f32)) { + float_set_invalid(); + } else { + f32 ^= 1L << 31; + raw = (uint64_t)f32; + } + } else { + float64 f64 = (float64)raw; + if (float64_is_nan(f64)) { + float_set_invalid(); + } else { + f64 ^= 1L << 63; + raw = (uint64_t)f64; + } + } + fpu_store(tf, fmt, fd, raw); + + return 0; +} + +int +fpu_nmadd(struct trap_frame *tf, uint fmt, uint fr, uint ft, uint fs, uint fd) +{ + uint64_t raw1, raw2, raw3, rslt; + + if (fmt != FMT_S && fmt != FMT_D) + return SIGILL; + + raw1 = fpu_load(tf, fmt, fs); + raw2 = fpu_load(tf, fmt, ft); + raw3 = fpu_load(tf, fmt, fr); + if (fmt == FMT_S) { + float32 f32 = float32_add( + float32_mul((float32)raw1, (float32)raw2), + (float32)raw3); + if (float32_is_nan(f32)) + float_set_invalid(); + else + f32 ^= 1L << 31; + rslt = (uint64_t)f32; + } else { + float64 f64 = float64_add( + float64_mul((float64)raw1, (float64)raw2), + (float64)raw3); + if (float64_is_nan(f64)) + float_set_invalid(); + else + f64 ^= 1L << 63; + rslt = (uint64_t)f64; + } + fpu_store(tf, fmt, fd, rslt); + + return 0; +} + +int +fpu_nmsub(struct trap_frame *tf, uint fmt, uint fr, uint ft, uint fs, uint fd) +{ + uint64_t raw1, raw2, raw3, rslt; + + if (fmt != FMT_S && fmt != FMT_D) + return SIGILL; + + raw1 = fpu_load(tf, fmt, fs); + raw2 = fpu_load(tf, fmt, ft); + raw3 = fpu_load(tf, fmt, fr); + if (fmt == FMT_S) { + float32 f32 = float32_sub( + float32_mul((float32)raw1, (float32)raw2), + (float32)raw3); + if (float32_is_nan(f32)) + float_set_invalid(); + else + f32 ^= 1L << 31; + rslt = (uint64_t)f32; + } else { + float64 f64 = float64_sub( + float64_mul((float64)raw1, (float64)raw2), + (float64)raw3); + if (float64_is_nan(f64)) + float_set_invalid(); + else + f64 ^= 1L << 63; + rslt = (uint64_t)f64; + } + fpu_store(tf, fmt, fd, rslt); + + return 0; +} + +int +fpu_recip(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd) +{ + uint64_t raw; + + if (ft != 0) + return SIGILL; + if (fmt != FMT_S && fmt != FMT_D) + return SIGILL; + + raw = fpu_load(tf, fmt, fs); + if (fmt == FMT_S) { + float32 f32 = float32_div(ONE_F32, (float32)raw); + raw = (uint64_t)f32; + } else { + float64 f64 = float64_div(ONE_F64, (float64)raw); + raw = (uint64_t)f64; + } + fpu_store(tf, fmt, fd, raw); + + return 0; +} + +int +fpu_round_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd) +{ + /* round towards nearest */ + return fpu_int_l(tf, fmt, ft, fs, fd, FP_RN); +} + +int +fpu_round_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd) +{ + /* round towards nearest */ + return fpu_int_w(tf, fmt, ft, fs, fd, FP_RN); +} + +int +fpu_rsqrt(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd) +{ + uint64_t raw; + + if (ft != 0) + return SIGILL; + if (fmt != FMT_S && fmt != FMT_D) + return SIGILL; + + raw = fpu_load(tf, fmt, fs); + if (fmt == FMT_S) { + float32 f32 = float32_sqrt((float32)raw); + if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != + (FPCSR_C_V | FPCSR_E_V)) + f32 = float32_div(ONE_F32, f32); + raw = (uint64_t)f32; + } else { + float64 f64 = float64_sqrt((float64)raw); + if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != + (FPCSR_C_V | FPCSR_E_V)) + f64 = float64_div(ONE_F64, f64); + raw = (uint64_t)f64; + } + fpu_store(tf, fmt, fd, raw); + + return 0; +} + +int +fpu_sqrt(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd) +{ + uint64_t raw; + + if (ft != 0) + return SIGILL; + if (fmt != FMT_S && fmt != FMT_D) + return SIGILL; + + raw = fpu_load(tf, fmt, fs); + if (fmt == FMT_S) { + float32 f32 = float32_sqrt((float32)raw); + raw = (uint64_t)f32; + } else { + float64 f64 = float64_sqrt((float64)raw); + raw = (uint64_t)f64; + } + fpu_store(tf, fmt, fd, raw); + + return 0; +} + +int +fpu_sub(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd) +{ + uint64_t raw1, raw2, rslt; + + if (fmt != FMT_S && fmt != FMT_D) + return SIGILL; + + raw1 = fpu_load(tf, fmt, fs); + raw2 = fpu_load(tf, fmt, ft); + if (fmt == FMT_S) { + float32 f32 = float32_sub((float32)raw1, (float32)raw2); + rslt = (uint64_t)f32; + } else { + float64 f64 = float64_sub((float64)raw1, (float64)raw2); + rslt = (uint64_t)f64; + } + fpu_store(tf, fmt, fd, rslt); + + return 0; +} + +int +fpu_trunc_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd) +{ + /* round towards zero */ + return fpu_int_l(tf, fmt, ft, fs, fd, FP_RZ); +} + +int +fpu_trunc_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd) +{ + /* round towards zero */ + return fpu_int_w(tf, fmt, ft, fs, fd, FP_RZ); +} diff --git a/sys/arch/mips64/mips64/lcore_float.S b/sys/arch/mips64/mips64/lcore_float.S index c15db784cc6..b89837fe2f3 100644 --- a/sys/arch/mips64/mips64/lcore_float.S +++ b/sys/arch/mips64/mips64/lcore_float.S @@ -1,4 +1,4 @@ -/* $OpenBSD: lcore_float.S,v 1.19 2010/01/08 01:35:52 syuu Exp $ */ +/* $OpenBSD: lcore_float.S,v 1.20 2010/09/21 20:29:17 miod Exp $ */ /* * Copyright (c) 2001-2003 Opsycon AB (www.opsycon.se / www.opsycon.com) @@ -152,7 +152,6 @@ LEAF(MipsSwitchFPState, 0) ldc1 $f30, PCB_FPREGS+(30 * REGSZ)(a1) ldc1 $f31, PCB_FPREGS+(31 * REGSZ)(a1) - and t0, t0, ~FPC_EXCEPTION_BITS ctc1 t0, FPC_CSR nop @@ -256,7 +255,6 @@ LEAF(MipsSwitchFPState16, 0) lwc1 $f30, PCB_FPREGS+(30 * REGSZ)(a1) lwc1 $f31, PCB_FPREGS+(31 * REGSZ)(a1) - and t0, t0, ~FPC_EXCEPTION_BITS ctc1 t0, FPC_CSR nop @@ -407,134 +405,6 @@ END(MipsSaveCurFPState16) /*---------------------------------------------------------------------------- * - * MipsFPTrap -- - * - * Handle a floating point Trap. - * - * MipsFPTrap(statusReg, causeReg, pc) - * unsigned statusReg; - * unsigned causeReg; - * unsigned pc; - * - * Results: - * None. - * - * Side effects: - * None. - * - *---------------------------------------------------------------------------- - */ -NON_LEAF(MipsFPTrap, FRAMESZ(CF_SZ), ra) - PTR_SUBU sp, sp, FRAMESZ(CF_SZ) - mfc0 t0, COP_0_STATUS_REG - PTR_S ra, CF_RA_OFFS(sp) - .mask 0x80000000, (CF_RA_OFFS - FRAMESZ(CF_SZ)) - - PTR_S a2, 2*REGSZ(sp) - PTR_S a3, 3*REGSZ(sp) - or t1, t0, SR_COP_1_BIT - mtc0 t1, COP_0_STATUS_REG - ITLBNOPFIX - cfc1 t1, FPC_CSR # stall til FP done - cfc1 t1, FPC_CSR # now get status - nop - sll t2, t1, (31-17) # unimplemented operation? - bgez t2, 3f # no, normal trap - nop -/* - * We got an unimplemented operation trap so fetch the instruction, - * compute the next PC and emulate the instruction. - */ - bgez a1, 1f # Check the branch delay bit. - nop -/* - * The instruction is in the branch delay slot so the branch will have to - * be emulated to get the resulting PC. - */ - GET_CPU_INFO(t2, t3) - PTR_L a0, CI_CURPROCPADDR(t2) # first arg is ptr to CPU regs - move a1, a2 # second arg is instruction PC - move a2, t1 # third arg is the FP CSR - jal MipsEmulateBranch # compute PC after branch - move a3, zero # fourth arg is FALSE -/* - * Now load the floating-point instruction in the branch delay slot - * to be emulated. - */ - PTR_L a2, 2*REGSZ(sp) # restore EXC pc - b 2f - lw a0, 4(a2) # a0 = coproc instruction -/* - * This is not in the branch delay slot so calculate the resulting - * PC (epc + 4) into v0 and continue to MipsEmulateFP(). - */ -1: - lw a0, 0(a2) # a0 = coproc instruction - PTR_ADDU v0, a2, 4 # v0 = next pc -2: - GET_CPU_INFO(t2, t3) - PTR_L a3, CI_CURPROCPADDR(t2) # first arg is ptr to CPU regs - PTR_S v0, PCB_REGS+(PC * REGSZ)(a3) # save new pc -/* - * Check to see if the instruction to be emulated is a floating-point - * instruction. - */ - srl a3, a0, OPCODE_SHIFT - beq a3, OPCODE_C1, 5f # this should never fail - nop -/* - * Send a floating point exception signal to the current process. - */ -3: - cfc1 a1, FPC_CSR # code = FP exceptions - GET_CPU_INFO(t2, t3) - PTR_L a0, CI_CURPROC(t2) # get current process - PTR_L a3, 3*REGSZ(sp) - and v0, a1, FPC_EXCEPTION_INEXACT - bnez v0, 4f - li a2, FPE_FLTRES - and v0, a1, FPC_EXCEPTION_UNDERFLOW - bnez v0, 4f - li a2, FPE_FLTUND - and v0, a1, FPC_EXCEPTION_OVERFLOW - bnez v0, 4f - li a2, FPE_FLTOVF - and v0, a1, FPC_EXCEPTION_DIV0 - bnez v0, 4f - li a2, FPE_FLTDIV - li a2, FPE_FLTINV -4: - ctc1 zero, FPC_CSR # Clear exceptions - jal fpu_trapsignal - nop - b FPReturn - nop - -/* - * Finally, we can call MipsEmulateFP() where a0 is the instruction to emulate. - */ -5: - jal MipsEmulateFP - nop - - bnez v0, 3b # Emulation failed. - nop - -/* - * Turn off the floating point coprocessor and return. - */ -FPReturn: - mfc0 t0, COP_0_STATUS_REG - PTR_L ra, CF_RA_OFFS(sp) - and t0, t0, ~SR_COP_1_BIT - mtc0 t0, COP_0_STATUS_REG - ITLBNOPFIX - j ra - PTR_ADDU sp, sp, FRAMESZ(CF_SZ) -END(MipsFPTrap) - -/*---------------------------------------------------------------------------- - * * cp1_get_prid * * Get the floating point co-processor id. @@ -562,4 +432,3 @@ LEAF(cp1_get_prid, 0) jr ra nop END(cp1_get_prid) - diff --git a/sys/arch/mips64/mips64/process_machdep.c b/sys/arch/mips64/mips64/process_machdep.c index bfe1b2948b3..748c3ca7af2 100644 --- a/sys/arch/mips64/mips64/process_machdep.c +++ b/sys/arch/mips64/mips64/process_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: process_machdep.c,v 1.14 2010/06/26 23:24:43 guenther Exp $ */ +/* $OpenBSD: process_machdep.c,v 1.15 2010/09/21 20:29:17 miod Exp $ */ /* * Copyright (c) 1994 Adam Glass @@ -40,7 +40,7 @@ * From: * Id: procfs_i386.c,v 4.1 1993/12/17 10:47:45 jsp Rel * - * $Id: process_machdep.c,v 1.14 2010/06/26 23:24:43 guenther Exp $ + * $Id: process_machdep.c,v 1.15 2010/09/21 20:29:17 miod Exp $ */ /* @@ -72,6 +72,7 @@ #include <sys/proc.h> #include <sys/vnode.h> #include <sys/ptrace.h> +#include <machine/fpu.h> #include <machine/frame.h> #include <machine/reg.h> @@ -111,6 +112,7 @@ process_write_regs(p, regs) ic = p->p_md.md_regs->ic; ipl = p->p_md.md_regs->ipl; bcopy(®s->r_regs[AST], &p->p_md.md_regs->ast, REGSIZE); + p->p_md.md_regs->fsr &= ~FPCSR_C_MASK; p->p_md.md_regs->sr = sr; p->p_md.md_regs->ic = ic; p->p_md.md_regs->ipl = ipl; diff --git a/sys/arch/mips64/mips64/trap.c b/sys/arch/mips64/mips64/trap.c index c2a534f9fb8..378bd911409 100644 --- a/sys/arch/mips64/mips64/trap.c +++ b/sys/arch/mips64/mips64/trap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: trap.c,v 1.67 2010/09/17 00:36:32 miod Exp $ */ +/* $OpenBSD: trap.c,v 1.68 2010/09/21 20:29:17 miod Exp $ */ /* * Copyright (c) 1988 University of Utah. @@ -133,10 +133,7 @@ uint64_t kdbpeekd(vaddr_t); extern int kdb_trap(int, db_regs_t *); #endif -extern void MipsFPTrap(u_int, u_int, u_int, union sigval); - void ast(void); -void fpu_trapsignal(struct proc *, u_long, int, union sigval); void trap(struct trap_frame *); #ifdef PTRACE int cpu_singlestep(struct proc *); @@ -746,6 +743,11 @@ printf("SIG-BUSB @%p pc %p, ra %p\n", trapframe->badvaddr, trapframe->pc, trapfr break; case T_COP_UNUSABLE+T_USER: + /* + * Note MIPS IV COP1X instructions issued with FPU + * disabled correctly report coprocessor 1 as the + * unusable coprocessor number. + */ if ((trapframe->cause & CR_COP_ERR) != 0x10000000) { i = SIGILL; /* only FPU instructions allowed */ typ = ILL_ILLOPC; @@ -761,8 +763,7 @@ printf("SIG-BUSB @%p pc %p, ra %p\n", trapframe->badvaddr, trapframe->pc, trapfr goto err; case T_FPE+T_USER: - sv.sival_ptr = (void *)trapframe->pc; - MipsFPTrap(trapframe->sr, trapframe->cause, trapframe->pc, sv); + MipsFPTrap(trapframe); goto out; case T_OVFLOW+T_USER: @@ -835,17 +836,6 @@ child_return(arg) #endif } -/* - * Wrapper around trapsignal() for use by the floating point code. - */ -void -fpu_trapsignal(struct proc *p, u_long ucode, int typ, union sigval sv) -{ - KERNEL_PROC_LOCK(p); - trapsignal(p, SIGFPE, ucode, typ, sv); - KERNEL_PROC_UNLOCK(p); -} - #if defined(DDB) || defined(DEBUG) void trapDump(char *msg) |