summaryrefslogtreecommitdiff
path: root/sys/arch
diff options
context:
space:
mode:
authorMiod Vallat <miod@cvs.openbsd.org>2010-09-21 20:29:18 +0000
committerMiod Vallat <miod@cvs.openbsd.org>2010-09-21 20:29:18 +0000
commit39eff95ee263d1a682cb8667f5e7ea2307be5a0c (patch)
tree4dbffcedda9b781e4a328757263429adf2fdc640 /sys/arch
parent37d466cb419fc3bce08d762cf8bf4cad7f3c5ae5 (diff)
Replace the old floating point completion code with a C interface to the
MI softfloat code, implementing all MIPS IV specified floating point operations. Tested on R5000, R10000, R14000 and Loongson2F.
Diffstat (limited to 'sys/arch')
-rw-r--r--sys/arch/mips64/conf/files.mips646
-rw-r--r--sys/arch/mips64/include/cpu.h40
-rw-r--r--sys/arch/mips64/include/ieeefp.h23
-rw-r--r--sys/arch/mips64/mips64/fp.S3127
-rw-r--r--sys/arch/mips64/mips64/fp_emulate.c1310
-rw-r--r--sys/arch/mips64/mips64/lcore_float.S133
-rw-r--r--sys/arch/mips64/mips64/process_machdep.c6
-rw-r--r--sys/arch/mips64/mips64/trap.c24
8 files changed, 1350 insertions, 3319 deletions
diff --git a/sys/arch/mips64/conf/files.mips64 b/sys/arch/mips64/conf/files.mips64
index 4cbaa6bb8b4..4e558136edd 100644
--- a/sys/arch/mips64/conf/files.mips64
+++ b/sys/arch/mips64/conf/files.mips64
@@ -1,4 +1,4 @@
-# $OpenBSD: files.mips64,v 1.15 2010/09/20 12:10:26 syuu Exp $
+# $OpenBSD: files.mips64,v 1.16 2010/09/21 20:29:13 miod Exp $
file arch/mips64/mips64/arcbios.c arcbios
file arch/mips64/mips64/clock.c
@@ -20,7 +20,7 @@ file arch/mips64/mips64/cache_octeon.c cpu_octeon
file arch/mips64/mips64/context.S
file arch/mips64/mips64/cp0access.S
file arch/mips64/mips64/exception.S
-file arch/mips64/mips64/fp.S
+file arch/mips64/mips64/fp_emulate.c
file arch/mips64/mips64/lcore_access.S
file arch/mips64/mips64/lcore_float.S
file arch/mips64/mips64/tlbhandler.S
@@ -33,3 +33,5 @@ file arch/mips64/mips64/ipifuncs.c multiprocessor
file netinet/in_cksum.c inet
file netinet/in4_cksum.c inet
+
+file lib/libkern/softfloat.c
diff --git a/sys/arch/mips64/include/cpu.h b/sys/arch/mips64/include/cpu.h
index 429bd17d05f..6913ad4a4f9 100644
--- a/sys/arch/mips64/include/cpu.h
+++ b/sys/arch/mips64/include/cpu.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpu.h,v 1.64 2010/09/20 12:10:26 syuu Exp $ */
+/* $OpenBSD: cpu.h,v 1.65 2010/09/21 20:29:17 miod Exp $ */
/*-
* Copyright (c) 1992, 1993
@@ -284,43 +284,6 @@ extern vaddr_t uncached_base;
#define FPC_CSR $31
/*
- * The floating point coprocessor status register bits.
- */
-#define FPC_ROUNDING_BITS 0x00000003
-#define FPC_ROUND_RN 0x00000000
-#define FPC_ROUND_RZ 0x00000001
-#define FPC_ROUND_RP 0x00000002
-#define FPC_ROUND_RM 0x00000003
-#define FPC_STICKY_BITS 0x0000007c
-#define FPC_STICKY_INEXACT 0x00000004
-#define FPC_STICKY_UNDERFLOW 0x00000008
-#define FPC_STICKY_OVERFLOW 0x00000010
-#define FPC_STICKY_DIV0 0x00000020
-#define FPC_STICKY_INVALID 0x00000040
-#define FPC_ENABLE_BITS 0x00000f80
-#define FPC_ENABLE_INEXACT 0x00000080
-#define FPC_ENABLE_UNDERFLOW 0x00000100
-#define FPC_ENABLE_OVERFLOW 0x00000200
-#define FPC_ENABLE_DIV0 0x00000400
-#define FPC_ENABLE_INVALID 0x00000800
-#define FPC_EXCEPTION_BITS 0x0003f000
-#define FPC_EXCEPTION_INEXACT 0x00001000
-#define FPC_EXCEPTION_UNDERFLOW 0x00002000
-#define FPC_EXCEPTION_OVERFLOW 0x00004000
-#define FPC_EXCEPTION_DIV0 0x00008000
-#define FPC_EXCEPTION_INVALID 0x00010000
-#define FPC_EXCEPTION_UNIMPL 0x00020000
-#define FPC_COND_BIT 0x00800000
-#define FPC_FLUSH_BIT 0x01000000
-#define FPC_MBZ_BITS 0xfe7c0000
-
-/*
- * Constants to determine if have a floating point instruction.
- */
-#define OPCODE_SHIFT 26
-#define OPCODE_C1 0x11
-
-/*
* The low part of the TLB entry.
*/
#define VMTLB_PF_NUM 0x3fffffc0
@@ -636,6 +599,7 @@ void save_fpu(void);
int guarded_read_4(paddr_t, uint32_t *);
int guarded_write_4(paddr_t, uint32_t);
+void MipsFPTrap(struct trap_frame *);
register_t MipsEmulateBranch(struct trap_frame *, vaddr_t, uint32_t, uint32_t);
/*
diff --git a/sys/arch/mips64/include/ieeefp.h b/sys/arch/mips64/include/ieeefp.h
index b833c549bd3..0c2f18909b5 100644
--- a/sys/arch/mips64/include/ieeefp.h
+++ b/sys/arch/mips64/include/ieeefp.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: ieeefp.h,v 1.2 2004/08/10 20:28:13 deraadt Exp $ */
+/* $OpenBSD: ieeefp.h,v 1.3 2010/09/21 20:29:17 miod Exp $ */
/*
* Written by J.T. Conklin, Apr 11, 1995
@@ -22,4 +22,25 @@ typedef enum {
FP_RM=3 /* round toward negative infinity */
} fp_rnd;
+#ifdef _KERNEL
+
+/*
+ * Defines for the floating-point completion/emulation code.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <machine/fpu.h>
+
+#define float_raise(bits) \
+ do { curproc->p_md.md_regs->fsr |= (bits) << FPCSR_C_SHIFT; } while (0)
+#define float_set_inexact() float_raise(FP_X_IMP)
+#define float_set_invalid() float_raise(FP_X_INV)
+
+#define float_get_round(csr) (csr & FPCSR_RM_MASK)
+#define fpgetround() float_get_round(curproc->p_md.md_regs->fsr)
+
+#endif
+
#endif /* !_MIPS_IEEEFP_H_ */
diff --git a/sys/arch/mips64/mips64/fp.S b/sys/arch/mips64/mips64/fp.S
deleted file mode 100644
index 5578b6f576f..00000000000
--- a/sys/arch/mips64/mips64/fp.S
+++ /dev/null
@@ -1,3127 +0,0 @@
-/* $OpenBSD: fp.S,v 1.9 2010/02/08 19:26:46 miod Exp $ */
-/*
- * Copyright (c) 1992, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Ralph Campbell.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: @(#)fp.s 8.1 (Berkeley) 6/10/93
- * $Id: fp.S,v 1.9 2010/02/08 19:26:46 miod Exp $
- */
-
-/*
- * Standard header stuff.
- */
-
-#include <machine/regdef.h>
-#include <machine/asm.h>
-#include <machine/regnum.h>
-#include <machine/cpu.h>
-
-#include "assym.h"
-
-#define SEXP_INF 0xff
-#define DEXP_INF 0x7ff
-#define SEXP_BIAS 127
-#define DEXP_BIAS 1023
-#define SEXP_MIN -126
-#define DEXP_MIN -1022
-#define SEXP_MAX 127
-#define DEXP_MAX 1023
-#define WEXP_MAX 30 /* maximum unbiased exponent for int */
-#define WEXP_MIN -1 /* minimum unbiased exponent for int */
-#define LEXP_MAX 62 /* maximum unbiased exponent for long */
-#define LEXP_MIN -1 /* minimum unbiased exponent for long */
-#define SFRAC_BITS 23
-#define DFRAC_BITS 52
-#define SIMPL_ONE 0x00800000
-#define DIMPL_ONE 0x0010000000000000
-#define SLEAD_ZEROS 63 - 55
-#define DLEAD_ZEROS 63 - 52
-#define STICKYBIT 1
-#define GUARDBIT 0x0000000080000000
-#define DGUARDBIT 0x8000000000000000
-
-#define SSIGNAL_NAN 0x00400000
-#define DSIGNAL_NAN 0x00080000
-#define SQUIET_NAN 0x003fffff
-#define DQUIET_NAN 0x0007ffffffffffff
-#define INT_MIN 0x80000000
-#define INT_MAX 0x7fffffff
-#define LONG_MIN 0x8000000000000000
-#define LONG_MAX 0x7fffffffffffffff
-
-#define COND_UNORDERED 0x1
-#define COND_EQUAL 0x2
-#define COND_LESS 0x4
-#define COND_SIGNAL 0x8
-
-/*----------------------------------------------------------------------------
- *
- * MipsEmulateFP --
- *
- * Emulate unimplemented floating point operations.
- * This routine should only be called by MipsFPInterrupt()
- * and only if this is a COP1 instruction.
- *
- * MipsEmulateFP(instr)
- * unsigned instr;
- *
- * Results:
- * None.
- *
- * Side effects:
- * Floating point registers are modified according to instruction.
- *
- *----------------------------------------------------------------------------
- */
-NON_LEAF(MipsEmulateFP, FRAMESZ(CF_SZ), ra)
- PTR_SUB sp, sp, FRAMESZ(CF_SZ)
- PTR_S ra, CF_RA_OFFS(sp)
-
- srl v0, a0, 21 # get FMT field
- and v0, v0, 0x1f # mask FMT field
- dla a3, func_s
- beq v0, 0x10, 1f
- dla a3, func_d
- beq v0, 0x11, 1f
- dla a3, func_w
- beq v0, 0x14, 1f
- dla a3, func_l
- beq v0, 0x15, 1f
- b ill # illegal format
-
-1:
- and v1, a0, 0x3f # mask FUNC field
- sll v1, v1, 3 # align for table lookup
- daddu v1, a3
- cfc1 a1, FPC_CSR # get exception register
- ld a3, (v1) # switch on FUNC & FMT
- and a1, a1, ~FPC_EXCEPTION_UNIMPL # clear exception
- ctc1 a1, FPC_CSR
- j a3
-
- .rdata
-func_s:
- .dword add_s # 0
- .dword sub_s # 1
- .dword mul_s # 2
- .dword div_s # 3
- .dword ill # 4 (sqrt)
- .dword abs_s # 5
- .dword mov_s # 6
- .dword neg_s # 7
- .dword round_l_s # 8
- .dword trunc_l_s # 9
- .dword ceil_l_s # 10
- .dword floor_l_s # 11
- .dword round_w_s # 12
- .dword trunc_w_s # 13
- .dword ceil_w_s # 14
- .dword floor_w_s # 15
- .dword ill # 16
- .dword ill # 17
- .dword ill # 18
- .dword ill # 19
- .dword ill # 20
- .dword ill # 21
- .dword ill # 22
- .dword ill # 23
- .dword ill # 24
- .dword ill # 25
- .dword ill # 26
- .dword ill # 27
- .dword ill # 28
- .dword ill # 29
- .dword ill # 30
- .dword ill # 31
- .dword ill # 32
- .dword cvt_d_s # 33
- .dword ill # 34
- .dword ill # 35
- .dword cvt_w_s # 36
- .dword cvt_l_s # 37
- .dword ill # 38
- .dword ill # 39
- .dword ill # 40
- .dword ill # 41
- .dword ill # 42
- .dword ill # 43
- .dword ill # 44
- .dword ill # 45
- .dword ill # 46
- .dword ill # 47
- .dword cmp_s # 48
- .dword cmp_s # 49
- .dword cmp_s # 50
- .dword cmp_s # 51
- .dword cmp_s # 52
- .dword cmp_s # 53
- .dword cmp_s # 54
- .dword cmp_s # 55
- .dword cmp_s # 56
- .dword cmp_s # 57
- .dword cmp_s # 58
- .dword cmp_s # 59
- .dword cmp_s # 60
- .dword cmp_s # 61
- .dword cmp_s # 62
- .dword cmp_s # 63
-
-func_d:
- .dword add_d # 0
- .dword sub_d # 1
- .dword mul_d # 2
- .dword div_d # 3
- .dword ill # 4 (sqrt)
- .dword abs_d # 5
- .dword mov_d # 6
- .dword neg_d # 7
- .dword round_l_d # 8
- .dword trunc_l_d # 9
- .dword ceil_l_d # 10
- .dword floor_l_d # 11
- .dword round_w_d # 12
- .dword trunc_w_d # 13
- .dword ceil_w_d # 14
- .dword floor_w_d # 15
- .dword ill # 16
- .dword ill # 17
- .dword ill # 18
- .dword ill # 19
- .dword ill # 20
- .dword ill # 21
- .dword ill # 22
- .dword ill # 23
- .dword ill # 24
- .dword ill # 25
- .dword ill # 26
- .dword ill # 27
- .dword ill # 28
- .dword ill # 29
- .dword ill # 30
- .dword ill # 31
- .dword cvt_s_d # 32
- .dword ill # 33
- .dword ill # 34
- .dword ill # 35
- .dword cvt_w_d # 36
- .dword cvt_l_d # 37
- .dword ill # 38
- .dword ill # 39
- .dword ill # 40
- .dword ill # 41
- .dword ill # 42
- .dword ill # 43
- .dword ill # 44
- .dword ill # 45
- .dword ill # 46
- .dword ill # 47
- .dword cmp_d # 48
- .dword cmp_d # 49
- .dword cmp_d # 50
- .dword cmp_d # 51
- .dword cmp_d # 52
- .dword cmp_d # 53
- .dword cmp_d # 54
- .dword cmp_d # 55
- .dword cmp_d # 56
- .dword cmp_d # 57
- .dword cmp_d # 58
- .dword cmp_d # 59
- .dword cmp_d # 60
- .dword cmp_d # 61
- .dword cmp_d # 62
- .dword cmp_d # 63
-
-func_w:
- .dword ill # 0
- .dword ill # 1
- .dword ill # 2
- .dword ill # 3
- .dword ill # 4
- .dword ill # 5
- .dword ill # 6
- .dword ill # 7
- .dword ill # 8
- .dword ill # 9
- .dword ill # 10
- .dword ill # 11
- .dword ill # 12
- .dword ill # 13
- .dword ill # 14
- .dword ill # 15
- .dword ill # 16
- .dword ill # 17
- .dword ill # 18
- .dword ill # 19
- .dword ill # 20
- .dword ill # 21
- .dword ill # 22
- .dword ill # 23
- .dword ill # 24
- .dword ill # 25
- .dword ill # 26
- .dword ill # 27
- .dword ill # 28
- .dword ill # 29
- .dword ill # 30
- .dword ill # 31
- .dword cvt_s_w # 32
- .dword cvt_d_w # 33
- .dword ill # 34
- .dword ill # 35
- .dword ill # 36
- .dword ill # 37
- .dword ill # 38
- .dword ill # 39
- .dword ill # 40
- .dword ill # 41
- .dword ill # 42
- .dword ill # 43
- .dword ill # 44
- .dword ill # 45
- .dword ill # 46
- .dword ill # 47
- .dword ill # 48
- .dword ill # 49
- .dword ill # 50
- .dword ill # 51
- .dword ill # 52
- .dword ill # 53
- .dword ill # 54
- .dword ill # 55
- .dword ill # 56
- .dword ill # 57
- .dword ill # 58
- .dword ill # 59
- .dword ill # 60
- .dword ill # 61
- .dword ill # 62
- .dword ill # 63
-
-func_l:
- .dword ill # 0
- .dword ill # 1
- .dword ill # 2
- .dword ill # 3
- .dword ill # 4
- .dword ill # 5
- .dword ill # 6
- .dword ill # 7
- .dword ill # 8
- .dword ill # 9
- .dword ill # 10
- .dword ill # 11
- .dword ill # 12
- .dword ill # 13
- .dword ill # 14
- .dword ill # 15
- .dword ill # 16
- .dword ill # 17
- .dword ill # 18
- .dword ill # 19
- .dword ill # 20
- .dword ill # 21
- .dword ill # 22
- .dword ill # 23
- .dword ill # 24
- .dword ill # 25
- .dword ill # 26
- .dword ill # 27
- .dword ill # 28
- .dword ill # 29
- .dword ill # 30
- .dword ill # 31
- .dword cvt_s_l # 32
- .dword cvt_d_l # 33
- .dword ill # 34
- .dword ill # 35
- .dword ill # 36
- .dword ill # 37
- .dword ill # 38
- .dword ill # 39
- .dword ill # 40
- .dword ill # 41
- .dword ill # 42
- .dword ill # 43
- .dword ill # 44
- .dword ill # 45
- .dword ill # 46
- .dword ill # 47
- .dword ill # 48
- .dword ill # 49
- .dword ill # 50
- .dword ill # 51
- .dword ill # 52
- .dword ill # 53
- .dword ill # 54
- .dword ill # 55
- .dword ill # 56
- .dword ill # 57
- .dword ill # 58
- .dword ill # 59
- .dword ill # 60
- .dword ill # 61
- .dword ill # 62
- .dword ill # 63
-
- .text
-
-/*
- * Single precision subtract.
- */
-sub_s:
- jal get_ft_fs_s
- xor ta0, 1 # negate FT sign bit
- b add_sub_s
-/*
- * Single precision add.
- */
-add_s:
- jal get_ft_fs_s
-add_sub_s:
- bne t1, SEXP_INF, 1f # is FS an infinity?
- bne ta1, SEXP_INF, result_fs_s # if FT is not inf, result=FS
- bne t2, zero, result_fs_s # if FS is NAN, result is FS
- bne ta2, zero, result_ft_s # if FT is NAN, result is FT
- bne t0, ta0, invalid_s # both infinities same sign?
- b result_fs_s # result is in FS
-1:
- beq ta1, SEXP_INF, result_ft_s # if FT is inf, result=FT
- bne t1, zero, 4f # is FS a denormalized num?
- beq t2, zero, 3f # is FS zero?
- bne ta1, zero, 2f # is FT a denormalized num?
- beq ta2, zero, result_fs_s # FT is zero, result=FS
- jal renorm_fs_s
- jal renorm_ft_s
- b 5f
-2:
- jal renorm_fs_s
- subu ta1, ta1, SEXP_BIAS # unbias FT exponent
- or ta2, ta2, SIMPL_ONE # set implied one bit
- b 5f
-3:
- bne ta1, zero, result_ft_s # if FT != 0, result=FT
- bne ta2, zero, result_ft_s
- and v0, a1, FPC_ROUNDING_BITS # get rounding mode
- bne v0, FPC_ROUND_RM, 1f # round to -infinity?
- or t0, t0, ta0 # compute result sign
- b result_fs_s
-1:
- and t0, ta0 # compute result sign
- b result_fs_s
-4:
- bne ta1, zero, 2f # is FT a denormalized num?
- beq ta2, zero, result_fs_s # FT is zero, result=FS
- subu t1, SEXP_BIAS # unbias FS exponent
- or t2, SIMPL_ONE # set implied one bit
- jal renorm_ft_s
- b 5f
-2:
- subu t1, SEXP_BIAS # unbias FS exponent
- or t2, SIMPL_ONE # set implied one bit
- subu ta1, SEXP_BIAS # unbias FT exponent
- or ta2, SIMPL_ONE # set implied one bit
-/*
- * Perform the addition.
- */
-5:
- move t8, zero # no shifted bits (sticky reg)
- beq t1, ta1, 4f # exp equal, no shift needed
- subu v0, t1, ta1 # v0 = difference of exponents
- move v1, v0 # v1 = abs(difference)
- bge v0, zero, 1f
- negu v1
-1:
- ble v1, SFRAC_BITS+2, 2f # is difference too great?
- li t8, STICKYBIT # set the sticky bit
- bge v0, zero, 1f # check which exp is larger
- move t1, ta1 # result exp is FTs
- move t2, zero # FSs fraction shifted is zero
- b 4f
-1:
- move ta2, zero # FTs fraction shifted is zero
- b 4f
-2:
- li t9, 32 # compute 32 - abs(exp diff)
- subu t9, t9, v1
- bgt v0, zero, 3f # if FS > FT, shift FTs frac
- move t1, ta1 # FT > FS, result exp is FTs
- sll t8, t2, t9 # save bits shifted out
- srl t2, t2, v1 # shift FSs fraction
- b 4f
-3:
- sll t8, ta2, t9 # save bits shifted out
- srl ta2, ta2, v1 # shift FTs fraction
-4:
- bne t0, ta0, 1f # if signs differ, subtract
- addu t2, t2, ta2 # add fractions
- b norm_s
-1:
- blt t2, ta2, 3f # subtract larger from smaller
- bne t2, ta2, 2f # if same, result=0
- move t1, zero # result=0
- move t2, zero
- and v0, a1, FPC_ROUNDING_BITS # get rounding mode
- bne v0, FPC_ROUND_RM, 1f # round to -infinity?
- or t0, t0, ta0 # compute result sign
- b result_fs_s
-1:
- and t0, t0, ta0 # compute result sign
- b result_fs_s
-2:
- sltu t9, zero, t8 # compute t2:zero - ta2:t8
- subu t8, zero, t8
- subu t2, t2, ta2 # subtract fractions
- subu t2, t2, t9 # subtract barrow
- b norm_s
-3:
- move t0, ta0 # sign of result = FTs
- sltu t9, zero, t8 # compute ta2:zero - t2:t8
- subu t8, zero, t8
- subu t2, ta2, t2 # subtract fractions
- subu t2, t2, t9 # subtract barrow
- b norm_s
-
-/*
- * Double precision subtract.
- */
-sub_d:
- jal get_ft_fs_d
- xor ta0, ta0, 1 # negate sign bit
- b add_sub_d
-/*
- * Double precision add.
- */
-add_d:
- jal get_ft_fs_d
-add_sub_d:
- bne t1, DEXP_INF, 1f # is FS an infinity?
- bne ta1, DEXP_INF, result_fs_d # if FT is not inf, result=FS
- bne t2, zero, result_fs_d # if FS is NAN, result is FS
- bne ta2, zero, result_ft_d # if FT is NAN, result is FT
- bne t0, ta0, invalid_d # both infinities same sign?
- b result_fs_d # result is in FS
-1:
- beq ta1, DEXP_INF, result_ft_d # if FT is inf, result=FT
- bne t1, zero, 4f # is FS a denormalized num?
- beq t2, zero, 3f # is FS zero?
- bne ta1, zero, 2f # is FT a denormalized num?
- beq ta2, zero, result_fs_d # FT is zero, result=FS
- jal renorm_fs_d
- jal renorm_ft_d
- b 5f
-2:
- jal renorm_fs_d
- subu ta1, ta1, DEXP_BIAS # unbias FT exponent
- or ta2, ta2, DIMPL_ONE # set implied one bit
- b 5f
-3:
- bne ta1, zero, result_ft_d # if FT != 0, result=FT
- bne ta2, zero, result_ft_d
- and v0, a1, FPC_ROUNDING_BITS # get rounding mode
- bne v0, FPC_ROUND_RM, 1f # round to -infinity?
- or t0, t0, ta0 # compute result sign
- b result_fs_d
-1:
- and t0, t0, ta0 # compute result sign
- b result_fs_d
-4:
- bne ta1, zero, 2f # is FT a denormalized num?
- beq ta2, zero, result_fs_d # FT is zero, result=FS
- subu t1, t1, DEXP_BIAS # unbias FS exponent
- or t2, t2, DIMPL_ONE # set implied one bit
- jal renorm_ft_d
- b 5f
-2:
- subu t1, t1, DEXP_BIAS # unbias FS exponent
- or t2, t2, DIMPL_ONE # set implied one bit
- subu ta1, ta1, DEXP_BIAS # unbias FT exponent
- or ta2, ta2, DIMPL_ONE # set implied one bit
-/*
- * Perform the addition.
- */
-5:
- move t8, zero # no shifted bits (sticky reg)
- beq t1, ta1, 4f # no shift needed
- subu v0, t1, ta1 # v0 = difference of exponents
- move v1, v0 # v1 = abs(difference)
- bge v0, zero, 1f
- negu v1
-1:
- ble v1, DFRAC_BITS+2, 2f # is difference too great?
- li t8, STICKYBIT # set the sticky bit
- bge v0, zero, 1f # check which exp is larger
- move t1, ta1 # result exp is FTs
- move t2, zero # FSs fraction shifted is zero
- b 4f
-1:
- move ta2, zero # FTs fraction shifted is zero
- b 4f
-2:
- li t9, 64
- subu t9, t9, v1
- bge v0, zero, 3f # if FS > FT, shift FTs frac
- move t1, ta1 # FT > FS, result exp is FTs
- dsll t8, t2, t9 # save bits shifted out
- dsrl t2, t2, v1
- b 4f
-3:
- dsll t8, ta2, t9 # save bits shifted out
- dsrl ta2, ta2, v1
-4:
- bne t0, ta0, 1f # if signs differ, subtract
- daddu t2, ta2 # add fractions
- b norm_d
-1:
- blt t2, ta2, 3f # subtract larger from smaller
- bne t2, ta2, 2f
- move t1, zero # result=0
- move t2, zero
- and v0, a1, FPC_ROUNDING_BITS # get rounding mode
- bne v0, FPC_ROUND_RM, 1f # round to -infinity?
- or t0, t0, ta0 # compute result sign
- b result_fs_d
-1:
- and t0, t0, ta0 # compute result sign
- b result_fs_d
-2:
- sltu t9, zero, t8 # compute t2:zero - ta2:t8
- dsubu t8, zero, t8
- dsubu t2, t2, ta2 # subtract fractions
- dsubu t2, t2, t9 # subtract barrow
- b norm_d
-3:
- move t0, ta0 # sign of result = FTs
- sltu t9, zero, t8
- dsubu t2, ta2, t2 # subtract fractions
- dsubu t2, t2, t9 # subtract barrow
- b norm_d
-
-/*
- * Single precision multiply.
- */
-mul_s:
- jal get_ft_fs_s
- xor t0, t0, ta0 # compute sign of result
- move ta0, t0
- bne t1, SEXP_INF, 2f # is FS an infinity?
- bne t2, zero, result_fs_s # if FS is a NAN, result=FS
- bne ta1, SEXP_INF, 1f # FS is inf, is FT an infinity?
- bne ta2, zero, result_ft_s # if FT is a NAN, result=FT
- b result_fs_s # result is infinity
-1:
- bne ta1, zero, result_fs_s # inf * zero? if no, result=FS
- bne ta2, zero, result_fs_s
- b invalid_s # infinity * zero is invalid
-2:
- bne ta1, SEXP_INF, 1f # FS != inf, is FT an infinity?
- bne t1, zero, result_ft_s # zero * inf? if no, result=FT
- bne t2, zero, result_ft_s
- bne ta2, zero, result_ft_s # if FT is a NAN, result=FT
- b invalid_s # zero * infinity is invalid
-1:
- bne t1, zero, 1f # is FS zero?
- beq t2, zero, result_fs_s # result is zero
- jal renorm_fs_s
- b 2f
-1:
- subu t1, t1, SEXP_BIAS # unbias FS exponent
- or t2, t2, SIMPL_ONE # set implied one bit
-2:
- bne ta1, zero, 1f # is FT zero?
- beq ta2, zero, result_ft_s # result is zero
- jal renorm_ft_s
- b 2f
-1:
- subu ta1, ta1, SEXP_BIAS # unbias FT exponent
- or ta2, ta2, SIMPL_ONE # set implied one bit
-2:
- addu t1, t1, ta1 # compute result exponent
- addu t1, t1, 9 # account for binary point
- multu t2, ta2 # multiply fractions
- mflo t8
- mfhi t2
- b norm_s
-
-/*
- * Double precision multiply.
- */
-mul_d:
- jal get_ft_fs_d
- xor t0, t0, ta0 # compute sign of result
- move ta0, t0
- bne t1, DEXP_INF, 2f # is FS an infinity?
- bne t2, zero, result_fs_d # if FS is a NAN, result=FS
- bne ta1, DEXP_INF, 1f # FS is inf, is FT an infinity?
- bne ta2, zero, result_ft_d # if FT is a NAN, result=FT
- b result_fs_d # result is infinity
-1:
- bne ta1, zero, result_fs_d # inf * zero? if no, result=FS
- bne ta2, zero, result_fs_d
- b invalid_d # infinity * zero is invalid
-2:
- bne ta1, DEXP_INF, 1f # FS != inf, is FT an infinity?
- bne t1, zero, result_ft_d # zero * inf? if no, result=FT
- bne t2, zero, result_ft_d # if FS is a NAN, result=FS
- bne ta2, zero, result_ft_d # if FT is a NAN, result=FT
- b invalid_d # zero * infinity is invalid
-1:
- bne t1, zero, 2f # is FS zero?
- beq t2, zero, result_fs_d # result is zero
- jal renorm_fs_d
- b 3f
-2:
- subu t1, t1, DEXP_BIAS # unbias FS exponent
- or t2, t2, DIMPL_ONE # set implied one bit
-3:
- bne ta1, zero, 2f # is FT zero?
- beq ta2, zero, result_ft_d # result is zero
- jal renorm_ft_d
- b 3f
-2:
- subu ta1, ta1, DEXP_BIAS # unbias FT exponent
- or ta2, ta2, DIMPL_ONE # set implied one bit
-3:
- addu t1, t1, ta1 # compute result exponent
- addu t1, t1, 12 # ???
- dmultu t2, ta2 # multiply fractions
- mflo t8
- mfhi t2
- b norm_d
-
-/*
- * Single precision divide.
- */
-div_s:
- jal get_ft_fs_s
- xor t0, t0, ta0 # compute sign of result
- move ta0, t0
- bne t1, SEXP_INF, 1f # is FS an infinity?
- bne t2, zero, result_fs_s # if FS is NAN, result is FS
- bne ta1, SEXP_INF, result_fs_s # is FT an infinity?
- bne ta2, zero, result_ft_s # if FT is NAN, result is FT
- b invalid_s # infinity/infinity is invalid
-1:
- bne ta1, SEXP_INF, 1f # is FT an infinity?
- bne ta2, zero, result_ft_s # if FT is NAN, result is FT
- move t1, zero # x / infinity is zero
- move t2, zero
- b result_fs_s
-1:
- bne t1, zero, 2f # is FS zero?
- bne t2, zero, 1f
- bne ta1, zero, result_fs_s # FS=zero, is FT zero?
- beq ta2, zero, invalid_s # 0 / 0
- b result_fs_s # result = zero
-1:
- jal renorm_fs_s
- b 3f
-2:
- subu t1, t1, SEXP_BIAS # unbias FS exponent
- or t2, t2, SIMPL_ONE # set implied one bit
-3:
- bne ta1, zero, 2f # is FT zero?
- bne ta2, zero, 1f
- or a1, a1, FPC_EXCEPTION_DIV0 | FPC_STICKY_DIV0
- and v0, a1, FPC_ENABLE_DIV0 # trap enabled?
- bne v0, zero, fpe_trap
- ctc1 a1, FPC_CSR # save exceptions
- li t1, SEXP_INF # result is infinity
- move t2, zero
- b result_fs_s
-1:
- jal renorm_ft_s
- b 3f
-2:
- subu ta1, ta1, SEXP_BIAS # unbias FT exponent
- or ta2, ta2, SIMPL_ONE # set implied one bit
-3:
- subu t1, t1, ta1 # compute exponent
- subu t1, t1, 3 # compensate for result position
- li v0, SFRAC_BITS+3 # number of bits to divide
- move t8, t2 # init dividend
- move t2, zero # init result
-1:
- bltu t8, ta2, 3f # is dividend >= divisor?
-2:
- subu t8, t8, ta2 # subtract divisor from dividend
- or t2, t2, 1 # remember that we did
- bne t8, zero, 3f # if not done, continue
- sll t2, t2, v0 # shift result to final position
- b norm_s
-3:
- sll t8, t8, 1 # shift dividend
- sll t2, t2, 1 # shift result
- subu v0, v0, 1 # are we done?
- bne v0, zero, 1b # no, continue
- b norm_s
-
-/*
- * Double precision divide.
- */
-div_d:
- jal get_ft_fs_d
- xor t0, t0, ta0 # compute sign of result
- move ta0, t0
- bne t1, DEXP_INF, 1f # is FS an infinity?
- bne t2, zero, result_fs_d # if FS is NAN, result is FS
- bne ta1, DEXP_INF, result_fs_d # is FT an infinity?
- bne ta2, zero, result_ft_d # if FT is NAN, result is FT
- b invalid_d # infinity/infinity is invalid
-1:
- bne ta1, DEXP_INF, 1f # is FT an infinity?
- bne ta2, zero, result_ft_d # if FT is NAN, result is FT
- move t1, zero # x / infinity is zero
- move t2, zero
- b result_fs_d
-1:
- bne t1, zero, 2f # is FS zero?
- bne t2, zero, 1f
- bne ta1, zero, result_fs_d # FS=zero, is FT zero?
- beq ta2, zero, invalid_d # 0 / 0
- b result_fs_d # result = zero
-1:
- jal renorm_fs_d
- b 3f
-2:
- subu t1, t1, DEXP_BIAS # unbias FS exponent
- or t2, t2, DIMPL_ONE # set implied one bit
-3:
- bne ta1, zero, 2f # is FT zero?
- bne ta2, zero, 1f
- or a1, a1, FPC_EXCEPTION_DIV0 | FPC_STICKY_DIV0
- and v0, a1, FPC_ENABLE_DIV0 # trap enabled?
- bne v0, zero, fpe_trap
- ctc1 a1, FPC_CSR # Save exceptions
- li t1, DEXP_INF # result is infinity
- move t2, zero
- b result_fs_d
-1:
- jal renorm_ft_d
- b 3f
-2:
- subu ta1, ta1, DEXP_BIAS # unbias FT exponent
- or ta2, ta2, DIMPL_ONE # set implied one bit
-3:
- subu t1, t1, ta1 # compute exponent
- subu t1, t1, 3 # compensate for result position
- li v0, DFRAC_BITS+3 # number of bits to divide
- move t8, t2 # init dividend
- move t2, zero # init result
-1:
- bltu t8, ta2, 3f # is dividend >= divisor?
-2:
- dsubu t8, t8, ta2 # subtract divisor from dividend
- or t2, t2, 1 # remember that we did
- bne t8, zero, 3f # if not done, continue
- dsll t2, t2, v0 # shift upper part
- b norm_d
-3:
- dsll t8, t8, 1 # shift dividend
- dsll t2, t2, 1 # shift result
- subu v0, v0, 1 # are we done?
- bne v0, zero, 1b # no, continue
- b norm_d
-
-/*
- * Single precision absolute value.
- */
-abs_s:
- jal get_fs_s
- move t0, zero # set sign positive
- b result_fs_s
-
-/*
- * Double precision absolute value.
- */
-abs_d:
- jal get_fs_d
- move t0, zero # set sign positive
- b result_fs_d
-
-/*
- * Single precision move.
- */
-mov_s:
- jal get_fs_s
- b result_fs_s
-
-/*
- * Double precision move.
- */
-mov_d:
- jal get_fs_d
- b result_fs_d
-
-/*
- * Single precision negate.
- */
-neg_s:
- jal get_fs_s
- xor t0, t0, 1 # reverse sign
- b result_fs_s
-
-/*
- * Double precision negate.
- */
-neg_d:
- jal get_fs_d
- xor t0, t0, 1 # reverse sign
- b result_fs_d
-
-/*
- * Convert double to single.
- */
-cvt_s_d:
- jal get_fs_d
- bne t1, DEXP_INF, 1f # is FS an infinity?
- li t1, SEXP_INF # convert to single
- dsll t2, t2, 3 # convert D fraction to S
- b result_fs_s
-1:
- bne t1, zero, 2f # is FS zero?
- beq t2, zero, result_fs_s # result=0
- jal renorm_fs_d
- subu t1, t1, 3 # correct exp for shift below
- b 3f
-2:
- subu t1, t1, DEXP_BIAS # unbias exponent
- or t2, t2, DIMPL_ONE # add implied one bit
-3:
- dsll t2, t2, 3 # convert D fraction to S
- b norm_noshift_s
-
-/*
- * Convert long integer to single.
- */
-cvt_s_l:
- jal get_fs_long
- b cvt_s_int
-/*
- * Convert integer to single.
- */
-cvt_s_w:
- jal get_fs_int
-cvt_s_int:
- bne t2, zero, 1f # check for zero
- move t1, zero
- b result_fs_s
-/*
- * Find out how many leading zero bits are in t2 and put in t9.
- */
-1:
- move v0, t2
- move t9, zero
- dsrl v1, v0, 32
- bne v1, zero, 1f
- addu t9, 32
- dsll v0, 32
-1:
- dsrl v1, v0, 16
- bne v1, zero, 1f
- addu t9, 16
- dsll v0, 16
-1:
- dsrl v1, v0, 24
- bne v1, zero, 1f
- addu t9, 8
- dsll v0, 8
-1:
- dsrl v1, v0, 28
- bne v1, zero, 1f
- addu t9, 4
- dsll v0, 4
-1:
- dsrl v1, v0, 30
- bne v1, zero, 1f
- addu t9, 2
- dsll v0, 2
-1:
- dsrl v1, v0, 31
- bne v1, zero, 1f
- addu t9, 1
-/*
- * Now shift t2 the correct number of bits.
- */
-1:
- subu t9, SLEAD_ZEROS # dont count leading zeros
- li t1, 23+32 # init exponent
- subu t1, t1, t9 # compute exponent
- beq t9, zero, 1f
- li v0, 32
- blt t9, zero, 2f # if shift < 0, shift right
- subu v0, v0, t9
- sll t2, t2, t9 # shift left
-1:
- add t1, t1, SEXP_BIAS # bias exponent
- and t2, t2, ~SIMPL_ONE # clear implied one bit
- b result_fs_s
-2:
- negu t9 # shift right by t9
- subu v0, v0, t9
- sll t8, t2, v0 # save bits shifted out
- srl t2, t2, t9
- b norm_noshift_s
-
-/*
- * Convert single to double.
- */
-cvt_d_s:
- jal get_fs_s
- dsll t2, 32
- bne t1, SEXP_INF, 1f # is FS an infinity?
- li t1, DEXP_INF # convert to double
- b result_fs_d
-1:
- bne t1, zero, 2f # is FS denormalized or zero?
- beq t2, zero, result_fs_d # is FS zero?
- jal renorm_fs_s
- move t8, zero
- b norm_d
-2:
- addu t1, t1, DEXP_BIAS - SEXP_BIAS # bias exponent correctly
- dsrl t2, t2, 3
- b result_fs_d
-
-/*
- * Convert long integer to double.
- */
-cvt_d_l:
- jal get_fs_long
- b cvt_d_int
-/*
- * Convert integer to double.
- */
-cvt_d_w:
- jal get_fs_int
-cvt_d_int:
- bne t2, zero, 1f # check for zero
- move t1, zero # result=0
- b result_fs_d
-/*
- * Find out how many leading zero bits are in t2 and put in t9.
- */
-1:
- move v0, t2
- move t9, zero
- dsrl v1, v0, 32
- bne v1, zero, 1f
- addu t9, 32
- dsll v0, 32
-1:
- dsrl v1, v0, 16
- bne v1, zero, 1f
- addu t9, 16
- dsll v0, 16
-1:
- dsrl v1, v0, 24
- bne v1, zero, 1f
- addu t9, 8
- dsll v0, 8
-1:
- dsrl v1, v0, 28
- bne v1, zero, 1f
- addu t9, 4
- dsll v0, 4
-1:
- dsrl v1, v0, 30
- bne v1, zero, 1f
- addu t9, 2
- dsll v0, 2
-1:
- dsrl v1, v0, 31
- bne v1, zero, 1f
- addu t9, 1
-/*
- * Now shift t2 the correct number of bits.
- */
-1:
- subu t9, t9, DLEAD_ZEROS # dont count leading zeros
- li t1, DEXP_BIAS + 20 # init exponent
- subu t1, t1, t9 # compute exponent
- beq t9, zero, 1f
- li v0, 64
- blt t9, zero, 2f # if shift < 0, shift right
- subu v0, v0, t9
- dsll t2, t2, t9 # shift left
-1:
- and t2, t2, ~DIMPL_ONE # clear implied one bit
- b result_fs_d
-2:
- negu t9 # shift right by t9
- subu v0, v0, t9
- dsrl t2, t2, t9
- and t2, t2, ~DIMPL_ONE # clear implied one bit
- b result_fs_d
-
-/*
- * Convert single to integer with specific rounding.
- */
-round_w_s:
- li t3, FPC_ROUND_RN
- b do_cvt_w_s
-trunc_w_s:
- li t3, FPC_ROUND_RZ
- b do_cvt_w_s
-ceil_w_s:
- li t3, FPC_ROUND_RP
- b do_cvt_w_s
-floor_w_s:
- li t3, FPC_ROUND_RM
- b do_cvt_w_s
-
-/*
- * Convert single to integer.
- */
-cvt_w_s:
- and t3, a1, FPC_ROUNDING_BITS # get rounding mode
-do_cvt_w_s:
- jal get_fs_s
- bne t1, SEXP_INF, 1f # is FS an infinity?
- bne t2, zero, invalid_w # invalid conversion
-1:
- bne t1, zero, 1f # is FS zero?
- beq t2, zero, result_fs_w # result is zero
- move t2, zero # result is an inexact zero
- b inexact_w
-1:
- subu t1, t1, SEXP_BIAS # unbias exponent
- or t2, t2, SIMPL_ONE # add implied one bit
- dsll t2, t2, DFRAC_BITS - SFRAC_BITS # convert S fraction to D
- b cvt_w
-
-/*
- * Convert double to integer with specific rounding.
- */
-round_w_d:
- li t3, FPC_ROUND_RN
- b do_cvt_w_d
-trunc_w_d:
- li t3, FPC_ROUND_RZ
- b do_cvt_w_d
-ceil_w_d:
- li t3, FPC_ROUND_RP
- b do_cvt_w_d
-floor_w_d:
- li t3, FPC_ROUND_RM
- b do_cvt_w_d
-
-/*
- * Convert double to integer.
- */
-cvt_w_d:
- and t3, a1, FPC_ROUNDING_BITS # get rounding mode
-do_cvt_w_d:
- jal get_fs_d
- bne t1, DEXP_INF, 1f # is FS an infinity?
- bne t2, zero, invalid_w # invalid conversion
-1:
- bne t1, zero, 2f # is FS zero?
- beq t2, zero, result_fs_w # result is zero
- move t2, zero # result is an inexact zero
- b inexact_w
-2:
- subu t1, t1, DEXP_BIAS # unbias exponent
- or t2, t2, DIMPL_ONE # add implied one bit
-cvt_w:
- blt t1, WEXP_MIN, underflow_w # is exponent too small?
- li v0, WEXP_MAX+1
- bgt t1, v0, overflow_w # is exponent too large?
- bne t1, v0, 1f # special check for INT_MIN
- beq t0, zero, overflow_w # if positive, overflow
- bne t2, DIMPL_ONE, overflow_w
- li t2, INT_MIN # result is INT_MIN
- b result_fs_w
-1:
- subu v0, t1, 20 # compute amount to shift
- beq v0, zero, 2f # is shift needed?
- li v1, 64
- blt v0, zero, 1f # if shift < 0, shift right
- subu v1, v1, v0 # shift left
- dsll t2, t2, v0
- b 2f
-1:
- negu v0 # shift right by v0
- subu v1, v1, v0
- dsll t8, t2, v1 # save bits shifted out
- sltu t8, zero, t8 # dont lose any ones
- dsrl t2, t2, v0
-/*
- * round (t0 is sign, t2:63-32 is integer part, t2:31-0 is fractional part).
- */
-2:
- beq t3, FPC_ROUND_RN, 3f # round to nearest
- beq t3, FPC_ROUND_RZ, 5f # round to zero (truncate)
- beq t3, FPC_ROUND_RP, 1f # round to +infinity
- beq t0, zero, 5f # if sign is positive, truncate
- b 2f
-1:
- bne t0, zero, 5f # if sign is negative, truncate
-2:
- daddu t2, t2, GUARDBIT # add in fractional
- blt t2, zero, overflow_w # overflow?
- b 5f
-3:
- daddu t2, t2, GUARDBIT # add in fractional
- blt t2, zero, overflow_w # overflow?
-4:
- bne v0, zero, 5f # if rounded remainder is zero
- and t2, 0xfffffffe00000000 # clear LSB (round to nearest)
-5:
- beq t0, zero, 1f # result positive?
- negu t2 # convert to negative integer
-1:
- dsll v0, 32 # save fraction
- dsrl t2, 32 # shift out fractional part
- beq v0, zero, result_fs_w # is result exact?
-/*
- * Handle inexact exception.
- */
-inexact_w:
- or a1, a1, FPC_EXCEPTION_INEXACT | FPC_STICKY_INEXACT
- and v0, a1, FPC_ENABLE_INEXACT
- bne v0, zero, fpe_trap
- ctc1 a1, FPC_CSR # save exceptions
- b result_fs_w
-
-/*
- * Conversions to integer which overflow will trap (if enabled),
- * or generate an inexact trap (if enabled),
- * or generate an invalid exception.
- */
-overflow_w:
- or a1, a1, FPC_EXCEPTION_OVERFLOW | FPC_STICKY_OVERFLOW
- and v0, a1, FPC_ENABLE_OVERFLOW
- bne v0, zero, fpe_trap
- and v0, a1, FPC_ENABLE_INEXACT
- bne v0, zero, inexact_w # inexact traps enabled?
- b invalid_w
-
-/*
- * Conversions to integer which underflow will trap (if enabled),
- * or generate an inexact trap (if enabled),
- * or generate an invalid exception.
- */
-underflow_w:
- or a1, a1, FPC_EXCEPTION_UNDERFLOW | FPC_STICKY_UNDERFLOW
- and v0, a1, FPC_ENABLE_UNDERFLOW
- bne v0, zero, fpe_trap
- and v0, a1, FPC_ENABLE_INEXACT
- bne v0, zero, inexact_w # inexact traps enabled?
- b invalid_w
-
-/*
- * Convert single to long integer with specific rounding.
- */
-round_l_s:
- li t3, FPC_ROUND_RN
- b do_cvt_l_s
-trunc_l_s:
- li t3, FPC_ROUND_RZ
- b do_cvt_l_s
-ceil_l_s:
- li t3, FPC_ROUND_RP
- b do_cvt_l_s
-floor_l_s:
- li t3, FPC_ROUND_RM
- b do_cvt_l_s
-
-/*
- * Convert single to long integer.
- */
-cvt_l_s:
- and t3, a1, FPC_ROUNDING_BITS # get rounding mode
-do_cvt_l_s:
- jal get_fs_s
- bne t1, SEXP_INF, 1f # is FS an infinity?
- bne t2, zero, invalid_l # invalid conversion
-1:
- bne t1, zero, 1f # is FS zero?
- beq t2, zero, result_fs_l # result is zero
- move t2, zero # result is an inexact zero
- b inexact_l
-1:
- subu t1, t1, SEXP_BIAS # unbias exponent
- or t2, t2, SIMPL_ONE # add implied one bit
- dsll t2, t2, DFRAC_BITS - SFRAC_BITS # convert S fraction to D
- b cvt_l
-
-/*
- * Convert double to long integer with specific rounding.
- */
-round_l_d:
- li t3, FPC_ROUND_RN
- b do_cvt_l_d
-trunc_l_d:
- li t3, FPC_ROUND_RZ
- b do_cvt_l_d
-ceil_l_d:
- li t3, FPC_ROUND_RP
- b do_cvt_l_d
-floor_l_d:
- li t3, FPC_ROUND_RM
- b do_cvt_l_d
-
-/*
- * Convert double to long integer.
- */
-cvt_l_d:
- and t3, a1, FPC_ROUNDING_BITS # get rounding mode
-do_cvt_l_d:
- jal get_fs_d
- bne t1, DEXP_INF, 1f # is FS an infinity?
- bne t2, zero, invalid_l # invalid conversion
-1:
- bne t1, zero, 2f # is FS zero?
- beq t2, zero, result_fs_l # result is zero
- move t2, zero # result is an inexact zero
- b inexact_l
-2:
- subu t1, t1, DEXP_BIAS # unbias exponent
- or t2, t2, DIMPL_ONE # add implied one bit
-cvt_l:
- blt t1, LEXP_MIN, underflow_l # is exponent too small?
- li v0, LEXP_MAX+1
- bgt t1, v0, overflow_l # is exponent too large?
- bne t1, v0, 1f # special check for LONG_MIN
- beq t0, zero, overflow_l # if positive, overflow
- bne t2, DIMPL_ONE, overflow_l
- dli t2, LONG_MIN # result is LONG_MIN
- b result_fs_l
-1:
- subu v0, t1, DFRAC_BITS # compute amount to shift
- beq v0, zero, 2f # is shift needed?
- li v1, 64
- blt v0, zero, 1f # if shift < 0, shift right
- subu v1, v1, v0 # shift left
- dsll t2, t2, v0
- b 2f
-1:
- negu v0 # shift right by v0
- subu v1, v1, v0
- dsll t8, t2, v1 # save bits shifted out
- sltu t8, zero, t8 # dont lose any ones
- dsrl t2, t2, v0
-/*
- * round (t0 is sign, t2 is integer part).
- */
-2:
- beq t3, FPC_ROUND_RN, 3f # round to nearest
- beq t3, FPC_ROUND_RZ, 5f # round to zero (truncate)
- beq t3, FPC_ROUND_RP, 1f # round to +infinity
- beq t0, zero, 5f # if sign is positive, truncate
- b 2f
-1:
- bne t0, zero, 5f # if sign is negative, truncate
-2:
- daddu t2, t2, DGUARDBIT # add in fractional
- blt t2, zero, overflow_l # overflow?
- b 5f
-3:
- daddu t2, t2, DGUARDBIT # add in fractional
- blt t2, zero, overflow_l # overflow?
-4:
- bne v0, zero, 5f # if rounded remainder is zero
- and t2, 0xe000000000000000 # clear LSB (round to nearest)
-5:
- beq t0, zero, 1f # result positive?
- negu t2 # convert to negative integer
-1:
- b result_fs_l
- nop
-/*
- * Handle inexact exception.
- */
-inexact_l:
- or a1, a1, FPC_EXCEPTION_INEXACT | FPC_STICKY_INEXACT
- and v0, a1, FPC_ENABLE_INEXACT
- bne v0, zero, fpe_trap
- ctc1 a1, FPC_CSR # save exceptions
- b result_fs_l
-
-/*
- * Conversions to integer which overflow will trap (if enabled),
- * or generate an inexact trap (if enabled),
- * or generate an invalid exception.
- */
-overflow_l:
- or a1, a1, FPC_EXCEPTION_OVERFLOW | FPC_STICKY_OVERFLOW
- and v0, a1, FPC_ENABLE_OVERFLOW
- bne v0, zero, fpe_trap
- and v0, a1, FPC_ENABLE_INEXACT
- bne v0, zero, inexact_l # inexact traps enabled?
- b invalid_l
-
-/*
- * Conversions to integer which underflow will trap (if enabled),
- * or generate an inexact trap (if enabled),
- * or generate an invalid exception.
- */
-underflow_l:
- or a1, a1, FPC_EXCEPTION_UNDERFLOW | FPC_STICKY_UNDERFLOW
- and v0, a1, FPC_ENABLE_UNDERFLOW
- bne v0, zero, fpe_trap
- and v0, a1, FPC_ENABLE_INEXACT
- bne v0, zero, inexact_l # inexact traps enabled?
- b invalid_l
-
-/*
- * Compare single.
- */
-cmp_s:
- jal get_cmp_s
- bne t1, SEXP_INF, 1f # is FS an infinity?
- bne t2, zero, unordered # FS is a NAN
-1:
- bne ta1, SEXP_INF, 2f # is FT an infinity?
- bne ta2, zero, unordered # FT is a NAN
-2:
- sll t1, t1, SFRAC_BITS # reassemble exp & frac
- or t1, t1, t2
- sll ta1, ta1, SFRAC_BITS # reassemble exp & frac
- or ta1, ta1, ta2
- beq t0, zero, 1f # is FS positive?
- negu t1
-1:
- beq ta0, zero, 1f # is FT positive?
- negu ta1
-1:
- li v0, COND_LESS
- blt t1, ta1, test_cond # is FS < FT?
- li v0, COND_EQUAL
- beq t1, ta1, test_cond # is FS == FT?
- move v0, zero # FS > FT
- b test_cond
-
-/*
- * Compare double.
- */
-cmp_d:
- jal get_cmp_d
- bne t1, DEXP_INF, 1f # is FS an infinity?
- bne t2, zero, unordered # FS is a NAN
-1:
- bne ta1, DEXP_INF, 2f # is FT an infinity?
- bne ta2, zero, unordered # FT is a NAN
-2:
- dsll t1, t1, DFRAC_BITS # reassemble exp & frac
- or t1, t1, t2
- dsll ta1, ta1, DFRAC_BITS # reassemble exp & frac
- or ta1, ta1, ta2
- beq t0, zero, 1f # is FS positive?
- dnegu t1 # negate t1
-1:
- beq ta0, zero, 1f # is FT positive?
- dnegu ta1
-1:
- li v0, COND_LESS
- blt t1, ta1, test_cond # is FS(MSW) < FT(MSW)?
- li v0, COND_EQUAL
- beq t1, ta1, test_cond # is FS(LSW) == FT(LSW)?
- move v0, zero # FS > FT
-test_cond:
- and v0, v0, a0 # condition match instruction?
-set_cond:
- bne v0, zero, 1f
- and a1, a1, ~FPC_COND_BIT # clear condition bit
- b 2f
-1:
- or a1, a1, FPC_COND_BIT # set condition bit
-2:
- ctc1 a1, FPC_CSR # save condition bit
- b done
-
-unordered:
- and v0, a0, COND_UNORDERED # this cmp match unordered?
- bne v0, zero, 1f
- and a1, a1, ~FPC_COND_BIT # clear condition bit
- b 2f
-1:
- or a1, a1, FPC_COND_BIT # set condition bit
-2:
- and v0, a0, COND_SIGNAL
- beq v0, zero, 1f # is this a signaling cmp?
- or a1, a1, FPC_EXCEPTION_INVALID | FPC_STICKY_INVALID
- and v0, a1, FPC_ENABLE_INVALID
- bne v0, zero, fpe_trap
-1:
- ctc1 a1, FPC_CSR # save condition bit
- b done
-
-/*
- * Determine the amount to shift the fraction in order to restore the
- * normalized position. After that, round and handle exceptions.
- */
-norm_s:
- move v0, t2
- move t9, zero # t9 = num of leading zeros
- bne t2, zero, 1f
- move v0, t8
- addu t9, 32
-1:
- srl v1, v0, 16
- bne v1, zero, 1f
- addu t9, 16
- sll v0, 16
-1:
- srl v1, v0, 24
- bne v1, zero, 1f
- addu t9, 8
- sll v0, 8
-1:
- srl v1, v0, 28
- bne v1, zero, 1f
- addu t9, 4
- sll v0, 4
-1:
- srl v1, v0, 30
- bne v1, zero, 1f
- addu t9, 2
- sll v0, 2
-1:
- srl v1, v0, 31
- bne v1, zero, 1f
- addu t9, 1
-/*
- * Now shift t2,t8 the correct number of bits.
- */
-1:
- subu t9, t9, SLEAD_ZEROS # dont count leading zeros
- subu t1, t1, t9 # adjust the exponent
- beq t9, zero, norm_noshift_s
- li v1, 32
- blt t9, zero, 1f # if shift < 0, shift right
- subu v1, v1, t9
- sll t2, t2, t9 # shift t2,t8 left
- srl v0, t8, v1 # save bits shifted out
- or t2, t2, v0
- sll t8, t8, t9
- b norm_noshift_s
-1:
- negu t9 # shift t2,t8 right by t9
- subu v1, v1, t9
- sll v0, t8, v1 # save bits shifted out
- sltu v0, zero, v0 # be sure to save any one bits
- srl t8, t8, t9
- or t8, t8, v0
- sll v0, t2, v1 # save bits shifted out
- or t8, t8, v0
- srl t2, t2, t9
-norm_noshift_s:
- move ta1, t1 # save unrounded exponent
- move ta2, t2 # save unrounded fraction
- and v0, a1, FPC_ROUNDING_BITS # get rounding mode
- beq v0, FPC_ROUND_RN, 3f # round to nearest
- beq v0, FPC_ROUND_RZ, 5f # round to zero (truncate)
- beq v0, FPC_ROUND_RP, 1f # round to +infinity
- beq t0, zero, 5f # if sign is positive, truncate
- b 2f
-1:
- bne t0, zero, 5f # if sign is negative, truncate
-2:
- beq t8, zero, 5f # if exact, continue
- addu t2, t2, 1 # add rounding bit
- bne t2, SIMPL_ONE<<1, 5f # need to adjust exponent?
- addu t1, t1, 1 # adjust exponent
- srl t2, t2, 1 # renormalize fraction
- b 5f
-3:
- li v0, GUARDBIT # load guard bit for rounding
- addu v0, v0, t8 # add remainder
- sltu v1, v0, t8 # compute carry out
- beq v1, zero, 4f # if no carry, continue
- addu t2, t2, 1 # add carry to result
- bne t2, SIMPL_ONE<<1, 4f # need to adjust exponent?
- addu t1, t1, 1 # adjust exponent
- srl t2, t2, 1 # renormalize fraction
-4:
- bne v0, zero, 5f # if rounded remainder is zero
- and t2, t2, ~1 # clear LSB (round to nearest)
-5:
- bgt t1, SEXP_MAX, overflow_s # overflow?
- blt t1, SEXP_MIN, underflow_s # underflow?
- bne t8, zero, inexact_s # is result inexact?
- addu t1, t1, SEXP_BIAS # bias exponent
- and t2, t2, ~SIMPL_ONE # clear implied one bit
- b result_fs_s
-
-/*
- * Handle inexact exception.
- */
-inexact_s:
- addu t1, t1, SEXP_BIAS # bias exponent
- and t2, t2, ~SIMPL_ONE # clear implied one bit
-inexact_nobias_s:
- jal set_fd_s # save result
- or a1, a1, FPC_EXCEPTION_INEXACT | FPC_STICKY_INEXACT
- and v0, a1, FPC_ENABLE_INEXACT
- bne v0, zero, fpe_trap
- ctc1 a1, FPC_CSR # save exceptions
- b done
-
-/*
- * Overflow will trap (if enabled),
- * or generate an inexact trap (if enabled),
- * or generate an infinity.
- */
-overflow_s:
- or a1, a1, FPC_EXCEPTION_OVERFLOW | FPC_STICKY_OVERFLOW
- and v0, a1, FPC_ENABLE_OVERFLOW
- beq v0, zero, 1f
- subu t1, t1, 192 # bias exponent
- and t2, t2, ~SIMPL_ONE # clear implied one bit
- jal set_fd_s # save result
- b fpe_trap
-1:
- and v0, a1, FPC_ROUNDING_BITS # get rounding mode
- beq v0, FPC_ROUND_RN, 3f # round to nearest
- beq v0, FPC_ROUND_RZ, 1f # round to zero (truncate)
- beq v0, FPC_ROUND_RP, 2f # round to +infinity
- bne t0, zero, 3f
-1:
- li t1, SEXP_MAX # result is max finite
- li t2, 0x007fffff
- b inexact_s
-2:
- bne t0, zero, 1b
-3:
- li t1, SEXP_MAX + 1 # result is infinity
- move t2, zero
- b inexact_s
-
-/*
- * In this implementation, "tininess" is detected "after rounding" and
- * "loss of accuracy" is detected as "an inexact result".
- */
-underflow_s:
- and v0, a1, FPC_ENABLE_UNDERFLOW
- beq v0, zero, 1f
-/*
- * Underflow is enabled so compute the result and trap.
- */
- addu t1, t1, 192 # bias exponent
- and t2, t2, ~SIMPL_ONE # clear implied one bit
- jal set_fd_s # save result
- or a1, a1, FPC_EXCEPTION_UNDERFLOW | FPC_STICKY_UNDERFLOW
- b fpe_trap
-/*
- * Underflow is not enabled so compute the result,
- * signal inexact result (if it is) and trap (if enabled).
- */
-1:
- move t1, ta1 # get unrounded exponent
- move t2, ta2 # get unrounded fraction
- li t9, SEXP_MIN # compute shift amount
- subu t9, t9, t1 # shift t2,t8 right by t9
- blt t9, SFRAC_BITS+2, 3f # shift all the bits out?
- move t1, zero # result is inexact zero
- move t2, zero
- or a1, a1, FPC_EXCEPTION_UNDERFLOW | FPC_STICKY_UNDERFLOW
-/*
- * Now round the zero result.
- * Only need to worry about rounding to +- infinity when the sign matches.
- */
- and v0, a1, FPC_ROUNDING_BITS # get rounding mode
- beq v0, FPC_ROUND_RN, inexact_nobias_s # round to nearest
- beq v0, FPC_ROUND_RZ, inexact_nobias_s # round to zero
- beq v0, FPC_ROUND_RP, 1f # round to +infinity
- beq t0, zero, inexact_nobias_s # if sign is positive, truncate
- b 2f
-1:
- bne t0, zero, inexact_nobias_s # if sign is negative, truncate
-2:
- addu t2, t2, 1 # add rounding bit
- b inexact_nobias_s
-3:
- li v1, 32
- subu v1, v1, t9
- sltu v0, zero, t8 # be sure to save any one bits
- sll t8, t2, v1 # save bits shifted out
- or t8, t8, v0 # include sticky bits
- srl t2, t2, t9
-/*
- * Now round the denormalized result.
- */
- and v0, a1, FPC_ROUNDING_BITS # get rounding mode
- beq v0, FPC_ROUND_RN, 3f # round to nearest
- beq v0, FPC_ROUND_RZ, 5f # round to zero (truncate)
- beq v0, FPC_ROUND_RP, 1f # round to +infinity
- beq t0, zero, 5f # if sign is positive, truncate
- b 2f
-1:
- bne t0, zero, 5f # if sign is negative, truncate
-2:
- beq t8, zero, 5f # if exact, continue
- addu t2, t2, 1 # add rounding bit
- b 5f
-3:
- li v0, GUARDBIT # load guard bit for rounding
- addu v0, v0, t8 # add remainder
- sltu v1, v0, t8 # compute carry out
- beq v1, zero, 4f # if no carry, continue
- addu t2, t2, 1 # add carry to result
-4:
- bne v0, zero, 5f # if rounded remainder is zero
- and t2, t2, ~1 # clear LSB (round to nearest)
-5:
- move t1, zero # denorm or zero exponent
- jal set_fd_s # save result
- beq t8, zero, done # check for exact result
- or a1, a1, FPC_EXCEPTION_UNDERFLOW | FPC_STICKY_UNDERFLOW
- or a1, a1, FPC_EXCEPTION_INEXACT | FPC_STICKY_INEXACT
- and v0, a1, FPC_ENABLE_INEXACT
- bne v0, zero, fpe_trap
- ctc1 a1, FPC_CSR # save exceptions
- b done
-
-/*
- * Determine the amount to shift the fraction in order to restore the
- * normalized position. After that, round and handle exceptions.
- */
-norm_d:
- move v0, t2
- move t9, zero # t9 = num of leading zeros
- dsrl v1, v0, 32
- bne v1, zero, 1f
- addu t9, 32
- dsll v0, 32
-1:
- dsrl v1, v0, 16
- bne v1, zero, 1f
- addu t9, 16
- dsll v0, 16
-1:
- dsrl v1, v0, 24
- bne v1, zero, 1f
- addu t9, 8
- dsll v0, 8
-1:
- dsrl v1, v0, 28
- bne v1, zero, 1f
- addu t9, 4
- dsll v0, 4
-1:
- dsrl v1, v0, 30
- bne v1, zero, 1f
- addu t9, 2
- dsll v0, 2
-1:
- dsrl v1, v0, 31
- bne v1, zero, 1f
- addu t9, 1
-/*
- * Now shift t2,t8 the correct number of bits.
- */
-1:
- subu t9, t9, DLEAD_ZEROS # dont count leading zeros
- subu t1, t1, t9 # adjust the exponent
- beq t9, zero, norm_noshift_d
- li v1, 64
- blt t9, zero, 2f # if shift < 0, shift right
- subu v1, v1, t9
- dsll t2, t2, t9 # shift left by t9
- dsrl v0, t8, v1 # save bits shifted out
- or t2, t2, v0
- dsll t8, t8, t9
- b norm_noshift_d
-2:
- negu t9 # shift right by t9
- subu v1, v1, t9 # (known to be < 32 bits)
- dsll v0, t8, v1 # save bits shifted out
- sltu v0, zero, v0 # be sure to save any one bits
- dsrl t8, t8, t9
- or t8, t8, v0
- dsll v0, t2, v1 # save bits shifted out
- or t8, t8, v0
- dsrl t2, t2, t9
-norm_noshift_d:
- move ta1, t1 # save unrounded exponent
- move ta2, t2 # save unrounded fraction (MS)
- and v0, a1, FPC_ROUNDING_BITS # get rounding mode
- beq v0, FPC_ROUND_RN, 3f # round to nearest
- beq v0, FPC_ROUND_RZ, 5f # round to zero (truncate)
- beq v0, FPC_ROUND_RP, 1f # round to +infinity
- beq t0, zero, 5f # if sign is positive, truncate
- b 2f
-1:
- bne t0, zero, 5f # if sign is negative, truncate
-2:
- beq t8, zero, 5f # if exact, continue
- daddu t2, t2, 1 # add rounding bit
- bne t2, DIMPL_ONE<<1, 5f # need to adjust exponent?
- addu t1, t1, 1 # adjust exponent
- dsrl t2, t2, 1 # renormalize fraction
- b 5f
-3:
- dli v0, DGUARDBIT # load guard bit for rounding
- addu v0, v0, t8 # add remainder
- sltu v1, v0, t8 # compute carry out
- beq v1, zero, 4f # branch if no carry
- daddu t2, t2, 1 # add carry to result
- bne t2, DIMPL_ONE<<1, 4f # need to adjust exponent?
- addu t1, t1, 1 # adjust exponent
- srl t2, t2, 1 # renormalize fraction
-4:
- bne v0, zero, 5f # if rounded remainder is zero
- and t2, t2, ~1 # clear LSB (round to nearest)
-5:
- bgt t1, DEXP_MAX, overflow_d # overflow?
- blt t1, DEXP_MIN, underflow_d # underflow?
- bne t8, zero, inexact_d # is result inexact?
- addu t1, t1, DEXP_BIAS # bias exponent
- and t2, t2, ~DIMPL_ONE # clear implied one bit
- b result_fs_d
-
-/*
- * Handle inexact exception.
- */
-inexact_d:
- addu t1, t1, DEXP_BIAS # bias exponent
- and t2, t2, ~DIMPL_ONE # clear implied one bit
-inexact_nobias_d:
- jal set_fd_d # save result
- or a1, a1, FPC_EXCEPTION_INEXACT | FPC_STICKY_INEXACT
- and v0, a1, FPC_ENABLE_INEXACT
- bne v0, zero, fpe_trap
- ctc1 a1, FPC_CSR # save exceptions
- b done
-
-/*
- * Overflow will trap (if enabled),
- * or generate an inexact trap (if enabled),
- * or generate an infinity.
- */
-overflow_d:
- or a1, a1, FPC_EXCEPTION_OVERFLOW | FPC_STICKY_OVERFLOW
- and v0, a1, FPC_ENABLE_OVERFLOW
- beq v0, zero, 1f
- subu t1, t1, 1536 # bias exponent
- and t2, t2, ~DIMPL_ONE # clear implied one bit
- jal set_fd_d # save result
- b fpe_trap
-1:
- and v0, a1, FPC_ROUNDING_BITS # get rounding mode
- beq v0, FPC_ROUND_RN, 3f # round to nearest
- beq v0, FPC_ROUND_RZ, 1f # round to zero (truncate)
- beq v0, FPC_ROUND_RP, 2f # round to +infinity
- bne t0, zero, 3f
-1:
- li t1, DEXP_MAX # result is max finite
- dli t2, 0x000fffffffffffff
- b inexact_d
-2:
- bne t0, zero, 1b
-3:
- li t1, DEXP_MAX + 1 # result is infinity
- move t2, zero
- b inexact_d
-
-/*
- * In this implementation, "tininess" is detected "after rounding" and
- * "loss of accuracy" is detected as "an inexact result".
- */
-underflow_d:
- and v0, a1, FPC_ENABLE_UNDERFLOW
- beq v0, zero, 1f
-/*
- * Underflow is enabled so compute the result and trap.
- */
- addu t1, t1, 1536 # bias exponent
- and t2, t2, ~DIMPL_ONE # clear implied one bit
- jal set_fd_d # save result
- or a1, a1, FPC_EXCEPTION_UNDERFLOW | FPC_STICKY_UNDERFLOW
- b fpe_trap
-/*
- * Underflow is not enabled so compute the result,
- * signal inexact result (if it is) and trap (if enabled).
- */
-1:
- move t1, ta1 # get unrounded exponent
- move t2, ta2 # get unrounded fraction (MS)
- li t9, DEXP_MIN # compute shift amount
- subu t9, t9, t1 # shift t2,t8 right by t9
- blt t9, DFRAC_BITS+2, 3f # shift all the bits out?
- move t1, zero # result is inexact zero
- move t2, zero
- or a1, a1, FPC_EXCEPTION_UNDERFLOW | FPC_STICKY_UNDERFLOW
-/*
- * Now round the zero result.
- * Only need to worry about rounding to +- infinity when the sign matches.
- */
- and v0, a1, FPC_ROUNDING_BITS # get rounding mode
- beq v0, FPC_ROUND_RN, inexact_nobias_d # round to nearest
- beq v0, FPC_ROUND_RZ, inexact_nobias_d # round to zero
- beq v0, FPC_ROUND_RP, 1f # round to +infinity
- beq t0, zero, inexact_nobias_d # if sign is positive, truncate
- b 2f
-1:
- bne t0, zero, inexact_nobias_d # if sign is negative, truncate
-2:
- daddu t2, t2, 1 # add rounding bit
- b inexact_nobias_d
-3:
- li v1, 64
- subu v1, v1, t9
- sltu v0, zero, t8 # be sure to save any one bits
- dsll t8, t2, v1 # save bits shifted out
- or t8, t8, v0 # include sticky bits
- dsrl t2, t2, t9
-/*
- * Now round the denormalized result.
- */
- and v0, a1, FPC_ROUNDING_BITS # get rounding mode
- beq v0, FPC_ROUND_RN, 3f # round to nearest
- beq v0, FPC_ROUND_RZ, 5f # round to zero (truncate)
- beq v0, FPC_ROUND_RP, 1f # round to +infinity
- beq t0, zero, 5f # if sign is positive, truncate
- b 2f
-1:
- bne t0, zero, 5f # if sign is negative, truncate
-2:
- beq t8, zero, 5f # if exact, continue
- daddu t2, t2, 1 # add rounding bit
- b 5f
-3:
- dli v0, DGUARDBIT # load guard bit for rounding
- daddu v0, v0, t8 # add remainder
- sltu v1, v0, t8 # compute carry out
- beq v1, zero, 4f # if no carry, continue
- daddu t2, t2, 1 # add carry
-4:
- bne v0, zero, 5f # if rounded remainder is zero
- and t2, t2, ~1 # clear LSB (round to nearest)
-5:
- move t1, zero # denorm or zero exponent
- jal set_fd_d # save result
- beq t8, zero, done # check for exact result
- or a1, a1, FPC_EXCEPTION_UNDERFLOW | FPC_STICKY_UNDERFLOW
- or a1, a1, FPC_EXCEPTION_INEXACT | FPC_STICKY_INEXACT
- and v0, a1, FPC_ENABLE_INEXACT
- bne v0, zero, fpe_trap
- ctc1 a1, FPC_CSR # save exceptions
- b done
-
-/*
- * Signal an invalid operation if the trap is enabled; otherwise,
- * the result is a quiet NAN.
- */
-invalid_s: # trap invalid operation
- or a1, a1, FPC_EXCEPTION_INVALID | FPC_STICKY_INVALID
- and v0, a1, FPC_ENABLE_INVALID
- bne v0, zero, fpe_trap
- ctc1 a1, FPC_CSR # save exceptions
- move t0, zero # result is a quiet NAN
- li t1, SEXP_INF
- li t2, SQUIET_NAN
- jal set_fd_s # save result (in t0,t1,t2)
- b done
-
-/*
- * Signal an invalid operation if the trap is enabled; otherwise,
- * the result is a quiet NAN.
- */
-invalid_d: # trap invalid operation
- or a1, a1, FPC_EXCEPTION_INVALID | FPC_STICKY_INVALID
- and v0, a1, FPC_ENABLE_INVALID
- bne v0, zero, fpe_trap
- ctc1 a1, FPC_CSR # save exceptions
- move t0, zero # result is a quiet NAN
- li t1, DEXP_INF
- dli t2, DQUIET_NAN
- jal set_fd_d # save result (in t0,t1,t2)
- b done
-
-/*
- * Signal an invalid operation if the trap is enabled; otherwise,
- * the result is INT_MAX or INT_MIN.
- */
-invalid_w: # trap invalid operation
- or a1, a1, FPC_EXCEPTION_INVALID | FPC_STICKY_INVALID
- and v0, a1, FPC_ENABLE_INVALID
- bne v0, zero, fpe_trap
- ctc1 a1, FPC_CSR # save exceptions
- bne t0, zero, 1f
- li t2, INT_MAX # result is INT_MAX
- b result_fs_w
-1:
- li t2, INT_MIN # result is INT_MIN
- b result_fs_w
-
-/*
- * Signal an invalid operation if the trap is enabled; otherwise,
- * the result is LONG_MAX or LONG_MIN.
- */
-invalid_l: # trap invalid operation
- or a1, a1, FPC_EXCEPTION_INVALID | FPC_STICKY_INVALID
- and v0, a1, FPC_ENABLE_INVALID
- bne v0, zero, fpe_trap
- ctc1 a1, FPC_CSR # save exceptions
- bne t0, zero, 1f
- dli t2, LONG_MAX # result is INT_MAX
- b result_fs_l
-1:
- dli t2, LONG_MIN # result is INT_MIN
- b result_fs_l
-
-/*
- * Trap if the hardware should have handled this case.
- */
-fpe_trap:
- move a2, a1 # code = FP CSR
- ctc1 a1, FPC_CSR # save exceptions
- li v0, 1
- b done_err
-
-/*
- * Send an illegal instruction signal to the current process.
- */
-ill:
- ctc1 a1, FPC_CSR # save exceptions
- move a2, a0 # code = FP instruction
- li v0, 1
- b done_err
-
-result_ft_s:
- move t0, ta0 # result is FT
- move t1, ta1
- move t2, ta2
-result_fs_s: # result is FS
- jal set_fd_s # save result (in t0,t1,t2)
- b done
-
-result_fs_w:
- jal set_fd_word # save result (in t2)
- b done
-
-result_fs_l:
- move t0, t2
- jal set_fd_dword # save result (in t0)
- b done
-
-result_ft_d:
- move t0, ta0 # result is FT
- move t1, ta1
- move t2, ta2
-result_fs_d: # result is FS
- jal set_fd_d # save result (in t0,t1,t2)
-
-done:
- li v0, 0
-done_err:
- PTR_L ra, CF_RA_OFFS(sp)
- PTR_ADD sp, sp, FRAMESZ(CF_SZ)
- j ra
-END(MipsEmulateFP)
-
-/*----------------------------------------------------------------------------
- * get_fs_int --
- *
- * Read (integer) the FS register (bits 15-11).
- * This is an internal routine used by MipsEmulateFP only.
- *
- * Results:
- * t0 contains the sign
- * t2 contains the fraction
- *
- *----------------------------------------------------------------------------
- */
-#define GET_FS_INT(n) \
- .rdata; \
- .dword get_fs_int_ ## n; \
- .text; \
-get_fs_int_ ## n: \
- mfc1 t2, $ ## n; \
- b get_fs_int_done
-
-LEAF(get_fs_int, 0)
- srl a3, a0, 11 - 3 # get FS field
- and a3, a3, 0x1f << 3 # mask FS field
- ld a3, get_fs_int_tbl(a3) # switch on register number
- j a3
-
- .rdata
-get_fs_int_tbl:
- .text
-
- GET_FS_INT(f0)
- GET_FS_INT(f1)
- GET_FS_INT(f2)
- GET_FS_INT(f3)
- GET_FS_INT(f4)
- GET_FS_INT(f5)
- GET_FS_INT(f6)
- GET_FS_INT(f7)
- GET_FS_INT(f8)
- GET_FS_INT(f9)
- GET_FS_INT(f10)
- GET_FS_INT(f11)
- GET_FS_INT(f12)
- GET_FS_INT(f13)
- GET_FS_INT(f14)
- GET_FS_INT(f15)
- GET_FS_INT(f16)
- GET_FS_INT(f17)
- GET_FS_INT(f18)
- GET_FS_INT(f19)
- GET_FS_INT(f20)
- GET_FS_INT(f21)
- GET_FS_INT(f22)
- GET_FS_INT(f23)
- GET_FS_INT(f24)
- GET_FS_INT(f25)
- GET_FS_INT(f26)
- GET_FS_INT(f27)
- GET_FS_INT(f28)
- GET_FS_INT(f29)
- GET_FS_INT(f30)
- GET_FS_INT(f31)
-
-get_fs_int_done:
- srl t0, t2, 31 # init the sign bit
- bge t2, zero, 1f
- negu t2
- dsll t2, 33
- dsrl t2, 33
-1:
- j ra
-END(get_fs_int)
-
-/*----------------------------------------------------------------------------
- * get_fs_long --
- *
- * Read (long integer) the FS register (bits 15-11).
- * This is an internal routine used by MipsEmulateFP only.
- *
- * Results:
- * t0 contains the sign
- * t2 contains the fraction
- *
- *----------------------------------------------------------------------------
- */
-#define GET_FS_LONG(n) \
- .rdata; \
- .dword get_fs_long_ ## n; \
- .text; \
-get_fs_long_ ## n: \
- dmfc1 t2, $ ## n; \
- b get_fs_long_done
-
-LEAF(get_fs_long, 0)
- srl a3, a0, 11 - 3 # get FS field
- and a3, a3, 0x1f << 3 # mask FS field
- ld a3, get_fs_long_tbl(a3) # switch on register number
- j a3
-
- .rdata
-get_fs_long_tbl:
- .text
-
- GET_FS_LONG(f0)
- GET_FS_LONG(f1)
- GET_FS_LONG(f2)
- GET_FS_LONG(f3)
- GET_FS_LONG(f4)
- GET_FS_LONG(f5)
- GET_FS_LONG(f6)
- GET_FS_LONG(f7)
- GET_FS_LONG(f8)
- GET_FS_LONG(f9)
- GET_FS_LONG(f10)
- GET_FS_LONG(f11)
- GET_FS_LONG(f12)
- GET_FS_LONG(f13)
- GET_FS_LONG(f14)
- GET_FS_LONG(f15)
- GET_FS_LONG(f16)
- GET_FS_LONG(f17)
- GET_FS_LONG(f18)
- GET_FS_LONG(f19)
- GET_FS_LONG(f20)
- GET_FS_LONG(f21)
- GET_FS_LONG(f22)
- GET_FS_LONG(f23)
- GET_FS_LONG(f24)
- GET_FS_LONG(f25)
- GET_FS_LONG(f26)
- GET_FS_LONG(f27)
- GET_FS_LONG(f28)
- GET_FS_LONG(f29)
- GET_FS_LONG(f30)
- GET_FS_LONG(f31)
-
-get_fs_long_done:
- dsrl t0, t2, 63 # init the sign bit
- bge t2, zero, 1f
- dnegu t2
-1:
- j ra
-END(get_fs_long)
-
-/*----------------------------------------------------------------------------
- * get_ft_fs_s --
- *
- * Read (single precision) the FT register (bits 20-16) and
- * the FS register (bits 15-11) and break up into fields.
- * This is an internal routine used by MipsEmulateFP only.
- *
- * Results:
- * t0 contains the FS sign
- * t1 contains the FS (biased) exponent
- * t2 contains the FS fraction
- * ta0 contains the FT sign
- * ta1 contains the FT (biased) exponent
- * ta2 contains the FT fraction
- *
- *----------------------------------------------------------------------------
- */
-#define GET_FT_S(n) \
- .rdata; \
- .dword get_ft_s_ ## n; \
- .text; \
-get_ft_s_ ## n: \
- mfc1 ta0, $ ## n; \
- b get_ft_s_done
-
-LEAF(get_ft_fs_s, 0)
- srl a3, a0, 16 - 3 # get FT field
- and a3, a3, 0x1f << 3 # mask FT field
- ld a3, get_ft_s_tbl(a3) # switch on register number
- j a3
-
- .rdata
-get_ft_s_tbl:
- .text
-
- GET_FT_S(f0)
- GET_FT_S(f1)
- GET_FT_S(f2)
- GET_FT_S(f3)
- GET_FT_S(f4)
- GET_FT_S(f5)
- GET_FT_S(f6)
- GET_FT_S(f7)
- GET_FT_S(f8)
- GET_FT_S(f9)
- GET_FT_S(f10)
- GET_FT_S(f11)
- GET_FT_S(f12)
- GET_FT_S(f13)
- GET_FT_S(f14)
- GET_FT_S(f15)
- GET_FT_S(f16)
- GET_FT_S(f17)
- GET_FT_S(f18)
- GET_FT_S(f19)
- GET_FT_S(f20)
- GET_FT_S(f21)
- GET_FT_S(f22)
- GET_FT_S(f23)
- GET_FT_S(f24)
- GET_FT_S(f25)
- GET_FT_S(f26)
- GET_FT_S(f27)
- GET_FT_S(f28)
- GET_FT_S(f29)
- GET_FT_S(f30)
- GET_FT_S(f31)
-
-get_ft_s_done:
- srl ta1, ta0, SFRAC_BITS # get exponent
- and ta1, ta1, 0xFF
- and ta2, ta0, 0x7FFFFF # get fraction
- srl ta0, ta0, 31 # get sign
- bne ta1, SEXP_INF, 1f # is it a signaling NAN?
- and v0, ta2, SSIGNAL_NAN
- bne v0, zero, invalid_s
-1:
- /* fall through to get FS */
-
-/*----------------------------------------------------------------------------
- * get_fs_s --
- *
- * Read (single precision) the FS register (bits 15-11) and
- * break up into fields.
- * This is an internal routine used by MipsEmulateFP only.
- *
- * Results:
- * t0 contains the sign
- * t1 contains the (biased) exponent
- * t2 contains the fraction
- *
- *----------------------------------------------------------------------------
- */
-#define GET_FS_S(n) \
- .rdata; \
- .dword get_fs_s_ ## n; \
- .text; \
-get_fs_s_ ## n: \
- mfc1 t0, $ ## n; \
- b get_fs_s_done
-
-ALEAF(get_fs_s)
- srl a3, a0, 11 - 3 # get FS field
- and a3, a3, 0x1f << 3 # mask FS field
- ld a3, get_fs_s_tbl(a3) # switch on register number
- j a3
-
- .rdata
-get_fs_s_tbl:
- .text
-
- GET_FS_S(f0)
- GET_FS_S(f1)
- GET_FS_S(f2)
- GET_FS_S(f3)
- GET_FS_S(f4)
- GET_FS_S(f5)
- GET_FS_S(f6)
- GET_FS_S(f7)
- GET_FS_S(f8)
- GET_FS_S(f9)
- GET_FS_S(f10)
- GET_FS_S(f11)
- GET_FS_S(f12)
- GET_FS_S(f13)
- GET_FS_S(f14)
- GET_FS_S(f15)
- GET_FS_S(f16)
- GET_FS_S(f17)
- GET_FS_S(f18)
- GET_FS_S(f19)
- GET_FS_S(f20)
- GET_FS_S(f21)
- GET_FS_S(f22)
- GET_FS_S(f23)
- GET_FS_S(f24)
- GET_FS_S(f25)
- GET_FS_S(f26)
- GET_FS_S(f27)
- GET_FS_S(f28)
- GET_FS_S(f29)
- GET_FS_S(f30)
- GET_FS_S(f31)
-
-get_fs_s_done:
- srl t1, t0, SFRAC_BITS # get exponent
- and t1, t1, 0xFF
- and t2, t0, 0x7FFFFF # get fraction
- srl t0, t0, 31 # get sign
- bne t1, SEXP_INF, 1f # is it a signaling NAN?
- and v0, t2, SSIGNAL_NAN
- bne v0, zero, invalid_s
-1:
- j ra
-END(get_ft_fs_s)
-
-/*----------------------------------------------------------------------------
- * get_ft_fs_d --
- *
- * Read (double precision) the FT register (bits 20-16) and
- * the FS register (bits 15-11) and break up into fields.
- * This is an internal routine used by MipsEmulateFP only.
- *
- * Results:
- * t0 contains the FS sign
- * t1 contains the FS (biased) exponent
- * t2 contains the FS fraction
- * ta0 contains the FT sign
- * ta1 contains the FT (biased) exponent
- * ta2 contains the FT fraction
- *
- *----------------------------------------------------------------------------
- */
-#define GET_FT_FS_D(n) \
- .rdata; \
- .dword get_ft_fs_d_ ## n; \
- .text; \
-get_ft_fs_d_ ## n: \
- dmfc1 ta2, $ ## n; \
- b get_ft_d_done
-
-LEAF(get_ft_fs_d, 0)
- srl a3, a0, 16 - 3 # get FT field
- and a3, a3, 0x1f << 3 # mask FT field
- ld a3, get_ft_d_tbl(a3) # switch on register number
- j a3
-
- .rdata
-get_ft_d_tbl:
- .text
-
- GET_FT_FS_D(f0)
- GET_FT_FS_D(f1)
- GET_FT_FS_D(f2)
- GET_FT_FS_D(f3)
- GET_FT_FS_D(f4)
- GET_FT_FS_D(f5)
- GET_FT_FS_D(f6)
- GET_FT_FS_D(f7)
- GET_FT_FS_D(f8)
- GET_FT_FS_D(f9)
- GET_FT_FS_D(f10)
- GET_FT_FS_D(f11)
- GET_FT_FS_D(f12)
- GET_FT_FS_D(f13)
- GET_FT_FS_D(f14)
- GET_FT_FS_D(f15)
- GET_FT_FS_D(f16)
- GET_FT_FS_D(f17)
- GET_FT_FS_D(f18)
- GET_FT_FS_D(f19)
- GET_FT_FS_D(f20)
- GET_FT_FS_D(f21)
- GET_FT_FS_D(f22)
- GET_FT_FS_D(f23)
- GET_FT_FS_D(f24)
- GET_FT_FS_D(f25)
- GET_FT_FS_D(f26)
- GET_FT_FS_D(f27)
- GET_FT_FS_D(f28)
- GET_FT_FS_D(f29)
- GET_FT_FS_D(f30)
- GET_FT_FS_D(f31)
-
-get_ft_d_done:
- dsrl ta0, ta2, 63 # get sign
- dsrl ta1, ta2, DFRAC_BITS # get exponent
- and ta1, ta1, 0x7FF
- dsll ta2, 12
- dsrl ta2, 12 # get fraction
- bne ta1, DEXP_INF, 1f # is it a signaling NAN?
- and v0, ta2, DSIGNAL_NAN
- bne v0, zero, invalid_d
-1:
- /* fall through to get FS */
-
-/*----------------------------------------------------------------------------
- * get_fs_d --
- *
- * Read (double precision) the FS register (bits 15-11) and
- * break up into fields.
- * This is an internal routine used by MipsEmulateFP only.
- *
- * Results:
- * t0 contains the sign
- * t1 contains the (biased) exponent
- * t2 contains the fraction
- *
- *----------------------------------------------------------------------------
- */
-#define GET_FS_D(n) \
- .rdata; \
- .dword get_fs_d_ ## n; \
- .text; \
-get_fs_d_ ## n: \
- dmfc1 t2, $ ## n; \
- b get_fs_d_done
-
-ALEAF(get_fs_d)
- srl a3, a0, 11 - 3 # get FS field
- and a3, a3, 0x1f << 3 # mask FS field
- ld a3, get_fs_d_tbl(a3) # switch on register number
- j a3
-
- .rdata
-get_fs_d_tbl:
- .text
-
- GET_FS_D(f0)
- GET_FS_D(f1)
- GET_FS_D(f2)
- GET_FS_D(f3)
- GET_FS_D(f4)
- GET_FS_D(f5)
- GET_FS_D(f6)
- GET_FS_D(f7)
- GET_FS_D(f8)
- GET_FS_D(f9)
- GET_FS_D(f10)
- GET_FS_D(f11)
- GET_FS_D(f12)
- GET_FS_D(f13)
- GET_FS_D(f14)
- GET_FS_D(f15)
- GET_FS_D(f16)
- GET_FS_D(f17)
- GET_FS_D(f18)
- GET_FS_D(f19)
- GET_FS_D(f20)
- GET_FS_D(f21)
- GET_FS_D(f22)
- GET_FS_D(f23)
- GET_FS_D(f24)
- GET_FS_D(f25)
- GET_FS_D(f26)
- GET_FS_D(f27)
- GET_FS_D(f28)
- GET_FS_D(f29)
- GET_FS_D(f30)
- GET_FS_D(f31)
-
-get_fs_d_done:
- dsrl t0, t2, 63 # get sign
- dsrl t1, t2, DFRAC_BITS # get exponent
- and t1, t1, 0x7FF
- dsll t2, 12
- dsrl t2, 12 # get fraction
- bne t1, DEXP_INF, 1f # is it a signaling NAN?
- and v0, t2, DSIGNAL_NAN
- bne v0, zero, invalid_d
-1:
- j ra
-END(get_ft_fs_d)
-
-/*----------------------------------------------------------------------------
- * get_cmp_s --
- *
- * Read (single precision) the FS register (bits 15-11) and
- * the FT register (bits 20-16) and break up into fields.
- * This is an internal routine used by MipsEmulateFP only.
- *
- * Results:
- * t0 contains the sign
- * t1 contains the (biased) exponent
- * t2 contains the fraction
- * ta0 contains the sign
- * ta1 contains the (biased) exponent
- * ta2 contains the fraction
- *
- *----------------------------------------------------------------------------
- */
-#define CMP_FS_S(n) \
- .rdata; \
- .dword cmp_fs_s_ ## n; \
- .text; \
-cmp_fs_s_ ## n: \
- mfc1 t0, $ ## n; \
- b cmp_fs_s_done
-
-LEAF(get_cmp_s, 0)
- srl a3, a0, 11 - 3 # get FS field
- and a3, a3, 0x1f << 3 # mask FS field
- ld a3, cmp_fs_s_tbl(a3) # switch on register number
- j a3
-
- .rdata
-cmp_fs_s_tbl:
- .text
-
- CMP_FS_S(f0)
- CMP_FS_S(f1)
- CMP_FS_S(f2)
- CMP_FS_S(f3)
- CMP_FS_S(f4)
- CMP_FS_S(f5)
- CMP_FS_S(f6)
- CMP_FS_S(f7)
- CMP_FS_S(f8)
- CMP_FS_S(f9)
- CMP_FS_S(f10)
- CMP_FS_S(f11)
- CMP_FS_S(f12)
- CMP_FS_S(f13)
- CMP_FS_S(f14)
- CMP_FS_S(f15)
- CMP_FS_S(f16)
- CMP_FS_S(f17)
- CMP_FS_S(f18)
- CMP_FS_S(f19)
- CMP_FS_S(f20)
- CMP_FS_S(f21)
- CMP_FS_S(f22)
- CMP_FS_S(f23)
- CMP_FS_S(f24)
- CMP_FS_S(f25)
- CMP_FS_S(f26)
- CMP_FS_S(f27)
- CMP_FS_S(f28)
- CMP_FS_S(f29)
- CMP_FS_S(f30)
- CMP_FS_S(f31)
-
-cmp_fs_s_done:
- srl t1, t0, SFRAC_BITS # get exponent
- and t1, t1, 0xFF
- and t2, t0, 0x7FFFFF # get fraction
- srl t0, t0, 31 # get sign
-
-#define CMP_FT_S(n) \
- .rdata; \
- .dword cmp_ft_s_ ## n; \
- .text; \
-cmp_ft_s_ ## n: \
- mfc1 ta0, $ ## n; \
- b cmp_ft_s_done
-
- srl a3, a0, 16 - 3 # get FT field
- and a3, a3, 0x1f << 3 # mask FT field
- ld a3, cmp_ft_s_tbl(a3) # switch on register number
- j a3
-
- .rdata
-cmp_ft_s_tbl:
- .text
-
- CMP_FT_S(f0)
- CMP_FT_S(f1)
- CMP_FT_S(f2)
- CMP_FT_S(f3)
- CMP_FT_S(f4)
- CMP_FT_S(f5)
- CMP_FT_S(f6)
- CMP_FT_S(f7)
- CMP_FT_S(f8)
- CMP_FT_S(f9)
- CMP_FT_S(f10)
- CMP_FT_S(f11)
- CMP_FT_S(f12)
- CMP_FT_S(f13)
- CMP_FT_S(f14)
- CMP_FT_S(f15)
- CMP_FT_S(f16)
- CMP_FT_S(f17)
- CMP_FT_S(f18)
- CMP_FT_S(f19)
- CMP_FT_S(f20)
- CMP_FT_S(f21)
- CMP_FT_S(f22)
- CMP_FT_S(f23)
- CMP_FT_S(f24)
- CMP_FT_S(f25)
- CMP_FT_S(f26)
- CMP_FT_S(f27)
- CMP_FT_S(f28)
- CMP_FT_S(f29)
- CMP_FT_S(f30)
-
-cmp_ft_s_done:
- srl ta1, ta0, SFRAC_BITS # get exponent
- and ta1, ta1, 0xFF
- and ta2, ta0, 0x7FFFFF # get fraction
- srl ta0, ta0, 31 # get sign
- j ra
-END(get_cmp_s)
-
-/*----------------------------------------------------------------------------
- * get_cmp_d --
- *
- * Read (double precision) the FS register (bits 15-11) and
- * the FT register (bits 20-16) and break up into fields.
- * This is an internal routine used by MipsEmulateFP only.
- *
- * Results:
- * t0 contains the sign
- * t1 contains the (biased) exponent
- * t2 contains the fraction
- * ta0 contains the sign
- * ta1 contains the (biased) exponent
- * ta2 contains the fraction
- *
- *----------------------------------------------------------------------------
- */
-#define CMP_FS_D(n) \
- .rdata; \
- .dword cmp_fs_d_ ## n; \
- .text; \
-cmp_fs_d_ ## n: \
- dmfc1 t2, $ ## n; \
- b cmp_fs_d_done
-
-LEAF(get_cmp_d, 0)
- srl a3, a0, 11 - 3 # get FS field
- and a3, a3, 0x1f << 3 # mask FS field
- ld a3, cmp_fs_d_tbl(a3) # switch on register number
- j a3
-
- .rdata
-cmp_fs_d_tbl:
- .text
-
- CMP_FS_D(f0)
- CMP_FS_D(f1)
- CMP_FS_D(f2)
- CMP_FS_D(f3)
- CMP_FS_D(f4)
- CMP_FS_D(f5)
- CMP_FS_D(f6)
- CMP_FS_D(f7)
- CMP_FS_D(f8)
- CMP_FS_D(f9)
- CMP_FS_D(f10)
- CMP_FS_D(f11)
- CMP_FS_D(f12)
- CMP_FS_D(f13)
- CMP_FS_D(f14)
- CMP_FS_D(f15)
- CMP_FS_D(f16)
- CMP_FS_D(f17)
- CMP_FS_D(f18)
- CMP_FS_D(f19)
- CMP_FS_D(f20)
- CMP_FS_D(f21)
- CMP_FS_D(f22)
- CMP_FS_D(f23)
- CMP_FS_D(f24)
- CMP_FS_D(f25)
- CMP_FS_D(f26)
- CMP_FS_D(f27)
- CMP_FS_D(f28)
- CMP_FS_D(f29)
- CMP_FS_D(f30)
- CMP_FS_D(f31)
-
-cmp_fs_d_done:
- dsrl t0, t2, 63 # get sign
- dsrl t1, t2, DFRAC_BITS # get exponent
- and t1, t1, 0x7FF
- dsll t2, 12
- dsrl t2, 12 # get fraction
-
-#define CMP_FT_D(n) \
- .rdata; \
- .dword cmp_ft_d_ ## n; \
- .text; \
-cmp_ft_d_ ## n: \
- dmfc1 ta2, $ ## n; \
- b cmp_ft_d_done
-
- srl a3, a0, 16 - 3 # get FT field
- and a3, a3, 0x1f << 3 # mask FT field
- ld a3, cmp_ft_d_tbl(a3) # switch on register number
- j a3
-
- .rdata
-cmp_ft_d_tbl:
- .text
-
- CMP_FT_D(f0)
- CMP_FT_D(f1)
- CMP_FT_D(f2)
- CMP_FT_D(f3)
- CMP_FT_D(f4)
- CMP_FT_D(f5)
- CMP_FT_D(f6)
- CMP_FT_D(f7)
- CMP_FT_D(f8)
- CMP_FT_D(f9)
- CMP_FT_D(f10)
- CMP_FT_D(f11)
- CMP_FT_D(f12)
- CMP_FT_D(f13)
- CMP_FT_D(f14)
- CMP_FT_D(f15)
- CMP_FT_D(f16)
- CMP_FT_D(f17)
- CMP_FT_D(f18)
- CMP_FT_D(f19)
- CMP_FT_D(f20)
- CMP_FT_D(f21)
- CMP_FT_D(f22)
- CMP_FT_D(f23)
- CMP_FT_D(f24)
- CMP_FT_D(f25)
- CMP_FT_D(f26)
- CMP_FT_D(f27)
- CMP_FT_D(f28)
- CMP_FT_D(f29)
- CMP_FT_D(f30)
- CMP_FT_D(f31)
-
-cmp_ft_d_done:
- dsrl ta0, ta2, 63 # get sign
- dsrl ta1, ta2, DFRAC_BITS # get exponent
- and ta1, ta1, 0x7FF
- dsll ta2, 12
- dsrl ta2, 12 # get fraction
- j ra
-END(get_cmp_d)
-
-/*----------------------------------------------------------------------------
- * set_fd_s --
- *
- * Write (single precision) the FD register (bits 10-6).
- * This is an internal routine used by MipsEmulateFP only.
- *
- * Arguments:
- * a0 contains the FP instruction
- * t0 contains the sign
- * t1 contains the (biased) exponent
- * t2 contains the fraction
- *
- * set_fd_word --
- *
- * Write (integer) the FD register (bits 10-6).
- * This is an internal routine used by MipsEmulateFP only.
- *
- * Arguments:
- * a0 contains the FP instruction
- * t2 contains the integer
- *
- *----------------------------------------------------------------------------
- */
-#define SET_FD_S(n) \
- .rdata; \
- .dword set_fd_s_ ## n; \
- .text; \
-set_fd_s_ ## n: \
- mtc1 t2, $ ## n; \
- j ra
-
-LEAF(set_fd_s, 0)
- sll t0, t0, 31 # position sign
- sll t1, t1, SFRAC_BITS # position exponent
- or t2, t2, t0
- or t2, t2, t1
-ALEAF(set_fd_word)
- srl a3, a0, 6 - 3 # get FD field
- and a3, a3, 0x1f << 3 # mask FT field
- ld a3, set_fd_s_tbl(a3) # switch on register number
- j a3
-
- .rdata
-set_fd_s_tbl:
- .text
-
- SET_FD_S(f0)
- SET_FD_S(f1)
- SET_FD_S(f2)
- SET_FD_S(f3)
- SET_FD_S(f4)
- SET_FD_S(f5)
- SET_FD_S(f6)
- SET_FD_S(f7)
- SET_FD_S(f8)
- SET_FD_S(f9)
- SET_FD_S(f10)
- SET_FD_S(f11)
- SET_FD_S(f12)
- SET_FD_S(f13)
- SET_FD_S(f14)
- SET_FD_S(f15)
- SET_FD_S(f16)
- SET_FD_S(f17)
- SET_FD_S(f18)
- SET_FD_S(f19)
- SET_FD_S(f20)
- SET_FD_S(f21)
- SET_FD_S(f22)
- SET_FD_S(f23)
- SET_FD_S(f24)
- SET_FD_S(f25)
- SET_FD_S(f26)
- SET_FD_S(f27)
- SET_FD_S(f28)
- SET_FD_S(f29)
- SET_FD_S(f30)
- SET_FD_S(f31)
-
-END(set_fd_s)
-
-/*----------------------------------------------------------------------------
- * set_fd_d --
- *
- * Write (double precision) the FT register (bits 10-6).
- * This is an internal routine used by MipsEmulateFP only.
- *
- * Arguments:
- * a0 contains the FP instruction
- * t0 contains the sign
- * t1 contains the (biased) exponent
- * t2 contains the fraction
- *
- *----------------------------------------------------------------------------
- */
-#define SET_FD_D(n) \
- .rdata; \
- .dword set_fd_d_ ## n; \
- .text; \
-set_fd_d_ ## n: \
- dmtc1 t0, $ ## n; \
- j ra
-
-LEAF(set_fd_d, 0)
- dsll t0, 63 # set sign
- dsll t1, t1, DFRAC_BITS # set exponent
- or t0, t0, t1
- or t0, t0, t2 # set fraction
-ALEAF(set_fd_dword)
- srl a3, a0, 6 - 3 # get FD field
- and a3, a3, 0x1f << 3 # mask FD field
- ld a3, set_fd_d_tbl(a3) # switch on register number
- j a3
-
- .rdata
-set_fd_d_tbl:
- .text
-
- SET_FD_D(f0)
- SET_FD_D(f1)
- SET_FD_D(f2)
- SET_FD_D(f3)
- SET_FD_D(f4)
- SET_FD_D(f5)
- SET_FD_D(f6)
- SET_FD_D(f7)
- SET_FD_D(f8)
- SET_FD_D(f9)
- SET_FD_D(f10)
- SET_FD_D(f11)
- SET_FD_D(f12)
- SET_FD_D(f13)
- SET_FD_D(f14)
- SET_FD_D(f15)
- SET_FD_D(f16)
- SET_FD_D(f17)
- SET_FD_D(f18)
- SET_FD_D(f19)
- SET_FD_D(f20)
- SET_FD_D(f21)
- SET_FD_D(f22)
- SET_FD_D(f23)
- SET_FD_D(f24)
- SET_FD_D(f25)
- SET_FD_D(f26)
- SET_FD_D(f27)
- SET_FD_D(f28)
- SET_FD_D(f29)
- SET_FD_D(f30)
- SET_FD_D(f31)
-
-END(set_fd_d)
-
-/*----------------------------------------------------------------------------
- * renorm_fs_s --
- *
- * Results:
- * t1 unbiased exponent
- * t2 normalized fraction
- *
- *----------------------------------------------------------------------------
- */
-LEAF(renorm_fs_s, 0)
-/*
- * Find out how many leading zero bits are in t2 and put in t9.
- */
- move v0, t2
- move t9, zero
- srl v1, v0, 16
- bne v1, zero, 1f
- addu t9, 16
- sll v0, 16
-1:
- srl v1, v0, 24
- bne v1, zero, 1f
- addu t9, 8
- sll v0, 8
-1:
- srl v1, v0, 28
- bne v1, zero, 1f
- addu t9, 4
- sll v0, 4
-1:
- srl v1, v0, 30
- bne v1, zero, 1f
- addu t9, 2
- sll v0, 2
-1:
- srl v1, v0, 31
- bne v1, zero, 1f
- addu t9, 1
-/*
- * Now shift t2 the correct number of bits.
- */
-1:
- subu t9, t9, SLEAD_ZEROS # dont count normal leading zeros
- li t1, SEXP_MIN
- subu t1, t1, t9 # adjust exponent
- sll t2, t2, t9
- j ra
-END(renorm_fs_s)
-
-/*----------------------------------------------------------------------------
- * renorm_fs_d --
- *
- * Results:
- * t1 unbiased exponent
- * t2 normalized fraction
- *
- *----------------------------------------------------------------------------
- */
-LEAF(renorm_fs_d, 0)
-/*
- * Find out how many leading zero bits are in t2 and put in t9.
- */
- move v0, t2
- move t9, zero
- dsrl v1, v0, 32
- bne v1, zero, 1f
- addu t9, 32
- dsll v0, 32
-1:
- dsrl v1, v0, 16
- bne v1, zero, 1f
- addu t9, 16
- dsll v0, 16
-1:
- dsrl v1, v0, 24
- bne v1, zero, 1f
- addu t9, 8
- dsll v0, 8
-1:
- dsrl v1, v0, 28
- bne v1, zero, 1f
- addu t9, 4
- dsll v0, 4
-1:
- dsrl v1, v0, 30
- bne v1, zero, 1f
- addu t9, 2
- dsll v0, 2
-1:
- dsrl v1, v0, 31
- bne v1, zero, 1f
- addu t9, 1
-/*
- * Now shift t2 the correct number of bits.
- */
-1:
- subu t9, t9, DLEAD_ZEROS # dont count normal leading zeros
- li t1, DEXP_MIN
- subu t1, t9 # adjust exponent
- dsll t2, t9
- j ra
-END(renorm_fs_d)
-
-/*----------------------------------------------------------------------------
- * renorm_ft_s --
- *
- * Results:
- * ta1 unbiased exponent
- * ta2 normalized fraction
- *
- *----------------------------------------------------------------------------
- */
-LEAF(renorm_ft_s, 0)
-/*
- * Find out how many leading zero bits are in ta2 and put in t9.
- */
- move v0, ta2
- move t9, zero
- srl v1, v0, 16
- bne v1, zero, 1f
- addu t9, 16
- sll v0, 16
-1:
- srl v1, v0, 24
- bne v1, zero, 1f
- addu t9, 8
- sll v0, 8
-1:
- srl v1, v0, 28
- bne v1, zero, 1f
- addu t9, 4
- sll v0, 4
-1:
- srl v1, v0, 30
- bne v1, zero, 1f
- addu t9, 2
- sll v0, 2
-1:
- srl v1, v0, 31
- bne v1, zero, 1f
- addu t9, 1
-/*
- * Now shift ta2 the correct number of bits.
- */
-1:
- subu t9, t9, SLEAD_ZEROS # dont count normal leading zeros
- li ta1, SEXP_MIN
- subu ta1, t9 # adjust exponent
- sll ta2, t9
- j ra
-END(renorm_ft_s)
-
-/*----------------------------------------------------------------------------
- * renorm_ft_d --
- *
- * Results:
- * ta1 unbiased exponent
- * ta2 normalized fraction
- *
- *----------------------------------------------------------------------------
- */
-LEAF(renorm_ft_d, 0)
-/*
- * Find out how many leading zero bits are in ta2 and put in t9.
- */
- move v0, ta2
- move t9, zero
- dsrl v1, v0, 32
- bne v1, zero, 1f
- addu t9, 32
- dsll v0, 32
-1:
- dsrl v1, v0, 16
- bne v1, zero, 1f
- addu t9, 16
- dsll v0, 16
-1:
- dsrl v1, v0, 24
- bne v1, zero, 1f
- addu t9, 8
- dsll v0, 8
-1:
- dsrl v1, v0, 28
- bne v1, zero, 1f
- addu t9, 4
- dsll v0, 4
-1:
- dsrl v1, v0, 30
- bne v1, zero, 1f
- addu t9, 2
- dsll v0, 2
-1:
- dsrl v1, v0, 31
- bne v1, zero, 1f
- addu t9, 1
-/*
- * Now shift ta2 the correct number of bits.
- */
-1:
- subu t9, t9, DLEAD_ZEROS # dont count normal leading zeros
- li ta1, DEXP_MIN
- subu ta1, t9 # adjust exponent
- dsll ta2, t9
- j ra
-END(renorm_ft_d)
diff --git a/sys/arch/mips64/mips64/fp_emulate.c b/sys/arch/mips64/mips64/fp_emulate.c
new file mode 100644
index 00000000000..d392b8d1564
--- /dev/null
+++ b/sys/arch/mips64/mips64/fp_emulate.c
@@ -0,0 +1,1310 @@
+/* $OpenBSD: fp_emulate.c,v 1.1 2010/09/21 20:29:17 miod Exp $ */
+
+/*
+ * Copyright (c) 2010 Miodrag Vallat.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Floating Point completion code (MI softfloat code control engine).
+ *
+ * Supports all MIPS IV COP1 and COP1X floating-point instructions.
+ * Floating-point load and store instructions, as well as branch instructions,
+ * are not handled, as they should not require completion code.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/signalvar.h>
+
+#include <machine/cpu.h>
+#include <machine/fpu.h>
+#include <machine/frame.h>
+#include <machine/ieee.h>
+#include <machine/ieeefp.h>
+#include <machine/mips_opcode.h>
+#include <machine/regnum.h>
+
+#include <lib/libkern/softfloat.h>
+#if defined(DEBUG) && defined(DDB)
+#include <machine/db_machdep.h>
+#endif
+
+int fpu_emulate(struct trap_frame *, uint32_t, union sigval *);
+int fpu_emulate_cop1(struct trap_frame *, uint32_t);
+int fpu_emulate_cop1x(struct trap_frame *, uint32_t);
+uint64_t
+ fpu_load(struct trap_frame *, uint, uint);
+void fpu_store(struct trap_frame *, uint, uint, uint64_t);
+
+typedef int (fpu_fn3)(struct trap_frame *, uint, uint, uint, uint);
+typedef int (fpu_fn4)(struct trap_frame *, uint, uint, uint, uint, uint);
+fpu_fn3 fpu_abs;
+fpu_fn3 fpu_add;
+int fpu_c(struct trap_frame *, uint, uint, uint, uint, uint);
+fpu_fn3 fpu_ceil_l;
+fpu_fn3 fpu_ceil_w;
+fpu_fn3 fpu_cvt_d;
+fpu_fn3 fpu_cvt_l;
+fpu_fn3 fpu_cvt_s;
+fpu_fn3 fpu_cvt_w;
+fpu_fn3 fpu_div;
+fpu_fn3 fpu_floor_l;
+fpu_fn3 fpu_floor_w;
+fpu_fn4 fpu_madd;
+fpu_fn4 fpu_msub;
+fpu_fn3 fpu_mov;
+fpu_fn3 fpu_movcf;
+fpu_fn3 fpu_movn;
+fpu_fn3 fpu_movz;
+fpu_fn3 fpu_mul;
+fpu_fn3 fpu_neg;
+fpu_fn4 fpu_nmadd;
+fpu_fn4 fpu_nmsub;
+fpu_fn3 fpu_recip;
+fpu_fn3 fpu_round_l;
+fpu_fn3 fpu_round_w;
+fpu_fn3 fpu_rsqrt;
+fpu_fn3 fpu_sqrt;
+fpu_fn3 fpu_sub;
+fpu_fn3 fpu_trunc_l;
+fpu_fn3 fpu_trunc_w;
+
+int fpu_int_l(struct trap_frame *, uint, uint, uint, uint, uint);
+int fpu_int_w(struct trap_frame *, uint, uint, uint, uint, uint);
+
+/*
+ * Encoding of operand format within opcodes `fmt' and `fmt3' fields.
+ */
+#define FMT_S 0x00
+#define FMT_D 0x01
+#define FMT_W 0x04
+#define FMT_L 0x05
+
+/*
+ * Inlines from softfloat-specialize.h which are not made public, needed
+ * for fpu_abs.
+ */
+#define float32_is_nan(a) \
+ (0xff000000 < (a << 1))
+#define float32_is_signaling_nan(a) \
+ ((((a >> 22) & 0x1ff) == 0x1fe) && (a & 0x003fffff))
+
+/*
+ * Precomputed results of intXX_to_floatXX(1)
+ */
+#define ONE_F32 (float32)(SNG_EXP_BIAS << SNG_FRACBITS)
+#define ONE_F64 (float64)((uint64_t)DBL_EXP_BIAS << DBL_FRACBITS)
+
+/*
+ * Handle a floating-point exception.
+ */
+void
+MipsFPTrap(struct trap_frame *tf)
+{
+ struct cpu_info *ci = curcpu();
+ struct proc *p = ci->ci_curproc;
+ union sigval sv;
+ vaddr_t pc;
+ uint32_t fsr, excbits;
+ uint32_t insn;
+ InstFmt inst;
+ int sig = 0;
+ int fault_type = SI_NOINFO;
+ int update_pcb = 0;
+ int emulate = 0;
+ uint32_t sr;
+
+ KDASSERT(tf == p->p_md.md_regs);
+
+ /*
+ * Enable FPU, and read its status register.
+ */
+
+ sr = getsr();
+ setsr(sr | SR_COP_1_BIT);
+
+ __asm__ __volatile__ ("cfc1 %0, $31" : "=r" (fsr));
+ __asm__ __volatile__ ("cfc1 %0, $31" : "=r" (fsr));
+
+ /*
+ * If this is not an unimplemented operation, but a genuine
+ * FPU exception, signal the process.
+ */
+
+ if ((fsr & FPCSR_C_E) == 0) {
+ sig = SIGFPE;
+ goto deliver;
+ }
+
+ /*
+ * Get the faulting instruction. This should not fail, and
+ * if it does, it's probably not your lucky day.
+ */
+
+ pc = (vaddr_t)tf->pc;
+ if (tf->cause & CR_BR_DELAY)
+ pc += 4;
+ if (copyin((void *)pc, &insn, sizeof insn) != 0) {
+ sig = SIGBUS;
+ fault_type = BUS_OBJERR;
+ goto deliver;
+ }
+ inst = *(InstFmt *)&insn;
+
+ /*
+ * Emulate the instruction.
+ */
+
+#ifdef DEBUG
+#ifdef DDB
+ printf("%s: unimplemented FPU completion, fsr 0x%08x\n%p: ",
+ p->p_comm, fsr, pc);
+ dbmd_print_insn(insn, pc, printf);
+#else
+ printf("%s: unimplemented FPU completion, insn 0x%08x fsr 0x%08x\n",
+ p->p_comm, insn, fsr);
+#endif
+#endif
+
+ switch (inst.FRType.op) {
+ default:
+ /*
+ * Not a FPU instruction.
+ */
+ break;
+ case OP_COP1:
+ switch (inst.RType.rs) {
+ case OP_BC:
+ case OP_MF:
+ case OP_DMF:
+ case OP_CF:
+ case OP_MT:
+ case OP_DMT:
+ case OP_CT:
+ /*
+ * These instructions should not require emulation,
+ * unless there is no FPU.
+ */
+ break;
+ default:
+ emulate = 1;
+ break;
+ }
+ break;
+ case OP_COP1X:
+ switch (inst.FQType.op4) {
+ default:
+ break;
+ case OP_MADD:
+ case OP_MSUB:
+ case OP_NMADD:
+ case OP_NMSUB:
+ emulate = 1;
+ break;
+ }
+ break;
+ }
+
+ if (emulate) {
+ KASSERT(p == ci->ci_fpuproc);
+ save_fpu();
+ update_pcb = 1;
+
+ sig = fpu_emulate(tf, insn, &sv);
+ /* reload fsr, possibly modified by softfloat code */
+ fsr = tf->fsr;
+ if (sig == 0) {
+ /* raise SIGFPE if necessary */
+ excbits = (fsr & FPCSR_C_MASK) >> FPCSR_C_SHIFT;
+ excbits &= (fsr & FPCSR_E_MASK) >> FPCSR_E_SHIFT;
+ if (excbits != 0)
+ sig = SIGFPE;
+ }
+ } else {
+ sig = SIGILL;
+ fault_type = ILL_ILLOPC;
+ }
+
+deliver:
+ switch (sig) {
+ case SIGFPE:
+ excbits = (fsr & FPCSR_C_MASK) >> FPCSR_C_SHIFT;
+ excbits &= (fsr & FPCSR_E_MASK) >> FPCSR_E_SHIFT;
+ if (excbits & FP_X_INV)
+ fault_type = FPE_FLTINV;
+ else if (excbits & FP_X_DZ)
+ fault_type = FPE_INTDIV;
+ else if (excbits & FP_X_OFL)
+ fault_type = FPE_FLTUND;
+ else if (excbits & FP_X_UFL)
+ fault_type = FPE_FLTOVF;
+ else /* if (excbits & FP_X_IMP) */
+ fault_type = FPE_FLTRES;
+ break;
+ }
+
+ /*
+ * Skip the instruction, unless we are delivering SIGILL.
+ */
+
+ if (sig != SIGILL) {
+ if (tf->cause & CR_BR_DELAY) {
+ /*
+ * Note that it doesn't matter, at this point,
+ * that we pass the updated FSR value, as it is
+ * only used to decide whether to branch or not
+ * if the faulting instruction was BC1[FT].
+ */
+ tf->pc = MipsEmulateBranch(tf, tf->pc, fsr, 0);
+ } else
+ tf->pc += 4;
+ }
+
+ /*
+ * Update the FPU status register.
+ * We need to make sure that this will not cause an exception
+ * in kernel mode.
+ */
+
+ /* propagate raised exceptions to the sticky bits */
+ fsr &= ~FPCSR_C_E;
+ excbits = (fsr & FPCSR_C_MASK) >> FPCSR_C_SHIFT;
+ fsr |= excbits << FPCSR_F_SHIFT;
+ /* clear all exception sources */
+ fsr &= ~FPCSR_C_MASK;
+ if (update_pcb)
+ tf->fsr = fsr;
+ __asm__ __volatile__ ("ctc1 %0, $31" :: "r" (fsr));
+ /* disable fpu before returning to trap() */
+ setsr(sr);
+
+ if (sig != 0) {
+ sv.sival_ptr = (void *)pc;
+ KERNEL_PROC_LOCK(p);
+ trapsignal(p, sig, 0, fault_type, sv);
+ KERNEL_PROC_UNLOCK(p);
+ }
+}
+
+/*
+ * Emulate an FPU instruction. The FPU register set has been saved in the
+ * current PCB, and is pointed to by the trap frame.
+ */
+int
+fpu_emulate(struct trap_frame *tf, uint32_t insn, union sigval *sv)
+{
+ InstFmt inst;
+
+ tf->zero = 0; /* not written by trap code */
+
+ inst = *(InstFmt *)&insn;
+ switch (inst.FRType.op) {
+ default:
+ break;
+ case OP_COP1:
+ return fpu_emulate_cop1(tf, insn);
+ case OP_COP1X:
+ return fpu_emulate_cop1x(tf, insn);
+ }
+
+ return SIGILL;
+}
+
+/*
+ * Emulate a COP1 FPU instruction.
+ */
+int
+fpu_emulate_cop1(struct trap_frame *tf, uint32_t insn)
+{
+ InstFmt inst;
+ uint ft, fs, fd;
+ fpu_fn3 *fpu_op;
+ static fpu_fn3 *const fpu_ops1[1 << 6] = {
+ fpu_add, /* 0x00 */
+ fpu_sub,
+ fpu_mul,
+ fpu_div,
+ fpu_sqrt,
+ fpu_abs,
+ fpu_mov,
+ fpu_neg,
+ fpu_round_l, /* 0x08 */
+ fpu_trunc_l,
+ fpu_ceil_l,
+ fpu_floor_l,
+ fpu_round_w,
+ fpu_trunc_w,
+ fpu_ceil_w,
+ fpu_floor_w,
+ NULL, /* 0x10 */
+ fpu_movcf,
+ fpu_movz,
+ fpu_movn,
+ NULL,
+ fpu_recip,
+ fpu_rsqrt,
+ NULL,
+ NULL, /* 0x18 */
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ fpu_cvt_s, /* 0x20 */
+ fpu_cvt_d,
+ NULL,
+ NULL,
+ fpu_cvt_w,
+ fpu_cvt_l,
+ NULL,
+ NULL,
+ NULL, /* 0x28 */
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ (fpu_fn3 *)fpu_c, /* 0x30 */
+ (fpu_fn3 *)fpu_c,
+ (fpu_fn3 *)fpu_c,
+ (fpu_fn3 *)fpu_c,
+ (fpu_fn3 *)fpu_c,
+ (fpu_fn3 *)fpu_c,
+ (fpu_fn3 *)fpu_c,
+ (fpu_fn3 *)fpu_c,
+ (fpu_fn3 *)fpu_c, /* 0x38 */
+ (fpu_fn3 *)fpu_c,
+ (fpu_fn3 *)fpu_c,
+ (fpu_fn3 *)fpu_c,
+ (fpu_fn3 *)fpu_c,
+ (fpu_fn3 *)fpu_c,
+ (fpu_fn3 *)fpu_c,
+ (fpu_fn3 *)fpu_c
+ };
+
+ inst = *(InstFmt *)&insn;
+
+ /*
+ * Check for valid function code.
+ */
+
+ fpu_op = fpu_ops1[inst.FRType.func];
+ if (fpu_op == NULL)
+ return SIGILL;
+
+ /*
+ * Check for valid format. FRType assumes bit 25 is always set,
+ * so we need to check for it explicitely.
+ */
+
+ if ((insn & (1 << 25)) == 0)
+ return SIGILL;
+ switch (inst.FRType.fmt) {
+ default:
+ return SIGILL;
+ case FMT_S:
+ case FMT_D:
+ case FMT_W:
+ case FMT_L:
+ break;
+ }
+
+ /*
+ * Check for valid register values. Only even-numbered registers
+ * can be used if the FR bit is clear in coprocessor 0 status
+ * register.
+ *
+ * Note that c.cond does not specify a register number in the fd
+ * field, but the fd field must have zero in its low two bits, so
+ * the test will not reject valid c.cond instructions.
+ */
+
+ ft = inst.FRType.ft;
+ fs = inst.FRType.fs;
+ fd = inst.FRType.fd;
+ if ((tf->sr & SR_FR_32) == 0) {
+ if ((ft | fs | fd) & 1)
+ return SIGILL;
+ }
+
+ /*
+ * Finally dispatch to the proper routine.
+ */
+
+ if (fpu_op == (fpu_fn3 *)&fpu_c)
+ return fpu_c(tf, inst.FRType.fmt, ft, fs, fd, inst.FRType.func);
+ else
+ return (*fpu_op)(tf, inst.FRType.fmt, ft, fs, fd);
+}
+
+/*
+ * Emulate a COP1X FPU instruction.
+ */
+int
+fpu_emulate_cop1x(struct trap_frame *tf, uint32_t insn)
+{
+ InstFmt inst;
+ uint fr, ft, fs, fd;
+ fpu_fn4 *fpu_op;
+ static fpu_fn4 *const fpu_ops1x[1 << 3] = {
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ fpu_madd,
+ fpu_msub,
+ fpu_nmadd,
+ fpu_nmsub
+ };
+
+ inst = *(InstFmt *)&insn;
+
+ /*
+ * Check for valid function code.
+ */
+
+ fpu_op = fpu_ops1x[inst.FQType.op4];
+ if (fpu_op == NULL)
+ return SIGILL;
+
+ /*
+ * Check for valid format.
+ */
+
+ switch (inst.FQType.fmt3) {
+ default:
+ return SIGILL;
+ case FMT_S:
+ case FMT_D:
+ case FMT_W:
+ case FMT_L:
+ break;
+ }
+
+ /*
+ * Check for valid register values. Only even-numbered registers
+ * can be used if the FR bit is clear in coprocessor 0 status
+ * register.
+ */
+
+ fr = inst.FQType.fr;
+ ft = inst.FQType.ft;
+ fs = inst.FQType.fs;
+ fd = inst.FQType.fd;
+ if ((tf->sr & SR_FR_32) == 0) {
+ if ((fr | ft | fs | fd) & 1)
+ return SIGILL;
+ }
+
+ /*
+ * Finally dispatch to the proper routine.
+ */
+
+ return (*fpu_op)(tf, inst.FRType.fmt, fr, ft, fs, fd);
+}
+
+/*
+ * Load a floating-point argument according to the specified format.
+ */
+uint64_t
+fpu_load(struct trap_frame *tf, uint fmt, uint regno)
+{
+ register_t *regs = (register_t *)tf;
+ uint64_t tmp, tmp2;
+
+ tmp = (uint64_t)regs[FPBASE + regno];
+ if (tf->sr & SR_FR_32) {
+ switch (fmt) {
+ case FMT_D:
+ case FMT_L:
+ break;
+ case FMT_S:
+ case FMT_W:
+ tmp &= 0xffffffff;
+ break;
+ }
+ } else {
+ tmp &= 0xffffffff;
+ switch (fmt) {
+ case FMT_D:
+ case FMT_L:
+ /* caller has enforced regno is even */
+ tmp2 = (uint64_t)regs[FPBASE + regno + 1];
+ tmp |= tmp2 << 32;
+ break;
+ case FMT_S:
+ case FMT_W:
+ break;
+ }
+ }
+
+ return tmp;
+}
+
+/*
+ * Store a floating-point result according to the specified format.
+ */
+void
+fpu_store(struct trap_frame *tf, uint fmt, uint regno, uint64_t rslt)
+{
+ register_t *regs = (register_t *)tf;
+
+ if (tf->sr & SR_FR_32) {
+ regs[FPBASE + regno] = rslt;
+ } else {
+ /* caller has enforced regno is even */
+ regs[FPBASE + regno] = rslt & 0xffffffff;
+ regs[FPBASE + regno + 1] = (rslt >> 32) & 0xffffffff;
+ }
+}
+
+/*
+ * Integer conversion
+ */
+
+int
+fpu_int_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd, uint rm)
+{
+ uint64_t raw;
+ uint32_t oldrm;
+
+ if (ft != 0)
+ return SIGILL;
+ if (fmt != FMT_S && fmt != FMT_D)
+ return SIGILL;
+
+ raw = fpu_load(tf, fmt, fs);
+
+ /* round towards required mode */
+ oldrm = tf->fsr & FPCSR_RM_MASK;
+ tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | rm;
+ if (fmt == FMT_S)
+ raw = float32_to_int64((float32)raw);
+ else
+ raw = float64_to_int64((float64)raw);
+ /* restore rounding mode */
+ tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | oldrm;
+
+ if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))
+ fpu_store(tf, fmt, fd, raw);
+
+ return 0;
+}
+
+int
+fpu_int_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd, uint rm)
+{
+ uint64_t raw;
+ uint32_t oldrm;
+
+ if (ft != 0)
+ return SIGILL;
+ if (fmt != FMT_S && fmt != FMT_D)
+ return SIGILL;
+
+ raw = fpu_load(tf, fmt, fs);
+
+ /* round towards required mode */
+ oldrm = tf->fsr & FPCSR_RM_MASK;
+ tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | rm;
+ if (fmt == FMT_S)
+ raw = float32_to_int32((float32)raw);
+ else
+ raw = float64_to_int32((float64)raw);
+ /* restore rounding mode */
+ tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | oldrm;
+
+ if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))
+ fpu_store(tf, fmt, fd, raw);
+
+ return 0;
+}
+
+/*
+ * FPU Instruction emulation
+ */
+
+int
+fpu_abs(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
+{
+ uint64_t raw;
+
+ if (ft != 0)
+ return SIGILL;
+ if (fmt != FMT_S && fmt != FMT_D)
+ return SIGILL;
+
+ raw = fpu_load(tf, fmt, fs);
+ /* clear sign bit unless NaN */
+ if (fmt == FMT_S) {
+ float32 f32 = (float32)raw;
+ if (float32_is_nan(f32)) {
+ float_set_invalid();
+ } else {
+ f32 &= ~(1L << 31);
+ raw = (uint64_t)f32;
+ }
+ } else {
+ float64 f64 = (float64)raw;
+ if (float64_is_nan(f64)) {
+ float_set_invalid();
+ } else {
+ f64 &= ~(1L << 63);
+ raw = (uint64_t)f64;
+ }
+ }
+ fpu_store(tf, fmt, fd, raw);
+
+ return 0;
+}
+
+int
+fpu_add(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
+{
+ uint64_t raw1, raw2, rslt;
+
+ if (fmt != FMT_S && fmt != FMT_D)
+ return SIGILL;
+
+ raw1 = fpu_load(tf, fmt, fs);
+ raw2 = fpu_load(tf, fmt, ft);
+ if (fmt == FMT_S) {
+ float32 f32 = float32_add((float32)raw1, (float32)raw2);
+ rslt = (uint64_t)f32;
+ } else {
+ float64 f64 = float64_add((float64)raw1, (float64)raw2);
+ rslt = (uint64_t)f64;
+ }
+ fpu_store(tf, fmt, fd, rslt);
+
+ return 0;
+}
+
+int
+fpu_c(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd, uint op)
+{
+ uint64_t raw1, raw2;
+ uint cc, lt, eq, uo;
+
+ if ((fd & 0x03) != 0)
+ return SIGILL;
+ if (fmt != FMT_S && fmt != FMT_D)
+ return SIGILL;
+
+ lt = eq = uo = 0;
+ cc = fd >> 2;
+
+ raw1 = fpu_load(tf, fmt, ft);
+ raw2 = fpu_load(tf, fmt, fs);
+
+ if (fmt == FMT_S) {
+ float32 f32a = (float32)raw1;
+ float32 f32b = (float32)raw2;
+ if (float32_is_nan(f32a)) {
+ uo = 1 << 0;
+ if (float32_is_signaling_nan(f32a))
+ op |= 0x08; /* force invalid exception */
+ } else if (float32_is_nan(f32b)) {
+ uo = 1 << 0;
+ if (float32_is_signaling_nan(f32b))
+ op |= 0x08; /* force invalid exception */
+ } else {
+ if (float32_eq(f32a, f32b))
+ eq = 1 << 1;
+ else if (float32_lt(f32a, f32b))
+ lt = 1 << 2;
+ }
+ } else {
+ float64 f64a = (float64)raw1;
+ float64 f64b = (float64)raw2;
+ if (float64_is_nan(f64a)) {
+ uo = 1 << 0;
+ if (float64_is_signaling_nan(f64a))
+ op |= 0x08; /* force invalid exception */
+ } else if (float64_is_nan(f64b)) {
+ uo = 1 << 0;
+ if (float64_is_signaling_nan(f64b))
+ op |= 0x08; /* force invalid exception */
+ } else {
+ if (float64_eq(f64a, f64b))
+ eq = 1 << 1;
+ else if (float64_lt(f64a, f64b))
+ lt = 1 << 2;
+ }
+ }
+
+ if (uo && (op & 0x08)) {
+ float_set_invalid();
+ if (tf->fsr & FPCSR_E_V) {
+ /* comparison result intentionaly not written */
+ goto skip;
+ }
+ } else {
+ if ((uo | eq | lt) & op)
+ tf->fsr |= FPCSR_CONDVAL(cc);
+ else
+ tf->fsr &= ~FPCSR_CONDVAL(cc);
+ }
+skip:
+
+ return 0;
+}
+
+int
+fpu_ceil_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
+{
+ /* round towards positive infinity */
+ return fpu_int_l(tf, fmt, ft, fs, fd, FP_RP);
+}
+
+int
+fpu_ceil_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
+{
+ /* round towards positive infinity */
+ return fpu_int_w(tf, fmt, ft, fs, fd, FP_RP);
+}
+
+int
+fpu_cvt_d(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
+{
+ uint64_t raw;
+
+ if (ft != 0)
+ return SIGILL;
+ if (fmt == FMT_D)
+ return SIGILL;
+
+ raw = fpu_load(tf, fmt, fs);
+ switch (fmt) {
+ case FMT_L:
+ raw = int64_to_float64((int64_t)raw);
+ break;
+ case FMT_S:
+ raw = float32_to_float64((float32)raw);
+ break;
+ case FMT_W:
+ raw = int32_to_float64((int32_t)raw);
+ break;
+ }
+ fpu_store(tf, fmt, fd, raw);
+
+ return 0;
+}
+
+int
+fpu_cvt_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
+{
+ uint64_t raw;
+ uint32_t rm;
+
+ if (ft != 0)
+ return SIGILL;
+ if (fmt != FMT_S && fmt != FMT_D)
+ return SIGILL;
+
+ rm = tf->fsr & FPCSR_RM_MASK;
+ raw = fpu_load(tf, fmt, fs);
+ if (fmt == FMT_D) {
+ if (rm == FP_RZ)
+ raw = float64_to_int64_round_to_zero((float64)raw);
+ else
+ raw = float64_to_int64((float64)raw);
+ } else {
+ if (rm == FP_RZ)
+ raw = float32_to_int64_round_to_zero((float32)raw);
+ else
+ raw = float32_to_int64((float32)raw);
+ }
+ if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))
+ fpu_store(tf, fmt, fd, raw);
+
+ return 0;
+}
+
+int
+fpu_cvt_s(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
+{
+ uint64_t raw;
+
+ if (ft != 0)
+ return SIGILL;
+ if (fmt == FMT_S)
+ return SIGILL;
+
+ raw = fpu_load(tf, fmt, fs);
+ switch (fmt) {
+ case FMT_D:
+ raw = float64_to_float32((float64)raw);
+ break;
+ case FMT_L:
+ raw = int64_to_float32((int64_t)raw);
+ break;
+ case FMT_W:
+ raw = int32_to_float32((int32_t)raw);
+ break;
+ }
+ fpu_store(tf, fmt, fd, raw);
+
+ return 0;
+}
+
+int
+fpu_cvt_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
+{
+ uint64_t raw;
+ uint32_t rm;
+
+ if (ft != 0)
+ return SIGILL;
+ if (fmt != FMT_S && fmt != FMT_D)
+ return SIGILL;
+
+ rm = tf->fsr & FPCSR_RM_MASK;
+ raw = fpu_load(tf, fmt, fs);
+ if (fmt == FMT_D) {
+ if (rm == FP_RZ)
+ raw = float64_to_int32_round_to_zero((float64)raw);
+ else
+ raw = float64_to_int32((float64)raw);
+ } else {
+ if (rm == FP_RZ)
+ raw = float32_to_int32_round_to_zero((float32)raw);
+ else
+ raw = float32_to_int32((float32)raw);
+ }
+ if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))
+ fpu_store(tf, fmt, fd, raw);
+
+ return 0;
+}
+
+int
+fpu_div(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
+{
+ uint64_t raw1, raw2, rslt;
+
+ if (fmt != FMT_S && fmt != FMT_D)
+ return SIGILL;
+
+ raw1 = fpu_load(tf, fmt, fs);
+ raw2 = fpu_load(tf, fmt, ft);
+ if (fmt == FMT_S) {
+ float32 f32 = float32_div((float32)raw1, (float32)raw2);
+ rslt = (uint64_t)f32;
+ } else {
+ float64 f64 = float64_div((float64)raw1, (float64)raw2);
+ rslt = (uint64_t)f64;
+ }
+ fpu_store(tf, fmt, fd, rslt);
+
+ return 0;
+}
+
+int
+fpu_floor_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
+{
+ /* round towards negative infinity */
+ return fpu_int_l(tf, fmt, ft, fs, fd, FP_RM);
+}
+
+int
+fpu_floor_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
+{
+ /* round towards negative infinity */
+ return fpu_int_w(tf, fmt, ft, fs, fd, FP_RM);
+}
+
+int
+fpu_madd(struct trap_frame *tf, uint fmt, uint fr, uint ft, uint fs, uint fd)
+{
+ uint64_t raw1, raw2, raw3, rslt;
+
+ if (fmt != FMT_S && fmt != FMT_D)
+ return SIGILL;
+
+ raw1 = fpu_load(tf, fmt, fs);
+ raw2 = fpu_load(tf, fmt, ft);
+ raw3 = fpu_load(tf, fmt, fr);
+ if (fmt == FMT_S) {
+ float32 f32 = float32_add(
+ float32_mul((float32)raw1, (float32)raw2),
+ (float32)raw3);
+ rslt = (uint64_t)f32;
+ } else {
+ float64 f64 = float64_add(
+ float64_mul((float64)raw1, (float64)raw2),
+ (float64)raw3);
+ rslt = (uint64_t)f64;
+ }
+ fpu_store(tf, fmt, fd, rslt);
+
+ return 0;
+}
+
+int
+fpu_mov(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
+{
+ uint64_t raw;
+
+ if (ft != 0)
+ return SIGILL;
+ if (fmt != FMT_S && fmt != FMT_D)
+ return SIGILL;
+
+ raw = fpu_load(tf, fmt, fs);
+ fpu_store(tf, fmt, fd, raw);
+
+ return 0;
+}
+
+int
+fpu_movcf(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
+{
+ uint64_t raw;
+ uint cc, istf;
+ int condition;
+
+ if ((ft & 0x02) != 0)
+ return SIGILL;
+ cc = ft >> 2;
+ if (fmt != FMT_S && fmt != FMT_D)
+ return SIGILL;
+
+ condition = tf->fsr & FPCSR_CONDVAL(cc);
+ istf = ft & COPz_BC_TF_MASK;
+ if ((!condition && !istf) /*movf*/ || (condition && istf) /*movt*/) {
+ raw = fpu_load(tf, fmt, fs);
+ fpu_store(tf, fmt, fd, raw);
+ }
+
+ return 0;
+}
+
+int
+fpu_movn(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
+{
+ register_t *regs = (register_t *)tf;
+ uint64_t raw;
+
+ if (fmt != FMT_S && fmt != FMT_D)
+ return SIGILL;
+
+ if (ft != ZERO && regs[ft] != 0) {
+ raw = fpu_load(tf, fmt, fs);
+ fpu_store(tf, fmt, fd, raw);
+ }
+
+ return 0;
+}
+
+int
+fpu_movz(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
+{
+ register_t *regs = (register_t *)tf;
+ uint64_t raw;
+
+ if (fmt != FMT_S && fmt != FMT_D)
+ return SIGILL;
+
+ if (ft == ZERO || regs[ft] == 0) {
+ raw = fpu_load(tf, fmt, fs);
+ fpu_store(tf, fmt, fd, raw);
+ }
+
+ return 0;
+}
+
+int
+fpu_msub(struct trap_frame *tf, uint fmt, uint fr, uint ft, uint fs, uint fd)
+{
+ uint64_t raw1, raw2, raw3, rslt;
+
+ if (fmt != FMT_S && fmt != FMT_D)
+ return SIGILL;
+
+ raw1 = fpu_load(tf, fmt, fs);
+ raw2 = fpu_load(tf, fmt, ft);
+ raw3 = fpu_load(tf, fmt, fr);
+ if (fmt == FMT_S) {
+ float32 f32 = float32_sub(
+ float32_mul((float32)raw1, (float32)raw2),
+ (float32)raw3);
+ rslt = (uint64_t)f32;
+ } else {
+ float64 f64 = float64_sub(
+ float64_mul((float64)raw1, (float64)raw2),
+ (float64)raw3);
+ rslt = (uint64_t)f64;
+ }
+ fpu_store(tf, fmt, fd, rslt);
+
+ return 0;
+}
+
+int
+fpu_mul(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
+{
+ uint64_t raw1, raw2, rslt;
+
+ if (fmt != FMT_S && fmt != FMT_D)
+ return SIGILL;
+
+ raw1 = fpu_load(tf, fmt, fs);
+ raw2 = fpu_load(tf, fmt, ft);
+ if (fmt == FMT_S) {
+ float32 f32 = float32_mul((float32)raw1, (float32)raw2);
+ rslt = (uint64_t)f32;
+ } else {
+ float64 f64 = float64_mul((float64)raw1, (float64)raw2);
+ rslt = (uint64_t)f64;
+ }
+ fpu_store(tf, fmt, fd, rslt);
+
+ return 0;
+}
+
+int
+fpu_neg(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
+{
+ uint64_t raw;
+
+ if (ft != 0)
+ return SIGILL;
+ if (fmt != FMT_S && fmt != FMT_D)
+ return SIGILL;
+
+ raw = fpu_load(tf, fmt, fs);
+ /* flip sign bit unless NaN */
+ if (fmt == FMT_S) {
+ float32 f32 = (float32)raw;
+ if (float32_is_nan(f32)) {
+ float_set_invalid();
+ } else {
+ f32 ^= 1L << 31;
+ raw = (uint64_t)f32;
+ }
+ } else {
+ float64 f64 = (float64)raw;
+ if (float64_is_nan(f64)) {
+ float_set_invalid();
+ } else {
+ f64 ^= 1L << 63;
+ raw = (uint64_t)f64;
+ }
+ }
+ fpu_store(tf, fmt, fd, raw);
+
+ return 0;
+}
+
+int
+fpu_nmadd(struct trap_frame *tf, uint fmt, uint fr, uint ft, uint fs, uint fd)
+{
+ uint64_t raw1, raw2, raw3, rslt;
+
+ if (fmt != FMT_S && fmt != FMT_D)
+ return SIGILL;
+
+ raw1 = fpu_load(tf, fmt, fs);
+ raw2 = fpu_load(tf, fmt, ft);
+ raw3 = fpu_load(tf, fmt, fr);
+ if (fmt == FMT_S) {
+ float32 f32 = float32_add(
+ float32_mul((float32)raw1, (float32)raw2),
+ (float32)raw3);
+ if (float32_is_nan(f32))
+ float_set_invalid();
+ else
+ f32 ^= 1L << 31;
+ rslt = (uint64_t)f32;
+ } else {
+ float64 f64 = float64_add(
+ float64_mul((float64)raw1, (float64)raw2),
+ (float64)raw3);
+ if (float64_is_nan(f64))
+ float_set_invalid();
+ else
+ f64 ^= 1L << 63;
+ rslt = (uint64_t)f64;
+ }
+ fpu_store(tf, fmt, fd, rslt);
+
+ return 0;
+}
+
+int
+fpu_nmsub(struct trap_frame *tf, uint fmt, uint fr, uint ft, uint fs, uint fd)
+{
+ uint64_t raw1, raw2, raw3, rslt;
+
+ if (fmt != FMT_S && fmt != FMT_D)
+ return SIGILL;
+
+ raw1 = fpu_load(tf, fmt, fs);
+ raw2 = fpu_load(tf, fmt, ft);
+ raw3 = fpu_load(tf, fmt, fr);
+ if (fmt == FMT_S) {
+ float32 f32 = float32_sub(
+ float32_mul((float32)raw1, (float32)raw2),
+ (float32)raw3);
+ if (float32_is_nan(f32))
+ float_set_invalid();
+ else
+ f32 ^= 1L << 31;
+ rslt = (uint64_t)f32;
+ } else {
+ float64 f64 = float64_sub(
+ float64_mul((float64)raw1, (float64)raw2),
+ (float64)raw3);
+ if (float64_is_nan(f64))
+ float_set_invalid();
+ else
+ f64 ^= 1L << 63;
+ rslt = (uint64_t)f64;
+ }
+ fpu_store(tf, fmt, fd, rslt);
+
+ return 0;
+}
+
+int
+fpu_recip(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
+{
+ uint64_t raw;
+
+ if (ft != 0)
+ return SIGILL;
+ if (fmt != FMT_S && fmt != FMT_D)
+ return SIGILL;
+
+ raw = fpu_load(tf, fmt, fs);
+ if (fmt == FMT_S) {
+ float32 f32 = float32_div(ONE_F32, (float32)raw);
+ raw = (uint64_t)f32;
+ } else {
+ float64 f64 = float64_div(ONE_F64, (float64)raw);
+ raw = (uint64_t)f64;
+ }
+ fpu_store(tf, fmt, fd, raw);
+
+ return 0;
+}
+
+int
+fpu_round_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
+{
+ /* round towards nearest */
+ return fpu_int_l(tf, fmt, ft, fs, fd, FP_RN);
+}
+
+int
+fpu_round_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
+{
+ /* round towards nearest */
+ return fpu_int_w(tf, fmt, ft, fs, fd, FP_RN);
+}
+
+int
+fpu_rsqrt(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
+{
+ uint64_t raw;
+
+ if (ft != 0)
+ return SIGILL;
+ if (fmt != FMT_S && fmt != FMT_D)
+ return SIGILL;
+
+ raw = fpu_load(tf, fmt, fs);
+ if (fmt == FMT_S) {
+ float32 f32 = float32_sqrt((float32)raw);
+ if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) !=
+ (FPCSR_C_V | FPCSR_E_V))
+ f32 = float32_div(ONE_F32, f32);
+ raw = (uint64_t)f32;
+ } else {
+ float64 f64 = float64_sqrt((float64)raw);
+ if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) !=
+ (FPCSR_C_V | FPCSR_E_V))
+ f64 = float64_div(ONE_F64, f64);
+ raw = (uint64_t)f64;
+ }
+ fpu_store(tf, fmt, fd, raw);
+
+ return 0;
+}
+
+int
+fpu_sqrt(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
+{
+ uint64_t raw;
+
+ if (ft != 0)
+ return SIGILL;
+ if (fmt != FMT_S && fmt != FMT_D)
+ return SIGILL;
+
+ raw = fpu_load(tf, fmt, fs);
+ if (fmt == FMT_S) {
+ float32 f32 = float32_sqrt((float32)raw);
+ raw = (uint64_t)f32;
+ } else {
+ float64 f64 = float64_sqrt((float64)raw);
+ raw = (uint64_t)f64;
+ }
+ fpu_store(tf, fmt, fd, raw);
+
+ return 0;
+}
+
+int
+fpu_sub(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
+{
+ uint64_t raw1, raw2, rslt;
+
+ if (fmt != FMT_S && fmt != FMT_D)
+ return SIGILL;
+
+ raw1 = fpu_load(tf, fmt, fs);
+ raw2 = fpu_load(tf, fmt, ft);
+ if (fmt == FMT_S) {
+ float32 f32 = float32_sub((float32)raw1, (float32)raw2);
+ rslt = (uint64_t)f32;
+ } else {
+ float64 f64 = float64_sub((float64)raw1, (float64)raw2);
+ rslt = (uint64_t)f64;
+ }
+ fpu_store(tf, fmt, fd, rslt);
+
+ return 0;
+}
+
+int
+fpu_trunc_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
+{
+ /* round towards zero */
+ return fpu_int_l(tf, fmt, ft, fs, fd, FP_RZ);
+}
+
+int
+fpu_trunc_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
+{
+ /* round towards zero */
+ return fpu_int_w(tf, fmt, ft, fs, fd, FP_RZ);
+}
diff --git a/sys/arch/mips64/mips64/lcore_float.S b/sys/arch/mips64/mips64/lcore_float.S
index c15db784cc6..b89837fe2f3 100644
--- a/sys/arch/mips64/mips64/lcore_float.S
+++ b/sys/arch/mips64/mips64/lcore_float.S
@@ -1,4 +1,4 @@
-/* $OpenBSD: lcore_float.S,v 1.19 2010/01/08 01:35:52 syuu Exp $ */
+/* $OpenBSD: lcore_float.S,v 1.20 2010/09/21 20:29:17 miod Exp $ */
/*
* Copyright (c) 2001-2003 Opsycon AB (www.opsycon.se / www.opsycon.com)
@@ -152,7 +152,6 @@ LEAF(MipsSwitchFPState, 0)
ldc1 $f30, PCB_FPREGS+(30 * REGSZ)(a1)
ldc1 $f31, PCB_FPREGS+(31 * REGSZ)(a1)
- and t0, t0, ~FPC_EXCEPTION_BITS
ctc1 t0, FPC_CSR
nop
@@ -256,7 +255,6 @@ LEAF(MipsSwitchFPState16, 0)
lwc1 $f30, PCB_FPREGS+(30 * REGSZ)(a1)
lwc1 $f31, PCB_FPREGS+(31 * REGSZ)(a1)
- and t0, t0, ~FPC_EXCEPTION_BITS
ctc1 t0, FPC_CSR
nop
@@ -407,134 +405,6 @@ END(MipsSaveCurFPState16)
/*----------------------------------------------------------------------------
*
- * MipsFPTrap --
- *
- * Handle a floating point Trap.
- *
- * MipsFPTrap(statusReg, causeReg, pc)
- * unsigned statusReg;
- * unsigned causeReg;
- * unsigned pc;
- *
- * Results:
- * None.
- *
- * Side effects:
- * None.
- *
- *----------------------------------------------------------------------------
- */
-NON_LEAF(MipsFPTrap, FRAMESZ(CF_SZ), ra)
- PTR_SUBU sp, sp, FRAMESZ(CF_SZ)
- mfc0 t0, COP_0_STATUS_REG
- PTR_S ra, CF_RA_OFFS(sp)
- .mask 0x80000000, (CF_RA_OFFS - FRAMESZ(CF_SZ))
-
- PTR_S a2, 2*REGSZ(sp)
- PTR_S a3, 3*REGSZ(sp)
- or t1, t0, SR_COP_1_BIT
- mtc0 t1, COP_0_STATUS_REG
- ITLBNOPFIX
- cfc1 t1, FPC_CSR # stall til FP done
- cfc1 t1, FPC_CSR # now get status
- nop
- sll t2, t1, (31-17) # unimplemented operation?
- bgez t2, 3f # no, normal trap
- nop
-/*
- * We got an unimplemented operation trap so fetch the instruction,
- * compute the next PC and emulate the instruction.
- */
- bgez a1, 1f # Check the branch delay bit.
- nop
-/*
- * The instruction is in the branch delay slot so the branch will have to
- * be emulated to get the resulting PC.
- */
- GET_CPU_INFO(t2, t3)
- PTR_L a0, CI_CURPROCPADDR(t2) # first arg is ptr to CPU regs
- move a1, a2 # second arg is instruction PC
- move a2, t1 # third arg is the FP CSR
- jal MipsEmulateBranch # compute PC after branch
- move a3, zero # fourth arg is FALSE
-/*
- * Now load the floating-point instruction in the branch delay slot
- * to be emulated.
- */
- PTR_L a2, 2*REGSZ(sp) # restore EXC pc
- b 2f
- lw a0, 4(a2) # a0 = coproc instruction
-/*
- * This is not in the branch delay slot so calculate the resulting
- * PC (epc + 4) into v0 and continue to MipsEmulateFP().
- */
-1:
- lw a0, 0(a2) # a0 = coproc instruction
- PTR_ADDU v0, a2, 4 # v0 = next pc
-2:
- GET_CPU_INFO(t2, t3)
- PTR_L a3, CI_CURPROCPADDR(t2) # first arg is ptr to CPU regs
- PTR_S v0, PCB_REGS+(PC * REGSZ)(a3) # save new pc
-/*
- * Check to see if the instruction to be emulated is a floating-point
- * instruction.
- */
- srl a3, a0, OPCODE_SHIFT
- beq a3, OPCODE_C1, 5f # this should never fail
- nop
-/*
- * Send a floating point exception signal to the current process.
- */
-3:
- cfc1 a1, FPC_CSR # code = FP exceptions
- GET_CPU_INFO(t2, t3)
- PTR_L a0, CI_CURPROC(t2) # get current process
- PTR_L a3, 3*REGSZ(sp)
- and v0, a1, FPC_EXCEPTION_INEXACT
- bnez v0, 4f
- li a2, FPE_FLTRES
- and v0, a1, FPC_EXCEPTION_UNDERFLOW
- bnez v0, 4f
- li a2, FPE_FLTUND
- and v0, a1, FPC_EXCEPTION_OVERFLOW
- bnez v0, 4f
- li a2, FPE_FLTOVF
- and v0, a1, FPC_EXCEPTION_DIV0
- bnez v0, 4f
- li a2, FPE_FLTDIV
- li a2, FPE_FLTINV
-4:
- ctc1 zero, FPC_CSR # Clear exceptions
- jal fpu_trapsignal
- nop
- b FPReturn
- nop
-
-/*
- * Finally, we can call MipsEmulateFP() where a0 is the instruction to emulate.
- */
-5:
- jal MipsEmulateFP
- nop
-
- bnez v0, 3b # Emulation failed.
- nop
-
-/*
- * Turn off the floating point coprocessor and return.
- */
-FPReturn:
- mfc0 t0, COP_0_STATUS_REG
- PTR_L ra, CF_RA_OFFS(sp)
- and t0, t0, ~SR_COP_1_BIT
- mtc0 t0, COP_0_STATUS_REG
- ITLBNOPFIX
- j ra
- PTR_ADDU sp, sp, FRAMESZ(CF_SZ)
-END(MipsFPTrap)
-
-/*----------------------------------------------------------------------------
- *
* cp1_get_prid
*
* Get the floating point co-processor id.
@@ -562,4 +432,3 @@ LEAF(cp1_get_prid, 0)
jr ra
nop
END(cp1_get_prid)
-
diff --git a/sys/arch/mips64/mips64/process_machdep.c b/sys/arch/mips64/mips64/process_machdep.c
index bfe1b2948b3..748c3ca7af2 100644
--- a/sys/arch/mips64/mips64/process_machdep.c
+++ b/sys/arch/mips64/mips64/process_machdep.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: process_machdep.c,v 1.14 2010/06/26 23:24:43 guenther Exp $ */
+/* $OpenBSD: process_machdep.c,v 1.15 2010/09/21 20:29:17 miod Exp $ */
/*
* Copyright (c) 1994 Adam Glass
@@ -40,7 +40,7 @@
* From:
* Id: procfs_i386.c,v 4.1 1993/12/17 10:47:45 jsp Rel
*
- * $Id: process_machdep.c,v 1.14 2010/06/26 23:24:43 guenther Exp $
+ * $Id: process_machdep.c,v 1.15 2010/09/21 20:29:17 miod Exp $
*/
/*
@@ -72,6 +72,7 @@
#include <sys/proc.h>
#include <sys/vnode.h>
#include <sys/ptrace.h>
+#include <machine/fpu.h>
#include <machine/frame.h>
#include <machine/reg.h>
@@ -111,6 +112,7 @@ process_write_regs(p, regs)
ic = p->p_md.md_regs->ic;
ipl = p->p_md.md_regs->ipl;
bcopy(&regs->r_regs[AST], &p->p_md.md_regs->ast, REGSIZE);
+ p->p_md.md_regs->fsr &= ~FPCSR_C_MASK;
p->p_md.md_regs->sr = sr;
p->p_md.md_regs->ic = ic;
p->p_md.md_regs->ipl = ipl;
diff --git a/sys/arch/mips64/mips64/trap.c b/sys/arch/mips64/mips64/trap.c
index c2a534f9fb8..378bd911409 100644
--- a/sys/arch/mips64/mips64/trap.c
+++ b/sys/arch/mips64/mips64/trap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: trap.c,v 1.67 2010/09/17 00:36:32 miod Exp $ */
+/* $OpenBSD: trap.c,v 1.68 2010/09/21 20:29:17 miod Exp $ */
/*
* Copyright (c) 1988 University of Utah.
@@ -133,10 +133,7 @@ uint64_t kdbpeekd(vaddr_t);
extern int kdb_trap(int, db_regs_t *);
#endif
-extern void MipsFPTrap(u_int, u_int, u_int, union sigval);
-
void ast(void);
-void fpu_trapsignal(struct proc *, u_long, int, union sigval);
void trap(struct trap_frame *);
#ifdef PTRACE
int cpu_singlestep(struct proc *);
@@ -746,6 +743,11 @@ printf("SIG-BUSB @%p pc %p, ra %p\n", trapframe->badvaddr, trapframe->pc, trapfr
break;
case T_COP_UNUSABLE+T_USER:
+ /*
+ * Note MIPS IV COP1X instructions issued with FPU
+ * disabled correctly report coprocessor 1 as the
+ * unusable coprocessor number.
+ */
if ((trapframe->cause & CR_COP_ERR) != 0x10000000) {
i = SIGILL; /* only FPU instructions allowed */
typ = ILL_ILLOPC;
@@ -761,8 +763,7 @@ printf("SIG-BUSB @%p pc %p, ra %p\n", trapframe->badvaddr, trapframe->pc, trapfr
goto err;
case T_FPE+T_USER:
- sv.sival_ptr = (void *)trapframe->pc;
- MipsFPTrap(trapframe->sr, trapframe->cause, trapframe->pc, sv);
+ MipsFPTrap(trapframe);
goto out;
case T_OVFLOW+T_USER:
@@ -835,17 +836,6 @@ child_return(arg)
#endif
}
-/*
- * Wrapper around trapsignal() for use by the floating point code.
- */
-void
-fpu_trapsignal(struct proc *p, u_long ucode, int typ, union sigval sv)
-{
- KERNEL_PROC_LOCK(p);
- trapsignal(p, SIGFPE, ucode, typ, sv);
- KERNEL_PROC_UNLOCK(p);
-}
-
#if defined(DDB) || defined(DEBUG)
void
trapDump(char *msg)