src - OpenBSD base system

diff options


context:
space:
mode:

author	Miod Vallat <miod@cvs.openbsd.org>	2010-09-21 20:29:18 +0000
committer	Miod Vallat <miod@cvs.openbsd.org>	2010-09-21 20:29:18 +0000
commit	39eff95ee263d1a682cb8667f5e7ea2307be5a0c (patch)
tree	4dbffcedda9b781e4a328757263429adf2fdc640 /sys/arch
parent	37d466cb419fc3bce08d762cf8bf4cad7f3c5ae5 (diff)

Replace the old floating point completion code with a C interface to the

MI softfloat code, implementing all MIPS IV specified floating point operations. Tested on R5000, R10000, R14000 and Loongson2F.

Diffstat (limited to 'sys/arch')

-rw-r--r--

sys/arch/mips64/conf/files.mips64

-rw-r--r--

sys/arch/mips64/include/cpu.h

-rw-r--r--

sys/arch/mips64/include/ieeefp.h

-rw-r--r--

sys/arch/mips64/mips64/fp.S

3127

-rw-r--r--

sys/arch/mips64/mips64/fp_emulate.c

1310

-rw-r--r--

sys/arch/mips64/mips64/lcore_float.S

133

-rw-r--r--

sys/arch/mips64/mips64/process_machdep.c

-rw-r--r--

sys/arch/mips64/mips64/trap.c

8 files changed, 1350 insertions, 3319 deletions

diff --git a/sys/arch/mips64/conf/files.mips64 b/sys/arch/mips64/conf/files.mips64
index 4cbaa6bb8b4..4e558136edd 100644
--- a/sys/arch/mips64/conf/files.mips64
+++ b/sys/arch/mips64/conf/files.mips64

@@ -1,4 +1,4 @@

-# $OpenBSD: files.mips64,v 1.15 2010/09/20 12:10:26 syuu Exp $

+# $OpenBSD: files.mips64,v 1.16 2010/09/21 20:29:13 miod Exp $

file arch/mips64/mips64/arcbios.c arcbios

file arch/mips64/mips64/clock.c

@@ -20,7 +20,7 @@ file arch/mips64/mips64/cache_octeon.c cpu_octeon

file arch/mips64/mips64/context.S

file arch/mips64/mips64/cp0access.S

file arch/mips64/mips64/exception.S

-file arch/mips64/mips64/fp.S

+file arch/mips64/mips64/fp_emulate.c

file arch/mips64/mips64/lcore_access.S

file arch/mips64/mips64/lcore_float.S

file arch/mips64/mips64/tlbhandler.S

@@ -33,3 +33,5 @@ file arch/mips64/mips64/ipifuncs.c multiprocessor

file netinet/in_cksum.c inet

file netinet/in4_cksum.c inet

+file lib/libkern/softfloat.c

diff --git a/sys/arch/mips64/include/cpu.h b/sys/arch/mips64/include/cpu.h
index 429bd17d05f..6913ad4a4f9 100644
--- a/sys/arch/mips64/include/cpu.h
+++ b/sys/arch/mips64/include/cpu.h

@@ -1,4 +1,4 @@

-/* $OpenBSD: cpu.h,v 1.64 2010/09/20 12:10:26 syuu Exp $ */

+/* $OpenBSD: cpu.h,v 1.65 2010/09/21 20:29:17 miod Exp $ */

/*-

@@ -284,43 +284,6 @@ extern vaddr_t uncached_base;

#define FPC_CSR $31

- * The floating point coprocessor status register bits.

- */

-#define FPC_ROUNDING_BITS 0x00000003

-#define FPC_ROUND_RN 0x00000000

-#define FPC_ROUND_RZ 0x00000001

-#define FPC_ROUND_RP 0x00000002

-#define FPC_ROUND_RM 0x00000003

-#define FPC_STICKY_BITS 0x0000007c

-#define FPC_STICKY_INEXACT 0x00000004

-#define FPC_STICKY_UNDERFLOW 0x00000008

-#define FPC_STICKY_OVERFLOW 0x00000010

-#define FPC_STICKY_DIV0 0x00000020

-#define FPC_STICKY_INVALID 0x00000040

-#define FPC_ENABLE_BITS 0x00000f80

-#define FPC_ENABLE_INEXACT 0x00000080

-#define FPC_ENABLE_UNDERFLOW 0x00000100

-#define FPC_ENABLE_OVERFLOW 0x00000200

-#define FPC_ENABLE_DIV0 0x00000400

-#define FPC_ENABLE_INVALID 0x00000800

-#define FPC_EXCEPTION_BITS 0x0003f000

-#define FPC_EXCEPTION_INEXACT 0x00001000

-#define FPC_EXCEPTION_UNDERFLOW 0x00002000

-#define FPC_EXCEPTION_OVERFLOW 0x00004000

-#define FPC_EXCEPTION_DIV0 0x00008000

-#define FPC_EXCEPTION_INVALID 0x00010000

-#define FPC_EXCEPTION_UNIMPL 0x00020000

-#define FPC_COND_BIT 0x00800000

-#define FPC_FLUSH_BIT 0x01000000

-#define FPC_MBZ_BITS 0xfe7c0000

-/*

- * Constants to determine if have a floating point instruction.

- */

-#define OPCODE_SHIFT 26

-#define OPCODE_C1 0x11

-/*

* The low part of the TLB entry.

#define VMTLB_PF_NUM 0x3fffffc0

@@ -636,6 +599,7 @@ void save_fpu(void);

int guarded_read_4(paddr_t, uint32_t *);

int guarded_write_4(paddr_t, uint32_t);

+void MipsFPTrap(struct trap_frame *);

register_t MipsEmulateBranch(struct trap_frame *, vaddr_t, uint32_t, uint32_t);

diff --git a/sys/arch/mips64/include/ieeefp.h b/sys/arch/mips64/include/ieeefp.h
index b833c549bd3..0c2f18909b5 100644
--- a/sys/arch/mips64/include/ieeefp.h
+++ b/sys/arch/mips64/include/ieeefp.h

@@ -1,4 +1,4 @@

-/* $OpenBSD: ieeefp.h,v 1.2 2004/08/10 20:28:13 deraadt Exp $ */

+/* $OpenBSD: ieeefp.h,v 1.3 2010/09/21 20:29:17 miod Exp $ */

* Written by J.T. Conklin, Apr 11, 1995

@@ -22,4 +22,25 @@ typedef enum {

FP_RM=3 /* round toward negative infinity */

} fp_rnd;

+#ifdef _KERNEL

+/*

+ * Defines for the floating-point completion/emulation code.

+ */

+#include <sys/param.h>

+#include <sys/systm.h>

+#include <sys/proc.h>

+#include <machine/fpu.h>

+#define float_raise(bits) \

+ do { curproc->p_md.md_regs->fsr |= (bits) << FPCSR_C_SHIFT; } while (0)

+#define float_set_inexact() float_raise(FP_X_IMP)

+#define float_set_invalid() float_raise(FP_X_INV)

+#define float_get_round(csr) (csr & FPCSR_RM_MASK)

+#define fpgetround() float_get_round(curproc->p_md.md_regs->fsr)

+#endif

#endif /* !_MIPS_IEEEFP_H_ */

diff --git a/sys/arch/mips64/mips64/fp.S b/sys/arch/mips64/mips64/fp.S
deleted file mode 100644
index 5578b6f576f..00000000000
--- a/sys/arch/mips64/mips64/fp.S
+++ /dev/null

@@ -1,3127 +0,0 @@

-/* $OpenBSD: fp.S,v 1.9 2010/02/08 19:26:46 miod Exp $ */

-/*

- *

- * This code is derived from software contributed to Berkeley by

- * Ralph Campbell.

- *

- * Redistribution and use in source and binary forms, with or without

- * modification, are permitted provided that the following conditions

- * are met:

- * 1. Redistributions of source code must retain the above copyright

- * notice, this list of conditions and the following disclaimer.

- * 2. Redistributions in binary form must reproduce the above copyright

- * notice, this list of conditions and the following disclaimer in the

- * documentation and/or other materials provided with the distribution.

- * 3. All advertising materials mentioning features or use of this software

- * must display the following acknowledgement:

- * This product includes software developed by the University of

- * California, Berkeley and its contributors.

- * 4. Neither the name of the University nor the names of its contributors

- * may be used to endorse or promote products derived from this software

- * without specific prior written permission.

- *

- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND

- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE

- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL

- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS

- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)

- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY

- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

- * SUCH DAMAGE.

- *

- * from: @(#)fp.s 8.1 (Berkeley) 6/10/93

- * $Id: fp.S,v 1.9 2010/02/08 19:26:46 miod Exp $

- */

-/*

- * Standard header stuff.

- */

-#include <machine/regdef.h>

-#include <machine/asm.h>

-#include <machine/regnum.h>

-#include <machine/cpu.h>

-#include "assym.h"

-#define SEXP_INF 0xff

-#define DEXP_INF 0x7ff

-#define SEXP_BIAS 127

-#define DEXP_BIAS 1023

-#define SEXP_MIN -126

-#define DEXP_MIN -1022

-#define SEXP_MAX 127

-#define DEXP_MAX 1023

-#define WEXP_MAX 30 /* maximum unbiased exponent for int */

-#define WEXP_MIN -1 /* minimum unbiased exponent for int */

-#define LEXP_MAX 62 /* maximum unbiased exponent for long */

-#define LEXP_MIN -1 /* minimum unbiased exponent for long */

-#define SFRAC_BITS 23

-#define DFRAC_BITS 52

-#define SIMPL_ONE 0x00800000

-#define DIMPL_ONE 0x0010000000000000

-#define SLEAD_ZEROS 63 - 55

-#define DLEAD_ZEROS 63 - 52

-#define STICKYBIT 1

-#define GUARDBIT 0x0000000080000000

-#define DGUARDBIT 0x8000000000000000

-#define SSIGNAL_NAN 0x00400000

-#define DSIGNAL_NAN 0x00080000

-#define SQUIET_NAN 0x003fffff

-#define DQUIET_NAN 0x0007ffffffffffff

-#define INT_MIN 0x80000000

-#define INT_MAX 0x7fffffff

-#define LONG_MIN 0x8000000000000000

-#define LONG_MAX 0x7fffffffffffffff

-#define COND_UNORDERED 0x1

-#define COND_EQUAL 0x2

-#define COND_LESS 0x4

-#define COND_SIGNAL 0x8

-/*----------------------------------------------------------------------------

- *

- * MipsEmulateFP --

- *

- * Emulate unimplemented floating point operations.

- * This routine should only be called by MipsFPInterrupt()

- * and only if this is a COP1 instruction.

- *

- * MipsEmulateFP(instr)

- * unsigned instr;

- *

- * Results:

- * None.

- *

- * Side effects:

- * Floating point registers are modified according to instruction.

- *

- *----------------------------------------------------------------------------

- */

-NON_LEAF(MipsEmulateFP, FRAMESZ(CF_SZ), ra)

- PTR_SUB sp, sp, FRAMESZ(CF_SZ)

- PTR_S ra, CF_RA_OFFS(sp)

- srl v0, a0, 21 # get FMT field

- and v0, v0, 0x1f # mask FMT field

- dla a3, func_s

- beq v0, 0x10, 1f

- dla a3, func_d

- beq v0, 0x11, 1f

- dla a3, func_w

- beq v0, 0x14, 1f

- dla a3, func_l

- beq v0, 0x15, 1f

- b ill # illegal format

-1:

- and v1, a0, 0x3f # mask FUNC field

- sll v1, v1, 3 # align for table lookup

- daddu v1, a3

- cfc1 a1, FPC_CSR # get exception register

- ld a3, (v1) # switch on FUNC & FMT

- and a1, a1, ~FPC_EXCEPTION_UNIMPL # clear exception

- ctc1 a1, FPC_CSR

- j a3

- .rdata

-func_s:

- .dword add_s # 0

- .dword sub_s # 1

- .dword mul_s # 2

- .dword div_s # 3

- .dword ill # 4 (sqrt)

- .dword abs_s # 5

- .dword mov_s # 6

- .dword neg_s # 7

- .dword round_l_s # 8

- .dword trunc_l_s # 9

- .dword ceil_l_s # 10

- .dword floor_l_s # 11

- .dword round_w_s # 12

- .dword trunc_w_s # 13

- .dword ceil_w_s # 14

- .dword floor_w_s # 15

- .dword ill # 16

- .dword ill # 17

- .dword ill # 18

- .dword ill # 19

- .dword ill # 20

- .dword ill # 21

- .dword ill # 22

- .dword ill # 23

- .dword ill # 24

- .dword ill # 25

- .dword ill # 26

- .dword ill # 27

- .dword ill # 28

- .dword ill # 29

- .dword ill # 30

- .dword ill # 31

- .dword ill # 32

- .dword cvt_d_s # 33

- .dword ill # 34

- .dword ill # 35

- .dword cvt_w_s # 36

- .dword cvt_l_s # 37

- .dword ill # 38

- .dword ill # 39

- .dword ill # 40

- .dword ill # 41

- .dword ill # 42

- .dword ill # 43

- .dword ill # 44

- .dword ill # 45

- .dword ill # 46

- .dword ill # 47

- .dword cmp_s # 48

- .dword cmp_s # 49

- .dword cmp_s # 50

- .dword cmp_s # 51

- .dword cmp_s # 52

- .dword cmp_s # 53

- .dword cmp_s # 54

- .dword cmp_s # 55

- .dword cmp_s # 56

- .dword cmp_s # 57

- .dword cmp_s # 58

- .dword cmp_s # 59

- .dword cmp_s # 60

- .dword cmp_s # 61

- .dword cmp_s # 62

- .dword cmp_s # 63

-func_d:

- .dword add_d # 0

- .dword sub_d # 1

- .dword mul_d # 2

- .dword div_d # 3

- .dword ill # 4 (sqrt)

- .dword abs_d # 5

- .dword mov_d # 6

- .dword neg_d # 7

- .dword round_l_d # 8

- .dword trunc_l_d # 9

- .dword ceil_l_d # 10

- .dword floor_l_d # 11

- .dword round_w_d # 12

- .dword trunc_w_d # 13

- .dword ceil_w_d # 14

- .dword floor_w_d # 15

- .dword ill # 16

- .dword ill # 17

- .dword ill # 18

- .dword ill # 19

- .dword ill # 20

- .dword ill # 21

- .dword ill # 22

- .dword ill # 23

- .dword ill # 24

- .dword ill # 25

- .dword ill # 26

- .dword ill # 27

- .dword ill # 28

- .dword ill # 29

- .dword ill # 30

- .dword ill # 31

- .dword cvt_s_d # 32

- .dword ill # 33

- .dword ill # 34

- .dword ill # 35

- .dword cvt_w_d # 36

- .dword cvt_l_d # 37

- .dword ill # 38

- .dword ill # 39

- .dword ill # 40

- .dword ill # 41

- .dword ill # 42

- .dword ill # 43

- .dword ill # 44

- .dword ill # 45

- .dword ill # 46

- .dword ill # 47

- .dword cmp_d # 48

- .dword cmp_d # 49

- .dword cmp_d # 50

- .dword cmp_d # 51

- .dword cmp_d # 52

- .dword cmp_d # 53

- .dword cmp_d # 54

- .dword cmp_d # 55

- .dword cmp_d # 56

- .dword cmp_d # 57

- .dword cmp_d # 58

- .dword cmp_d # 59

- .dword cmp_d # 60

- .dword cmp_d # 61

- .dword cmp_d # 62

- .dword cmp_d # 63

-func_w:

- .dword ill # 0

- .dword ill # 1

- .dword ill # 2

- .dword ill # 3

- .dword ill # 4

- .dword ill # 5

- .dword ill # 6

- .dword ill # 7

- .dword ill # 8

- .dword ill # 9

- .dword ill # 10

- .dword ill # 11

- .dword ill # 12

- .dword ill # 13

- .dword ill # 14

- .dword ill # 15

- .dword ill # 16

- .dword ill # 17

- .dword ill # 18

- .dword ill # 19

- .dword ill # 20

- .dword ill # 21

- .dword ill # 22

- .dword ill # 23

- .dword ill # 24

- .dword ill # 25

- .dword ill # 26

- .dword ill # 27

- .dword ill # 28

- .dword ill # 29

- .dword ill # 30

- .dword ill # 31

- .dword cvt_s_w # 32

- .dword cvt_d_w # 33

- .dword ill # 34

- .dword ill # 35

- .dword ill # 36

- .dword ill # 37

- .dword ill # 38

- .dword ill # 39

- .dword ill # 40

- .dword ill # 41

- .dword ill # 42

- .dword ill # 43

- .dword ill # 44

- .dword ill # 45

- .dword ill # 46

- .dword ill # 47

- .dword ill # 48

- .dword ill # 49

- .dword ill # 50

- .dword ill # 51

- .dword ill # 52

- .dword ill # 53

- .dword ill # 54

- .dword ill # 55

- .dword ill # 56

- .dword ill # 57

- .dword ill # 58

- .dword ill # 59

- .dword ill # 60

- .dword ill # 61

- .dword ill # 62

- .dword ill # 63

-func_l:

- .dword ill # 0

- .dword ill # 1

- .dword ill # 2

- .dword ill # 3

- .dword ill # 4

- .dword ill # 5

- .dword ill # 6

- .dword ill # 7

- .dword ill # 8

- .dword ill # 9

- .dword ill # 10

- .dword ill # 11

- .dword ill # 12

- .dword ill # 13

- .dword ill # 14

- .dword ill # 15

- .dword ill # 16

- .dword ill # 17

- .dword ill # 18

- .dword ill # 19

- .dword ill # 20

- .dword ill # 21

- .dword ill # 22

- .dword ill # 23

- .dword ill # 24

- .dword ill # 25

- .dword ill # 26

- .dword ill # 27

- .dword ill # 28

- .dword ill # 29

- .dword ill # 30

- .dword ill # 31

- .dword cvt_s_l # 32

- .dword cvt_d_l # 33

- .dword ill # 34

- .dword ill # 35

- .dword ill # 36

- .dword ill # 37

- .dword ill # 38

- .dword ill # 39

- .dword ill # 40

- .dword ill # 41

- .dword ill # 42

- .dword ill # 43

- .dword ill # 44

- .dword ill # 45

- .dword ill # 46

- .dword ill # 47

- .dword ill # 48

- .dword ill # 49

- .dword ill # 50

- .dword ill # 51

- .dword ill # 52

- .dword ill # 53

- .dword ill # 54

- .dword ill # 55

- .dword ill # 56

- .dword ill # 57

- .dword ill # 58

- .dword ill # 59

- .dword ill # 60

- .dword ill # 61

- .dword ill # 62

- .dword ill # 63

- .text

-/*

- * Single precision subtract.

- */

-sub_s:

- jal get_ft_fs_s

- xor ta0, 1 # negate FT sign bit

- b add_sub_s

-/*

- * Single precision add.

- */

-add_s:

- jal get_ft_fs_s

-add_sub_s:

- bne t1, SEXP_INF, 1f # is FS an infinity?

- bne ta1, SEXP_INF, result_fs_s # if FT is not inf, result=FS

- bne t2, zero, result_fs_s # if FS is NAN, result is FS

- bne ta2, zero, result_ft_s # if FT is NAN, result is FT

- bne t0, ta0, invalid_s # both infinities same sign?

- b result_fs_s # result is in FS

-1:

- beq ta1, SEXP_INF, result_ft_s # if FT is inf, result=FT

- bne t1, zero, 4f # is FS a denormalized num?

- beq t2, zero, 3f # is FS zero?

- bne ta1, zero, 2f # is FT a denormalized num?

- beq ta2, zero, result_fs_s # FT is zero, result=FS

- jal renorm_fs_s

- jal renorm_ft_s

- b 5f

-2:

- jal renorm_fs_s

- subu ta1, ta1, SEXP_BIAS # unbias FT exponent

- or ta2, ta2, SIMPL_ONE # set implied one bit

- b 5f

-3:

- bne ta1, zero, result_ft_s # if FT != 0, result=FT

- bne ta2, zero, result_ft_s

- and v0, a1, FPC_ROUNDING_BITS # get rounding mode

- bne v0, FPC_ROUND_RM, 1f # round to -infinity?

- or t0, t0, ta0 # compute result sign

- b result_fs_s

-1:

- and t0, ta0 # compute result sign

- b result_fs_s

-4:

- bne ta1, zero, 2f # is FT a denormalized num?

- beq ta2, zero, result_fs_s # FT is zero, result=FS

- subu t1, SEXP_BIAS # unbias FS exponent

- or t2, SIMPL_ONE # set implied one bit

- jal renorm_ft_s

- b 5f

-2:

- subu t1, SEXP_BIAS # unbias FS exponent

- or t2, SIMPL_ONE # set implied one bit

- subu ta1, SEXP_BIAS # unbias FT exponent

- or ta2, SIMPL_ONE # set implied one bit

-/*

- * Perform the addition.

- */

-5:

- move t8, zero # no shifted bits (sticky reg)

- beq t1, ta1, 4f # exp equal, no shift needed

- subu v0, t1, ta1 # v0 = difference of exponents

- move v1, v0 # v1 = abs(difference)

- bge v0, zero, 1f

- negu v1

-1:

- ble v1, SFRAC_BITS+2, 2f # is difference too great?

- li t8, STICKYBIT # set the sticky bit

- bge v0, zero, 1f # check which exp is larger

- move t1, ta1 # result exp is FTs

- move t2, zero # FSs fraction shifted is zero

- b 4f

-1:

- move ta2, zero # FTs fraction shifted is zero

- b 4f

-2:

- li t9, 32 # compute 32 - abs(exp diff)

- subu t9, t9, v1

- bgt v0, zero, 3f # if FS > FT, shift FTs frac

- move t1, ta1 # FT > FS, result exp is FTs

- sll t8, t2, t9 # save bits shifted out

- srl t2, t2, v1 # shift FSs fraction

- b 4f

-3:

- sll t8, ta2, t9 # save bits shifted out

- srl ta2, ta2, v1 # shift FTs fraction

-4:

- bne t0, ta0, 1f # if signs differ, subtract

- addu t2, t2, ta2 # add fractions

- b norm_s

-1:

- blt t2, ta2, 3f # subtract larger from smaller

- bne t2, ta2, 2f # if same, result=0

- move t1, zero # result=0

- move t2, zero

- and v0, a1, FPC_ROUNDING_BITS # get rounding mode

- bne v0, FPC_ROUND_RM, 1f # round to -infinity?

- or t0, t0, ta0 # compute result sign

- b result_fs_s

-1:

- and t0, t0, ta0 # compute result sign

- b result_fs_s

-2:

- sltu t9, zero, t8 # compute t2:zero - ta2:t8

- subu t8, zero, t8

- subu t2, t2, ta2 # subtract fractions

- subu t2, t2, t9 # subtract barrow

- b norm_s

-3:

- move t0, ta0 # sign of result = FTs

- sltu t9, zero, t8 # compute ta2:zero - t2:t8

- subu t8, zero, t8

- subu t2, ta2, t2 # subtract fractions

- subu t2, t2, t9 # subtract barrow

- b norm_s

-/*

- * Double precision subtract.

- */

-sub_d:

- jal get_ft_fs_d

- xor ta0, ta0, 1 # negate sign bit

- b add_sub_d

-/*

- * Double precision add.

- */

-add_d:

- jal get_ft_fs_d

-add_sub_d:

- bne t1, DEXP_INF, 1f # is FS an infinity?

- bne ta1, DEXP_INF, result_fs_d # if FT is not inf, result=FS

- bne t2, zero, result_fs_d # if FS is NAN, result is FS

- bne ta2, zero, result_ft_d # if FT is NAN, result is FT

- bne t0, ta0, invalid_d # both infinities same sign?

- b result_fs_d # result is in FS

-1:

- beq ta1, DEXP_INF, result_ft_d # if FT is inf, result=FT

- bne t1, zero, 4f # is FS a denormalized num?

- beq t2, zero, 3f # is FS zero?

- bne ta1, zero, 2f # is FT a denormalized num?

- beq ta2, zero, result_fs_d # FT is zero, result=FS

- jal renorm_fs_d

- jal renorm_ft_d

- b 5f

-2:

- jal renorm_fs_d

- subu ta1, ta1, DEXP_BIAS # unbias FT exponent

- or ta2, ta2, DIMPL_ONE # set implied one bit

- b 5f

-3:

- bne ta1, zero, result_ft_d # if FT != 0, result=FT

- bne ta2, zero, result_ft_d

- and v0, a1, FPC_ROUNDING_BITS # get rounding mode

- bne v0, FPC_ROUND_RM, 1f # round to -infinity?

- or t0, t0, ta0 # compute result sign

- b result_fs_d

-1:

- and t0, t0, ta0 # compute result sign

- b result_fs_d

-4:

- bne ta1, zero, 2f # is FT a denormalized num?

- beq ta2, zero, result_fs_d # FT is zero, result=FS

- subu t1, t1, DEXP_BIAS # unbias FS exponent

- or t2, t2, DIMPL_ONE # set implied one bit

- jal renorm_ft_d

- b 5f

-2:

- subu t1, t1, DEXP_BIAS # unbias FS exponent

- or t2, t2, DIMPL_ONE # set implied one bit

- subu ta1, ta1, DEXP_BIAS # unbias FT exponent

- or ta2, ta2, DIMPL_ONE # set implied one bit

-/*

- * Perform the addition.

- */

-5:

- move t8, zero # no shifted bits (sticky reg)

- beq t1, ta1, 4f # no shift needed

- subu v0, t1, ta1 # v0 = difference of exponents

- move v1, v0 # v1 = abs(difference)

- bge v0, zero, 1f

- negu v1

-1:

- ble v1, DFRAC_BITS+2, 2f # is difference too great?

- li t8, STICKYBIT # set the sticky bit

- bge v0, zero, 1f # check which exp is larger

- move t1, ta1 # result exp is FTs

- move t2, zero # FSs fraction shifted is zero

- b 4f

-1:

- move ta2, zero # FTs fraction shifted is zero

- b 4f

-2:

- li t9, 64

- subu t9, t9, v1

- bge v0, zero, 3f # if FS > FT, shift FTs frac

- move t1, ta1 # FT > FS, result exp is FTs

- dsll t8, t2, t9 # save bits shifted out

- dsrl t2, t2, v1

- b 4f

-3:

- dsll t8, ta2, t9 # save bits shifted out

- dsrl ta2, ta2, v1

-4:

- bne t0, ta0, 1f # if signs differ, subtract

- daddu t2, ta2 # add fractions

- b norm_d

-1:

- blt t2, ta2, 3f # subtract larger from smaller

- bne t2, ta2, 2f

- move t1, zero # result=0

- move t2, zero

- and v0, a1, FPC_ROUNDING_BITS # get rounding mode

- bne v0, FPC_ROUND_RM, 1f # round to -infinity?

- or t0, t0, ta0 # compute result sign

- b result_fs_d

-1:

- and t0, t0, ta0 # compute result sign

- b result_fs_d

-2:

- sltu t9, zero, t8 # compute t2:zero - ta2:t8

- dsubu t8, zero, t8

- dsubu t2, t2, ta2 # subtract fractions

- dsubu t2, t2, t9 # subtract barrow

- b norm_d

-3:

- move t0, ta0 # sign of result = FTs

- sltu t9, zero, t8

- dsubu t2, ta2, t2 # subtract fractions

- dsubu t2, t2, t9 # subtract barrow

- b norm_d

-/*

- * Single precision multiply.

- */

-mul_s:

- jal get_ft_fs_s

- xor t0, t0, ta0 # compute sign of result

- move ta0, t0

- bne t1, SEXP_INF, 2f # is FS an infinity?

- bne t2, zero, result_fs_s # if FS is a NAN, result=FS

- bne ta1, SEXP_INF, 1f # FS is inf, is FT an infinity?

- bne ta2, zero, result_ft_s # if FT is a NAN, result=FT

- b result_fs_s # result is infinity

-1:

- bne ta1, zero, result_fs_s # inf * zero? if no, result=FS

- bne ta2, zero, result_fs_s

- b invalid_s # infinity * zero is invalid

-2:

- bne ta1, SEXP_INF, 1f # FS != inf, is FT an infinity?

- bne t1, zero, result_ft_s # zero * inf? if no, result=FT

- bne t2, zero, result_ft_s

- bne ta2, zero, result_ft_s # if FT is a NAN, result=FT

- b invalid_s # zero * infinity is invalid

-1:

- bne t1, zero, 1f # is FS zero?

- beq t2, zero, result_fs_s # result is zero

- jal renorm_fs_s

- b 2f

-1:

- subu t1, t1, SEXP_BIAS # unbias FS exponent

- or t2, t2, SIMPL_ONE # set implied one bit

-2:

- bne ta1, zero, 1f # is FT zero?

- beq ta2, zero, result_ft_s # result is zero

- jal renorm_ft_s

- b 2f

-1:

- subu ta1, ta1, SEXP_BIAS # unbias FT exponent

- or ta2, ta2, SIMPL_ONE # set implied one bit

-2:

- addu t1, t1, ta1 # compute result exponent

- addu t1, t1, 9 # account for binary point

- multu t2, ta2 # multiply fractions

- mflo t8

- mfhi t2

- b norm_s

-/*

- * Double precision multiply.

- */

-mul_d:

- jal get_ft_fs_d

- xor t0, t0, ta0 # compute sign of result

- move ta0, t0

- bne t1, DEXP_INF, 2f # is FS an infinity?

- bne t2, zero, result_fs_d # if FS is a NAN, result=FS

- bne ta1, DEXP_INF, 1f # FS is inf, is FT an infinity?

- bne ta2, zero, result_ft_d # if FT is a NAN, result=FT

- b result_fs_d # result is infinity

-1:

- bne ta1, zero, result_fs_d # inf * zero? if no, result=FS

- bne ta2, zero, result_fs_d

- b invalid_d # infinity * zero is invalid

-2:

- bne ta1, DEXP_INF, 1f # FS != inf, is FT an infinity?

- bne t1, zero, result_ft_d # zero * inf? if no, result=FT

- bne t2, zero, result_ft_d # if FS is a NAN, result=FS

- bne ta2, zero, result_ft_d # if FT is a NAN, result=FT

- b invalid_d # zero * infinity is invalid

-1:

- bne t1, zero, 2f # is FS zero?

- beq t2, zero, result_fs_d # result is zero

- jal renorm_fs_d

- b 3f

-2:

- subu t1, t1, DEXP_BIAS # unbias FS exponent

- or t2, t2, DIMPL_ONE # set implied one bit

-3:

- bne ta1, zero, 2f # is FT zero?

- beq ta2, zero, result_ft_d # result is zero

- jal renorm_ft_d

- b 3f

-2:

- subu ta1, ta1, DEXP_BIAS # unbias FT exponent

- or ta2, ta2, DIMPL_ONE # set implied one bit

-3:

- addu t1, t1, ta1 # compute result exponent

- addu t1, t1, 12 # ???

- dmultu t2, ta2 # multiply fractions

- mflo t8

- mfhi t2

- b norm_d

-/*

- * Single precision divide.

- */

-div_s:

- jal get_ft_fs_s

- xor t0, t0, ta0 # compute sign of result

- move ta0, t0

- bne t1, SEXP_INF, 1f # is FS an infinity?

- bne t2, zero, result_fs_s # if FS is NAN, result is FS

- bne ta1, SEXP_INF, result_fs_s # is FT an infinity?

- bne ta2, zero, result_ft_s # if FT is NAN, result is FT

- b invalid_s # infinity/infinity is invalid

-1:

- bne ta1, SEXP_INF, 1f # is FT an infinity?

- bne ta2, zero, result_ft_s # if FT is NAN, result is FT

- move t1, zero # x / infinity is zero

- move t2, zero

- b result_fs_s

-1:

- bne t1, zero, 2f # is FS zero?

- bne t2, zero, 1f

- bne ta1, zero, result_fs_s # FS=zero, is FT zero?

- beq ta2, zero, invalid_s # 0 / 0

- b result_fs_s # result = zero

-1:

- jal renorm_fs_s

- b 3f

-2:

- subu t1, t1, SEXP_BIAS # unbias FS exponent

- or t2, t2, SIMPL_ONE # set implied one bit

-3:

- bne ta1, zero, 2f # is FT zero?

- bne ta2, zero, 1f

- or a1, a1, FPC_EXCEPTION_DIV0 | FPC_STICKY_DIV0

- and v0, a1, FPC_ENABLE_DIV0 # trap enabled?

- bne v0, zero, fpe_trap

- ctc1 a1, FPC_CSR # save exceptions

- li t1, SEXP_INF # result is infinity

- move t2, zero

- b result_fs_s

-1:

- jal renorm_ft_s

- b 3f

-2:

- subu ta1, ta1, SEXP_BIAS # unbias FT exponent

- or ta2, ta2, SIMPL_ONE # set implied one bit

-3:

- subu t1, t1, ta1 # compute exponent

- subu t1, t1, 3 # compensate for result position

- li v0, SFRAC_BITS+3 # number of bits to divide

- move t8, t2 # init dividend

- move t2, zero # init result

-1:

- bltu t8, ta2, 3f # is dividend >= divisor?

-2:

- subu t8, t8, ta2 # subtract divisor from dividend

- or t2, t2, 1 # remember that we did

- bne t8, zero, 3f # if not done, continue

- sll t2, t2, v0 # shift result to final position

- b norm_s

-3:

- sll t8, t8, 1 # shift dividend

- sll t2, t2, 1 # shift result

- subu v0, v0, 1 # are we done?

- bne v0, zero, 1b # no, continue

- b norm_s

-/*

- * Double precision divide.

- */

-div_d:

- jal get_ft_fs_d

- xor t0, t0, ta0 # compute sign of result

- move ta0, t0

- bne t1, DEXP_INF, 1f # is FS an infinity?

- bne t2, zero, result_fs_d # if FS is NAN, result is FS

- bne ta1, DEXP_INF, result_fs_d # is FT an infinity?

- bne ta2, zero, result_ft_d # if FT is NAN, result is FT

- b invalid_d # infinity/infinity is invalid

-1:

- bne ta1, DEXP_INF, 1f # is FT an infinity?

- bne ta2, zero, result_ft_d # if FT is NAN, result is FT

- move t1, zero # x / infinity is zero

- move t2, zero

- b result_fs_d

-1:

- bne t1, zero, 2f # is FS zero?

- bne t2, zero, 1f

- bne ta1, zero, result_fs_d # FS=zero, is FT zero?

- beq ta2, zero, invalid_d # 0 / 0

- b result_fs_d # result = zero

-1:

- jal renorm_fs_d

- b 3f

-2:

- subu t1, t1, DEXP_BIAS # unbias FS exponent

- or t2, t2, DIMPL_ONE # set implied one bit

-3:

- bne ta1, zero, 2f # is FT zero?

- bne ta2, zero, 1f

- or a1, a1, FPC_EXCEPTION_DIV0 | FPC_STICKY_DIV0

- and v0, a1, FPC_ENABLE_DIV0 # trap enabled?

- bne v0, zero, fpe_trap

- ctc1 a1, FPC_CSR # Save exceptions

- li t1, DEXP_INF # result is infinity

- move t2, zero

- b result_fs_d

-1:

- jal renorm_ft_d

- b 3f

-2:

- subu ta1, ta1, DEXP_BIAS # unbias FT exponent

- or ta2, ta2, DIMPL_ONE # set implied one bit

-3:

- subu t1, t1, ta1 # compute exponent

- subu t1, t1, 3 # compensate for result position

- li v0, DFRAC_BITS+3 # number of bits to divide

- move t8, t2 # init dividend

- move t2, zero # init result

-1:

- bltu t8, ta2, 3f # is dividend >= divisor?

-2:

- dsubu t8, t8, ta2 # subtract divisor from dividend

- or t2, t2, 1 # remember that we did

- bne t8, zero, 3f # if not done, continue

- dsll t2, t2, v0 # shift upper part

- b norm_d

-3:

- dsll t8, t8, 1 # shift dividend

- dsll t2, t2, 1 # shift result

- subu v0, v0, 1 # are we done?

- bne v0, zero, 1b # no, continue

- b norm_d

-/*

- * Single precision absolute value.

- */

-abs_s:

- jal get_fs_s

- move t0, zero # set sign positive

- b result_fs_s

-/*

- * Double precision absolute value.

- */

-abs_d:

- jal get_fs_d

- move t0, zero # set sign positive

- b result_fs_d

-/*

- * Single precision move.

- */

-mov_s:

- jal get_fs_s

- b result_fs_s

-/*

- * Double precision move.

- */

-mov_d:

- jal get_fs_d

- b result_fs_d

-/*

- * Single precision negate.

- */

-neg_s:

- jal get_fs_s

- xor t0, t0, 1 # reverse sign

- b result_fs_s

-/*

- * Double precision negate.

- */

-neg_d:

- jal get_fs_d

- xor t0, t0, 1 # reverse sign

- b result_fs_d

-/*

- * Convert double to single.

- */

-cvt_s_d:

- jal get_fs_d

- bne t1, DEXP_INF, 1f # is FS an infinity?

- li t1, SEXP_INF # convert to single

- dsll t2, t2, 3 # convert D fraction to S

- b result_fs_s

-1:

- bne t1, zero, 2f # is FS zero?

- beq t2, zero, result_fs_s # result=0

- jal renorm_fs_d

- subu t1, t1, 3 # correct exp for shift below

- b 3f

-2:

- subu t1, t1, DEXP_BIAS # unbias exponent

- or t2, t2, DIMPL_ONE # add implied one bit

-3:

- dsll t2, t2, 3 # convert D fraction to S

- b norm_noshift_s

-/*

- * Convert long integer to single.

- */

-cvt_s_l:

- jal get_fs_long

- b cvt_s_int

-/*

- * Convert integer to single.

- */

-cvt_s_w:

- jal get_fs_int

-cvt_s_int:

- bne t2, zero, 1f # check for zero

- move t1, zero

- b result_fs_s

-/*

- * Find out how many leading zero bits are in t2 and put in t9.

- */

-1:

- move v0, t2

- move t9, zero

- dsrl v1, v0, 32

- bne v1, zero, 1f

- addu t9, 32

- dsll v0, 32

-1:

- dsrl v1, v0, 16

- bne v1, zero, 1f

- addu t9, 16

- dsll v0, 16

-1:

- dsrl v1, v0, 24

- bne v1, zero, 1f

- addu t9, 8

- dsll v0, 8

-1:

- dsrl v1, v0, 28

- bne v1, zero, 1f

- addu t9, 4

- dsll v0, 4

-1:

- dsrl v1, v0, 30

- bne v1, zero, 1f

- addu t9, 2

- dsll v0, 2

-1:

- dsrl v1, v0, 31

- bne v1, zero, 1f

- addu t9, 1

-/*

- * Now shift t2 the correct number of bits.

- */

-1:

- subu t9, SLEAD_ZEROS # dont count leading zeros

- li t1, 23+32 # init exponent

- subu t1, t1, t9 # compute exponent

- beq t9, zero, 1f

- li v0, 32

- blt t9, zero, 2f # if shift < 0, shift right

- subu v0, v0, t9

- sll t2, t2, t9 # shift left

-1:

- add t1, t1, SEXP_BIAS # bias exponent

- and t2, t2, ~SIMPL_ONE # clear implied one bit

- b result_fs_s

-2:

- negu t9 # shift right by t9

- subu v0, v0, t9

- sll t8, t2, v0 # save bits shifted out

- srl t2, t2, t9

- b norm_noshift_s

-/*

- * Convert single to double.

- */

-cvt_d_s:

- jal get_fs_s

- dsll t2, 32

- bne t1, SEXP_INF, 1f # is FS an infinity?

- li t1, DEXP_INF # convert to double

- b result_fs_d

-1:

- bne t1, zero, 2f # is FS denormalized or zero?

- beq t2, zero, result_fs_d # is FS zero?

- jal renorm_fs_s

- move t8, zero

- b norm_d

-2:

- addu t1, t1, DEXP_BIAS - SEXP_BIAS # bias exponent correctly

- dsrl t2, t2, 3

- b result_fs_d

-/*

- * Convert long integer to double.

- */

-cvt_d_l:

- jal get_fs_long

- b cvt_d_int

-/*

- * Convert integer to double.

- */

-cvt_d_w:

- jal get_fs_int

-cvt_d_int:

- bne t2, zero, 1f # check for zero

- move t1, zero # result=0

- b result_fs_d

-/*

- * Find out how many leading zero bits are in t2 and put in t9.

- */

-1:

- move v0, t2

- move t9, zero

- dsrl v1, v0, 32

- bne v1, zero, 1f

- addu t9, 32

- dsll v0, 32

-1:

- dsrl v1, v0, 16

- bne v1, zero, 1f

- addu t9, 16

- dsll v0, 16

-1:

- dsrl v1, v0, 24

- bne v1, zero, 1f

- addu t9, 8

- dsll v0, 8

-1:

- dsrl v1, v0, 28

- bne v1, zero, 1f

- addu t9, 4

- dsll v0, 4

-1:

- dsrl v1, v0, 30

- bne v1, zero, 1f

- addu t9, 2

- dsll v0, 2

-1:

- dsrl v1, v0, 31

- bne v1, zero, 1f

- addu t9, 1

-/*

- * Now shift t2 the correct number of bits.

- */

-1:

- subu t9, t9, DLEAD_ZEROS # dont count leading zeros

- li t1, DEXP_BIAS + 20 # init exponent

- subu t1, t1, t9 # compute exponent

- beq t9, zero, 1f

- li v0, 64

- blt t9, zero, 2f # if shift < 0, shift right

- subu v0, v0, t9

- dsll t2, t2, t9 # shift left

-1:

- and t2, t2, ~DIMPL_ONE # clear implied one bit

- b result_fs_d

-2:

- negu t9 # shift right by t9

- subu v0, v0, t9

- dsrl t2, t2, t9

- and t2, t2, ~DIMPL_ONE # clear implied one bit

- b result_fs_d

-/*

- * Convert single to integer with specific rounding.

- */

-round_w_s:

- li t3, FPC_ROUND_RN

- b do_cvt_w_s

-trunc_w_s:

- li t3, FPC_ROUND_RZ

- b do_cvt_w_s

-ceil_w_s:

- li t3, FPC_ROUND_RP

- b do_cvt_w_s

-floor_w_s:

- li t3, FPC_ROUND_RM

- b do_cvt_w_s

-/*

- * Convert single to integer.

- */

-cvt_w_s:

- and t3, a1, FPC_ROUNDING_BITS # get rounding mode

-do_cvt_w_s:

- jal get_fs_s

- bne t1, SEXP_INF, 1f # is FS an infinity?

- bne t2, zero, invalid_w # invalid conversion

-1:

- bne t1, zero, 1f # is FS zero?

- beq t2, zero, result_fs_w # result is zero

- move t2, zero # result is an inexact zero

- b inexact_w

-1:

- subu t1, t1, SEXP_BIAS # unbias exponent

- or t2, t2, SIMPL_ONE # add implied one bit

- dsll t2, t2, DFRAC_BITS - SFRAC_BITS # convert S fraction to D

- b cvt_w

-/*

- * Convert double to integer with specific rounding.

- */

-round_w_d:

- li t3, FPC_ROUND_RN

- b do_cvt_w_d

-trunc_w_d:

- li t3, FPC_ROUND_RZ

- b do_cvt_w_d

-ceil_w_d:

- li t3, FPC_ROUND_RP

- b do_cvt_w_d

-floor_w_d:

- li t3, FPC_ROUND_RM

- b do_cvt_w_d

-/*

- * Convert double to integer.

- */

-cvt_w_d:

- and t3, a1, FPC_ROUNDING_BITS # get rounding mode

-do_cvt_w_d:

- jal get_fs_d

- bne t1, DEXP_INF, 1f # is FS an infinity?

- bne t2, zero, invalid_w # invalid conversion

-1:

- bne t1, zero, 2f # is FS zero?

- beq t2, zero, result_fs_w # result is zero

- move t2, zero # result is an inexact zero

- b inexact_w

-2:

- subu t1, t1, DEXP_BIAS # unbias exponent

- or t2, t2, DIMPL_ONE # add implied one bit

-cvt_w:

- blt t1, WEXP_MIN, underflow_w # is exponent too small?

- li v0, WEXP_MAX+1

- bgt t1, v0, overflow_w # is exponent too large?

- bne t1, v0, 1f # special check for INT_MIN

- beq t0, zero, overflow_w # if positive, overflow

- bne t2, DIMPL_ONE, overflow_w

- li t2, INT_MIN # result is INT_MIN

- b result_fs_w

-1:

- subu v0, t1, 20 # compute amount to shift

- beq v0, zero, 2f # is shift needed?

- li v1, 64

- blt v0, zero, 1f # if shift < 0, shift right

- subu v1, v1, v0 # shift left

- dsll t2, t2, v0

- b 2f

-1:

- negu v0 # shift right by v0

- subu v1, v1, v0

- dsll t8, t2, v1 # save bits shifted out

- sltu t8, zero, t8 # dont lose any ones

- dsrl t2, t2, v0

-/*

- * round (t0 is sign, t2:63-32 is integer part, t2:31-0 is fractional part).

- */

-2:

- beq t3, FPC_ROUND_RN, 3f # round to nearest

- beq t3, FPC_ROUND_RZ, 5f # round to zero (truncate)

- beq t3, FPC_ROUND_RP, 1f # round to +infinity

- beq t0, zero, 5f # if sign is positive, truncate

- b 2f

-1:

- bne t0, zero, 5f # if sign is negative, truncate

-2:

- daddu t2, t2, GUARDBIT # add in fractional

- blt t2, zero, overflow_w # overflow?

- b 5f

-3:

- daddu t2, t2, GUARDBIT # add in fractional

- blt t2, zero, overflow_w # overflow?

-4:

- bne v0, zero, 5f # if rounded remainder is zero

- and t2, 0xfffffffe00000000 # clear LSB (round to nearest)

-5:

- beq t0, zero, 1f # result positive?

- negu t2 # convert to negative integer

-1:

- dsll v0, 32 # save fraction

- dsrl t2, 32 # shift out fractional part

- beq v0, zero, result_fs_w # is result exact?

-/*

- * Handle inexact exception.

- */

-inexact_w:

- or a1, a1, FPC_EXCEPTION_INEXACT | FPC_STICKY_INEXACT

- and v0, a1, FPC_ENABLE_INEXACT

- bne v0, zero, fpe_trap

- ctc1 a1, FPC_CSR # save exceptions

- b result_fs_w

-/*

- * Conversions to integer which overflow will trap (if enabled),

- * or generate an inexact trap (if enabled),

- * or generate an invalid exception.

- */

-overflow_w:

- or a1, a1, FPC_EXCEPTION_OVERFLOW | FPC_STICKY_OVERFLOW

- and v0, a1, FPC_ENABLE_OVERFLOW

- bne v0, zero, fpe_trap

- and v0, a1, FPC_ENABLE_INEXACT

- bne v0, zero, inexact_w # inexact traps enabled?

- b invalid_w

-/*

- * Conversions to integer which underflow will trap (if enabled),

- * or generate an inexact trap (if enabled),

- * or generate an invalid exception.

- */

-underflow_w:

- or a1, a1, FPC_EXCEPTION_UNDERFLOW | FPC_STICKY_UNDERFLOW

- and v0, a1, FPC_ENABLE_UNDERFLOW

- bne v0, zero, fpe_trap

- and v0, a1, FPC_ENABLE_INEXACT

- bne v0, zero, inexact_w # inexact traps enabled?

- b invalid_w

-/*

- * Convert single to long integer with specific rounding.

- */

-round_l_s:

- li t3, FPC_ROUND_RN

- b do_cvt_l_s

-trunc_l_s:

- li t3, FPC_ROUND_RZ

- b do_cvt_l_s

-ceil_l_s:

- li t3, FPC_ROUND_RP

- b do_cvt_l_s

-floor_l_s:

- li t3, FPC_ROUND_RM

- b do_cvt_l_s

-/*

- * Convert single to long integer.

- */

-cvt_l_s:

- and t3, a1, FPC_ROUNDING_BITS # get rounding mode

-do_cvt_l_s:

- jal get_fs_s

- bne t1, SEXP_INF, 1f # is FS an infinity?

- bne t2, zero, invalid_l # invalid conversion

-1:

- bne t1, zero, 1f # is FS zero?

- beq t2, zero, result_fs_l # result is zero

- move t2, zero # result is an inexact zero

- b inexact_l

-1:

- subu t1, t1, SEXP_BIAS # unbias exponent

- or t2, t2, SIMPL_ONE # add implied one bit

- dsll t2, t2, DFRAC_BITS - SFRAC_BITS # convert S fraction to D

- b cvt_l

-/*

- * Convert double to long integer with specific rounding.

- */

-round_l_d:

- li t3, FPC_ROUND_RN

- b do_cvt_l_d

-trunc_l_d:

- li t3, FPC_ROUND_RZ

- b do_cvt_l_d

-ceil_l_d:

- li t3, FPC_ROUND_RP

- b do_cvt_l_d

-floor_l_d:

- li t3, FPC_ROUND_RM

- b do_cvt_l_d

-/*

- * Convert double to long integer.

- */

-cvt_l_d:

- and t3, a1, FPC_ROUNDING_BITS # get rounding mode

-do_cvt_l_d:

- jal get_fs_d

- bne t1, DEXP_INF, 1f # is FS an infinity?

- bne t2, zero, invalid_l # invalid conversion

-1:

- bne t1, zero, 2f # is FS zero?

- beq t2, zero, result_fs_l # result is zero

- move t2, zero # result is an inexact zero

- b inexact_l

-2:

- subu t1, t1, DEXP_BIAS # unbias exponent

- or t2, t2, DIMPL_ONE # add implied one bit

-cvt_l:

- blt t1, LEXP_MIN, underflow_l # is exponent too small?

- li v0, LEXP_MAX+1

- bgt t1, v0, overflow_l # is exponent too large?

- bne t1, v0, 1f # special check for LONG_MIN

- beq t0, zero, overflow_l # if positive, overflow

- bne t2, DIMPL_ONE, overflow_l

- dli t2, LONG_MIN # result is LONG_MIN

- b result_fs_l

-1:

- subu v0, t1, DFRAC_BITS # compute amount to shift

- beq v0, zero, 2f # is shift needed?

- li v1, 64

- blt v0, zero, 1f # if shift < 0, shift right

- subu v1, v1, v0 # shift left

- dsll t2, t2, v0

- b 2f

-1:

- negu v0 # shift right by v0

- subu v1, v1, v0

- dsll t8, t2, v1 # save bits shifted out

- sltu t8, zero, t8 # dont lose any ones

- dsrl t2, t2, v0

-/*

- * round (t0 is sign, t2 is integer part).

- */

-2:

- beq t3, FPC_ROUND_RN, 3f # round to nearest

- beq t3, FPC_ROUND_RZ, 5f # round to zero (truncate)

- beq t3, FPC_ROUND_RP, 1f # round to +infinity

- beq t0, zero, 5f # if sign is positive, truncate

- b 2f

-1:

- bne t0, zero, 5f # if sign is negative, truncate

-2:

- daddu t2, t2, DGUARDBIT # add in fractional

- blt t2, zero, overflow_l # overflow?

- b 5f

-3:

- daddu t2, t2, DGUARDBIT # add in fractional

- blt t2, zero, overflow_l # overflow?

-4:

- bne v0, zero, 5f # if rounded remainder is zero

- and t2, 0xe000000000000000 # clear LSB (round to nearest)

-5:

- beq t0, zero, 1f # result positive?

- negu t2 # convert to negative integer

-1:

- b result_fs_l

- nop

-/*

- * Handle inexact exception.

- */

-inexact_l:

- or a1, a1, FPC_EXCEPTION_INEXACT | FPC_STICKY_INEXACT

- and v0, a1, FPC_ENABLE_INEXACT

- bne v0, zero, fpe_trap

- ctc1 a1, FPC_CSR # save exceptions

- b result_fs_l

-/*

- * Conversions to integer which overflow will trap (if enabled),

- * or generate an inexact trap (if enabled),

- * or generate an invalid exception.

- */

-overflow_l:

- or a1, a1, FPC_EXCEPTION_OVERFLOW | FPC_STICKY_OVERFLOW

- and v0, a1, FPC_ENABLE_OVERFLOW

- bne v0, zero, fpe_trap

- and v0, a1, FPC_ENABLE_INEXACT

- bne v0, zero, inexact_l # inexact traps enabled?

- b invalid_l

-/*

- * Conversions to integer which underflow will trap (if enabled),

- * or generate an inexact trap (if enabled),

- * or generate an invalid exception.

- */

-underflow_l:

- or a1, a1, FPC_EXCEPTION_UNDERFLOW | FPC_STICKY_UNDERFLOW

- and v0, a1, FPC_ENABLE_UNDERFLOW

- bne v0, zero, fpe_trap

- and v0, a1, FPC_ENABLE_INEXACT

- bne v0, zero, inexact_l # inexact traps enabled?

- b invalid_l

-/*

- * Compare single.

- */

-cmp_s:

- jal get_cmp_s

- bne t1, SEXP_INF, 1f # is FS an infinity?

- bne t2, zero, unordered # FS is a NAN

-1:

- bne ta1, SEXP_INF, 2f # is FT an infinity?

- bne ta2, zero, unordered # FT is a NAN

-2:

- sll t1, t1, SFRAC_BITS # reassemble exp & frac

- or t1, t1, t2

- sll ta1, ta1, SFRAC_BITS # reassemble exp & frac

- or ta1, ta1, ta2

- beq t0, zero, 1f # is FS positive?

- negu t1

-1:

- beq ta0, zero, 1f # is FT positive?

- negu ta1

-1:

- li v0, COND_LESS

- blt t1, ta1, test_cond # is FS < FT?

- li v0, COND_EQUAL

- beq t1, ta1, test_cond # is FS == FT?

- move v0, zero # FS > FT

- b test_cond

-/*

- * Compare double.

- */

-cmp_d:

- jal get_cmp_d

- bne t1, DEXP_INF, 1f # is FS an infinity?

- bne t2, zero, unordered # FS is a NAN

-1:

- bne ta1, DEXP_INF, 2f # is FT an infinity?

- bne ta2, zero, unordered # FT is a NAN

-2:

- dsll t1, t1, DFRAC_BITS # reassemble exp & frac

- or t1, t1, t2

- dsll ta1, ta1, DFRAC_BITS # reassemble exp & frac

- or ta1, ta1, ta2

- beq t0, zero, 1f # is FS positive?

- dnegu t1 # negate t1

-1:

- beq ta0, zero, 1f # is FT positive?

- dnegu ta1

-1:

- li v0, COND_LESS

- blt t1, ta1, test_cond # is FS(MSW) < FT(MSW)?

- li v0, COND_EQUAL

- beq t1, ta1, test_cond # is FS(LSW) == FT(LSW)?

- move v0, zero # FS > FT

-test_cond:

- and v0, v0, a0 # condition match instruction?

-set_cond:

- bne v0, zero, 1f

- and a1, a1, ~FPC_COND_BIT # clear condition bit

- b 2f

-1:

- or a1, a1, FPC_COND_BIT # set condition bit

-2:

- ctc1 a1, FPC_CSR # save condition bit

- b done

-unordered:

- and v0, a0, COND_UNORDERED # this cmp match unordered?

- bne v0, zero, 1f

- and a1, a1, ~FPC_COND_BIT # clear condition bit

- b 2f

-1:

- or a1, a1, FPC_COND_BIT # set condition bit

-2:

- and v0, a0, COND_SIGNAL

- beq v0, zero, 1f # is this a signaling cmp?

- or a1, a1, FPC_EXCEPTION_INVALID | FPC_STICKY_INVALID

- and v0, a1, FPC_ENABLE_INVALID

- bne v0, zero, fpe_trap

-1:

- ctc1 a1, FPC_CSR # save condition bit

- b done

-/*

- * Determine the amount to shift the fraction in order to restore the

- * normalized position. After that, round and handle exceptions.

- */

-norm_s:

- move v0, t2

- move t9, zero # t9 = num of leading zeros

- bne t2, zero, 1f

- move v0, t8

- addu t9, 32

-1:

- srl v1, v0, 16

- bne v1, zero, 1f

- addu t9, 16

- sll v0, 16

-1:

- srl v1, v0, 24

- bne v1, zero, 1f

- addu t9, 8

- sll v0, 8

-1:

- srl v1, v0, 28

- bne v1, zero, 1f

- addu t9, 4

- sll v0, 4

-1:

- srl v1, v0, 30

- bne v1, zero, 1f

- addu t9, 2

- sll v0, 2

-1:

- srl v1, v0, 31

- bne v1, zero, 1f

- addu t9, 1

-/*

- * Now shift t2,t8 the correct number of bits.

- */

-1:

- subu t9, t9, SLEAD_ZEROS # dont count leading zeros

- subu t1, t1, t9 # adjust the exponent

- beq t9, zero, norm_noshift_s

- li v1, 32

- blt t9, zero, 1f # if shift < 0, shift right

- subu v1, v1, t9

- sll t2, t2, t9 # shift t2,t8 left

- srl v0, t8, v1 # save bits shifted out

- or t2, t2, v0

- sll t8, t8, t9

- b norm_noshift_s

-1:

- negu t9 # shift t2,t8 right by t9

- subu v1, v1, t9

- sll v0, t8, v1 # save bits shifted out

- sltu v0, zero, v0 # be sure to save any one bits

- srl t8, t8, t9

- or t8, t8, v0

- sll v0, t2, v1 # save bits shifted out

- or t8, t8, v0

- srl t2, t2, t9

-norm_noshift_s:

- move ta1, t1 # save unrounded exponent

- move ta2, t2 # save unrounded fraction

- and v0, a1, FPC_ROUNDING_BITS # get rounding mode

- beq v0, FPC_ROUND_RN, 3f # round to nearest

- beq v0, FPC_ROUND_RZ, 5f # round to zero (truncate)

- beq v0, FPC_ROUND_RP, 1f # round to +infinity

- beq t0, zero, 5f # if sign is positive, truncate

- b 2f

-1:

- bne t0, zero, 5f # if sign is negative, truncate

-2:

- beq t8, zero, 5f # if exact, continue

- addu t2, t2, 1 # add rounding bit

- bne t2, SIMPL_ONE<<1, 5f # need to adjust exponent?

- addu t1, t1, 1 # adjust exponent

- srl t2, t2, 1 # renormalize fraction

- b 5f

-3:

- li v0, GUARDBIT # load guard bit for rounding

- addu v0, v0, t8 # add remainder

- sltu v1, v0, t8 # compute carry out

- beq v1, zero, 4f # if no carry, continue

- addu t2, t2, 1 # add carry to result

- bne t2, SIMPL_ONE<<1, 4f # need to adjust exponent?

- addu t1, t1, 1 # adjust exponent

- srl t2, t2, 1 # renormalize fraction

-4:

- bne v0, zero, 5f # if rounded remainder is zero

- and t2, t2, ~1 # clear LSB (round to nearest)

-5:

- bgt t1, SEXP_MAX, overflow_s # overflow?

- blt t1, SEXP_MIN, underflow_s # underflow?

- bne t8, zero, inexact_s # is result inexact?

- addu t1, t1, SEXP_BIAS # bias exponent

- and t2, t2, ~SIMPL_ONE # clear implied one bit

- b result_fs_s

-/*

- * Handle inexact exception.

- */

-inexact_s:

- addu t1, t1, SEXP_BIAS # bias exponent

- and t2, t2, ~SIMPL_ONE # clear implied one bit

-inexact_nobias_s:

- jal set_fd_s # save result

- or a1, a1, FPC_EXCEPTION_INEXACT | FPC_STICKY_INEXACT

- and v0, a1, FPC_ENABLE_INEXACT

- bne v0, zero, fpe_trap

- ctc1 a1, FPC_CSR # save exceptions

- b done

-/*

- * Overflow will trap (if enabled),

- * or generate an inexact trap (if enabled),

- * or generate an infinity.

- */

-overflow_s:

- or a1, a1, FPC_EXCEPTION_OVERFLOW | FPC_STICKY_OVERFLOW

- and v0, a1, FPC_ENABLE_OVERFLOW

- beq v0, zero, 1f

- subu t1, t1, 192 # bias exponent

- and t2, t2, ~SIMPL_ONE # clear implied one bit

- jal set_fd_s # save result

- b fpe_trap

-1:

- and v0, a1, FPC_ROUNDING_BITS # get rounding mode

- beq v0, FPC_ROUND_RN, 3f # round to nearest

- beq v0, FPC_ROUND_RZ, 1f # round to zero (truncate)

- beq v0, FPC_ROUND_RP, 2f # round to +infinity

- bne t0, zero, 3f

-1:

- li t1, SEXP_MAX # result is max finite

- li t2, 0x007fffff

- b inexact_s

-2:

- bne t0, zero, 1b

-3:

- li t1, SEXP_MAX + 1 # result is infinity

- move t2, zero

- b inexact_s

-/*

- * In this implementation, "tininess" is detected "after rounding" and

- * "loss of accuracy" is detected as "an inexact result".

- */

-underflow_s:

- and v0, a1, FPC_ENABLE_UNDERFLOW

- beq v0, zero, 1f

-/*

- * Underflow is enabled so compute the result and trap.

- */

- addu t1, t1, 192 # bias exponent

- and t2, t2, ~SIMPL_ONE # clear implied one bit

- jal set_fd_s # save result

- or a1, a1, FPC_EXCEPTION_UNDERFLOW | FPC_STICKY_UNDERFLOW

- b fpe_trap

-/*

- * Underflow is not enabled so compute the result,

- * signal inexact result (if it is) and trap (if enabled).

- */

-1:

- move t1, ta1 # get unrounded exponent

- move t2, ta2 # get unrounded fraction

- li t9, SEXP_MIN # compute shift amount

- subu t9, t9, t1 # shift t2,t8 right by t9

- blt t9, SFRAC_BITS+2, 3f # shift all the bits out?

- move t1, zero # result is inexact zero

- move t2, zero

- or a1, a1, FPC_EXCEPTION_UNDERFLOW | FPC_STICKY_UNDERFLOW

-/*

- * Now round the zero result.

- * Only need to worry about rounding to +- infinity when the sign matches.

- */

- and v0, a1, FPC_ROUNDING_BITS # get rounding mode

- beq v0, FPC_ROUND_RN, inexact_nobias_s # round to nearest

- beq v0, FPC_ROUND_RZ, inexact_nobias_s # round to zero

- beq v0, FPC_ROUND_RP, 1f # round to +infinity

- beq t0, zero, inexact_nobias_s # if sign is positive, truncate

- b 2f

-1:

- bne t0, zero, inexact_nobias_s # if sign is negative, truncate

-2:

- addu t2, t2, 1 # add rounding bit

- b inexact_nobias_s

-3:

- li v1, 32

- subu v1, v1, t9

- sltu v0, zero, t8 # be sure to save any one bits

- sll t8, t2, v1 # save bits shifted out

- or t8, t8, v0 # include sticky bits

- srl t2, t2, t9

-/*

- * Now round the denormalized result.

- */

- and v0, a1, FPC_ROUNDING_BITS # get rounding mode

- beq v0, FPC_ROUND_RN, 3f # round to nearest

- beq v0, FPC_ROUND_RZ, 5f # round to zero (truncate)

- beq v0, FPC_ROUND_RP, 1f # round to +infinity

- beq t0, zero, 5f # if sign is positive, truncate

- b 2f

-1:

- bne t0, zero, 5f # if sign is negative, truncate

-2:

- beq t8, zero, 5f # if exact, continue

- addu t2, t2, 1 # add rounding bit

- b 5f

-3:

- li v0, GUARDBIT # load guard bit for rounding

- addu v0, v0, t8 # add remainder

- sltu v1, v0, t8 # compute carry out

- beq v1, zero, 4f # if no carry, continue

- addu t2, t2, 1 # add carry to result

-4:

- bne v0, zero, 5f # if rounded remainder is zero

- and t2, t2, ~1 # clear LSB (round to nearest)

-5:

- move t1, zero # denorm or zero exponent

- jal set_fd_s # save result

- beq t8, zero, done # check for exact result

- or a1, a1, FPC_EXCEPTION_UNDERFLOW | FPC_STICKY_UNDERFLOW

- or a1, a1, FPC_EXCEPTION_INEXACT | FPC_STICKY_INEXACT

- and v0, a1, FPC_ENABLE_INEXACT

- bne v0, zero, fpe_trap

- ctc1 a1, FPC_CSR # save exceptions

- b done

-/*

- * Determine the amount to shift the fraction in order to restore the

- * normalized position. After that, round and handle exceptions.

- */

-norm_d:

- move v0, t2

- move t9, zero # t9 = num of leading zeros

- dsrl v1, v0, 32

- bne v1, zero, 1f

- addu t9, 32

- dsll v0, 32

-1:

- dsrl v1, v0, 16

- bne v1, zero, 1f

- addu t9, 16

- dsll v0, 16

-1:

- dsrl v1, v0, 24

- bne v1, zero, 1f

- addu t9, 8

- dsll v0, 8

-1:

- dsrl v1, v0, 28

- bne v1, zero, 1f

- addu t9, 4

- dsll v0, 4

-1:

- dsrl v1, v0, 30

- bne v1, zero, 1f

- addu t9, 2

- dsll v0, 2

-1:

- dsrl v1, v0, 31

- bne v1, zero, 1f

- addu t9, 1

-/*

- * Now shift t2,t8 the correct number of bits.

- */

-1:

- subu t9, t9, DLEAD_ZEROS # dont count leading zeros

- subu t1, t1, t9 # adjust the exponent

- beq t9, zero, norm_noshift_d

- li v1, 64

- blt t9, zero, 2f # if shift < 0, shift right

- subu v1, v1, t9

- dsll t2, t2, t9 # shift left by t9

- dsrl v0, t8, v1 # save bits shifted out

- or t2, t2, v0

- dsll t8, t8, t9

- b norm_noshift_d

-2:

- negu t9 # shift right by t9

- subu v1, v1, t9 # (known to be < 32 bits)

- dsll v0, t8, v1 # save bits shifted out

- sltu v0, zero, v0 # be sure to save any one bits

- dsrl t8, t8, t9

- or t8, t8, v0

- dsll v0, t2, v1 # save bits shifted out

- or t8, t8, v0

- dsrl t2, t2, t9

-norm_noshift_d:

- move ta1, t1 # save unrounded exponent

- move ta2, t2 # save unrounded fraction (MS)

- and v0, a1, FPC_ROUNDING_BITS # get rounding mode

- beq v0, FPC_ROUND_RN, 3f # round to nearest

- beq v0, FPC_ROUND_RZ, 5f # round to zero (truncate)

- beq v0, FPC_ROUND_RP, 1f # round to +infinity

- beq t0, zero, 5f # if sign is positive, truncate

- b 2f

-1:

- bne t0, zero, 5f # if sign is negative, truncate

-2:

- beq t8, zero, 5f # if exact, continue

- daddu t2, t2, 1 # add rounding bit

- bne t2, DIMPL_ONE<<1, 5f # need to adjust exponent?

- addu t1, t1, 1 # adjust exponent

- dsrl t2, t2, 1 # renormalize fraction

- b 5f

-3:

- dli v0, DGUARDBIT # load guard bit for rounding

- addu v0, v0, t8 # add remainder

- sltu v1, v0, t8 # compute carry out

- beq v1, zero, 4f # branch if no carry

- daddu t2, t2, 1 # add carry to result

- bne t2, DIMPL_ONE<<1, 4f # need to adjust exponent?

- addu t1, t1, 1 # adjust exponent

- srl t2, t2, 1 # renormalize fraction

-4:

- bne v0, zero, 5f # if rounded remainder is zero

- and t2, t2, ~1 # clear LSB (round to nearest)

-5:

- bgt t1, DEXP_MAX, overflow_d # overflow?

- blt t1, DEXP_MIN, underflow_d # underflow?

- bne t8, zero, inexact_d # is result inexact?

- addu t1, t1, DEXP_BIAS # bias exponent

- and t2, t2, ~DIMPL_ONE # clear implied one bit

- b result_fs_d

-/*

- * Handle inexact exception.

- */

-inexact_d:

- addu t1, t1, DEXP_BIAS # bias exponent

- and t2, t2, ~DIMPL_ONE # clear implied one bit

-inexact_nobias_d:

- jal set_fd_d # save result

- or a1, a1, FPC_EXCEPTION_INEXACT | FPC_STICKY_INEXACT

- and v0, a1, FPC_ENABLE_INEXACT

- bne v0, zero, fpe_trap

- ctc1 a1, FPC_CSR # save exceptions

- b done

-/*

- * Overflow will trap (if enabled),

- * or generate an inexact trap (if enabled),

- * or generate an infinity.

- */

-overflow_d:

- or a1, a1, FPC_EXCEPTION_OVERFLOW | FPC_STICKY_OVERFLOW

- and v0, a1, FPC_ENABLE_OVERFLOW

- beq v0, zero, 1f

- subu t1, t1, 1536 # bias exponent

- and t2, t2, ~DIMPL_ONE # clear implied one bit

- jal set_fd_d # save result

- b fpe_trap

-1:

- and v0, a1, FPC_ROUNDING_BITS # get rounding mode

- beq v0, FPC_ROUND_RN, 3f # round to nearest

- beq v0, FPC_ROUND_RZ, 1f # round to zero (truncate)

- beq v0, FPC_ROUND_RP, 2f # round to +infinity

- bne t0, zero, 3f

-1:

- li t1, DEXP_MAX # result is max finite

- dli t2, 0x000fffffffffffff

- b inexact_d

-2:

- bne t0, zero, 1b

-3:

- li t1, DEXP_MAX + 1 # result is infinity

- move t2, zero

- b inexact_d

-/*

- * In this implementation, "tininess" is detected "after rounding" and

- * "loss of accuracy" is detected as "an inexact result".

- */

-underflow_d:

- and v0, a1, FPC_ENABLE_UNDERFLOW

- beq v0, zero, 1f

-/*

- * Underflow is enabled so compute the result and trap.

- */

- addu t1, t1, 1536 # bias exponent

- and t2, t2, ~DIMPL_ONE # clear implied one bit

- jal set_fd_d # save result

- or a1, a1, FPC_EXCEPTION_UNDERFLOW | FPC_STICKY_UNDERFLOW

- b fpe_trap

-/*

- * Underflow is not enabled so compute the result,

- * signal inexact result (if it is) and trap (if enabled).

- */

-1:

- move t1, ta1 # get unrounded exponent

- move t2, ta2 # get unrounded fraction (MS)

- li t9, DEXP_MIN # compute shift amount

- subu t9, t9, t1 # shift t2,t8 right by t9

- blt t9, DFRAC_BITS+2, 3f # shift all the bits out?

- move t1, zero # result is inexact zero

- move t2, zero

- or a1, a1, FPC_EXCEPTION_UNDERFLOW | FPC_STICKY_UNDERFLOW

-/*

- * Now round the zero result.

- * Only need to worry about rounding to +- infinity when the sign matches.

- */

- and v0, a1, FPC_ROUNDING_BITS # get rounding mode

- beq v0, FPC_ROUND_RN, inexact_nobias_d # round to nearest

- beq v0, FPC_ROUND_RZ, inexact_nobias_d # round to zero

- beq v0, FPC_ROUND_RP, 1f # round to +infinity

- beq t0, zero, inexact_nobias_d # if sign is positive, truncate

- b 2f

-1:

- bne t0, zero, inexact_nobias_d # if sign is negative, truncate

-2:

- daddu t2, t2, 1 # add rounding bit

- b inexact_nobias_d

-3:

- li v1, 64

- subu v1, v1, t9

- sltu v0, zero, t8 # be sure to save any one bits

- dsll t8, t2, v1 # save bits shifted out

- or t8, t8, v0 # include sticky bits

- dsrl t2, t2, t9

-/*

- * Now round the denormalized result.

- */

- and v0, a1, FPC_ROUNDING_BITS # get rounding mode

- beq v0, FPC_ROUND_RN, 3f # round to nearest

- beq v0, FPC_ROUND_RZ, 5f # round to zero (truncate)

- beq v0, FPC_ROUND_RP, 1f # round to +infinity

- beq t0, zero, 5f # if sign is positive, truncate

- b 2f

-1:

- bne t0, zero, 5f # if sign is negative, truncate

-2:

- beq t8, zero, 5f # if exact, continue

- daddu t2, t2, 1 # add rounding bit

- b 5f

-3:

- dli v0, DGUARDBIT # load guard bit for rounding

- daddu v0, v0, t8 # add remainder

- sltu v1, v0, t8 # compute carry out

- beq v1, zero, 4f # if no carry, continue

- daddu t2, t2, 1 # add carry

-4:

- bne v0, zero, 5f # if rounded remainder is zero

- and t2, t2, ~1 # clear LSB (round to nearest)

-5:

- move t1, zero # denorm or zero exponent

- jal set_fd_d # save result

- beq t8, zero, done # check for exact result

- or a1, a1, FPC_EXCEPTION_UNDERFLOW | FPC_STICKY_UNDERFLOW

- or a1, a1, FPC_EXCEPTION_INEXACT | FPC_STICKY_INEXACT

- and v0, a1, FPC_ENABLE_INEXACT

- bne v0, zero, fpe_trap

- ctc1 a1, FPC_CSR # save exceptions

- b done

-/*

- * Signal an invalid operation if the trap is enabled; otherwise,

- * the result is a quiet NAN.

- */

-invalid_s: # trap invalid operation

- or a1, a1, FPC_EXCEPTION_INVALID | FPC_STICKY_INVALID

- and v0, a1, FPC_ENABLE_INVALID

- bne v0, zero, fpe_trap

- ctc1 a1, FPC_CSR # save exceptions

- move t0, zero # result is a quiet NAN

- li t1, SEXP_INF

- li t2, SQUIET_NAN

- jal set_fd_s # save result (in t0,t1,t2)

- b done

-/*

- * Signal an invalid operation if the trap is enabled; otherwise,

- * the result is a quiet NAN.

- */

-invalid_d: # trap invalid operation

- or a1, a1, FPC_EXCEPTION_INVALID | FPC_STICKY_INVALID

- and v0, a1, FPC_ENABLE_INVALID

- bne v0, zero, fpe_trap

- ctc1 a1, FPC_CSR # save exceptions

- move t0, zero # result is a quiet NAN

- li t1, DEXP_INF

- dli t2, DQUIET_NAN

- jal set_fd_d # save result (in t0,t1,t2)

- b done

-/*

- * Signal an invalid operation if the trap is enabled; otherwise,

- * the result is INT_MAX or INT_MIN.

- */

-invalid_w: # trap invalid operation

- or a1, a1, FPC_EXCEPTION_INVALID | FPC_STICKY_INVALID

- and v0, a1, FPC_ENABLE_INVALID

- bne v0, zero, fpe_trap

- ctc1 a1, FPC_CSR # save exceptions

- bne t0, zero, 1f

- li t2, INT_MAX # result is INT_MAX

- b result_fs_w

-1:

- li t2, INT_MIN # result is INT_MIN

- b result_fs_w

-/*

- * Signal an invalid operation if the trap is enabled; otherwise,

- * the result is LONG_MAX or LONG_MIN.

- */

-invalid_l: # trap invalid operation

- or a1, a1, FPC_EXCEPTION_INVALID | FPC_STICKY_INVALID

- and v0, a1, FPC_ENABLE_INVALID

- bne v0, zero, fpe_trap

- ctc1 a1, FPC_CSR # save exceptions

- bne t0, zero, 1f

- dli t2, LONG_MAX # result is INT_MAX

- b result_fs_l

-1:

- dli t2, LONG_MIN # result is INT_MIN

- b result_fs_l

-/*

- * Trap if the hardware should have handled this case.

- */

-fpe_trap:

- move a2, a1 # code = FP CSR

- ctc1 a1, FPC_CSR # save exceptions

- li v0, 1

- b done_err

-/*

- * Send an illegal instruction signal to the current process.

- */

-ill:

- ctc1 a1, FPC_CSR # save exceptions

- move a2, a0 # code = FP instruction

- li v0, 1

- b done_err

-result_ft_s:

- move t0, ta0 # result is FT

- move t1, ta1

- move t2, ta2

-result_fs_s: # result is FS

- jal set_fd_s # save result (in t0,t1,t2)

- b done

-result_fs_w:

- jal set_fd_word # save result (in t2)

- b done

-result_fs_l:

- move t0, t2

- jal set_fd_dword # save result (in t0)

- b done

-result_ft_d:

- move t0, ta0 # result is FT

- move t1, ta1

- move t2, ta2

-result_fs_d: # result is FS

- jal set_fd_d # save result (in t0,t1,t2)

-done:

- li v0, 0

-done_err:

- PTR_L ra, CF_RA_OFFS(sp)

- PTR_ADD sp, sp, FRAMESZ(CF_SZ)

- j ra

-END(MipsEmulateFP)

-/*----------------------------------------------------------------------------

- * get_fs_int --

- *

- * Read (integer) the FS register (bits 15-11).

- * This is an internal routine used by MipsEmulateFP only.

- *

- * Results:

- * t0 contains the sign

- * t2 contains the fraction

- *

- *----------------------------------------------------------------------------

- */

-#define GET_FS_INT(n) \

- .rdata; \

- .dword get_fs_int_ ## n; \

- .text; \

-get_fs_int_ ## n: \

- mfc1 t2, $ ## n; \

- b get_fs_int_done

-LEAF(get_fs_int, 0)

- srl a3, a0, 11 - 3 # get FS field

- and a3, a3, 0x1f << 3 # mask FS field

- ld a3, get_fs_int_tbl(a3) # switch on register number

- j a3

- .rdata

-get_fs_int_tbl:

- .text

- GET_FS_INT(f0)

- GET_FS_INT(f1)

- GET_FS_INT(f2)

- GET_FS_INT(f3)

- GET_FS_INT(f4)

- GET_FS_INT(f5)

- GET_FS_INT(f6)

- GET_FS_INT(f7)

- GET_FS_INT(f8)

- GET_FS_INT(f9)

- GET_FS_INT(f10)

- GET_FS_INT(f11)

- GET_FS_INT(f12)

- GET_FS_INT(f13)

- GET_FS_INT(f14)

- GET_FS_INT(f15)

- GET_FS_INT(f16)

- GET_FS_INT(f17)

- GET_FS_INT(f18)

- GET_FS_INT(f19)

- GET_FS_INT(f20)

- GET_FS_INT(f21)

- GET_FS_INT(f22)

- GET_FS_INT(f23)

- GET_FS_INT(f24)

- GET_FS_INT(f25)

- GET_FS_INT(f26)

- GET_FS_INT(f27)

- GET_FS_INT(f28)

- GET_FS_INT(f29)

- GET_FS_INT(f30)

- GET_FS_INT(f31)

-get_fs_int_done:

- srl t0, t2, 31 # init the sign bit

- bge t2, zero, 1f

- negu t2

- dsll t2, 33

- dsrl t2, 33

-1:

- j ra

-END(get_fs_int)

-/*----------------------------------------------------------------------------

- * get_fs_long --

- *

- * Read (long integer) the FS register (bits 15-11).

- * This is an internal routine used by MipsEmulateFP only.

- *

- * Results:

- * t0 contains the sign

- * t2 contains the fraction

- *

- *----------------------------------------------------------------------------

- */

-#define GET_FS_LONG(n) \

- .rdata; \

- .dword get_fs_long_ ## n; \

- .text; \

-get_fs_long_ ## n: \

- dmfc1 t2, $ ## n; \

- b get_fs_long_done

-LEAF(get_fs_long, 0)

- srl a3, a0, 11 - 3 # get FS field

- and a3, a3, 0x1f << 3 # mask FS field

- ld a3, get_fs_long_tbl(a3) # switch on register number

- j a3

- .rdata

-get_fs_long_tbl:

- .text

- GET_FS_LONG(f0)

- GET_FS_LONG(f1)

- GET_FS_LONG(f2)

- GET_FS_LONG(f3)

- GET_FS_LONG(f4)

- GET_FS_LONG(f5)

- GET_FS_LONG(f6)

- GET_FS_LONG(f7)

- GET_FS_LONG(f8)

- GET_FS_LONG(f9)

- GET_FS_LONG(f10)

- GET_FS_LONG(f11)

- GET_FS_LONG(f12)

- GET_FS_LONG(f13)

- GET_FS_LONG(f14)

- GET_FS_LONG(f15)

- GET_FS_LONG(f16)

- GET_FS_LONG(f17)

- GET_FS_LONG(f18)

- GET_FS_LONG(f19)

- GET_FS_LONG(f20)

- GET_FS_LONG(f21)

- GET_FS_LONG(f22)

- GET_FS_LONG(f23)

- GET_FS_LONG(f24)

- GET_FS_LONG(f25)

- GET_FS_LONG(f26)

- GET_FS_LONG(f27)

- GET_FS_LONG(f28)

- GET_FS_LONG(f29)

- GET_FS_LONG(f30)

- GET_FS_LONG(f31)

-get_fs_long_done:

- dsrl t0, t2, 63 # init the sign bit

- bge t2, zero, 1f

- dnegu t2

-1:

- j ra

-END(get_fs_long)

-/*----------------------------------------------------------------------------

- * get_ft_fs_s --

- *

- * Read (single precision) the FT register (bits 20-16) and

- * the FS register (bits 15-11) and break up into fields.

- * This is an internal routine used by MipsEmulateFP only.

- *

- * Results:

- * t0 contains the FS sign

- * t1 contains the FS (biased) exponent

- * t2 contains the FS fraction

- * ta0 contains the FT sign

- * ta1 contains the FT (biased) exponent

- * ta2 contains the FT fraction

- *

- *----------------------------------------------------------------------------

- */

-#define GET_FT_S(n) \

- .rdata; \

- .dword get_ft_s_ ## n; \

- .text; \

-get_ft_s_ ## n: \

- mfc1 ta0, $ ## n; \

- b get_ft_s_done

-LEAF(get_ft_fs_s, 0)

- srl a3, a0, 16 - 3 # get FT field

- and a3, a3, 0x1f << 3 # mask FT field

- ld a3, get_ft_s_tbl(a3) # switch on register number

- j a3

- .rdata

-get_ft_s_tbl:

- .text

- GET_FT_S(f0)

- GET_FT_S(f1)

- GET_FT_S(f2)

- GET_FT_S(f3)

- GET_FT_S(f4)

- GET_FT_S(f5)

- GET_FT_S(f6)

- GET_FT_S(f7)

- GET_FT_S(f8)

- GET_FT_S(f9)

- GET_FT_S(f10)

- GET_FT_S(f11)

- GET_FT_S(f12)

- GET_FT_S(f13)

- GET_FT_S(f14)

- GET_FT_S(f15)

- GET_FT_S(f16)

- GET_FT_S(f17)

- GET_FT_S(f18)

- GET_FT_S(f19)

- GET_FT_S(f20)

- GET_FT_S(f21)

- GET_FT_S(f22)

- GET_FT_S(f23)

- GET_FT_S(f24)

- GET_FT_S(f25)

- GET_FT_S(f26)

- GET_FT_S(f27)

- GET_FT_S(f28)

- GET_FT_S(f29)

- GET_FT_S(f30)

- GET_FT_S(f31)

-get_ft_s_done:

- srl ta1, ta0, SFRAC_BITS # get exponent

- and ta1, ta1, 0xFF

- and ta2, ta0, 0x7FFFFF # get fraction

- srl ta0, ta0, 31 # get sign

- bne ta1, SEXP_INF, 1f # is it a signaling NAN?

- and v0, ta2, SSIGNAL_NAN

- bne v0, zero, invalid_s

-1:

- /* fall through to get FS */

-/*----------------------------------------------------------------------------

- * get_fs_s --

- *

- * Read (single precision) the FS register (bits 15-11) and

- * break up into fields.

- * This is an internal routine used by MipsEmulateFP only.

- *

- * Results:

- * t0 contains the sign

- * t1 contains the (biased) exponent

- * t2 contains the fraction

- *

- *----------------------------------------------------------------------------

- */

-#define GET_FS_S(n) \

- .rdata; \

- .dword get_fs_s_ ## n; \

- .text; \

-get_fs_s_ ## n: \

- mfc1 t0, $ ## n; \

- b get_fs_s_done

-ALEAF(get_fs_s)

- srl a3, a0, 11 - 3 # get FS field

- and a3, a3, 0x1f << 3 # mask FS field

- ld a3, get_fs_s_tbl(a3) # switch on register number

- j a3

- .rdata

-get_fs_s_tbl:

- .text

- GET_FS_S(f0)

- GET_FS_S(f1)

- GET_FS_S(f2)

- GET_FS_S(f3)

- GET_FS_S(f4)

- GET_FS_S(f5)

- GET_FS_S(f6)

- GET_FS_S(f7)

- GET_FS_S(f8)

- GET_FS_S(f9)

- GET_FS_S(f10)

- GET_FS_S(f11)

- GET_FS_S(f12)

- GET_FS_S(f13)

- GET_FS_S(f14)

- GET_FS_S(f15)

- GET_FS_S(f16)

- GET_FS_S(f17)

- GET_FS_S(f18)

- GET_FS_S(f19)

- GET_FS_S(f20)

- GET_FS_S(f21)

- GET_FS_S(f22)

- GET_FS_S(f23)

- GET_FS_S(f24)

- GET_FS_S(f25)

- GET_FS_S(f26)

- GET_FS_S(f27)

- GET_FS_S(f28)

- GET_FS_S(f29)

- GET_FS_S(f30)

- GET_FS_S(f31)

-get_fs_s_done:

- srl t1, t0, SFRAC_BITS # get exponent

- and t1, t1, 0xFF

- and t2, t0, 0x7FFFFF # get fraction

- srl t0, t0, 31 # get sign

- bne t1, SEXP_INF, 1f # is it a signaling NAN?

- and v0, t2, SSIGNAL_NAN

- bne v0, zero, invalid_s

-1:

- j ra

-END(get_ft_fs_s)

-/*----------------------------------------------------------------------------

- * get_ft_fs_d --

- *

- * Read (double precision) the FT register (bits 20-16) and

- * the FS register (bits 15-11) and break up into fields.

- * This is an internal routine used by MipsEmulateFP only.

- *

- * Results:

- * t0 contains the FS sign

- * t1 contains the FS (biased) exponent

- * t2 contains the FS fraction

- * ta0 contains the FT sign

- * ta1 contains the FT (biased) exponent

- * ta2 contains the FT fraction

- *

- *----------------------------------------------------------------------------

- */

-#define GET_FT_FS_D(n) \

- .rdata; \

- .dword get_ft_fs_d_ ## n; \

- .text; \

-get_ft_fs_d_ ## n: \

- dmfc1 ta2, $ ## n; \

- b get_ft_d_done

-LEAF(get_ft_fs_d, 0)

- srl a3, a0, 16 - 3 # get FT field

- and a3, a3, 0x1f << 3 # mask FT field

- ld a3, get_ft_d_tbl(a3) # switch on register number

- j a3

- .rdata

-get_ft_d_tbl:

- .text

- GET_FT_FS_D(f0)

- GET_FT_FS_D(f1)

- GET_FT_FS_D(f2)

- GET_FT_FS_D(f3)

- GET_FT_FS_D(f4)

- GET_FT_FS_D(f5)

- GET_FT_FS_D(f6)

- GET_FT_FS_D(f7)

- GET_FT_FS_D(f8)

- GET_FT_FS_D(f9)

- GET_FT_FS_D(f10)

- GET_FT_FS_D(f11)

- GET_FT_FS_D(f12)

- GET_FT_FS_D(f13)

- GET_FT_FS_D(f14)

- GET_FT_FS_D(f15)

- GET_FT_FS_D(f16)

- GET_FT_FS_D(f17)

- GET_FT_FS_D(f18)

- GET_FT_FS_D(f19)

- GET_FT_FS_D(f20)

- GET_FT_FS_D(f21)

- GET_FT_FS_D(f22)

- GET_FT_FS_D(f23)

- GET_FT_FS_D(f24)

- GET_FT_FS_D(f25)

- GET_FT_FS_D(f26)

- GET_FT_FS_D(f27)

- GET_FT_FS_D(f28)

- GET_FT_FS_D(f29)

- GET_FT_FS_D(f30)

- GET_FT_FS_D(f31)

-get_ft_d_done:

- dsrl ta0, ta2, 63 # get sign

- dsrl ta1, ta2, DFRAC_BITS # get exponent

- and ta1, ta1, 0x7FF

- dsll ta2, 12

- dsrl ta2, 12 # get fraction

- bne ta1, DEXP_INF, 1f # is it a signaling NAN?

- and v0, ta2, DSIGNAL_NAN

- bne v0, zero, invalid_d

-1:

- /* fall through to get FS */

-/*----------------------------------------------------------------------------

- * get_fs_d --

- *

- * Read (double precision) the FS register (bits 15-11) and

- * break up into fields.

- * This is an internal routine used by MipsEmulateFP only.

- *

- * Results:

- * t0 contains the sign

- * t1 contains the (biased) exponent

- * t2 contains the fraction

- *

- *----------------------------------------------------------------------------

- */

-#define GET_FS_D(n) \

- .rdata; \

- .dword get_fs_d_ ## n; \

- .text; \

-get_fs_d_ ## n: \

- dmfc1 t2, $ ## n; \

- b get_fs_d_done

-ALEAF(get_fs_d)

- srl a3, a0, 11 - 3 # get FS field

- and a3, a3, 0x1f << 3 # mask FS field

- ld a3, get_fs_d_tbl(a3) # switch on register number

- j a3

- .rdata

-get_fs_d_tbl:

- .text

- GET_FS_D(f0)

- GET_FS_D(f1)

- GET_FS_D(f2)

- GET_FS_D(f3)

- GET_FS_D(f4)

- GET_FS_D(f5)

- GET_FS_D(f6)

- GET_FS_D(f7)

- GET_FS_D(f8)

- GET_FS_D(f9)

- GET_FS_D(f10)

- GET_FS_D(f11)

- GET_FS_D(f12)

- GET_FS_D(f13)

- GET_FS_D(f14)

- GET_FS_D(f15)

- GET_FS_D(f16)

- GET_FS_D(f17)

- GET_FS_D(f18)

- GET_FS_D(f19)

- GET_FS_D(f20)

- GET_FS_D(f21)

- GET_FS_D(f22)

- GET_FS_D(f23)

- GET_FS_D(f24)

- GET_FS_D(f25)

- GET_FS_D(f26)

- GET_FS_D(f27)

- GET_FS_D(f28)

- GET_FS_D(f29)

- GET_FS_D(f30)

- GET_FS_D(f31)

-get_fs_d_done:

- dsrl t0, t2, 63 # get sign

- dsrl t1, t2, DFRAC_BITS # get exponent

- and t1, t1, 0x7FF

- dsll t2, 12

- dsrl t2, 12 # get fraction

- bne t1, DEXP_INF, 1f # is it a signaling NAN?

- and v0, t2, DSIGNAL_NAN

- bne v0, zero, invalid_d

-1:

- j ra

-END(get_ft_fs_d)

-/*----------------------------------------------------------------------------

- * get_cmp_s --

- *

- * Read (single precision) the FS register (bits 15-11) and

- * the FT register (bits 20-16) and break up into fields.

- * This is an internal routine used by MipsEmulateFP only.

- *

- * Results:

- * t0 contains the sign

- * t1 contains the (biased) exponent

- * t2 contains the fraction

- * ta0 contains the sign

- * ta1 contains the (biased) exponent

- * ta2 contains the fraction

- *

- *----------------------------------------------------------------------------

- */

-#define CMP_FS_S(n) \

- .rdata; \

- .dword cmp_fs_s_ ## n; \

- .text; \

-cmp_fs_s_ ## n: \

- mfc1 t0, $ ## n; \

- b cmp_fs_s_done

-LEAF(get_cmp_s, 0)

- srl a3, a0, 11 - 3 # get FS field

- and a3, a3, 0x1f << 3 # mask FS field

- ld a3, cmp_fs_s_tbl(a3) # switch on register number

- j a3

- .rdata

-cmp_fs_s_tbl:

- .text

- CMP_FS_S(f0)

- CMP_FS_S(f1)

- CMP_FS_S(f2)

- CMP_FS_S(f3)

- CMP_FS_S(f4)

- CMP_FS_S(f5)

- CMP_FS_S(f6)

- CMP_FS_S(f7)

- CMP_FS_S(f8)

- CMP_FS_S(f9)

- CMP_FS_S(f10)

- CMP_FS_S(f11)

- CMP_FS_S(f12)

- CMP_FS_S(f13)

- CMP_FS_S(f14)

- CMP_FS_S(f15)

- CMP_FS_S(f16)

- CMP_FS_S(f17)

- CMP_FS_S(f18)

- CMP_FS_S(f19)

- CMP_FS_S(f20)

- CMP_FS_S(f21)

- CMP_FS_S(f22)

- CMP_FS_S(f23)

- CMP_FS_S(f24)

- CMP_FS_S(f25)

- CMP_FS_S(f26)

- CMP_FS_S(f27)

- CMP_FS_S(f28)

- CMP_FS_S(f29)

- CMP_FS_S(f30)

- CMP_FS_S(f31)

-cmp_fs_s_done:

- srl t1, t0, SFRAC_BITS # get exponent

- and t1, t1, 0xFF

- and t2, t0, 0x7FFFFF # get fraction

- srl t0, t0, 31 # get sign

-#define CMP_FT_S(n) \

- .rdata; \

- .dword cmp_ft_s_ ## n; \

- .text; \

-cmp_ft_s_ ## n: \

- mfc1 ta0, $ ## n; \

- b cmp_ft_s_done

- srl a3, a0, 16 - 3 # get FT field

- and a3, a3, 0x1f << 3 # mask FT field

- ld a3, cmp_ft_s_tbl(a3) # switch on register number

- j a3

- .rdata

-cmp_ft_s_tbl:

- .text

- CMP_FT_S(f0)

- CMP_FT_S(f1)

- CMP_FT_S(f2)

- CMP_FT_S(f3)

- CMP_FT_S(f4)

- CMP_FT_S(f5)

- CMP_FT_S(f6)

- CMP_FT_S(f7)

- CMP_FT_S(f8)

- CMP_FT_S(f9)

- CMP_FT_S(f10)

- CMP_FT_S(f11)

- CMP_FT_S(f12)

- CMP_FT_S(f13)

- CMP_FT_S(f14)

- CMP_FT_S(f15)

- CMP_FT_S(f16)

- CMP_FT_S(f17)

- CMP_FT_S(f18)

- CMP_FT_S(f19)

- CMP_FT_S(f20)

- CMP_FT_S(f21)

- CMP_FT_S(f22)

- CMP_FT_S(f23)

- CMP_FT_S(f24)

- CMP_FT_S(f25)

- CMP_FT_S(f26)

- CMP_FT_S(f27)

- CMP_FT_S(f28)

- CMP_FT_S(f29)

- CMP_FT_S(f30)

-cmp_ft_s_done:

- srl ta1, ta0, SFRAC_BITS # get exponent

- and ta1, ta1, 0xFF

- and ta2, ta0, 0x7FFFFF # get fraction

- srl ta0, ta0, 31 # get sign

- j ra

-END(get_cmp_s)

-/*----------------------------------------------------------------------------

- * get_cmp_d --

- *

- * Read (double precision) the FS register (bits 15-11) and

- * the FT register (bits 20-16) and break up into fields.

- * This is an internal routine used by MipsEmulateFP only.

- *

- * Results:

- * t0 contains the sign

- * t1 contains the (biased) exponent

- * t2 contains the fraction

- * ta0 contains the sign

- * ta1 contains the (biased) exponent

- * ta2 contains the fraction

- *

- *----------------------------------------------------------------------------

- */

-#define CMP_FS_D(n) \

- .rdata; \

- .dword cmp_fs_d_ ## n; \

- .text; \

-cmp_fs_d_ ## n: \

- dmfc1 t2, $ ## n; \

- b cmp_fs_d_done

-LEAF(get_cmp_d, 0)

- srl a3, a0, 11 - 3 # get FS field

- and a3, a3, 0x1f << 3 # mask FS field

- ld a3, cmp_fs_d_tbl(a3) # switch on register number

- j a3

- .rdata

-cmp_fs_d_tbl:

- .text

- CMP_FS_D(f0)

- CMP_FS_D(f1)

- CMP_FS_D(f2)

- CMP_FS_D(f3)

- CMP_FS_D(f4)

- CMP_FS_D(f5)

- CMP_FS_D(f6)

- CMP_FS_D(f7)

- CMP_FS_D(f8)

- CMP_FS_D(f9)

- CMP_FS_D(f10)

- CMP_FS_D(f11)

- CMP_FS_D(f12)

- CMP_FS_D(f13)

- CMP_FS_D(f14)

- CMP_FS_D(f15)

- CMP_FS_D(f16)

- CMP_FS_D(f17)

- CMP_FS_D(f18)

- CMP_FS_D(f19)

- CMP_FS_D(f20)

- CMP_FS_D(f21)

- CMP_FS_D(f22)

- CMP_FS_D(f23)

- CMP_FS_D(f24)

- CMP_FS_D(f25)

- CMP_FS_D(f26)

- CMP_FS_D(f27)

- CMP_FS_D(f28)

- CMP_FS_D(f29)

- CMP_FS_D(f30)

- CMP_FS_D(f31)

-cmp_fs_d_done:

- dsrl t0, t2, 63 # get sign

- dsrl t1, t2, DFRAC_BITS # get exponent

- and t1, t1, 0x7FF

- dsll t2, 12

- dsrl t2, 12 # get fraction

-#define CMP_FT_D(n) \

- .rdata; \

- .dword cmp_ft_d_ ## n; \

- .text; \

-cmp_ft_d_ ## n: \

- dmfc1 ta2, $ ## n; \

- b cmp_ft_d_done

- srl a3, a0, 16 - 3 # get FT field

- and a3, a3, 0x1f << 3 # mask FT field

- ld a3, cmp_ft_d_tbl(a3) # switch on register number

- j a3

- .rdata

-cmp_ft_d_tbl:

- .text

- CMP_FT_D(f0)

- CMP_FT_D(f1)

- CMP_FT_D(f2)

- CMP_FT_D(f3)

- CMP_FT_D(f4)

- CMP_FT_D(f5)

- CMP_FT_D(f6)

- CMP_FT_D(f7)

- CMP_FT_D(f8)

- CMP_FT_D(f9)

- CMP_FT_D(f10)

- CMP_FT_D(f11)

- CMP_FT_D(f12)

- CMP_FT_D(f13)

- CMP_FT_D(f14)

- CMP_FT_D(f15)

- CMP_FT_D(f16)

- CMP_FT_D(f17)

- CMP_FT_D(f18)

- CMP_FT_D(f19)

- CMP_FT_D(f20)

- CMP_FT_D(f21)

- CMP_FT_D(f22)

- CMP_FT_D(f23)

- CMP_FT_D(f24)

- CMP_FT_D(f25)

- CMP_FT_D(f26)

- CMP_FT_D(f27)

- CMP_FT_D(f28)

- CMP_FT_D(f29)

- CMP_FT_D(f30)

- CMP_FT_D(f31)

-cmp_ft_d_done:

- dsrl ta0, ta2, 63 # get sign

- dsrl ta1, ta2, DFRAC_BITS # get exponent

- and ta1, ta1, 0x7FF

- dsll ta2, 12

- dsrl ta2, 12 # get fraction

- j ra

-END(get_cmp_d)

-/*----------------------------------------------------------------------------

- * set_fd_s --

- *

- * Write (single precision) the FD register (bits 10-6).

- * This is an internal routine used by MipsEmulateFP only.

- *

- * Arguments:

- * a0 contains the FP instruction

- * t0 contains the sign

- * t1 contains the (biased) exponent

- * t2 contains the fraction

- *

- * set_fd_word --

- *

- * Write (integer) the FD register (bits 10-6).

- * This is an internal routine used by MipsEmulateFP only.

- *

- * Arguments:

- * a0 contains the FP instruction

- * t2 contains the integer

- *

- *----------------------------------------------------------------------------

- */

-#define SET_FD_S(n) \

- .rdata; \

- .dword set_fd_s_ ## n; \

- .text; \

-set_fd_s_ ## n: \

- mtc1 t2, $ ## n; \

- j ra

-LEAF(set_fd_s, 0)

- sll t0, t0, 31 # position sign

- sll t1, t1, SFRAC_BITS # position exponent

- or t2, t2, t0

- or t2, t2, t1

-ALEAF(set_fd_word)

- srl a3, a0, 6 - 3 # get FD field

- and a3, a3, 0x1f << 3 # mask FT field

- ld a3, set_fd_s_tbl(a3) # switch on register number

- j a3

- .rdata

-set_fd_s_tbl:

- .text

- SET_FD_S(f0)

- SET_FD_S(f1)

- SET_FD_S(f2)

- SET_FD_S(f3)

- SET_FD_S(f4)

- SET_FD_S(f5)

- SET_FD_S(f6)

- SET_FD_S(f7)

- SET_FD_S(f8)

- SET_FD_S(f9)

- SET_FD_S(f10)

- SET_FD_S(f11)

- SET_FD_S(f12)

- SET_FD_S(f13)

- SET_FD_S(f14)

- SET_FD_S(f15)

- SET_FD_S(f16)

- SET_FD_S(f17)

- SET_FD_S(f18)

- SET_FD_S(f19)

- SET_FD_S(f20)

- SET_FD_S(f21)

- SET_FD_S(f22)

- SET_FD_S(f23)

- SET_FD_S(f24)

- SET_FD_S(f25)

- SET_FD_S(f26)

- SET_FD_S(f27)

- SET_FD_S(f28)

- SET_FD_S(f29)

- SET_FD_S(f30)

- SET_FD_S(f31)

-END(set_fd_s)

-/*----------------------------------------------------------------------------

- * set_fd_d --

- *

- * Write (double precision) the FT register (bits 10-6).

- * This is an internal routine used by MipsEmulateFP only.

- *

- * Arguments:

- * a0 contains the FP instruction

- * t0 contains the sign

- * t1 contains the (biased) exponent

- * t2 contains the fraction

- *

- *----------------------------------------------------------------------------

- */

-#define SET_FD_D(n) \

- .rdata; \

- .dword set_fd_d_ ## n; \

- .text; \

-set_fd_d_ ## n: \

- dmtc1 t0, $ ## n; \

- j ra

-LEAF(set_fd_d, 0)

- dsll t0, 63 # set sign

- dsll t1, t1, DFRAC_BITS # set exponent

- or t0, t0, t1

- or t0, t0, t2 # set fraction

-ALEAF(set_fd_dword)

- srl a3, a0, 6 - 3 # get FD field

- and a3, a3, 0x1f << 3 # mask FD field

- ld a3, set_fd_d_tbl(a3) # switch on register number

- j a3

- .rdata

-set_fd_d_tbl:

- .text

- SET_FD_D(f0)

- SET_FD_D(f1)

- SET_FD_D(f2)

- SET_FD_D(f3)

- SET_FD_D(f4)

- SET_FD_D(f5)

- SET_FD_D(f6)

- SET_FD_D(f7)

- SET_FD_D(f8)

- SET_FD_D(f9)

- SET_FD_D(f10)

- SET_FD_D(f11)

- SET_FD_D(f12)

- SET_FD_D(f13)

- SET_FD_D(f14)

- SET_FD_D(f15)

- SET_FD_D(f16)

- SET_FD_D(f17)

- SET_FD_D(f18)

- SET_FD_D(f19)

- SET_FD_D(f20)

- SET_FD_D(f21)

- SET_FD_D(f22)

- SET_FD_D(f23)

- SET_FD_D(f24)

- SET_FD_D(f25)

- SET_FD_D(f26)

- SET_FD_D(f27)

- SET_FD_D(f28)

- SET_FD_D(f29)

- SET_FD_D(f30)

- SET_FD_D(f31)

-END(set_fd_d)

-/*----------------------------------------------------------------------------

- * renorm_fs_s --

- *

- * Results:

- * t1 unbiased exponent

- * t2 normalized fraction

- *

- *----------------------------------------------------------------------------

- */

-LEAF(renorm_fs_s, 0)

-/*

- * Find out how many leading zero bits are in t2 and put in t9.

- */

- move v0, t2

- move t9, zero

- srl v1, v0, 16

- bne v1, zero, 1f

- addu t9, 16

- sll v0, 16

-1:

- srl v1, v0, 24

- bne v1, zero, 1f

- addu t9, 8

- sll v0, 8

-1:

- srl v1, v0, 28

- bne v1, zero, 1f

- addu t9, 4

- sll v0, 4

-1:

- srl v1, v0, 30

- bne v1, zero, 1f

- addu t9, 2

- sll v0, 2

-1:

- srl v1, v0, 31

- bne v1, zero, 1f

- addu t9, 1

-/*

- * Now shift t2 the correct number of bits.

- */

-1:

- subu t9, t9, SLEAD_ZEROS # dont count normal leading zeros

- li t1, SEXP_MIN

- subu t1, t1, t9 # adjust exponent

- sll t2, t2, t9

- j ra

-END(renorm_fs_s)

-/*----------------------------------------------------------------------------

- * renorm_fs_d --

- *

- * Results:

- * t1 unbiased exponent

- * t2 normalized fraction

- *

- *----------------------------------------------------------------------------

- */

-LEAF(renorm_fs_d, 0)

-/*

- * Find out how many leading zero bits are in t2 and put in t9.

- */

- move v0, t2

- move t9, zero

- dsrl v1, v0, 32

- bne v1, zero, 1f

- addu t9, 32

- dsll v0, 32

-1:

- dsrl v1, v0, 16

- bne v1, zero, 1f

- addu t9, 16

- dsll v0, 16

-1:

- dsrl v1, v0, 24

- bne v1, zero, 1f

- addu t9, 8

- dsll v0, 8

-1:

- dsrl v1, v0, 28

- bne v1, zero, 1f

- addu t9, 4

- dsll v0, 4

-1:

- dsrl v1, v0, 30

- bne v1, zero, 1f

- addu t9, 2

- dsll v0, 2

-1:

- dsrl v1, v0, 31

- bne v1, zero, 1f

- addu t9, 1

-/*

- * Now shift t2 the correct number of bits.

- */

-1:

- subu t9, t9, DLEAD_ZEROS # dont count normal leading zeros

- li t1, DEXP_MIN

- subu t1, t9 # adjust exponent

- dsll t2, t9

- j ra

-END(renorm_fs_d)

-/*----------------------------------------------------------------------------

- * renorm_ft_s --

- *

- * Results:

- * ta1 unbiased exponent

- * ta2 normalized fraction

- *

- *----------------------------------------------------------------------------

- */

-LEAF(renorm_ft_s, 0)

-/*

- * Find out how many leading zero bits are in ta2 and put in t9.

- */

- move v0, ta2

- move t9, zero

- srl v1, v0, 16

- bne v1, zero, 1f

- addu t9, 16

- sll v0, 16

-1:

- srl v1, v0, 24

- bne v1, zero, 1f

- addu t9, 8

- sll v0, 8

-1:

- srl v1, v0, 28

- bne v1, zero, 1f

- addu t9, 4

- sll v0, 4

-1:

- srl v1, v0, 30

- bne v1, zero, 1f

- addu t9, 2

- sll v0, 2

-1:

- srl v1, v0, 31

- bne v1, zero, 1f

- addu t9, 1

-/*

- * Now shift ta2 the correct number of bits.

- */

-1:

- subu t9, t9, SLEAD_ZEROS # dont count normal leading zeros

- li ta1, SEXP_MIN

- subu ta1, t9 # adjust exponent

- sll ta2, t9

- j ra

-END(renorm_ft_s)

-/*----------------------------------------------------------------------------

- * renorm_ft_d --

- *

- * Results:

- * ta1 unbiased exponent

- * ta2 normalized fraction

- *

- *----------------------------------------------------------------------------

- */

-LEAF(renorm_ft_d, 0)

-/*

- * Find out how many leading zero bits are in ta2 and put in t9.

- */

- move v0, ta2

- move t9, zero

- dsrl v1, v0, 32

- bne v1, zero, 1f

- addu t9, 32

- dsll v0, 32

-1:

- dsrl v1, v0, 16

- bne v1, zero, 1f

- addu t9, 16

- dsll v0, 16

-1:

- dsrl v1, v0, 24

- bne v1, zero, 1f

- addu t9, 8

- dsll v0, 8

-1:

- dsrl v1, v0, 28

- bne v1, zero, 1f

- addu t9, 4

- dsll v0, 4

-1:

- dsrl v1, v0, 30

- bne v1, zero, 1f

- addu t9, 2

- dsll v0, 2

-1:

- dsrl v1, v0, 31

- bne v1, zero, 1f

- addu t9, 1

-/*

- * Now shift ta2 the correct number of bits.

- */

-1:

- subu t9, t9, DLEAD_ZEROS # dont count normal leading zeros

- li ta1, DEXP_MIN

- subu ta1, t9 # adjust exponent

- dsll ta2, t9

- j ra

-END(renorm_ft_d)

diff --git a/sys/arch/mips64/mips64/fp_emulate.c b/sys/arch/mips64/mips64/fp_emulate.c
new file mode 100644
index 00000000000..d392b8d1564
--- /dev/null
+++ b/sys/arch/mips64/mips64/fp_emulate.c

@@ -0,0 +1,1310 @@

+/* $OpenBSD: fp_emulate.c,v 1.1 2010/09/21 20:29:17 miod Exp $ */

+/*

+ *

+ * Permission to use, copy, modify, and distribute this software for any

+ * purpose with or without fee is hereby granted, provided that the above

+ * copyright notice and this permission notice appear in all copies.

+ *

+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES

+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF

+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR

+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES

+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN

+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF

+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

+ */

+/*

+ * Floating Point completion code (MI softfloat code control engine).

+ *

+ * Supports all MIPS IV COP1 and COP1X floating-point instructions.

+ * Floating-point load and store instructions, as well as branch instructions,

+ * are not handled, as they should not require completion code.

+ */

+#include <sys/param.h>

+#include <sys/systm.h>

+#include <sys/kernel.h>

+#include <sys/signalvar.h>

+#include <machine/cpu.h>

+#include <machine/fpu.h>

+#include <machine/frame.h>

+#include <machine/ieee.h>

+#include <machine/ieeefp.h>

+#include <machine/mips_opcode.h>

+#include <machine/regnum.h>

+#include <lib/libkern/softfloat.h>

+#if defined(DEBUG) && defined(DDB)

+#include <machine/db_machdep.h>

+#endif

+int fpu_emulate(struct trap_frame *, uint32_t, union sigval *);

+int fpu_emulate_cop1(struct trap_frame *, uint32_t);

+int fpu_emulate_cop1x(struct trap_frame *, uint32_t);

+uint64_t

+ fpu_load(struct trap_frame *, uint, uint);

+void fpu_store(struct trap_frame *, uint, uint, uint64_t);

+typedef int (fpu_fn3)(struct trap_frame *, uint, uint, uint, uint);

+typedef int (fpu_fn4)(struct trap_frame *, uint, uint, uint, uint, uint);

+fpu_fn3 fpu_abs;

+fpu_fn3 fpu_add;

+int fpu_c(struct trap_frame *, uint, uint, uint, uint, uint);

+fpu_fn3 fpu_ceil_l;

+fpu_fn3 fpu_ceil_w;

+fpu_fn3 fpu_cvt_d;

+fpu_fn3 fpu_cvt_l;

+fpu_fn3 fpu_cvt_s;

+fpu_fn3 fpu_cvt_w;

+fpu_fn3 fpu_div;

+fpu_fn3 fpu_floor_l;

+fpu_fn3 fpu_floor_w;

+fpu_fn4 fpu_madd;

+fpu_fn4 fpu_msub;

+fpu_fn3 fpu_mov;

+fpu_fn3 fpu_movcf;

+fpu_fn3 fpu_movn;

+fpu_fn3 fpu_movz;

+fpu_fn3 fpu_mul;

+fpu_fn3 fpu_neg;

+fpu_fn4 fpu_nmadd;

+fpu_fn4 fpu_nmsub;

+fpu_fn3 fpu_recip;

+fpu_fn3 fpu_round_l;

+fpu_fn3 fpu_round_w;

+fpu_fn3 fpu_rsqrt;

+fpu_fn3 fpu_sqrt;

+fpu_fn3 fpu_sub;

+fpu_fn3 fpu_trunc_l;

+fpu_fn3 fpu_trunc_w;

+int fpu_int_l(struct trap_frame *, uint, uint, uint, uint, uint);

+int fpu_int_w(struct trap_frame *, uint, uint, uint, uint, uint);

+/*

+ * Encoding of operand format within opcodes `fmt' and `fmt3' fields.

+ */

+#define FMT_S 0x00

+#define FMT_D 0x01

+#define FMT_W 0x04

+#define FMT_L 0x05

+/*

+ * Inlines from softfloat-specialize.h which are not made public, needed

+ * for fpu_abs.

+ */

+#define float32_is_nan(a) \

+ (0xff000000 < (a << 1))

+#define float32_is_signaling_nan(a) \

+ ((((a >> 22) & 0x1ff) == 0x1fe) && (a & 0x003fffff))

+/*

+ * Precomputed results of intXX_to_floatXX(1)

+ */

+#define ONE_F32 (float32)(SNG_EXP_BIAS << SNG_FRACBITS)

+#define ONE_F64 (float64)((uint64_t)DBL_EXP_BIAS << DBL_FRACBITS)

+/*

+�* Handle a floating-point exception.

+ */

+void

+MipsFPTrap(struct trap_frame *tf)

+ struct cpu_info *ci = curcpu();

+ struct proc *p = ci->ci_curproc;

+ union sigval sv;

+ vaddr_t pc;

+ uint32_t fsr, excbits;

+ uint32_t insn;

+ InstFmt inst;

+ int sig = 0;

+ int fault_type = SI_NOINFO;

+ int update_pcb = 0;

+ int emulate = 0;

+ uint32_t sr;

+ KDASSERT(tf == p->p_md.md_regs);

+ /*

+ * Enable FPU, and read its status register.

+ */

+ sr = getsr();

+ setsr(sr | SR_COP_1_BIT);

+ __asm__ __volatile__ ("cfc1 %0, $31" : "=r" (fsr));

+ /*

+ * If this is not an unimplemented operation, but a genuine

+ * FPU exception, signal the process.

+ */

+ if ((fsr & FPCSR_C_E) == 0) {

+ sig = SIGFPE;

+ goto deliver;

+ }

+ /*

+ * Get the faulting instruction. This should not fail, and

+ * if it does, it's probably not your lucky day.

+ */

+ pc = (vaddr_t)tf->pc;

+ if (tf->cause & CR_BR_DELAY)

+ pc += 4;

+ if (copyin((void *)pc, &insn, sizeof insn) != 0) {

+ sig = SIGBUS;

+ fault_type = BUS_OBJERR;

+ goto deliver;

+ }

+ inst = *(InstFmt *)&insn;

+ /*

+ * Emulate the instruction.

+ */

+#ifdef DEBUG

+#ifdef DDB

+ printf("%s: unimplemented FPU completion, fsr 0x%08x\n%p: ",

+ p->p_comm, fsr, pc);

+ dbmd_print_insn(insn, pc, printf);

+#else

+ printf("%s: unimplemented FPU completion, insn 0x%08x fsr 0x%08x\n",

+ p->p_comm, insn, fsr);

+#endif

+ switch (inst.FRType.op) {

+ default:

+ /*

+ * Not a FPU instruction.

+ */

+ break;

+ case OP_COP1:

+ switch (inst.RType.rs) {

+ case OP_BC:

+ case OP_MF:

+ case OP_DMF:

+ case OP_CF:

+ case OP_MT:

+ case OP_DMT:

+ case OP_CT:

+ /*

+ * These instructions should not require emulation,

+ * unless there is no FPU.

+ */

+ break;

+ default:

+ emulate = 1;

+ break;

+ }

+ break;

+ case OP_COP1X:

+ switch (inst.FQType.op4) {

+ default:

+ break;

+ case OP_MADD:

+ case OP_MSUB:

+ case OP_NMADD:

+ case OP_NMSUB:

+ emulate = 1;

+ break;

+ }

+ break;

+ }

+ if (emulate) {

+ KASSERT(p == ci->ci_fpuproc);

+ save_fpu();

+ update_pcb = 1;

+ sig = fpu_emulate(tf, insn, &sv);

+ /* reload fsr, possibly modified by softfloat code */

+ fsr = tf->fsr;

+ if (sig == 0) {

+ /* raise SIGFPE if necessary */

+ excbits = (fsr & FPCSR_C_MASK) >> FPCSR_C_SHIFT;

+ excbits &= (fsr & FPCSR_E_MASK) >> FPCSR_E_SHIFT;

+ if (excbits != 0)

+ sig = SIGFPE;

+ }

+ } else {

+ sig = SIGILL;

+ fault_type = ILL_ILLOPC;

+ }

+deliver:

+ switch (sig) {

+ case SIGFPE:

+ excbits = (fsr & FPCSR_C_MASK) >> FPCSR_C_SHIFT;

+ excbits &= (fsr & FPCSR_E_MASK) >> FPCSR_E_SHIFT;

+ if (excbits & FP_X_INV)

+ fault_type = FPE_FLTINV;

+ else if (excbits & FP_X_DZ)

+ fault_type = FPE_INTDIV;

+ else if (excbits & FP_X_OFL)

+ fault_type = FPE_FLTUND;

+ else if (excbits & FP_X_UFL)

+ fault_type = FPE_FLTOVF;

+ else /* if (excbits & FP_X_IMP) */

+ fault_type = FPE_FLTRES;

+ break;

+ }

+ /*

+ * Skip the instruction, unless we are delivering SIGILL.

+ */

+ if (sig != SIGILL) {

+ if (tf->cause & CR_BR_DELAY) {

+ /*

+ * Note that it doesn't matter, at this point,

+ * that we pass the updated FSR value, as it is

+ * only used to decide whether to branch or not

+ * if the faulting instruction was BC1[FT].

+ */

+ tf->pc = MipsEmulateBranch(tf, tf->pc, fsr, 0);

+ } else

+ tf->pc += 4;

+ }

+ /*

+ * Update the FPU status register.

+ * We need to make sure that this will not cause an exception

+ * in kernel mode.

+ */

+ /* propagate raised exceptions to the sticky bits */

+ fsr &= ~FPCSR_C_E;

+ excbits = (fsr & FPCSR_C_MASK) >> FPCSR_C_SHIFT;

+ fsr |= excbits << FPCSR_F_SHIFT;

+ /* clear all exception sources */

+ fsr &= ~FPCSR_C_MASK;

+ if (update_pcb)

+ tf->fsr = fsr;

+ __asm__ __volatile__ ("ctc1 %0, $31" :: "r" (fsr));

+ /* disable fpu before returning to trap() */

+ setsr(sr);

+ if (sig != 0) {

+ sv.sival_ptr = (void *)pc;

+ KERNEL_PROC_LOCK(p);

+ trapsignal(p, sig, 0, fault_type, sv);

+ KERNEL_PROC_UNLOCK(p);

+ }

+/*

+ * Emulate an FPU instruction. The FPU register set has been saved in the

+ * current PCB, and is pointed to by the trap frame.

+ */

+int

+fpu_emulate(struct trap_frame *tf, uint32_t insn, union sigval *sv)

+ InstFmt inst;

+ tf->zero = 0; /* not written by trap code */

+ inst = *(InstFmt *)&insn;

+ switch (inst.FRType.op) {

+ default:

+ break;

+ case OP_COP1:

+ return fpu_emulate_cop1(tf, insn);

+ case OP_COP1X:

+ return fpu_emulate_cop1x(tf, insn);

+ }

+ return SIGILL;

+/*

+ * Emulate a COP1 FPU instruction.

+ */

+int

+fpu_emulate_cop1(struct trap_frame *tf, uint32_t insn)

+ InstFmt inst;

+ uint ft, fs, fd;

+ fpu_fn3 *fpu_op;

+ static fpu_fn3 *const fpu_ops1[1 << 6] = {

+ fpu_add, /* 0x00 */

+ fpu_sub,

+ fpu_mul,

+ fpu_div,

+ fpu_sqrt,

+ fpu_abs,

+ fpu_mov,

+ fpu_neg,

+ fpu_round_l, /* 0x08 */

+ fpu_trunc_l,

+ fpu_ceil_l,

+ fpu_floor_l,

+ fpu_round_w,

+ fpu_trunc_w,

+ fpu_ceil_w,

+ fpu_floor_w,

+ NULL, /* 0x10 */

+ fpu_movcf,

+ fpu_movz,

+ fpu_movn,

+ NULL,

+ fpu_recip,

+ fpu_rsqrt,

+ NULL,

+ NULL, /* 0x18 */

+ NULL,

+ fpu_cvt_s, /* 0x20 */

+ fpu_cvt_d,

+ NULL,

+ fpu_cvt_w,

+ fpu_cvt_l,

+ NULL,

+ NULL, /* 0x28 */

+ NULL,

+ (fpu_fn3 *)fpu_c, /* 0x30 */

+ (fpu_fn3 *)fpu_c,

+ (fpu_fn3 *)fpu_c, /* 0x38 */

+ (fpu_fn3 *)fpu_c,

+ (fpu_fn3 *)fpu_c

+ };

+ inst = *(InstFmt *)&insn;

+ /*

+ * Check for valid function code.

+ */

+ fpu_op = fpu_ops1[inst.FRType.func];

+ if (fpu_op == NULL)

+ return SIGILL;

+ /*

+ * Check for valid format. FRType assumes bit 25 is always set,

+ * so we need to check for it explicitely.

+ */

+ if ((insn & (1 << 25)) == 0)

+ return SIGILL;

+ switch (inst.FRType.fmt) {

+ default:

+ return SIGILL;

+ case FMT_S:

+ case FMT_D:

+ case FMT_W:

+ case FMT_L:

+ break;

+ }

+ /*

+ * Check for valid register values. Only even-numbered registers

+ * can be used if the FR bit is clear in coprocessor 0 status

+ * register.

+ *

+ * Note that c.cond does not specify a register number in the fd

+ * field, but the fd field must have zero in its low two bits, so

+ * the test will not reject valid c.cond instructions.

+ */

+ ft = inst.FRType.ft;

+ fs = inst.FRType.fs;

+ fd = inst.FRType.fd;

+ if ((tf->sr & SR_FR_32) == 0) {

+ if ((ft | fs | fd) & 1)

+ return SIGILL;

+ }

+ /*

+ * Finally dispatch to the proper routine.

+ */

+ if (fpu_op == (fpu_fn3 *)&fpu_c)

+ return fpu_c(tf, inst.FRType.fmt, ft, fs, fd, inst.FRType.func);

+ else

+ return (*fpu_op)(tf, inst.FRType.fmt, ft, fs, fd);

+/*

+ * Emulate a COP1X FPU instruction.

+ */

+int

+fpu_emulate_cop1x(struct trap_frame *tf, uint32_t insn)

+ InstFmt inst;

+ uint fr, ft, fs, fd;

+ fpu_fn4 *fpu_op;

+ static fpu_fn4 *const fpu_ops1x[1 << 3] = {

+ NULL,

+ fpu_madd,

+ fpu_msub,

+ fpu_nmadd,

+ fpu_nmsub

+ };

+ inst = *(InstFmt *)&insn;

+ /*

+ * Check for valid function code.

+ */

+ fpu_op = fpu_ops1x[inst.FQType.op4];

+ if (fpu_op == NULL)

+ return SIGILL;

+ /*

+ * Check for valid format.

+ */

+ switch (inst.FQType.fmt3) {

+ default:

+ return SIGILL;

+ case FMT_S:

+ case FMT_D:

+ case FMT_W:

+ case FMT_L:

+ break;

+ }

+ /*

+ * Check for valid register values. Only even-numbered registers

+ * can be used if the FR bit is clear in coprocessor 0 status

+ * register.

+ */

+ fr = inst.FQType.fr;

+ ft = inst.FQType.ft;

+ fs = inst.FQType.fs;

+ fd = inst.FQType.fd;

+ if ((tf->sr & SR_FR_32) == 0) {

+ if ((fr | ft | fs | fd) & 1)

+ return SIGILL;

+ }

+ /*

+ * Finally dispatch to the proper routine.

+ */

+ return (*fpu_op)(tf, inst.FRType.fmt, fr, ft, fs, fd);

+/*

+ * Load a floating-point argument according to the specified format.

+ */

+uint64_t

+fpu_load(struct trap_frame *tf, uint fmt, uint regno)

+ register_t *regs = (register_t *)tf;

+ uint64_t tmp, tmp2;

+ tmp = (uint64_t)regs[FPBASE + regno];

+ if (tf->sr & SR_FR_32) {

+ switch (fmt) {

+ case FMT_D:

+ case FMT_L:

+ break;

+ case FMT_S:

+ case FMT_W:

+ tmp &= 0xffffffff;

+ break;

+ }

+ } else {

+ tmp &= 0xffffffff;

+ switch (fmt) {

+ case FMT_D:

+ case FMT_L:

+ /* caller has enforced regno is even */

+ tmp2 = (uint64_t)regs[FPBASE + regno + 1];

+ tmp |= tmp2 << 32;

+ break;

+ case FMT_S:

+ case FMT_W:

+ break;

+ }

+ return tmp;

+/*

+ * Store a floating-point result according to the specified format.

+ */

+void

+fpu_store(struct trap_frame *tf, uint fmt, uint regno, uint64_t rslt)

+ register_t *regs = (register_t *)tf;

+ if (tf->sr & SR_FR_32) {

+ regs[FPBASE + regno] = rslt;

+ } else {

+ /* caller has enforced regno is even */

+ regs[FPBASE + regno] = rslt & 0xffffffff;

+ regs[FPBASE + regno + 1] = (rslt >> 32) & 0xffffffff;

+ }

+/*

+ * Integer conversion

+ */

+int

+fpu_int_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd, uint rm)

+ uint64_t raw;

+ uint32_t oldrm;

+ if (ft != 0)

+ return SIGILL;

+ if (fmt != FMT_S && fmt != FMT_D)

+ return SIGILL;

+ raw = fpu_load(tf, fmt, fs);

+ /* round towards required mode */

+ oldrm = tf->fsr & FPCSR_RM_MASK;

+ tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | rm;

+ if (fmt == FMT_S)

+ raw = float32_to_int64((float32)raw);

+ else

+ raw = float64_to_int64((float64)raw);

+ /* restore rounding mode */

+ tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | oldrm;

+ if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))

+ fpu_store(tf, fmt, fd, raw);

+ return 0;

+int

+fpu_int_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd, uint rm)

+ uint64_t raw;

+ uint32_t oldrm;

+ if (ft != 0)

+ return SIGILL;

+ if (fmt != FMT_S && fmt != FMT_D)

+ return SIGILL;

+ raw = fpu_load(tf, fmt, fs);

+ /* round towards required mode */

+ oldrm = tf->fsr & FPCSR_RM_MASK;

+ tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | rm;

+ if (fmt == FMT_S)

+ raw = float32_to_int32((float32)raw);

+ else

+ raw = float64_to_int32((float64)raw);

+ /* restore rounding mode */

+ tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | oldrm;

+ if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))

+ fpu_store(tf, fmt, fd, raw);

+ return 0;

+/*

+ * FPU Instruction emulation

+ */

+int

+fpu_abs(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)

+ uint64_t raw;

+ if (ft != 0)

+ return SIGILL;

+ if (fmt != FMT_S && fmt != FMT_D)

+ return SIGILL;

+ raw = fpu_load(tf, fmt, fs);

+ /* clear sign bit unless NaN */

+ if (fmt == FMT_S) {

+ float32 f32 = (float32)raw;

+ if (float32_is_nan(f32)) {

+ float_set_invalid();

+ } else {

+ f32 &= ~(1L << 31);

+ raw = (uint64_t)f32;

+ }

+ } else {

+ float64 f64 = (float64)raw;

+ if (float64_is_nan(f64)) {

+ float_set_invalid();

+ } else {

+ f64 &= ~(1L << 63);

+ raw = (uint64_t)f64;

+ }

+ fpu_store(tf, fmt, fd, raw);

+ return 0;

+int

+fpu_add(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)

+ uint64_t raw1, raw2, rslt;

+ if (fmt != FMT_S && fmt != FMT_D)

+ return SIGILL;

+ raw1 = fpu_load(tf, fmt, fs);

+ raw2 = fpu_load(tf, fmt, ft);

+ if (fmt == FMT_S) {

+ float32 f32 = float32_add((float32)raw1, (float32)raw2);

+ rslt = (uint64_t)f32;

+ } else {

+ float64 f64 = float64_add((float64)raw1, (float64)raw2);

+ rslt = (uint64_t)f64;

+ }

+ fpu_store(tf, fmt, fd, rslt);

+ return 0;

+int

+fpu_c(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd, uint op)

+ uint64_t raw1, raw2;

+ uint cc, lt, eq, uo;

+ if ((fd & 0x03) != 0)

+ return SIGILL;

+ if (fmt != FMT_S && fmt != FMT_D)

+ return SIGILL;

+ lt = eq = uo = 0;

+ cc = fd >> 2;

+ raw1 = fpu_load(tf, fmt, ft);

+ raw2 = fpu_load(tf, fmt, fs);

+ if (fmt == FMT_S) {

+ float32 f32a = (float32)raw1;

+ float32 f32b = (float32)raw2;

+ if (float32_is_nan(f32a)) {

+ uo = 1 << 0;

+ if (float32_is_signaling_nan(f32a))

+ op |= 0x08; /* force invalid exception */

+ } else if (float32_is_nan(f32b)) {

+ uo = 1 << 0;

+ if (float32_is_signaling_nan(f32b))

+ op |= 0x08; /* force invalid exception */

+ } else {

+ if (float32_eq(f32a, f32b))

+ eq = 1 << 1;

+ else if (float32_lt(f32a, f32b))

+ lt = 1 << 2;

+ }

+ } else {

+ float64 f64a = (float64)raw1;

+ float64 f64b = (float64)raw2;

+ if (float64_is_nan(f64a)) {

+ uo = 1 << 0;

+ if (float64_is_signaling_nan(f64a))

+ op |= 0x08; /* force invalid exception */

+ } else if (float64_is_nan(f64b)) {

+ uo = 1 << 0;

+ if (float64_is_signaling_nan(f64b))

+ op |= 0x08; /* force invalid exception */

+ } else {

+ if (float64_eq(f64a, f64b))

+ eq = 1 << 1;

+ else if (float64_lt(f64a, f64b))

+ lt = 1 << 2;

+ }

+ if (uo && (op & 0x08)) {

+ float_set_invalid();

+ if (tf->fsr & FPCSR_E_V) {

+ /* comparison result intentionaly not written */

+ goto skip;

+ }

+ } else {

+ if ((uo | eq | lt) & op)

+ tf->fsr |= FPCSR_CONDVAL(cc);

+ else

+ tf->fsr &= ~FPCSR_CONDVAL(cc);

+ }

+skip:

+ return 0;

+int

+fpu_ceil_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)

+ /* round towards positive infinity */

+ return fpu_int_l(tf, fmt, ft, fs, fd, FP_RP);

+int

+fpu_ceil_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)

+ /* round towards positive infinity */

+ return fpu_int_w(tf, fmt, ft, fs, fd, FP_RP);

+int

+fpu_cvt_d(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)

+ uint64_t raw;

+ if (ft != 0)

+ return SIGILL;

+ if (fmt == FMT_D)

+ return SIGILL;

+ raw = fpu_load(tf, fmt, fs);

+ switch (fmt) {

+ case FMT_L:

+ raw = int64_to_float64((int64_t)raw);

+ break;

+ case FMT_S:

+ raw = float32_to_float64((float32)raw);

+ break;

+ case FMT_W:

+ raw = int32_to_float64((int32_t)raw);

+ break;

+ }

+ fpu_store(tf, fmt, fd, raw);

+ return 0;

+int

+fpu_cvt_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)

+ uint64_t raw;

+ uint32_t rm;

+ if (ft != 0)

+ return SIGILL;

+ if (fmt != FMT_S && fmt != FMT_D)

+ return SIGILL;

+ rm = tf->fsr & FPCSR_RM_MASK;

+ raw = fpu_load(tf, fmt, fs);

+ if (fmt == FMT_D) {

+ if (rm == FP_RZ)

+ raw = float64_to_int64_round_to_zero((float64)raw);

+ else

+ raw = float64_to_int64((float64)raw);

+ } else {

+ if (rm == FP_RZ)

+ raw = float32_to_int64_round_to_zero((float32)raw);

+ else

+ raw = float32_to_int64((float32)raw);

+ }

+ if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))

+ fpu_store(tf, fmt, fd, raw);

+ return 0;

+int

+fpu_cvt_s(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)

+ uint64_t raw;

+ if (ft != 0)

+ return SIGILL;

+ if (fmt == FMT_S)

+ return SIGILL;

+ raw = fpu_load(tf, fmt, fs);

+ switch (fmt) {

+ case FMT_D:

+ raw = float64_to_float32((float64)raw);

+ break;

+ case FMT_L:

+ raw = int64_to_float32((int64_t)raw);

+ break;

+ case FMT_W:

+ raw = int32_to_float32((int32_t)raw);

+ break;

+ }

+ fpu_store(tf, fmt, fd, raw);

+ return 0;

+int

+fpu_cvt_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)

+ uint64_t raw;

+ uint32_t rm;

+ if (ft != 0)

+ return SIGILL;

+ if (fmt != FMT_S && fmt != FMT_D)

+ return SIGILL;

+ rm = tf->fsr & FPCSR_RM_MASK;

+ raw = fpu_load(tf, fmt, fs);

+ if (fmt == FMT_D) {

+ if (rm == FP_RZ)

+ raw = float64_to_int32_round_to_zero((float64)raw);

+ else

+ raw = float64_to_int32((float64)raw);

+ } else {

+ if (rm == FP_RZ)

+ raw = float32_to_int32_round_to_zero((float32)raw);

+ else

+ raw = float32_to_int32((float32)raw);

+ }

+ if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))

+ fpu_store(tf, fmt, fd, raw);

+ return 0;

+int

+fpu_div(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)

+ uint64_t raw1, raw2, rslt;

+ if (fmt != FMT_S && fmt != FMT_D)

+ return SIGILL;

+ raw1 = fpu_load(tf, fmt, fs);

+ raw2 = fpu_load(tf, fmt, ft);

+ if (fmt == FMT_S) {

+ float32 f32 = float32_div((float32)raw1, (float32)raw2);

+ rslt = (uint64_t)f32;

+ } else {

+ float64 f64 = float64_div((float64)raw1, (float64)raw2);

+ rslt = (uint64_t)f64;

+ }

+ fpu_store(tf, fmt, fd, rslt);

+ return 0;

+int

+fpu_floor_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)

+ /* round towards negative infinity */

+ return fpu_int_l(tf, fmt, ft, fs, fd, FP_RM);

+int

+fpu_floor_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)

+ /* round towards negative infinity */

+ return fpu_int_w(tf, fmt, ft, fs, fd, FP_RM);

+int

+fpu_madd(struct trap_frame *tf, uint fmt, uint fr, uint ft, uint fs, uint fd)

+ uint64_t raw1, raw2, raw3, rslt;

+ if (fmt != FMT_S && fmt != FMT_D)

+ return SIGILL;

+ raw1 = fpu_load(tf, fmt, fs);

+ raw2 = fpu_load(tf, fmt, ft);

+ raw3 = fpu_load(tf, fmt, fr);

+ if (fmt == FMT_S) {

+ float32 f32 = float32_add(

+ float32_mul((float32)raw1, (float32)raw2),

+ (float32)raw3);

+ rslt = (uint64_t)f32;

+ } else {

+ float64 f64 = float64_add(

+ float64_mul((float64)raw1, (float64)raw2),

+ (float64)raw3);

+ rslt = (uint64_t)f64;

+ }

+ fpu_store(tf, fmt, fd, rslt);

+ return 0;

+int

+fpu_mov(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)

+ uint64_t raw;

+ if (ft != 0)

+ return SIGILL;

+ if (fmt != FMT_S && fmt != FMT_D)

+ return SIGILL;

+ raw = fpu_load(tf, fmt, fs);

+ fpu_store(tf, fmt, fd, raw);

+ return 0;

+int

+fpu_movcf(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)

+ uint64_t raw;

+ uint cc, istf;

+ int condition;

+ if ((ft & 0x02) != 0)

+ return SIGILL;

+ cc = ft >> 2;

+ if (fmt != FMT_S && fmt != FMT_D)

+ return SIGILL;

+ condition = tf->fsr & FPCSR_CONDVAL(cc);

+ istf = ft & COPz_BC_TF_MASK;

+ if ((!condition && !istf) /*movf*/ || (condition && istf) /*movt*/) {

+ raw = fpu_load(tf, fmt, fs);

+ fpu_store(tf, fmt, fd, raw);

+ }

+ return 0;

+int

+fpu_movn(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)

+ register_t *regs = (register_t *)tf;

+ uint64_t raw;

+ if (fmt != FMT_S && fmt != FMT_D)

+ return SIGILL;

+ if (ft != ZERO && regs[ft] != 0) {

+ raw = fpu_load(tf, fmt, fs);

+ fpu_store(tf, fmt, fd, raw);

+ }

+ return 0;

+int

+fpu_movz(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)

+ register_t *regs = (register_t *)tf;

+ uint64_t raw;

+ if (fmt != FMT_S && fmt != FMT_D)

+ return SIGILL;

+ if (ft == ZERO || regs[ft] == 0) {

+ raw = fpu_load(tf, fmt, fs);

+ fpu_store(tf, fmt, fd, raw);

+ }

+ return 0;

+int

+fpu_msub(struct trap_frame *tf, uint fmt, uint fr, uint ft, uint fs, uint fd)

+ uint64_t raw1, raw2, raw3, rslt;

+ if (fmt != FMT_S && fmt != FMT_D)

+ return SIGILL;

+ raw1 = fpu_load(tf, fmt, fs);

+ raw2 = fpu_load(tf, fmt, ft);

+ raw3 = fpu_load(tf, fmt, fr);

+ if (fmt == FMT_S) {

+ float32 f32 = float32_sub(

+ float32_mul((float32)raw1, (float32)raw2),

+ (float32)raw3);

+ rslt = (uint64_t)f32;

+ } else {

+ float64 f64 = float64_sub(

+ float64_mul((float64)raw1, (float64)raw2),

+ (float64)raw3);

+ rslt = (uint64_t)f64;

+ }

+ fpu_store(tf, fmt, fd, rslt);

+ return 0;

+int

+fpu_mul(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)

+ uint64_t raw1, raw2, rslt;

+ if (fmt != FMT_S && fmt != FMT_D)

+ return SIGILL;

+ raw1 = fpu_load(tf, fmt, fs);

+ raw2 = fpu_load(tf, fmt, ft);

+ if (fmt == FMT_S) {

+ float32 f32 = float32_mul((float32)raw1, (float32)raw2);

+ rslt = (uint64_t)f32;

+ } else {

+ float64 f64 = float64_mul((float64)raw1, (float64)raw2);

+ rslt = (uint64_t)f64;

+ }

+ fpu_store(tf, fmt, fd, rslt);

+ return 0;

+int

+fpu_neg(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)

+ uint64_t raw;

+ if (ft != 0)

+ return SIGILL;

+ if (fmt != FMT_S && fmt != FMT_D)

+ return SIGILL;

+ raw = fpu_load(tf, fmt, fs);

+ /* flip sign bit unless NaN */

+ if (fmt == FMT_S) {

+ float32 f32 = (float32)raw;

+ if (float32_is_nan(f32)) {

+ float_set_invalid();

+ } else {

+ f32 ^= 1L << 31;

+ raw = (uint64_t)f32;

+ }

+ } else {

+ float64 f64 = (float64)raw;

+ if (float64_is_nan(f64)) {

+ float_set_invalid();

+ } else {

+ f64 ^= 1L << 63;

+ raw = (uint64_t)f64;

+ }

+ fpu_store(tf, fmt, fd, raw);

+ return 0;

+int

+fpu_nmadd(struct trap_frame *tf, uint fmt, uint fr, uint ft, uint fs, uint fd)

+ uint64_t raw1, raw2, raw3, rslt;

+ if (fmt != FMT_S && fmt != FMT_D)

+ return SIGILL;

+ raw1 = fpu_load(tf, fmt, fs);

+ raw2 = fpu_load(tf, fmt, ft);

+ raw3 = fpu_load(tf, fmt, fr);

+ if (fmt == FMT_S) {

+ float32 f32 = float32_add(

+ float32_mul((float32)raw1, (float32)raw2),

+ (float32)raw3);

+ if (float32_is_nan(f32))

+ float_set_invalid();

+ else

+ f32 ^= 1L << 31;

+ rslt = (uint64_t)f32;

+ } else {

+ float64 f64 = float64_add(

+ float64_mul((float64)raw1, (float64)raw2),

+ (float64)raw3);

+ if (float64_is_nan(f64))

+ float_set_invalid();

+ else

+ f64 ^= 1L << 63;

+ rslt = (uint64_t)f64;

+ }

+ fpu_store(tf, fmt, fd, rslt);

+ return 0;

+int

+fpu_nmsub(struct trap_frame *tf, uint fmt, uint fr, uint ft, uint fs, uint fd)

+ uint64_t raw1, raw2, raw3, rslt;

+ if (fmt != FMT_S && fmt != FMT_D)

+ return SIGILL;

+ raw1 = fpu_load(tf, fmt, fs);

+ raw2 = fpu_load(tf, fmt, ft);

+ raw3 = fpu_load(tf, fmt, fr);

+ if (fmt == FMT_S) {

+ float32 f32 = float32_sub(

+ float32_mul((float32)raw1, (float32)raw2),

+ (float32)raw3);

+ if (float32_is_nan(f32))

+ float_set_invalid();

+ else

+ f32 ^= 1L << 31;

+ rslt = (uint64_t)f32;

+ } else {

+ float64 f64 = float64_sub(

+ float64_mul((float64)raw1, (float64)raw2),

+ (float64)raw3);

+ if (float64_is_nan(f64))

+ float_set_invalid();

+ else

+ f64 ^= 1L << 63;

+ rslt = (uint64_t)f64;

+ }

+ fpu_store(tf, fmt, fd, rslt);

+ return 0;

+int

+fpu_recip(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)

+ uint64_t raw;

+ if (ft != 0)

+ return SIGILL;

+ if (fmt != FMT_S && fmt != FMT_D)

+ return SIGILL;

+ raw = fpu_load(tf, fmt, fs);

+ if (fmt == FMT_S) {

+ float32 f32 = float32_div(ONE_F32, (float32)raw);

+ raw = (uint64_t)f32;

+ } else {

+ float64 f64 = float64_div(ONE_F64, (float64)raw);

+ raw = (uint64_t)f64;

+ }

+ fpu_store(tf, fmt, fd, raw);

+ return 0;

+int

+fpu_round_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)

+ /* round towards nearest */

+ return fpu_int_l(tf, fmt, ft, fs, fd, FP_RN);

+int

+fpu_round_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)

+ /* round towards nearest */

+ return fpu_int_w(tf, fmt, ft, fs, fd, FP_RN);

+int

+fpu_rsqrt(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)

+ uint64_t raw;

+ if (ft != 0)

+ return SIGILL;

+ if (fmt != FMT_S && fmt != FMT_D)

+ return SIGILL;

+ raw = fpu_load(tf, fmt, fs);

+ if (fmt == FMT_S) {

+ float32 f32 = float32_sqrt((float32)raw);

+ if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) !=

+ (FPCSR_C_V | FPCSR_E_V))

+ f32 = float32_div(ONE_F32, f32);

+ raw = (uint64_t)f32;

+ } else {

+ float64 f64 = float64_sqrt((float64)raw);

+ if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) !=

+ (FPCSR_C_V | FPCSR_E_V))

+ f64 = float64_div(ONE_F64, f64);

+ raw = (uint64_t)f64;

+ }

+ fpu_store(tf, fmt, fd, raw);

+ return 0;

+int

+fpu_sqrt(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)

+ uint64_t raw;

+ if (ft != 0)

+ return SIGILL;

+ if (fmt != FMT_S && fmt != FMT_D)

+ return SIGILL;

+ raw = fpu_load(tf, fmt, fs);

+ if (fmt == FMT_S) {

+ float32 f32 = float32_sqrt((float32)raw);

+ raw = (uint64_t)f32;

+ } else {

+ float64 f64 = float64_sqrt((float64)raw);

+ raw = (uint64_t)f64;

+ }

+ fpu_store(tf, fmt, fd, raw);

+ return 0;

+int

+fpu_sub(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)

+ uint64_t raw1, raw2, rslt;

+ if (fmt != FMT_S && fmt != FMT_D)

+ return SIGILL;

+ raw1 = fpu_load(tf, fmt, fs);

+ raw2 = fpu_load(tf, fmt, ft);

+ if (fmt == FMT_S) {

+ float32 f32 = float32_sub((float32)raw1, (float32)raw2);

+ rslt = (uint64_t)f32;

+ } else {

+ float64 f64 = float64_sub((float64)raw1, (float64)raw2);

+ rslt = (uint64_t)f64;

+ }

+ fpu_store(tf, fmt, fd, rslt);

+ return 0;

+int

+fpu_trunc_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)

+ /* round towards zero */

+ return fpu_int_l(tf, fmt, ft, fs, fd, FP_RZ);

+int

+fpu_trunc_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)

+ /* round towards zero */

+ return fpu_int_w(tf, fmt, ft, fs, fd, FP_RZ);

diff --git a/sys/arch/mips64/mips64/lcore_float.S b/sys/arch/mips64/mips64/lcore_float.S
index c15db784cc6..b89837fe2f3 100644
--- a/sys/arch/mips64/mips64/lcore_float.S
+++ b/sys/arch/mips64/mips64/lcore_float.S

@@ -1,4 +1,4 @@

-/* $OpenBSD: lcore_float.S,v 1.19 2010/01/08 01:35:52 syuu Exp $ */

+/* $OpenBSD: lcore_float.S,v 1.20 2010/09/21 20:29:17 miod Exp $ */

@@ -152,7 +152,6 @@ LEAF(MipsSwitchFPState, 0)

ldc1 $f30, PCB_FPREGS+(30 * REGSZ)(a1)

ldc1 $f31, PCB_FPREGS+(31 * REGSZ)(a1)

- and t0, t0, ~FPC_EXCEPTION_BITS

ctc1 t0, FPC_CSR

nop

@@ -256,7 +255,6 @@ LEAF(MipsSwitchFPState16, 0)

lwc1 $f30, PCB_FPREGS+(30 * REGSZ)(a1)

lwc1 $f31, PCB_FPREGS+(31 * REGSZ)(a1)

- and t0, t0, ~FPC_EXCEPTION_BITS

ctc1 t0, FPC_CSR

nop

@@ -407,134 +405,6 @@ END(MipsSaveCurFPState16)

/*----------------------------------------------------------------------------

- * MipsFPTrap --

- *

- * Handle a floating point Trap.

- *

- * MipsFPTrap(statusReg, causeReg, pc)

- * unsigned statusReg;

- * unsigned causeReg;

- * unsigned pc;

- *

- * Results:

- * None.

- *

- * Side effects:

- * None.

- *

- *----------------------------------------------------------------------------

- */

-NON_LEAF(MipsFPTrap, FRAMESZ(CF_SZ), ra)

- PTR_SUBU sp, sp, FRAMESZ(CF_SZ)

- mfc0 t0, COP_0_STATUS_REG

- PTR_S ra, CF_RA_OFFS(sp)

- .mask 0x80000000, (CF_RA_OFFS - FRAMESZ(CF_SZ))

- PTR_S a2, 2*REGSZ(sp)

- PTR_S a3, 3*REGSZ(sp)

- or t1, t0, SR_COP_1_BIT

- mtc0 t1, COP_0_STATUS_REG

- ITLBNOPFIX

- cfc1 t1, FPC_CSR # stall til FP done

- cfc1 t1, FPC_CSR # now get status

- nop

- sll t2, t1, (31-17) # unimplemented operation?

- bgez t2, 3f # no, normal trap

- nop

-/*

- * We got an unimplemented operation trap so fetch the instruction,

- * compute the next PC and emulate the instruction.

- */

- bgez a1, 1f # Check the branch delay bit.

- nop

-/*

- * The instruction is in the branch delay slot so the branch will have to

- * be emulated to get the resulting PC.

- */

- GET_CPU_INFO(t2, t3)

- PTR_L a0, CI_CURPROCPADDR(t2) # first arg is ptr to CPU regs

- move a1, a2 # second arg is instruction PC

- move a2, t1 # third arg is the FP CSR

- jal MipsEmulateBranch # compute PC after branch

- move a3, zero # fourth arg is FALSE

-/*

- * Now load the floating-point instruction in the branch delay slot

- * to be emulated.

- */

- PTR_L a2, 2*REGSZ(sp) # restore EXC pc

- b 2f

- lw a0, 4(a2) # a0 = coproc instruction

-/*

- * This is not in the branch delay slot so calculate the resulting

- * PC (epc + 4) into v0 and continue to MipsEmulateFP().

- */

-1:

- lw a0, 0(a2) # a0 = coproc instruction

- PTR_ADDU v0, a2, 4 # v0 = next pc

-2:

- GET_CPU_INFO(t2, t3)

- PTR_L a3, CI_CURPROCPADDR(t2) # first arg is ptr to CPU regs

- PTR_S v0, PCB_REGS+(PC * REGSZ)(a3) # save new pc

-/*

- * Check to see if the instruction to be emulated is a floating-point

- * instruction.

- */

- srl a3, a0, OPCODE_SHIFT

- beq a3, OPCODE_C1, 5f # this should never fail

- nop

-/*

- * Send a floating point exception signal to the current process.

- */

-3:

- cfc1 a1, FPC_CSR # code = FP exceptions

- GET_CPU_INFO(t2, t3)

- PTR_L a0, CI_CURPROC(t2) # get current process

- PTR_L a3, 3*REGSZ(sp)

- and v0, a1, FPC_EXCEPTION_INEXACT

- bnez v0, 4f

- li a2, FPE_FLTRES

- and v0, a1, FPC_EXCEPTION_UNDERFLOW

- bnez v0, 4f

- li a2, FPE_FLTUND

- and v0, a1, FPC_EXCEPTION_OVERFLOW

- bnez v0, 4f

- li a2, FPE_FLTOVF

- and v0, a1, FPC_EXCEPTION_DIV0

- bnez v0, 4f

- li a2, FPE_FLTDIV

- li a2, FPE_FLTINV

-4:

- ctc1 zero, FPC_CSR # Clear exceptions

- jal fpu_trapsignal

- nop

- b FPReturn

- nop

-/*

- * Finally, we can call MipsEmulateFP() where a0 is the instruction to emulate.

- */

-5:

- jal MipsEmulateFP

- nop

- bnez v0, 3b # Emulation failed.

- nop

-/*

- * Turn off the floating point coprocessor and return.

- */

-FPReturn:

- mfc0 t0, COP_0_STATUS_REG

- PTR_L ra, CF_RA_OFFS(sp)

- and t0, t0, ~SR_COP_1_BIT

- mtc0 t0, COP_0_STATUS_REG

- ITLBNOPFIX

- j ra

- PTR_ADDU sp, sp, FRAMESZ(CF_SZ)

-END(MipsFPTrap)

-/*----------------------------------------------------------------------------

- *

* cp1_get_prid

* Get the floating point co-processor id.

@@ -562,4 +432,3 @@ LEAF(cp1_get_prid, 0)

jr ra

nop

END(cp1_get_prid)

diff --git a/sys/arch/mips64/mips64/process_machdep.c b/sys/arch/mips64/mips64/process_machdep.c
index bfe1b2948b3..748c3ca7af2 100644
--- a/sys/arch/mips64/mips64/process_machdep.c
+++ b/sys/arch/mips64/mips64/process_machdep.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: process_machdep.c,v 1.14 2010/06/26 23:24:43 guenther Exp $ */

+/* $OpenBSD: process_machdep.c,v 1.15 2010/09/21 20:29:17 miod Exp $ */

@@ -40,7 +40,7 @@

* From:

* Id: procfs_i386.c,v 4.1 1993/12/17 10:47:45 jsp Rel

- * $Id: process_machdep.c,v 1.14 2010/06/26 23:24:43 guenther Exp $

+ * $Id: process_machdep.c,v 1.15 2010/09/21 20:29:17 miod Exp $

@@ -72,6 +72,7 @@

#include <sys/proc.h>

#include <sys/vnode.h>

#include <sys/ptrace.h>

+#include <machine/fpu.h>

#include <machine/frame.h>

#include <machine/reg.h>

@@ -111,6 +112,7 @@ process_write_regs(p, regs)

ic = p->p_md.md_regs->ic;

ipl = p->p_md.md_regs->ipl;

bcopy(&regs->r_regs[AST], &p->p_md.md_regs->ast, REGSIZE);

+ p->p_md.md_regs->fsr &= ~FPCSR_C_MASK;

p->p_md.md_regs->sr = sr;

p->p_md.md_regs->ic = ic;

p->p_md.md_regs->ipl = ipl;

diff --git a/sys/arch/mips64/mips64/trap.c b/sys/arch/mips64/mips64/trap.c
index c2a534f9fb8..378bd911409 100644
--- a/sys/arch/mips64/mips64/trap.c
+++ b/sys/arch/mips64/mips64/trap.c

@@ -1,4 +1,4 @@

-/* $OpenBSD: trap.c,v 1.67 2010/09/17 00:36:32 miod Exp $ */

+/* $OpenBSD: trap.c,v 1.68 2010/09/21 20:29:17 miod Exp $ */

@@ -133,10 +133,7 @@ uint64_t kdbpeekd(vaddr_t);

extern int kdb_trap(int, db_regs_t *);

#endif

-extern void MipsFPTrap(u_int, u_int, u_int, union sigval);

void ast(void);

-void fpu_trapsignal(struct proc *, u_long, int, union sigval);

void trap(struct trap_frame *);

#ifdef PTRACE

int cpu_singlestep(struct proc *);

@@ -746,6 +743,11 @@ printf("SIG-BUSB @%p pc %p, ra %p\n", trapframe->badvaddr, trapframe->pc, trapfr

break;

case T_COP_UNUSABLE+T_USER:

+ /*

+ * Note MIPS IV COP1X instructions issued with FPU

+ * disabled correctly report coprocessor 1 as the

+ * unusable coprocessor number.

+ */

if ((trapframe->cause & CR_COP_ERR) != 0x10000000) {

i = SIGILL; /* only FPU instructions allowed */

typ = ILL_ILLOPC;

@@ -761,8 +763,7 @@ printf("SIG-BUSB @%p pc %p, ra %p\n", trapframe->badvaddr, trapframe->pc, trapfr

goto err;

case T_FPE+T_USER:

- sv.sival_ptr = (void *)trapframe->pc;

- MipsFPTrap(trapframe->sr, trapframe->cause, trapframe->pc, sv);

+ MipsFPTrap(trapframe);

goto out;

case T_OVFLOW+T_USER:

@@ -835,17 +836,6 @@ child_return(arg)

#endif

}

-/*

- * Wrapper around trapsignal() for use by the floating point code.

- */

-void

-fpu_trapsignal(struct proc *p, u_long ucode, int typ, union sigval sv)

- KERNEL_PROC_LOCK(p);

- trapsignal(p, SIGFPE, ucode, typ, sv);

- KERNEL_PROC_UNLOCK(p);

#if defined(DDB) || defined(DEBUG)

void

trapDump(char *msg)