summaryrefslogtreecommitdiff
path: root/sys/arch/m68k/fpsp
diff options
context:
space:
mode:
Diffstat (limited to 'sys/arch/m68k/fpsp')
-rw-r--r--sys/arch/m68k/fpsp/DYADIC.CI577
-rw-r--r--sys/arch/m68k/fpsp/DYADIC.GCC160
-rw-r--r--sys/arch/m68k/fpsp/DYADIC.GEN179
-rw-r--r--sys/arch/m68k/fpsp/DYADIC.R3V672
-rw-r--r--sys/arch/m68k/fpsp/FPSP.sa79
-rw-r--r--sys/arch/m68k/fpsp/L_ENTRY.AWK84
-rw-r--r--sys/arch/m68k/fpsp/L_LIST81
-rw-r--r--sys/arch/m68k/fpsp/MONADIC.CI593
-rw-r--r--sys/arch/m68k/fpsp/MONADIC.GCC203
-rw-r--r--sys/arch/m68k/fpsp/MONADIC.GEN230
-rw-r--r--sys/arch/m68k/fpsp/MONADIC.R3V691
-rw-r--r--sys/arch/m68k/fpsp/Makefile338
-rw-r--r--sys/arch/m68k/fpsp/Makefile.inc18
-rw-r--r--sys/arch/m68k/fpsp/asm2gas163
-rw-r--r--sys/arch/m68k/fpsp/bindec.sa946
-rw-r--r--sys/arch/m68k/fpsp/binstr.sa165
-rw-r--r--sys/arch/m68k/fpsp/bugfix.sa520
-rw-r--r--sys/arch/m68k/fpsp/copyright.s32
-rw-r--r--sys/arch/m68k/fpsp/decbin.sa531
-rw-r--r--sys/arch/m68k/fpsp/do_func.sa584
-rw-r--r--sys/arch/m68k/fpsp/fpsp.h373
-rw-r--r--sys/arch/m68k/fpsp/gen_except.sa493
-rw-r--r--sys/arch/m68k/fpsp/get_op.sa701
-rw-r--r--sys/arch/m68k/fpsp/kernel_ex.sa519
-rw-r--r--sys/arch/m68k/fpsp/l_fpsp.h280
-rw-r--r--sys/arch/m68k/fpsp/l_support.sa388
-rw-r--r--sys/arch/m68k/fpsp/netbsd.sa442
-rw-r--r--sys/arch/m68k/fpsp/res_func.sa2065
-rw-r--r--sys/arch/m68k/fpsp/round.sa673
-rw-r--r--sys/arch/m68k/fpsp/sacos.sa140
-rw-r--r--sys/arch/m68k/fpsp/sasin.sa129
-rw-r--r--sys/arch/m68k/fpsp/satan.sa503
-rw-r--r--sys/arch/m68k/fpsp/satanh.sa129
-rw-r--r--sys/arch/m68k/fpsp/scale.sa397
-rw-r--r--sys/arch/m68k/fpsp/scosh.sa156
-rw-r--r--sys/arch/m68k/fpsp/setox.sa889
-rw-r--r--sys/arch/m68k/fpsp/sgetem.sa166
-rw-r--r--sys/arch/m68k/fpsp/sint.sa272
-rw-r--r--sys/arch/m68k/fpsp/skeleton.sa482
-rw-r--r--sys/arch/m68k/fpsp/slog2.sa213
-rw-r--r--sys/arch/m68k/fpsp/slogn.sa617
-rw-r--r--sys/arch/m68k/fpsp/smovecr.sa187
-rw-r--r--sys/arch/m68k/fpsp/srem_mod.sa446
-rw-r--r--sys/arch/m68k/fpsp/ssin.sa771
-rw-r--r--sys/arch/m68k/fpsp/ssinh.sa160
-rw-r--r--sys/arch/m68k/fpsp/stan.sa480
-rw-r--r--sys/arch/m68k/fpsp/stanh.sa210
-rw-r--r--sys/arch/m68k/fpsp/sto_res.sa123
-rw-r--r--sys/arch/m68k/fpsp/stwotox.sa452
-rw-r--r--sys/arch/m68k/fpsp/tbldo.sa579
-rw-r--r--sys/arch/m68k/fpsp/util.sa773
-rw-r--r--sys/arch/m68k/fpsp/x_bsun.sa72
-rw-r--r--sys/arch/m68k/fpsp/x_fline.sa129
-rw-r--r--sys/arch/m68k/fpsp/x_operr.sa381
-rw-r--r--sys/arch/m68k/fpsp/x_ovfl.sa210
-rw-r--r--sys/arch/m68k/fpsp/x_snan.sa302
-rw-r--r--sys/arch/m68k/fpsp/x_store.sa281
-rw-r--r--sys/arch/m68k/fpsp/x_unfl.sa294
-rw-r--r--sys/arch/m68k/fpsp/x_unimp.sa102
-rw-r--r--sys/arch/m68k/fpsp/x_unsupp.sa107
60 files changed, 20732 insertions, 0 deletions
diff --git a/sys/arch/m68k/fpsp/DYADIC.CI5 b/sys/arch/m68k/fpsp/DYADIC.CI5
new file mode 100644
index 00000000000..43cd547c30b
--- /dev/null
+++ b/sys/arch/m68k/fpsp/DYADIC.CI5
@@ -0,0 +1,77 @@
+* $NetBSD: DYADIC.CI5,v 1.2 1994/10/26 07:48:26 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* DYADIC.CI5 1.2 4/30/91
+*
+* DYADIC.CI5 --- DYADIC template for CI5 compiler
+*
+
+ xref _OPa_
+ xref tag
+
+ xdef _OPd_
+_OPd_:
+ link a6,#-LOCAL_SIZE
+ fmovem.x fp2-fp3,USER_FP2(a6)
+ fmovem.l fpsr/fpcr,USER_FPSR(a6) ; user's rounding mode/precision
+ fmove.l #0,fpcr ; force rounding mode/prec to extended,rn
+*
+* copy, convert and tag input arguments
+*
+ fmove.d 8(a6),fp0
+ fmove.x fp0,FPTEMP(a6)
+ lea FPTEMP(a6),a0
+ bsr tag
+ move.b d0,DTAG(a6)
+
+ fmove.d 16(a6),fp0
+ fmove.x fp0,ETEMP(a6)
+ lea ETEMP(a6),a0
+ bsr tag
+ move.b d0,STAG(a6)
+
+ bsr _OPa_
+
+ fmove.l fpsr,d0 ; update status register
+ or.b FPSR_AEXCEPT(a6),d0 ;add previously accrued exceptions
+ swap.w d0
+ or.b FPSR_QBYTE(a6),d0 ; pickup sign of quotient byte
+ swap.w d0
+ fmove.l d0,fpsr
+*
+* Result is now in FP0
+*
+ fmovem.x USER_FP2(a6),fp2-fp3 ; note: FP0/FP1 not restored
+ unlk a6
+ rts
+
diff --git a/sys/arch/m68k/fpsp/DYADIC.GCC b/sys/arch/m68k/fpsp/DYADIC.GCC
new file mode 100644
index 00000000000..eacfa477ce3
--- /dev/null
+++ b/sys/arch/m68k/fpsp/DYADIC.GCC
@@ -0,0 +1,160 @@
+* $NetBSD: DYADIC.GCC,v 1.2 1994/10/26 07:48:27 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* DYADIC.GCC --- DYADIC template for GCC compiler
+*
+* This is based on the generic template. The only difference is that
+* GCC does not need the d0-d1/a0-a1 registers saved.
+*
+* Customizations:
+* 2. Likewise, don't save FP0/FP1 if they are scratch
+* registers.
+* 3. Delete updating of the fpsr if you only care about
+* the result.
+* 5. Move the result to d0/d1 if the compiler is that old.
+*
+
+ xref _OPa_
+ xref tag
+
+ xdef _OPs_
+_OPs_:
+ link a6,#-LOCAL_SIZE
+ fmovem.x fp0-fp3,USER_FP0(a6)
+ fmove.l fpsr,USER_FPSR(a6)
+ fmove.l fpcr,USER_FPCR(a6) ; user's rounding mode/precision
+ fmove.l #0,fpcr ; force rounding mode/prec to extended,rn
+*
+* copy, convert and tag input arguments
+*
+ fmove.s 8(a6),fp0
+ fmove.x fp0,FPTEMP(a6)
+ lea FPTEMP(a6),a0
+ bsr tag
+ move.b d0,DTAG(a6)
+
+ fmove.s 12(a6),fp0
+ fmove.x fp0,ETEMP(a6)
+ lea ETEMP(a6),a0
+ bsr tag
+ move.b d0,STAG(a6)
+
+ bsr _OPa_
+
+ fmove.l fpsr,d0 ; update status register
+ or.b FPSR_AEXCEPT(a6),d0 ;add previously accrued exceptions
+ swap.w d0
+ or.b FPSR_QBYTE(a6),d0 ; pickup sign of quotient byte
+ swap.w d0
+ fmove.l d0,fpsr
+*
+* Result is now in FP0
+*
+ fmovem.x USER_FP1(a6),fp1-fp3 ; note: FP0 not restored
+ unlk a6
+ rts
+
+ xdef _OPd_
+_OPd_:
+ link a6,#-LOCAL_SIZE
+ fmovem.x fp0-fp3,USER_FP0(a6)
+ fmove.l fpsr,USER_FPSR(a6)
+ fmove.l fpcr,USER_FPCR(a6) ; user's rounding mode/precision
+ fmove.l #0,fpcr ; force rounding mode/prec to extended,rn
+*
+* copy, convert and tag input arguments
+*
+ fmove.d 8(a6),fp0
+ fmove.x fp0,FPTEMP(a6)
+ lea FPTEMP(a6),a0
+ bsr tag
+ move.b d0,DTAG(a6)
+
+ fmove.d 16(a6),fp0
+ fmove.x fp0,ETEMP(a6)
+ lea ETEMP(a6),a0
+ bsr tag
+ move.b d0,STAG(a6)
+
+ bsr _OPa_
+
+ fmove.l fpsr,d0 ; update status register
+ or.b FPSR_AEXCEPT(a6),d0 ;add previously accrued exceptions
+ swap.w d0
+ or.b FPSR_QBYTE(a6),d0 ; pickup sign of quotient byte
+ swap.w d0
+ fmove.l d0,fpsr
+*
+* Result is now in FP0
+*
+ fmovem.x USER_FP1(a6),fp1-fp3 ; note: FP0 not restored
+ unlk a6
+ rts
+
+ xdef _OPx_
+_OPx_:
+ link a6,#-LOCAL_SIZE
+ fmovem.x fp0-fp3,USER_FP0(a6)
+ fmove.l fpsr,USER_FPSR(a6)
+ fmove.l fpcr,USER_FPCR(a6) ; user's rounding mode/precision
+ fmove.l #0,fpcr ; force rounding mode/prec to extended,rn
+*
+* copy, convert and tag input arguments
+*
+ fmove.x 8(a6),fp0
+ fmove.x fp0,FPTEMP(a6)
+ lea FPTEMP(a6),a0
+ bsr tag
+ move.b d0,DTAG(a6)
+
+ fmove.x 20(a6),fp0
+ fmove.x fp0,ETEMP(a6)
+ lea ETEMP(a6),a0
+ bsr tag
+ move.b d0,STAG(a6)
+
+ bsr _OPa_
+
+ fmove.l fpsr,d0 ; update status register
+ or.b FPSR_AEXCEPT(a6),d0 ;add previously accrued exceptions
+ swap.w d0
+ or.b FPSR_QBYTE(a6),d0 ; pickup sign of quotient byte
+ swap.w d0
+ fmove.l d0,fpsr
+*
+* Result is now in FP0
+*
+ fmovem.x USER_FP1(a6),fp1-fp3 ; note: FP0 not restored
+ unlk a6
+ rts
+
diff --git a/sys/arch/m68k/fpsp/DYADIC.GEN b/sys/arch/m68k/fpsp/DYADIC.GEN
new file mode 100644
index 00000000000..fa3797fa6d1
--- /dev/null
+++ b/sys/arch/m68k/fpsp/DYADIC.GEN
@@ -0,0 +1,179 @@
+* $NetBSD: DYADIC.GEN,v 1.2 1994/10/26 07:48:29 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* DYADIC.GEN 1.2 4/30/91
+*
+* DYADIC.GEN --- generic DYADIC template
+*
+* This version saves all registers that will be used by the emulation
+* routines and restores all but FP0 on exit. The FPSR is
+* updated to reflect the result of the operation. Return value
+* is placed in FP0 for single, double and extended results.
+*
+* The package subroutines expect the incoming FPCR to be zeroed
+* since they need extended precision to work properly. The
+* 'final' FPCR is expected in USER_FPCR(a6) so that the calculated result
+* can be properly sized and rounded. Also, if the incoming FPCR
+* has enabled any exceptions, the exception will be taken on the
+* final fmovem in this template.
+*
+* Customizations:
+* 1. Remove the movem.l at the entry and exit of
+* each routine if your compiler treats those
+* registers as scratch.
+* 2. Likewise, don't save FP0/FP1 if they are scratch
+* registers.
+* 3. Delete updating of the fpsr if you only care about
+* the result.
+* 4. Remove the _OPs_ and _OPx_ entry points if your compiler
+* treats everything as doubles.
+* 5. Move the result to d0/d1 if the compiler is that old.
+*
+
+ xref _OPa_
+ xref tag
+
+ xdef _OPs_
+_OPs_:
+ link a6,#-LOCAL_SIZE
+ movem.l d0-d1/a0-a1,USER_DA(a6)
+ fmovem.x fp0-fp3,USER_FP0(a6)
+ fmovem.l fpsr/fpcr,USER_FPSR(a6) ; user's rounding mode/precision
+ fmove.l #0,fpcr ; force rounding mode/prec to extended,rn
+*
+* copy, convert and tag input arguments
+*
+ fmove.s 8(a6),fp0
+ fmove.x fp0,FPTEMP(a6)
+ lea FPTEMP(a6),a0
+ bsr tag
+ move.b d0,DTAG(a6)
+
+ fmove.s 12(a6),fp0
+ fmove.x fp0,ETEMP(a6)
+ lea ETEMP(a6),a0
+ bsr tag
+ move.b d0,STAG(a6)
+
+ bsr _OPa_
+
+ fmove.l fpsr,d0 ; update status register
+ or.b FPSR_AEXCEPT(a6),d0 ;add previously accrued exceptions
+ swap.w d0
+ or.b FPSR_QBYTE(a6),d0 ; pickup sign of quotient byte
+ swap.w d0
+ fmove.l d0,fpsr
+*
+* Result is now in FP0
+*
+ movem.l USER_DA(a6),d0-d1/a0-a1
+ fmovem.x USER_FP1(a6),fp1-fp3 ; note: FP0 not restored
+ unlk a6
+ rts
+
+ xdef _OPd_
+_OPd_:
+ link a6,#-LOCAL_SIZE
+ movem.l d0-d1/a0-a1,USER_DA(a6)
+ fmovem.x fp0-fp3,USER_FP0(a6)
+ fmovem.l fpsr/fpcr,USER_FPSR(a6) ; user's rounding mode/precision
+ fmove.l #0,fpcr ; force rounding mode/prec to extended,rn
+*
+* copy, convert and tag input arguments
+*
+ fmove.d 8(a6),fp0
+ fmove.x fp0,FPTEMP(a6)
+ lea FPTEMP(a6),a0
+ bsr tag
+ move.b d0,DTAG(a6)
+
+ fmove.d 16(a6),fp0
+ fmove.x fp0,ETEMP(a6)
+ lea ETEMP(a6),a0
+ bsr tag
+ move.b d0,STAG(a6)
+
+ bsr _OPa_
+
+ fmove.l fpsr,d0 ; update status register
+ or.b FPSR_AEXCEPT(a6),d0 ;add previously accrued exceptions
+ swap.w d0
+ or.b FPSR_QBYTE(a6),d0 ; pickup sign of quotient byte
+ swap.w d0
+ fmove.l d0,fpsr
+*
+* Result is now in FP0
+*
+ movem.l USER_DA(a6),d0-d1/a0-a1
+ fmovem.x USER_FP1(a6),fp1-fp3 ; note: FP0 not restored
+ unlk a6
+ rts
+
+ xdef _OPx_
+_OPx_:
+ link a6,#-LOCAL_SIZE
+ movem.l d0-d1/a0-a1,USER_DA(a6)
+ fmovem.x fp0-fp3,USER_FP0(a6)
+ fmovem.l fpsr/fpcr,USER_FPSR(a6) ; user's rounding mode/precision
+ fmove.l #0,fpcr ; force rounding mode/prec to extended,rn
+*
+* copy, convert and tag input arguments
+*
+ fmove.x 8(a6),fp0
+ fmove.x fp0,FPTEMP(a6)
+ lea FPTEMP(a6),a0
+ bsr tag
+ move.b d0,DTAG(a6)
+
+ fmove.x 20(a6),fp0
+ fmove.x fp0,ETEMP(a6)
+ lea ETEMP(a6),a0
+ bsr tag
+ move.b d0,STAG(a6)
+
+ bsr _OPa_
+
+ fmove.l fpsr,d0 ; update status register
+ or.b FPSR_AEXCEPT(a6),d0 ;add previously accrued exceptions
+ swap.w d0
+ or.b FPSR_QBYTE(a6),d0 ; pickup sign of quotient byte
+ swap.w d0
+ fmove.l d0,fpsr
+*
+* Result is now in FP0
+*
+ movem.l USER_DA(a6),d0-d1/a0-a1
+ fmovem.x USER_FP1(a6),fp1-fp3 ; note: FP0 not restored
+ unlk a6
+ rts
+
diff --git a/sys/arch/m68k/fpsp/DYADIC.R3V6 b/sys/arch/m68k/fpsp/DYADIC.R3V6
new file mode 100644
index 00000000000..843a44a2386
--- /dev/null
+++ b/sys/arch/m68k/fpsp/DYADIC.R3V6
@@ -0,0 +1,72 @@
+* $NetBSD: DYADIC.R3V6,v 1.2 1994/10/26 07:48:31 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* DYADIC.R3V6 1.2 4/30/91
+*
+* DYADIC.R3V6 --- DYADIC template for MCD R3V6 native C compiler
+*
+* The MCD compiler is old. It returns float and double values
+* as a double stored in d0/d1. There is no support for single or extended
+* precision operations. It's not clear whether the float registers
+* should be preserved, so for speed they're not.
+*
+
+ xref _OPa_
+ xref tag
+
+ xdef _OPd_
+_OPd_:
+ link a6,#-LOCAL_SIZE
+ fmovem.l fpsr/fpcr,USER_FPSR(a6) ; user's rounding mode/precision
+ fmove.l #0,fpcr ; force rounding mode/prec to extended,rn
+*
+* copy and convert arguments to ETEMP, FPTEMP.
+*
+ fmove.d 8(a6),fp0
+ fmove.x fp0,FPTEMP(a6)
+ lea FPTEMP(a6),a0
+ bsr tag
+ move.b d0,DTAG(a6)
+
+ fmove.d 16(a6),fp0
+ fmove.x fp0,ETEMP(a6)
+ lea ETEMP(a6),a0
+ bsr tag
+ move.b d0,STAG(a6)
+
+ bsr _OPa_
+
+ fmove.d fp0,USER_D0(a6) ; result goes into d0/d1 pair
+ movem.l USER_D0(a6),d0-d1
+ unlk a6
+ rts
diff --git a/sys/arch/m68k/fpsp/FPSP.sa b/sys/arch/m68k/fpsp/FPSP.sa
new file mode 100644
index 00000000000..1a3692d4667
--- /dev/null
+++ b/sys/arch/m68k/fpsp/FPSP.sa
@@ -0,0 +1,79 @@
+* $NetBSD: FPSP.sa,v 1.2 1994/10/26 07:48:33 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* FPSP.sa 3.1 12/10/90
+*
+* Init file for testing FPSP software package.
+*
+* Takes over the exception vectors that the FPSP handles.
+*
+
+FPSP IDNT 2,1 Motorola 040 Floating Point Software Package
+
+CODE_ST equ $10000 ;address of test code start
+
+FLINE_VEC equ $2c
+BSUN_VEC equ $c0
+INEX2_VEC equ $c4
+DZ_VEC equ $c8
+UNFL_VEC equ $cc
+OPERR_VEC equ $d0
+OVFL_VEC equ $d4
+SNAN_VEC equ $d8
+UNSUP_VEC equ $dc
+
+ xref fline,unsupp
+ xref bsun,inex,dz,unfl
+ xref operr,ovfl,snan
+
+ section 7
+
+* Load vector table with addresses of FPSP routines and
+* branch to CODE_ST, start address of test code.
+
+ xdef start
+start:
+ movec.l VBR,a0
+ move.l #fline,FLINE_VEC(a0)
+ move.l #bsun,BSUN_VEC(a0)
+ move.l #inex,INEX2_VEC(a0)
+ move.l #dz,DZ_VEC(a0)
+ move.l #unfl,UNFL_VEC(a0)
+ move.l #operr,OPERR_VEC(a0)
+ move.l #ovfl,OVFL_VEC(a0)
+ move.l #snan,SNAN_VEC(a0)
+ move.l #unsupp,UNSUP_VEC(a0)
+
+ jmp CODE_ST
+
+ end
diff --git a/sys/arch/m68k/fpsp/L_ENTRY.AWK b/sys/arch/m68k/fpsp/L_ENTRY.AWK
new file mode 100644
index 00000000000..44cf26a5682
--- /dev/null
+++ b/sys/arch/m68k/fpsp/L_ENTRY.AWK
@@ -0,0 +1,84 @@
+# $NetBSD: L_ENTRY.AWK,v 1.2 1994/10/26 07:48:34 cgd Exp $
+
+# MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+# M68000 Hi-Performance Microprocessor Division
+# M68040 Software Package
+#
+# M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+# All rights reserved.
+#
+# THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+# To the maximum extent permitted by applicable law,
+# MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+# INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+# PARTICULAR PURPOSE and any warranty against infringement with
+# regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+# and any accompanying written materials.
+#
+# To the maximum extent permitted by applicable law,
+# IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+# (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+# PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+# OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+# SOFTWARE. Motorola assumes no responsibility for the maintenance
+# and support of the SOFTWARE.
+#
+# You are hereby granted a copyright license to use, modify, and
+# distribute the SOFTWARE so long as this entire notice is retained
+# without alteration in any modified and/or redistributed versions,
+# and that such modified versions are clearly identified as such.
+# No licenses are granted by implication, estoppel or otherwise
+# under any patents or trademarks of Motorola, Inc.
+
+# L_ENTRY.AWK 1.1 3/27/91
+
+BEGIN{
+ print "echo \" section 8\""
+ print "echo \" include l_fpsp.h\""
+ print "echo \"\""
+ print "echo \" xref tag\""
+ print "echo \" xref szero\""
+ print "echo \" xref sinf\""
+ print "echo \" xref sopr_inf\""
+ print "echo \" xref sone\""
+ print "echo \" xref spi_2\""
+ print "echo \" xref szr_inf\""
+ print "echo \" xref src_nan\""
+ print "echo \" xref t_operr\""
+ print "echo \" xref t_dz2\""
+ print "echo \" xref snzrinx\""
+ print "echo \" xref ld_pone\""
+ print "echo \" xref ld_pinf\""
+ print "echo \" xref ld_ppi2\""
+ print "echo \" xref ssincosz\""
+ print "echo \" xref ssincosi\""
+ print "echo \" xref ssincosnan\""
+ print "echo \" xref setoxm1i\""
+ utmp = 100
+ }
+
+$4=="MONADIC"{
+ printf "sed 's/_OPs_/" $1 "/g' MONADIC." SYS " | "
+ printf "sed 's/_OPd_/" $2 "/g' | "
+ printf "sed 's/_OPx_/" $3 "/g' | "
+ printf "sed 's/_OPr_/" $5 "/g' | "
+ printf "sed 's/_OPz_/" $6 "/g' | "
+ printf "sed 's/_OPi_/" $7 "/g' | "
+ printf "sed 's/_OPn_/" $8 "/g' | "
+ printf "sed 's/_OPm_/" $9 "/g' | "
+ utmp += 1
+ printf "sed 's/_TMP_/" PREFIX utmp "/g'\n "
+ }
+
+$4=="DYADIC"{
+ printf "sed 's/_OPs_/" $1 "/g' DYADIC." SYS " | "
+ printf "sed 's/_OPd_/" $2 "/g' | "
+ printf "sed 's/_OPx_/" $3 "/g' | "
+ printf "sed 's/_OPa_/" $5 "/g' | "
+ utmp += 1
+ printf "sed 's/_TMP_/" PREFIX utmp "/g'\n"
+ }
+
+END{
+ print "echo \" end\""
+ }
diff --git a/sys/arch/m68k/fpsp/L_LIST b/sys/arch/m68k/fpsp/L_LIST
new file mode 100644
index 00000000000..7eb9b0feb5a
--- /dev/null
+++ b/sys/arch/m68k/fpsp/L_LIST
@@ -0,0 +1,81 @@
+# $NetBSD: L_LIST,v 1.2 1994/10/26 07:48:38 cgd Exp $
+#
+# MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+# M68000 Hi-Performance Microprocessor Division
+# M68040 Software Package
+#
+# M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+# All rights reserved.
+#
+# THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+# To the maximum extent permitted by applicable law,
+# MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+# INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+# PARTICULAR PURPOSE and any warranty against infringement with
+# regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+# and any accompanying written materials.
+#
+# To the maximum extent permitted by applicable law,
+# IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+# (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+# PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+# OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+# SOFTWARE. Motorola assumes no responsibility for the maintenance
+# and support of the SOFTWARE.
+#
+# You are hereby granted a copyright license to use, modify, and
+# distribute the SOFTWARE so long as this entire notice is retained
+# without alteration in any modified and/or redistributed versions,
+# and that such modified versions are clearly identified as such.
+# No licenses are granted by implication, estoppel or otherwise
+# under any patents or trademarks of Motorola, Inc.
+#
+# L_LIST 1.2 4/30/91
+#
+# Each line specifies the entry points for one function. The first
+# 3 items are the library entry point names for the single, double and
+# extended precision versions of the function. Change them to
+# suit your system. The next item is
+# either MONADIC or DYADIC. The remaining 5 items are the labels
+# in the FPSP code that correspond to subroutines to handle Regular,
+# Zero, Infinity, Nan and Denorm input values.
+#
+# The first 3
+#
+#Sgl Dbl. Ext. Type Reg Zero Inf Nan Denorm
+#---------------------- ---- --- ---- --- --- ------
+facoss facosd facosx MONADIC sacos ld_ppi2 t_operr mon_nan sacosd
+fasins fasind fasinx MONADIC sasin szero t_operr mon_nan sasind
+fatans fatand fatanx MONADIC satan szero spi_2 mon_nan satand
+fatanhs fatanhd fatanhx MONADIC satanh szero t_operr mon_nan satanhd
+fcoss fcosd fcosx MONADIC scos ld_pone t_operr mon_nan scosd
+fcoshs fcoshd fcoshx MONADIC scosh ld_pone ld_pinf mon_nan scoshd
+fetoxs fetoxd fetoxx MONADIC setox ld_pone szr_inf mon_nan setoxd
+fetoxm1s fetoxm1d fetoxm1x MONADIC setoxm1 szero setoxm1i mon_nan setoxm1d
+fgetexps fgetexpd fgetexpx MONADIC sgetexp szero t_operr mon_nan sgetexpd
+fsins fsind fsinx MONADIC ssin szero t_operr mon_nan ssind
+fsinhs fsinhd fsinhx MONADIC ssinh szero sinf mon_nan ssinhd
+ftans ftand ftanx MONADIC stan szero t_operr mon_nan stand
+ftanhs ftanhd ftanhx MONADIC stanh szero sone mon_nan stanhd
+ftentoxs ftentoxd ftentoxx MONADIC stentox ld_pone szr_inf mon_nan stentoxd
+ftwotoxs ftwotoxd ftwotoxx MONADIC stwotox ld_pone szr_inf mon_nan stwotoxd
+fgetmans fgetmand fgetmanx MONADIC sgetman szero t_operr mon_nan sgetmand
+flogns flognd flognx MONADIC sslogn t_dz2 sopr_inf mon_nan sslognd
+flog2s flog2d flog2x MONADIC sslog2 t_dz2 sopr_inf mon_nan sslog2d
+flog10s flog10d flog10x MONADIC sslog10 t_dz2 sopr_inf mon_nan sslog10d
+flognp1s flognp1d flognp1x MONADIC sslognp1 szero sopr_inf mon_nan slognp1d
+fints fintd fintx MONADIC l_sint szero sinf mon_nan l_sintd
+fintrzs fintrzd fintrzx MONADIC l_sintrz szero sinf mon_nan snzrinx
+frems fremd fremx DYADIC prem
+fmods fmodd fmodx DYADIC pmod
+fscales fscaled fscalex DYADIC pscale
+#
+# 68040 native instructions added for completeness
+#
+fabss fabsd fabsx MONADIC sabs sabs sabs sabs sabs
+fnegs fnegd fnegx MONADIC sneg sneg sneg sneg sneg
+fsqrts fsqrtd fsqrtx MONADIC ssqrt ssqrt ssqrt ssqrt ssqrt
+fadds faddd faddx DYADIC sadd
+fsubs fsubd fsubx DYADIC ssub
+fmuls fmuld fmulx DYADIC smul
+fdivs fdivd fdivx DYADIC sdiv
diff --git a/sys/arch/m68k/fpsp/MONADIC.CI5 b/sys/arch/m68k/fpsp/MONADIC.CI5
new file mode 100644
index 00000000000..56cc6586347
--- /dev/null
+++ b/sys/arch/m68k/fpsp/MONADIC.CI5
@@ -0,0 +1,93 @@
+* $NetBSD: MONADIC.CI5,v 1.2 1994/10/26 07:48:39 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* MONADIC.CI5 1.3 4/30/91
+*
+* MONADIC.CI5 --- MONADIC template for CI5 compiler
+*
+
+ xref tag
+ xref _OPr_
+ xref _OPz_
+ xref _OPi_
+ xref _OPn_
+ xref _OPm_
+
+
+ xdef _OPd_
+_OPd_:
+ link a6,#-LOCAL_SIZE
+ fmovem.x fp2-fp3,USER_FP2(a6)
+ fmove.l fpsr,USER_FPSR(a6)
+ fmove.l fpcr,d1 ; user's rounding mode/precision
+ fmove.l #0,fpcr ; force rounding mode/prec to extended,rn
+*
+* copy, convert and tag input argument
+*
+ fmove.d 8(a6),fp0
+ fmove.x fp0,ETEMP(a6)
+ lea ETEMP(a6),a0
+ bsr tag
+ move.b d0,STAG(a6)
+ tst.b d0
+ bne.b _TMP_2
+ bsr _OPr_ ; normalized (regular) number
+ bra.b _TMP_6
+_TMP_2:
+ cmp.b #$20,d0 ; zero?
+ bne.b _TMP_3
+ bsr _OPz_
+ bra.b _TMP_6
+_TMP_3:
+ cmp.b #$40,d0 ; infinity?
+ bne.b _TMP_4
+ bsr _OPi_
+ bra.b _TMP_6
+_TMP_4:
+ cmp.b #$60,d0 ; NaN?
+ bne.b _TMP_5
+ bsr _OPn_
+ bra.b _TMP_6
+_TMP_5:
+ bsr _OPm_ ; assuming a denorm...
+
+_TMP_6:
+ fmove.l fpsr,d0 ; update status register
+ or.b USER_FPSR+3(a6),d0 ;add previously accrued exceptions
+ move.l d0,USER_FPSR(a6)
+*
+* Result is now in FP0
+*
+ fmovem.x USER_FP2(a6),fp2-fp3 ; note: FP1 not restored
+ unlk a6
+ rts
diff --git a/sys/arch/m68k/fpsp/MONADIC.GCC b/sys/arch/m68k/fpsp/MONADIC.GCC
new file mode 100644
index 00000000000..a8b7ce142ae
--- /dev/null
+++ b/sys/arch/m68k/fpsp/MONADIC.GCC
@@ -0,0 +1,203 @@
+* $NetBSD: MONADIC.GCC,v 1.2 1994/10/26 07:48:40 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* MONADIC.GCC --- MONADIC template for GCC compiler
+*
+* This is based on the generic template. The only difference is that
+* GCC does not need the d0-d1/a0-a1 registers saved.
+*
+* Customizations:
+* 2. Likewise, don't save FP0/FP1 if they are scratch
+* registers.
+* 3. Delete handling of the fpsr if you only care about
+* the result.
+* 5. Move the result to d0/d1 if the compiler is that old.
+*
+
+ xref tag
+ xref _OPr_
+ xref _OPz_
+ xref _OPi_
+ xref _OPn_
+ xref _OPm_
+
+ xdef _OPs_
+_OPs_:
+ link a6,#-LOCAL_SIZE
+ fmovem.x fp0-fp3,USER_FP0(a6)
+ fmove.l fpsr,USER_FPSR(a6)
+ fmove.l fpcr,USER_FPCR(a6)
+ fmove.l fpcr,d1 ; user's rounding mode/precision
+ fmove.l #0,fpcr ; force rounding mode/prec to extended,rn
+*
+* copy, convert and tag input argument
+*
+ fmove.s 8(a6),fp0
+ fmove.x fp0,ETEMP(a6)
+ lea ETEMP(a6),a0
+ bsr tag
+ move.b d0,STAG(a6)
+ tst.b d0
+ bne.b _TMP_2
+ bsr _OPr_ ; normalized (regular) number
+ bra.b _TMP_6
+_TMP_2:
+ cmp.b #$20,d0 ; zero?
+ bne.b _TMP_3
+ bsr _OPz_
+ bra.b _TMP_6
+_TMP_3:
+ cmp.b #$40,d0 ; infinity?
+ bne.b _TMP_4
+ bsr _OPi_
+ bra.b _TMP_6
+_TMP_4:
+ cmp.b #$60,d0 ; NaN?
+ bne.b _TMP_5
+ bsr _OPn_
+ bra.b _TMP_6
+_TMP_5:
+ bsr _OPm_ ; assuming a denorm...
+
+_TMP_6:
+ fmove.l fpsr,d0 ; update status register
+ or.b FPSR_AEXCEPT(a6),d0 ;add previously accrued exceptions
+ fmove.l d0,fpsr
+*
+* Result is now in FP0
+*
+ fmovem.x USER_FP1(a6),fp1-fp3 ; note: FP0 not restored
+ fmove.l USER_FPCR(a6),fpcr ; fpcr restored
+ unlk a6
+ rts
+
+ xdef _OPd_
+_OPd_:
+ link a6,#-LOCAL_SIZE
+ fmovem.x fp0-fp3,USER_FP0(a6)
+ fmove.l fpsr,USER_FPSR(a6)
+ fmove.l fpcr,USER_FPCR(a6)
+ fmove.l fpcr,d1 ; user's rounding mode/precision
+ fmove.l #0,fpcr ; force rounding mode/prec to extended,rn
+*
+* copy, convert and tag input argument
+*
+ fmove.d 8(a6),fp0
+ fmove.x fp0,ETEMP(a6)
+ lea ETEMP(a6),a0
+ bsr tag
+ move.b d0,STAG(a6)
+ tst.b d0
+ bne.b _TMP_7
+ bsr _OPr_ ; normalized (regular) number
+ bra.b _TMP_B
+_TMP_7:
+ cmp.b #$20,d0 ; zero?
+ bne.b _TMP_8
+ bsr _OPz_
+ bra.b _TMP_B
+_TMP_8:
+ cmp.b #$40,d0 ; infinity?
+ bne.b _TMP_9
+ bsr _OPi_
+ bra.b _TMP_B
+_TMP_9:
+ cmp.b #$60,d0 ; NaN?
+ bne.b _TMP_A
+ bsr _OPn_
+ bra.b _TMP_B
+_TMP_A:
+ bsr _OPm_ ; assuming a denorm...
+
+_TMP_B:
+ fmove.l fpsr,d0 ; update status register
+ or.b FPSR_AEXCEPT(a6),d0 ;add previously accrued exceptions
+ fmove.l d0,fpsr
+*
+* Result is now in FP0
+*
+ fmovem.x USER_FP1(a6),fp1-fp3 ; note: FP0 not restored
+ fmove.l USER_FPCR(a6),fpcr ; fpcr restored
+ unlk a6
+ rts
+
+ xdef _OPx_
+_OPx_:
+ link a6,#-LOCAL_SIZE
+ fmovem.x fp0-fp3,USER_FP0(a6)
+ fmove.l fpsr,USER_FPSR(a6)
+ fmove.l fpcr,USER_FPCR(a6)
+ fmove.l fpcr,d1 ; user's rounding mode/precision
+ fmove.l #0,fpcr ; force rounding mode/prec to extended,rn
+*
+* copy, convert and tag input argument
+*
+ fmove.x 8(a6),fp0
+ fmove.x fp0,ETEMP(a6)
+ lea ETEMP(a6),a0
+ bsr tag
+ move.b d0,STAG(a6)
+ tst.b d0
+ bne.b _TMP_C
+ bsr _OPr_ ; normalized (regular) number
+ bra.b _TMP_G
+_TMP_C:
+ cmp.b #$20,d0 ; zero?
+ bne.b _TMP_D
+ bsr _OPz_
+ bra.b _TMP_G
+_TMP_D:
+ cmp.b #$40,d0 ; infinity?
+ bne.b _TMP_E
+ bsr _OPi_
+ bra.b _TMP_G
+_TMP_E:
+ cmp.b #$60,d0 ; NaN?
+ bne.b _TMP_F
+ bsr _OPn_
+ bra.b _TMP_G
+_TMP_F:
+ bsr _OPm_ ; assuming a denorm...
+
+_TMP_G:
+ fmove.l fpsr,d0 ; update status register
+ or.b FPSR_AEXCEPT(a6),d0 ;add previously accrued exceptions
+ fmove.l d0,fpsr
+*
+* Result is now in FP0
+*
+ fmovem.x USER_FP1(a6),fp1-fp3 ; note: FP0 not restored
+ fmove.l USER_FPCR(a6),fpcr ; fpcr restored
+ unlk a6
+ rts
+
diff --git a/sys/arch/m68k/fpsp/MONADIC.GEN b/sys/arch/m68k/fpsp/MONADIC.GEN
new file mode 100644
index 00000000000..5e6581b9d99
--- /dev/null
+++ b/sys/arch/m68k/fpsp/MONADIC.GEN
@@ -0,0 +1,230 @@
+* $NetBSD: MONADIC.GEN,v 1.3 1994/10/26 07:48:42 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* MONADIC.GEN 1.5 5/18/92
+*
+* MONADIC.GEN 1.4 1/16/92
+*
+* MONADIC.GEN 1.3 4/30/91
+*
+* MONADIC.GEN --- generic MONADIC template
+*
+* This version saves all registers that will be used by the emulation
+* routines and restores all but FP0 on exit. The FPSR is
+* updated to reflect the result of the operation. Return value
+* is placed in FP0 for single, double and extended results.
+*
+* The package subroutines expect the incoming FPCR to be zeroed
+* since they need extended precision to work properly. The
+* 'final' FPCR is expected in d1 so that the calculated result
+* can be properly sized and rounded. Also, if the incoming FPCR
+* has enabled any exceptions, the exception will be taken on the
+* final fmovem in this template.
+*
+* Customizations:
+* 1. Remove the movem.l at the entry and exit of
+* each routine if your compiler treats those
+* registers as scratch.
+* 2. Likewise, don't save FP0/FP1 if they are scratch
+* registers.
+* 3. Delete handling of the fpsr if you only care about
+* the result.
+* 4. Some (most?) C compilers convert all float arguments
+* to double, and provide no support at all for extended
+* precision so remove the _OPs_ and _OPx_ entry points.
+* 5. Move the result to d0/d1 if the compiler is that old.
+*
+
+ xref tag
+ xref _OPr_
+ xref _OPz_
+ xref _OPi_
+ xref _OPn_
+ xref _OPm_
+
+ xdef _OPs_
+_OPs_:
+ link a6,#-LOCAL_SIZE
+ movem.l d0-d1/a0-a1,USER_DA(a6)
+ fmovem.x fp0-fp3,USER_FP0(a6)
+ fmove.l fpsr,USER_FPSR(a6)
+ fmove.l fpcr,USER_FPCR(a6)
+ fmove.l fpcr,d1 ; user's rounding mode/precision
+ fmove.l #0,fpcr ; force rounding mode/prec to extended,rn
+*
+* copy, convert and tag input argument
+*
+ fmove.s 8(a6),fp0
+ fmove.x fp0,ETEMP(a6)
+ lea ETEMP(a6),a0
+ bsr tag
+ move.b d0,STAG(a6)
+ tst.b d0
+ bne.b _TMP_2
+ bsr _OPr_ ; normalized (regular) number
+ bra.b _TMP_6
+_TMP_2:
+ cmp.b #$20,d0 ; zero?
+ bne.b _TMP_3
+ bsr _OPz_
+ bra.b _TMP_6
+_TMP_3:
+ cmp.b #$40,d0 ; infinity?
+ bne.b _TMP_4
+ bsr _OPi_
+ bra.b _TMP_6
+_TMP_4:
+ cmp.b #$60,d0 ; NaN?
+ bne.b _TMP_5
+ bsr _OPn_
+ bra.b _TMP_6
+_TMP_5:
+ bsr _OPm_ ; assuming a denorm...
+
+_TMP_6:
+ fmove.l fpsr,d0 ; update status register
+ or.b FPSR_AEXCEPT(a6),d0 ;add previously accrued exceptions
+ fmove.l d0,fpsr
+*
+* Result is now in FP0
+*
+ movem.l USER_DA(a6),d0-d1/a0-a1
+ fmovem.x USER_FP1(a6),fp1-fp3 ; note: FP0 not restored
+ fmove.l USER_FPCR(a6),fpcr ; fpcr restored
+ unlk a6
+ rts
+
+ xdef _OPd_
+_OPd_:
+ link a6,#-LOCAL_SIZE
+ movem.l d0-d1/a0-a1,USER_DA(a6)
+ fmovem.x fp0-fp3,USER_FP0(a6)
+ fmove.l fpsr,USER_FPSR(a6)
+ fmove.l fpcr,USER_FPCR(a6)
+ fmove.l fpcr,d1 ; user's rounding mode/precision
+ fmove.l #0,fpcr ; force rounding mode/prec to extended,rn
+*
+* copy, convert and tag input argument
+*
+ fmove.d 8(a6),fp0
+ fmove.x fp0,ETEMP(a6)
+ lea ETEMP(a6),a0
+ bsr tag
+ move.b d0,STAG(a6)
+ tst.b d0
+ bne.b _TMP_7
+ bsr _OPr_ ; normalized (regular) number
+ bra.b _TMP_B
+_TMP_7:
+ cmp.b #$20,d0 ; zero?
+ bne.b _TMP_8
+ bsr _OPz_
+ bra.b _TMP_B
+_TMP_8:
+ cmp.b #$40,d0 ; infinity?
+ bne.b _TMP_9
+ bsr _OPi_
+ bra.b _TMP_B
+_TMP_9:
+ cmp.b #$60,d0 ; NaN?
+ bne.b _TMP_A
+ bsr _OPn_
+ bra.b _TMP_B
+_TMP_A:
+ bsr _OPm_ ; assuming a denorm...
+
+_TMP_B:
+ fmove.l fpsr,d0 ; update status register
+ or.b FPSR_AEXCEPT(a6),d0 ;add previously accrued exceptions
+ fmove.l d0,fpsr
+*
+* Result is now in FP0
+*
+ movem.l USER_DA(a6),d0-d1/a0-a1
+ fmovem.x USER_FP1(a6),fp1-fp3 ; note: FP0 not restored
+ fmove.l USER_FPCR(a6),fpcr ; fpcr restored
+ unlk a6
+ rts
+
+ xdef _OPx_
+_OPx_:
+ link a6,#-LOCAL_SIZE
+ movem.l d0-d1/a0-a1,USER_DA(a6)
+ fmovem.x fp0-fp3,USER_FP0(a6)
+ fmove.l fpsr,USER_FPSR(a6)
+ fmove.l fpcr,USER_FPCR(a6)
+ fmove.l fpcr,d1 ; user's rounding mode/precision
+ fmove.l #0,fpcr ; force rounding mode/prec to extended,rn
+*
+* copy, convert and tag input argument
+*
+ fmove.x 8(a6),fp0
+ fmove.x fp0,ETEMP(a6)
+ lea ETEMP(a6),a0
+ bsr tag
+ move.b d0,STAG(a6)
+ tst.b d0
+ bne.b _TMP_C
+ bsr _OPr_ ; normalized (regular) number
+ bra.b _TMP_G
+_TMP_C:
+ cmp.b #$20,d0 ; zero?
+ bne.b _TMP_D
+ bsr _OPz_
+ bra.b _TMP_G
+_TMP_D:
+ cmp.b #$40,d0 ; infinity?
+ bne.b _TMP_E
+ bsr _OPi_
+ bra.b _TMP_G
+_TMP_E:
+ cmp.b #$60,d0 ; NaN?
+ bne.b _TMP_F
+ bsr _OPn_
+ bra.b _TMP_G
+_TMP_F:
+ bsr _OPm_ ; assuming a denorm...
+
+_TMP_G:
+ fmove.l fpsr,d0 ; update status register
+ or.b FPSR_AEXCEPT(a6),d0 ;add previously accrued exceptions
+ fmove.l d0,fpsr
+*
+* Result is now in FP0
+*
+ movem.l USER_DA(a6),d0-d1/a0-a1
+ fmovem.x USER_FP1(a6),fp1-fp3 ; note: FP0 not restored
+ fmove.l USER_FPCR(a6),fpcr ; fpcr restored
+ unlk a6
+ rts
+
diff --git a/sys/arch/m68k/fpsp/MONADIC.R3V6 b/sys/arch/m68k/fpsp/MONADIC.R3V6
new file mode 100644
index 00000000000..a4e494acc19
--- /dev/null
+++ b/sys/arch/m68k/fpsp/MONADIC.R3V6
@@ -0,0 +1,91 @@
+* $NetBSD: MONADIC.R3V6,v 1.2 1994/10/26 07:48:44 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* MONADIC.R3V6 1.3 4/30/91
+*
+* MONADIC.R3V6 --- MONADIC template for MCD R3V6 native C compiler
+*
+* The MCD compiler is old. It returns float and double values
+* as a double stored in d0/d1. There is no support for single or extended
+* precision operations. It's not clear whether the float registers
+* should be preserved, so for speed, they're not.
+*
+
+ xref tag
+ xref _OPr_
+ xref _OPz_
+ xref _OPi_
+ xref _OPn_
+ xref _OPm_
+
+ xdef _OPd_
+_OPd_:
+ link a6,#-LOCAL_SIZE
+ fmove.l fpcr,d1 ; user's rounding mode/precision
+ fmove.l #0,fpcr ; force rounding mode/prec to extended,rn
+*
+* copy, convert and tag input argument
+*
+ fmove.d 8(a6),fp0
+ fmove.x fp0,ETEMP(a6)
+ lea ETEMP(a6),a0
+ bsr tag
+ move.b d0,STAG(a6)
+ tst.b d0
+ bne.b _TMP_2
+ bsr _OPr_ ; normalized (regular) number
+ bra.b _TMP_6
+_TMP_2:
+ cmp.b #$20,d0 ; zero?
+ bne.b _TMP_3
+ bsr _OPz_
+ bra.b _TMP_6
+_TMP_3:
+ cmp.b #$40,d0 ; infinity?
+ bne.b _TMP_4
+ bsr _OPi_
+ bra.b _TMP_6
+_TMP_4:
+ cmp.b #$60,d0 ; NaN?
+ bne.b _TMP_5
+ bsr _OPn_
+ bra.b _TMP_6
+_TMP_5:
+ bsr _OPm_ ; assuming a denorm...
+
+_TMP_6:
+ fmove.d fp0,USER_D0(a6) ; result goes into d0/d1 pair
+ movem.l USER_D0(a6),d0-d1
+ unlk a6
+ rts
+
diff --git a/sys/arch/m68k/fpsp/Makefile b/sys/arch/m68k/fpsp/Makefile
new file mode 100644
index 00000000000..ce9dcca6411
--- /dev/null
+++ b/sys/arch/m68k/fpsp/Makefile
@@ -0,0 +1,338 @@
+# $NetBSD: Makefile,v 1.4 1994/10/26 07:48:46 cgd Exp $
+
+# MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+# M68000 Hi-Performance Microprocessor Division
+# M68040 Software Package
+#
+# M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+# All rights reserved.
+#
+# THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+# To the maximum extent permitted by applicable law,
+# MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+# INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+# PARTICULAR PURPOSE and any warranty against infringement with
+# regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+# and any accompanying written materials.
+#
+# To the maximum extent permitted by applicable law,
+# IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+# (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+# PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+# OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+# SOFTWARE. Motorola assumes no responsibility for the maintenance
+# and support of the SOFTWARE.
+#
+# You are hereby granted a copyright license to use, modify, and
+# distribute the SOFTWARE so long as this entire notice is retained
+# without alteration in any modified and/or redistributed versions,
+# and that such modified versions are clearly identified as such.
+# No licenses are granted by implication, estoppel or otherwise
+# under any patents or trademarks of Motorola, Inc.
+
+#
+# Makefile 3.3 3/27/91
+#
+# Makefile for 68040 Floating Point Software Package
+#
+
+TARGET = fpsp
+
+AS = as -m68040
+LD = ld
+
+#
+# For the Library Version:
+#
+AR = ar
+LIB_FILTER = sed 's/fpsp.defs/l_fpsp.defs/'
+LIB_TARGET = lib$(TARGET).a
+#
+# SYS selects the template set to use
+# templates are supplied for R3V6, CI5 and GEN(generic)
+# PREFIX is a string that begins a temporary label in the assembler
+# R3V6 uses 'L%', CI5 likes '.L'
+#
+#SYS = R3V6
+#PREFIX = L%%
+#
+#SYS = CI5
+#PREFIX = .L
+#
+#SYS = GEN
+#PREFIX = L_
+#
+SYS = GCC
+PREFIX = L_
+
+.SUFFIXES: .o .s .sa .defs .h
+
+.sa.s:
+ sh ${.CURDIR}/asm2gas ${.IMPSRC} >${.TARGET}
+.h.defs:
+ sh ${.CURDIR}/asm2gas ${.IMPSRC} >${.TARGET}
+.s.o:
+ $(AS) -o ${.TARGET} ${.IMPSRC}
+
+H_FILES = \
+ fpsp.defs \
+ l_fpsp.defs
+
+O_FILES = \
+ copyright.o \
+ netbsd.o \
+ bindec.o \
+ binstr.o \
+ decbin.o \
+ do_func.o \
+ gen_except.o \
+ get_op.o \
+ kernel_ex.o \
+ res_func.o \
+ round.o \
+ sacos.o \
+ sasin.o \
+ satan.o \
+ satanh.o \
+ scosh.o \
+ setox.o \
+ sgetem.o \
+ sint.o \
+ slogn.o \
+ slog2.o \
+ smovecr.o \
+ srem_mod.o \
+ scale.o \
+ ssin.o \
+ ssinh.o \
+ stan.o \
+ stanh.o \
+ sto_res.o \
+ stwotox.o \
+ tbldo.o \
+ util.o \
+ x_bsun.o \
+ x_fline.o \
+ x_operr.o \
+ x_ovfl.o \
+ x_snan.o \
+ x_store.o \
+ x_unfl.o \
+ x_unimp.o \
+ x_unsupp.o \
+ bugfix.o
+
+LIB_O_FILES = \
+ l_copyright.o \
+ l_entry.o \
+ l_do_func.o \
+ l_round.o \
+ l_sacos.o \
+ l_sasin.o \
+ l_satan.o \
+ l_satanh.o \
+ l_scale.o \
+ l_scosh.o \
+ l_setox.o \
+ l_sgetem.o \
+ l_sint.o \
+ l_slog2.o \
+ l_slogn.o \
+ l_srem_mod.o \
+ l_ssin.o \
+ l_ssinh.o \
+ l_stan.o \
+ l_stanh.o \
+ l_stwotox.o \
+ l_support.o
+
+S_FILES = \
+ netbsd.s \
+ bindec.s \
+ binstr.s \
+ decbin.s \
+ do_func.s \
+ get_op.s \
+ gen_except.s \
+ kernel_ex.s \
+ res_func.s \
+ round.s \
+ sacos.s \
+ sasin.s \
+ satan.s \
+ satanh.s \
+ scosh.s \
+ setox.s \
+ sgetem.s \
+ sint.s \
+ slogn.s \
+ slog2.s \
+ smovecr.s \
+ srem_mod.s \
+ scale.s \
+ ssin.s \
+ ssinh.s \
+ stan.s \
+ stanh.s \
+ sto_res.s \
+ stwotox.s \
+ tbldo.s \
+ util.s \
+ x_bsun.s \
+ x_fline.s \
+ x_operr.s \
+ x_ovfl.s \
+ x_snan.s \
+ x_store.s \
+ x_unfl.s \
+ x_unimp.s \
+ x_unsupp.s \
+ bugfix.s
+
+LIB_S_FILES = \
+ l_entry.sa l_entry.s \
+ l_do_func.s \
+ l_round.s \
+ l_sacos.s \
+ l_sasin.s \
+ l_satan.s \
+ l_satanh.s \
+ l_scale.s \
+ l_scosh.s \
+ l_setox.s \
+ l_sgetem.s \
+ l_sint.s \
+ l_slog2.s \
+ l_slogn.s \
+ l_srem_mod.s \
+ l_ssin.s \
+ l_ssinh.s \
+ l_stan.s \
+ l_stanh.s \
+ l_stwotox.s \
+ l_support.s
+
+#
+# Build the target object. The linkfile is created on the fly.
+# Change the SEG directives to suit your system.
+#
+$(TARGET).o: $(O_FILES)
+ $(LD) -r -o $(TARGET).o $(O_FILES)
+
+#
+# Just about every file needs fpsp.h so:
+#
+$(O_FILES): fpsp.defs
+
+#
+#-----------------------------------------------------------------------
+#
+# For making a library version of the FPSP:
+#
+library: $(LIB_TARGET)
+
+$(LIB_TARGET): $(LIB_O_FILES)
+ rm -f $(LIB_TARGET)
+ $(AR) crv $(LIB_TARGET) $(LIB_O_FILES)
+
+$(LIB_O_FILES): l_fpsp.defs
+
+#
+# The entry points to the library version are created here
+# by using two template files an awk script and a list of
+# the entry routines for each function.
+#
+l_entry.sa: L_ENTRY.AWK L_LIST MONADIC.$(SYS) DYADIC.$(SYS) l_fpsp.h
+ awk -f L_ENTRY.AWK SYS=$(SYS) PREFIX=$(PREFIX) - <L_LIST|sh>l_entry.sa
+
+#
+# Do_func.sa and round.sa need special editing to remove references that
+# aren't needed in the library version. Beware that changes in
+# the source code may cause this editing to break....
+#
+l_do_func.s: do_func.s
+ $(LIB_FILTER) ${.ALLSRC} >${.TARGET}
+ echo '/global.*do_func/,/^ rts/d' >.SCRIPT
+ echo 'g/smovcr/d' >>.SCRIPT
+ echo 'g/tblpre/d' >>.SCRIPT
+ echo 'w' >>.SCRIPT
+ echo 'q' >>.SCRIPT
+ ed - ${.TARGET} <.SCRIPT
+ rm .SCRIPT
+
+l_round.s: round.s
+ $(LIB_FILTER) ${.ALLSRC} >${.TARGET}
+ echo '/^not_E3:/-6,/^not_E3:/d' >.SCRIPT
+ echo 'w' >>.SCRIPT
+ echo 'q' >>.SCRIPT
+ ed - ${.TARGET} <.SCRIPT
+ rm .SCRIPT
+
+l_copyright.s: copyright.s
+ $(LIB_FILTER) ${.ALLSRC} >${.TARGET}
+
+l_sacos.s: sacos.s
+ $(LIB_FILTER) ${.ALLSRC} >${.TARGET}
+
+l_sasin.s: sasin.s
+ $(LIB_FILTER) ${.ALLSRC} >${.TARGET}
+
+l_satan.s: satan.s
+ $(LIB_FILTER) ${.ALLSRC} >${.TARGET}
+
+l_satanh.s: satanh.s
+ $(LIB_FILTER) ${.ALLSRC} >${.TARGET}
+
+l_scale.s: scale.s
+ $(LIB_FILTER) ${.ALLSRC} >${.TARGET}
+
+l_scosh.s: scosh.s
+ $(LIB_FILTER) ${.ALLSRC} >${.TARGET}
+
+l_setox.s: setox.s
+ $(LIB_FILTER) ${.ALLSRC} >${.TARGET}
+
+l_sgetem.s: sgetem.s
+ $(LIB_FILTER) ${.ALLSRC} >${.TARGET}
+
+l_sint.s: sint.s
+ $(LIB_FILTER) ${.ALLSRC} >${.TARGET}
+
+l_slog2.s: slog2.s
+ $(LIB_FILTER) ${.ALLSRC} >${.TARGET}
+
+l_slogn.s: slogn.s
+ $(LIB_FILTER) ${.ALLSRC} >${.TARGET}
+
+l_srem_mod.s: srem_mod.s
+ $(LIB_FILTER) ${.ALLSRC} >${.TARGET}
+
+l_ssin.s: ssin.s
+ $(LIB_FILTER) ${.ALLSRC} >${.TARGET}
+
+l_ssinh.s: ssinh.s
+ $(LIB_FILTER) ${.ALLSRC} >${.TARGET}
+
+l_stan.s: stan.s
+ $(LIB_FILTER) ${.ALLSRC} >${.TARGET}
+
+l_stanh.s: stanh.s
+ $(LIB_FILTER) ${.ALLSRC} >${.TARGET}
+
+l_stwotox.s: stwotox.s
+ $(LIB_FILTER) ${.ALLSRC} >${.TARGET}
+
+#
+# Extract all files from SCCS directory
+#
+clean:
+ rm -f $(H_FILES)
+ rm -f $(S_FILES)
+ rm -f $(O_FILES)
+ rm -f $(TARGET).o
+ rm -f $(LIB_S_FILES)
+ rm -f $(LIB_O_FILES)
+ rm -f $(LIB_TARGET)
+
+clobber: clean
+
diff --git a/sys/arch/m68k/fpsp/Makefile.inc b/sys/arch/m68k/fpsp/Makefile.inc
new file mode 100644
index 00000000000..4a76434aae3
--- /dev/null
+++ b/sys/arch/m68k/fpsp/Makefile.inc
@@ -0,0 +1,18 @@
+# $NetBSD: Makefile.inc,v 1.2 1994/10/26 07:48:47 cgd Exp $
+#
+# NOTE: $S must correspond to the top of the `sys' tree
+
+FPSPSRCDIR= $S/arch/m68k/fpsp
+
+FPSPOBJDIR!= cd $(FPSPSRCDIR); \
+ printf "xxx:\n\techo \$${.OBJDIR}\n" | $(MAKE) -r -s -f - xxx
+
+FPSPOBJ= $(FPSPOBJDIR)/fpsp.o
+
+$(FPSPOBJ): .NOTMAIN __always_make_fpsp
+ @echo making sure the fpsp is up to date...
+ @(cd $(FPSPSRCDIR) ; $(MAKE))
+
+FPSP!= printf "\#ifdef FPSP\n${FPSPOBJ}\n\#endif\n" | cpp -P -undef ${COPTS:M-DFPSP}
+
+__always_make_fpsp: .NOTMAIN
diff --git a/sys/arch/m68k/fpsp/asm2gas b/sys/arch/m68k/fpsp/asm2gas
new file mode 100644
index 00000000000..af3f7702cfe
--- /dev/null
+++ b/sys/arch/m68k/fpsp/asm2gas
@@ -0,0 +1,163 @@
+#!/bin/sh
+# $NetBSD: asm2gas,v 1.3 1994/10/26 07:48:49 cgd Exp $
+
+#
+# Copyright (c) 1994 Charles Hannum. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+# must display the following acknowledgement:
+# This product includes software developed by Charles Hannum.
+# 4. The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+# This ugly script converts assembler code from Motorola's format to a
+# form that gas (MIT syntax) can digest.
+
+cat $1 | sed -e '
+ # format canonicalization
+
+ /[ ]IDNT[ ]/{s/^/|/;p;d;}
+ /^\*/{s//|/;p;d;}
+ s/;/|/
+ /[ ]equ[ ]/{
+ s/\([A-Za-z_][A-Za-z0-9_]*\)[ ]*equ[ ]*/\1,/
+ s/[ ][ ]*\(.*\)$/ |\1/
+ s/ ||/ |/
+ s/^/ .set /
+ p;d
+ }
+ s/^\([A-Za-z_][A-Za-z0-9_]*\)[ ][ ]*/\1: /
+ s/^\([A-Za-z_][A-Za-z0-9_]*\)$/\1:/
+ /^[A-Za-z_][A-Za-z0-9_]*:/{
+ h
+ s/:.*$/:/
+ p
+ g
+ s/^.*:[ ]*/ /
+ /^ $/d
+ }
+ /^[ ][ ]*\([.a-zA-Z][.a-zA-Z0-9]*\)/{
+ h
+ s///
+ s/^[ ][ ]*//
+ s/[ ][ ]*\(.*\)$/ |\1/
+ s/ ||/ |/
+ x
+ s/^[ ][ ]*//
+ s/[ ][ ]*.*$/ /
+ y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/
+ s/^/ /
+ G
+ s/\n//
+ }
+' | sed -e '
+ # operator conversion
+
+ s/^ section 7/ .text/
+ s/^ section 8/ .text/
+ s/^ section 15/ .data/
+ /^ include/{s/include[ ]/.include "/;s/\.h[ ]*$/.defs"/;p;d;}
+ s/^ xref/| xref/
+ s/^ end/| end/
+ s/^ xdef/ .global/
+
+ s/^ dc\.l/ .long/
+ s/^ dc\.w/ .short/
+ s/^ dc\.b/ .byte/
+
+ /^ [aceg-z]/{
+ /^ add[aiqx]*\.[bwl] /{s/\.//;p;d;}
+ /^ andi*\.[bwl] /{s/\.//;p;d;}
+ /^ as[lr]\.[bwl] /{s/\.//;p;d;}
+ /^ clr\.[bwl] /{s/\.//;p;d;}
+ /^ cmp[i2]*\.[bwl] /{s/\.//;p;d;}
+ /^ eori*\.[bwl] /{s/\.//;p;d;}
+ /^ lea\.l /{s/\..//;p;d;}
+ /^ ls[lr]\.[bwl] /{s/\.//;p;d;}
+ /^ move[acmqs]*\.[bwl] /{s/\.//;p;d;}
+ /^ mul[su]\.[wl] /{s/\.//;p;d;}
+ /^ neg\.[bwl] /{s/\.//;p;d;}
+ /^ ori*\.[bwl] /{s/\.//;p;d;}
+ /^ ro[lrx]*\.[bwl] /{s/\.//;p;d;}
+ /^ sub[aiqx]*\.[bwl] /{s/\.//;p;d;}
+ /^ swap\.w /{s/\..//;p;d;}
+ /^ s\([a-tv-z][a-z]*\)\.b /{s/\..//;p;d;}
+ /^ tst\.[bwl] /{s/\.//;p;d;}
+ p;d
+ }
+
+ /^ bchg\.[bl] /{s/\..//;p;d;}
+ /^ bclr\.[bl] /{s/\..//;p;d;}
+ /^ bset\.[bl] /{s/\..//;p;d;}
+ /^ btst\.[bl] /{s/\..//;p;d;}
+ /^ div[sul]*\.[wl] /{s/\.//;p;d;}
+ /^ fabs\.[sdx] /{s/\.//;p;d;}
+ /^ fadd\.[sdxbwl] /{s/\.//;p;d;}
+ /^ fcmp\.[sdxbwl] /{s/\.//;p;d;}
+ /^ fdiv\.[sdx] /{s/\.//;p;d;}
+ /^ fmove[mx]*\.[sdxbwl] /{s/\.//;p;d;}
+ /^ fmul\.[sdx] /{s/\.//;p;d;}
+ /^ fneg\.[sdx] /{s/\.//;p;d;}
+ /^ fsqrt\.[sdx] /{s/\.//;p;d;}
+ /^ fsub\.[sdxbwl] /{s/\.//;p;d;}
+ /^ ftst\.[sdx] /{s/\.//;p;d;}
+
+ /^ b[a-eg-z][a-z]*\.b /{s/\.b/s/;p;d;}
+ /^ b[a-eg-z][a-z]*\.w /{s/\.w//;p;d;}
+ /^ b[a-eg-z][a-z]*\.l /{s/\.l/l/;p;d;}
+ /^ db[a-z][a-z]*\.w /{s/\.w//;p;d;}
+ /^ fb[a-eg-z][a-z]*\.w /{s/\.w//;p;d;}
+ /^ fb[a-eg-z][a-z]*\.l /{s/\.l/l/;p;d;}
+' | sed -e '
+ # operand conversion
+
+ s/\([^_a-zA-Z0-9]\)FPIAR\([^_a-zA-Z0-9]\)/\1FPI\2/g
+ s/\([^_a-zA-Z0-9]\)FPIAR\([^_a-zA-Z0-9]\)/\1FPI\2/g
+ s/\([^_a-zA-Z0-9]\)FPIAR$/\1FPI/g
+ s/\([^_a-zA-Z0-9]\)fpiar\([^_a-zA-Z0-9]\)/\1fpi\2/g
+ s/\([^_a-zA-Z0-9]\)fpiar\([^_a-zA-Z0-9]\)/\1fpi\2/g
+ s/\([^_a-zA-Z0-9]\)fpiar$/\1fpi/g
+
+ s/\$/0x/g
+ s/#:/#:0x/g
+
+ s/-(\([sSpPaA][pPcC0-7]\))/\1@-/g
+ s/(\([sSpPaA][pPcC0-7]\))+/\1@+/g
+ s/\([-+A-Za-z0-9_]*\)(\([sSpPaA][pPcC0-7]\)\([),]\)/\2@(\1\3/g
+
+ s/\.\([bBwWlL])\)/:\1/g
+ s/\.\([bBwWlL]\)\*\([0-9][0-9]*)\)/:\1:\2/g
+ s/\*\([0-9][0-9]*\))/:l:\1)/g
+ s/{\([0-9][0-9]*\):\([0-9][0-9]*\)}/{#\1:#\2}/g
+ s/{\([dD][0-7]\):\([0-9][0-9]*\)}/{\1:#\2}/g
+
+ s/@(0*)/@/g
+ s/(,/(/g;s/:)/)/g
+
+ # make up for a gas bug
+ /^ fmovemx /{
+ s/ \([fF][pP][0-7]\),/ \1-\1,/
+ s/,\([fF][pP][0-7]\) /,\1-\1 /
+ s/,\([fF][pP][0-7]\)$/,\1-\1/
+ }
+'
diff --git a/sys/arch/m68k/fpsp/bindec.sa b/sys/arch/m68k/fpsp/bindec.sa
new file mode 100644
index 00000000000..4e68ade209f
--- /dev/null
+++ b/sys/arch/m68k/fpsp/bindec.sa
@@ -0,0 +1,946 @@
+* $NetBSD: bindec.sa,v 1.3 1994/10/26 07:48:51 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* bindec.sa 3.4 1/3/91
+*
+* bindec
+*
+* Description:
+* Converts an input in extended precision format
+* to bcd format.
+*
+* Input:
+* a0 points to the input extended precision value
+* value in memory; d0 contains the k-factor sign-extended
+* to 32-bits. The input may be either normalized,
+* unnormalized, or denormalized.
+*
+* Output: result in the FP_SCR1 space on the stack.
+*
+* Saves and Modifies: D2-D7,A2,FP2
+*
+* Algorithm:
+*
+* A1. Set RM and size ext; Set SIGMA = sign of input.
+* The k-factor is saved for use in d7. Clear the
+* BINDEC_FLG for separating normalized/denormalized
+* input. If input is unnormalized or denormalized,
+* normalize it.
+*
+* A2. Set X = abs(input).
+*
+* A3. Compute ILOG.
+* ILOG is the log base 10 of the input value. It is
+* approximated by adding e + 0.f when the original
+* value is viewed as 2^^e * 1.f in extended precision.
+* This value is stored in d6.
+*
+* A4. Clr INEX bit.
+* The operation in A3 above may have set INEX2.
+*
+* A5. Set ICTR = 0;
+* ICTR is a flag used in A13. It must be set before the
+* loop entry A6.
+*
+* A6. Calculate LEN.
+* LEN is the number of digits to be displayed. The
+* k-factor can dictate either the total number of digits,
+* if it is a positive number, or the number of digits
+* after the decimal point which are to be included as
+* significant. See the 68882 manual for examples.
+* If LEN is computed to be greater than 17, set OPERR in
+* USER_FPSR. LEN is stored in d4.
+*
+* A7. Calculate SCALE.
+* SCALE is equal to 10^ISCALE, where ISCALE is the number
+* of decimal places needed to insure LEN integer digits
+* in the output before conversion to bcd. LAMBDA is the
+* sign of ISCALE, used in A9. Fp1 contains
+* 10^^(abs(ISCALE)) using a rounding mode which is a
+* function of the original rounding mode and the signs
+* of ISCALE and X. A table is given in the code.
+*
+* A8. Clr INEX; Force RZ.
+* The operation in A3 above may have set INEX2.
+* RZ mode is forced for the scaling operation to insure
+* only one rounding error. The grs bits are collected in
+* the INEX flag for use in A10.
+*
+* A9. Scale X -> Y.
+* The mantissa is scaled to the desired number of
+* significant digits. The excess digits are collected
+* in INEX2.
+*
+* A10. Or in INEX.
+* If INEX is set, round error occured. This is
+* compensated for by 'or-ing' in the INEX2 flag to
+* the lsb of Y.
+*
+* A11. Restore original FPCR; set size ext.
+* Perform FINT operation in the user's rounding mode.
+* Keep the size to extended.
+*
+* A12. Calculate YINT = FINT(Y) according to user's rounding
+* mode. The FPSP routine sintd0 is used. The output
+* is in fp0.
+*
+* A13. Check for LEN digits.
+* If the int operation results in more than LEN digits,
+* or less than LEN -1 digits, adjust ILOG and repeat from
+* A6. This test occurs only on the first pass. If the
+* result is exactly 10^LEN, decrement ILOG and divide
+* the mantissa by 10.
+*
+* A14. Convert the mantissa to bcd.
+* The binstr routine is used to convert the LEN digit
+* mantissa to bcd in memory. The input to binstr is
+* to be a fraction; i.e. (mantissa)/10^LEN and adjusted
+* such that the decimal point is to the left of bit 63.
+* The bcd digits are stored in the correct position in
+* the final string area in memory.
+*
+* A15. Convert the exponent to bcd.
+* As in A14 above, the exp is converted to bcd and the
+* digits are stored in the final string.
+* Test the length of the final exponent string. If the
+* length is 4, set operr.
+*
+* A16. Write sign bits to final string.
+*
+* Implementation Notes:
+*
+* The registers are used as follows:
+*
+* d0: scratch; LEN input to binstr
+* d1: scratch
+* d2: upper 32-bits of mantissa for binstr
+* d3: scratch;lower 32-bits of mantissa for binstr
+* d4: LEN
+* d5: LAMBDA/ICTR
+* d6: ILOG
+* d7: k-factor
+* a0: ptr for original operand/final result
+* a1: scratch pointer
+* a2: pointer to FP_X; abs(original value) in ext
+* fp0: scratch
+* fp1: scratch
+* fp2: scratch
+* F_SCR1:
+* F_SCR2:
+* L_SCR1:
+* L_SCR2:
+*
+
+BINDEC IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ include fpsp.h
+
+ section 8
+
+* Constants in extended precision
+LOG2 dc.l $3FFD0000,$9A209A84,$FBCFF798,$00000000
+LOG2UP1 dc.l $3FFD0000,$9A209A84,$FBCFF799,$00000000
+
+* Constants in single precision
+FONE dc.l $3F800000,$00000000,$00000000,$00000000
+FTWO dc.l $40000000,$00000000,$00000000,$00000000
+FTEN dc.l $41200000,$00000000,$00000000,$00000000
+F4933 dc.l $459A2800,$00000000,$00000000,$00000000
+
+RBDTBL dc.b 0,0,0,0
+ dc.b 3,3,2,2
+ dc.b 3,2,2,3
+ dc.b 2,3,3,2
+
+ xref binstr
+ xref sintdo
+ xref ptenrn,ptenrm,ptenrp
+
+ xdef bindec
+ xdef sc_mul
+bindec:
+ movem.l d2-d7/a2,-(a7)
+ fmovem.x fp0-fp2,-(a7)
+
+* A1. Set RM and size ext. Set SIGMA = sign input;
+* The k-factor is saved for use in d7. Clear BINDEC_FLG for
+* separating normalized/denormalized input. If the input
+* is a denormalized number, set the BINDEC_FLG memory word
+* to signal denorm. If the input is unnormalized, normalize
+* the input and test for denormalized result.
+*
+ fmove.l #rm_mode,FPCR ;set RM and ext
+ move.l (a0),L_SCR2(a6) ;save exponent for sign check
+ move.l d0,d7 ;move k-factor to d7
+ clr.b BINDEC_FLG(a6) ;clr norm/denorm flag
+ move.w STAG(a6),d0 ;get stag
+ andi.w #$e000,d0 ;isolate stag bits
+ beq A2_str ;if zero, input is norm
+*
+* Normalize the denorm
+*
+un_de_norm:
+ move.w (a0),d0
+ andi.w #$7fff,d0 ;strip sign of normalized exp
+ move.l 4(a0),d1
+ move.l 8(a0),d2
+norm_loop:
+ sub.w #1,d0
+ add.l d2,d2
+ addx.l d1,d1
+ tst.l d1
+ bge.b norm_loop
+*
+* Test if the normalized input is denormalized
+*
+ tst.w d0
+ bgt.b pos_exp ;if greater than zero, it is a norm
+ st BINDEC_FLG(a6) ;set flag for denorm
+pos_exp:
+ andi.w #$7fff,d0 ;strip sign of normalized exp
+ move.w d0,(a0)
+ move.l d1,4(a0)
+ move.l d2,8(a0)
+
+* A2. Set X = abs(input).
+*
+A2_str:
+ move.l (a0),FP_SCR2(a6) ; move input to work space
+ move.l 4(a0),FP_SCR2+4(a6) ; move input to work space
+ move.l 8(a0),FP_SCR2+8(a6) ; move input to work space
+ andi.l #$7fffffff,FP_SCR2(a6) ;create abs(X)
+
+* A3. Compute ILOG.
+* ILOG is the log base 10 of the input value. It is approx-
+* imated by adding e + 0.f when the original value is viewed
+* as 2^^e * 1.f in extended precision. This value is stored
+* in d6.
+*
+* Register usage:
+* Input/Output
+* d0: k-factor/exponent
+* d2: x/x
+* d3: x/x
+* d4: x/x
+* d5: x/x
+* d6: x/ILOG
+* d7: k-factor/Unchanged
+* a0: ptr for original operand/final result
+* a1: x/x
+* a2: x/x
+* fp0: x/float(ILOG)
+* fp1: x/x
+* fp2: x/x
+* F_SCR1:x/x
+* F_SCR2:Abs(X)/Abs(X) with $3fff exponent
+* L_SCR1:x/x
+* L_SCR2:first word of X packed/Unchanged
+
+ tst.b BINDEC_FLG(a6) ;check for denorm
+ beq.b A3_cont ;if clr, continue with norm
+ move.l #-4933,d6 ;force ILOG = -4933
+ bra.b A4_str
+A3_cont:
+ move.w FP_SCR2(a6),d0 ;move exp to d0
+ move.w #$3fff,FP_SCR2(a6) ;replace exponent with 0x3fff
+ fmove.x FP_SCR2(a6),fp0 ;now fp0 has 1.f
+ sub.w #$3fff,d0 ;strip off bias
+ fadd.w d0,fp0 ;add in exp
+ fsub.s FONE,fp0 ;subtract off 1.0
+ fbge.w pos_res ;if pos, branch
+ fmul.x LOG2UP1,fp0 ;if neg, mul by LOG2UP1
+ fmove.l fp0,d6 ;put ILOG in d6 as a lword
+ bra.b A4_str ;go move out ILOG
+pos_res:
+ fmul.x LOG2,fp0 ;if pos, mul by LOG2
+ fmove.l fp0,d6 ;put ILOG in d6 as a lword
+
+
+* A4. Clr INEX bit.
+* The operation in A3 above may have set INEX2.
+
+A4_str:
+ fmove.l #0,FPSR ;zero all of fpsr - nothing needed
+
+
+* A5. Set ICTR = 0;
+* ICTR is a flag used in A13. It must be set before the
+* loop entry A6. The lower word of d5 is used for ICTR.
+
+ clr.w d5 ;clear ICTR
+
+
+* A6. Calculate LEN.
+* LEN is the number of digits to be displayed. The k-factor
+* can dictate either the total number of digits, if it is
+* a positive number, or the number of digits after the
+* original decimal point which are to be included as
+* significant. See the 68882 manual for examples.
+* If LEN is computed to be greater than 17, set OPERR in
+* USER_FPSR. LEN is stored in d4.
+*
+* Register usage:
+* Input/Output
+* d0: exponent/Unchanged
+* d2: x/x/scratch
+* d3: x/x
+* d4: exc picture/LEN
+* d5: ICTR/Unchanged
+* d6: ILOG/Unchanged
+* d7: k-factor/Unchanged
+* a0: ptr for original operand/final result
+* a1: x/x
+* a2: x/x
+* fp0: float(ILOG)/Unchanged
+* fp1: x/x
+* fp2: x/x
+* F_SCR1:x/x
+* F_SCR2:Abs(X) with $3fff exponent/Unchanged
+* L_SCR1:x/x
+* L_SCR2:first word of X packed/Unchanged
+
+A6_str:
+ tst.l d7 ;branch on sign of k
+ ble.b k_neg ;if k <= 0, LEN = ILOG + 1 - k
+ move.l d7,d4 ;if k > 0, LEN = k
+ bra.b len_ck ;skip to LEN check
+k_neg:
+ move.l d6,d4 ;first load ILOG to d4
+ sub.l d7,d4 ;subtract off k
+ addq.l #1,d4 ;add in the 1
+len_ck:
+ tst.l d4 ;LEN check: branch on sign of LEN
+ ble.b LEN_ng ;if neg, set LEN = 1
+ cmp.l #17,d4 ;test if LEN > 17
+ ble.b A7_str ;if not, forget it
+ move.l #17,d4 ;set max LEN = 17
+ tst.l d7 ;if negative, never set OPERR
+ ble.b A7_str ;if positive, continue
+ or.l #opaop_mask,USER_FPSR(a6) ;set OPERR & AIOP in USER_FPSR
+ bra.b A7_str ;finished here
+LEN_ng:
+ moveq.l #1,d4 ;min LEN is 1
+
+
+* A7. Calculate SCALE.
+* SCALE is equal to 10^ISCALE, where ISCALE is the number
+* of decimal places needed to insure LEN integer digits
+* in the output before conversion to bcd. LAMBDA is the sign
+* of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using
+* the rounding mode as given in the following table (see
+* Coonen, p. 7.23 as ref.; however, the SCALE variable is
+* of opposite sign in bindec.sa from Coonen).
+*
+* Initial USE
+* FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5]
+* ----------------------------------------------
+* RN 00 0 0 00/0 RN
+* RN 00 0 1 00/0 RN
+* RN 00 1 0 00/0 RN
+* RN 00 1 1 00/0 RN
+* RZ 01 0 0 11/3 RP
+* RZ 01 0 1 11/3 RP
+* RZ 01 1 0 10/2 RM
+* RZ 01 1 1 10/2 RM
+* RM 10 0 0 11/3 RP
+* RM 10 0 1 10/2 RM
+* RM 10 1 0 10/2 RM
+* RM 10 1 1 11/3 RP
+* RP 11 0 0 10/2 RM
+* RP 11 0 1 11/3 RP
+* RP 11 1 0 11/3 RP
+* RP 11 1 1 10/2 RM
+*
+* Register usage:
+* Input/Output
+* d0: exponent/scratch - final is 0
+* d2: x/0 or 24 for A9
+* d3: x/scratch - offset ptr into PTENRM array
+* d4: LEN/Unchanged
+* d5: 0/ICTR:LAMBDA
+* d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
+* d7: k-factor/Unchanged
+* a0: ptr for original operand/final result
+* a1: x/ptr to PTENRM array
+* a2: x/x
+* fp0: float(ILOG)/Unchanged
+* fp1: x/10^ISCALE
+* fp2: x/x
+* F_SCR1:x/x
+* F_SCR2:Abs(X) with $3fff exponent/Unchanged
+* L_SCR1:x/x
+* L_SCR2:first word of X packed/Unchanged
+
+A7_str:
+ tst.l d7 ;test sign of k
+ bgt.b k_pos ;if pos and > 0, skip this
+ cmp.l d6,d7 ;test k - ILOG
+ blt.b k_pos ;if ILOG >= k, skip this
+ move.l d7,d6 ;if ((k<0) & (ILOG < k)) ILOG = k
+k_pos:
+ move.l d6,d0 ;calc ILOG + 1 - LEN in d0
+ addq.l #1,d0 ;add the 1
+ sub.l d4,d0 ;sub off LEN
+ swap d5 ;use upper word of d5 for LAMBDA
+ clr.w d5 ;set it zero initially
+ clr.w d2 ;set up d2 for very small case
+ tst.l d0 ;test sign of ISCALE
+ bge.b iscale ;if pos, skip next inst
+ addq.w #1,d5 ;if neg, set LAMBDA true
+ cmp.l #$ffffecd4,d0 ;test iscale <= -4908
+ bgt.b no_inf ;if false, skip rest
+ addi.l #24,d0 ;add in 24 to iscale
+ move.l #24,d2 ;put 24 in d2 for A9
+no_inf:
+ neg.l d0 ;and take abs of ISCALE
+iscale:
+ fmove.s FONE,fp1 ;init fp1 to 1
+ bfextu USER_FPCR(a6){26:2},d1 ;get initial rmode bits
+ add.w d1,d1 ;put them in bits 2:1
+ add.w d5,d1 ;add in LAMBDA
+ add.w d1,d1 ;put them in bits 3:1
+ tst.l L_SCR2(a6) ;test sign of original x
+ bge.b x_pos ;if pos, don't set bit 0
+ addq.l #1,d1 ;if neg, set bit 0
+x_pos:
+ lea.l RBDTBL,a2 ;load rbdtbl base
+ move.b (a2,d1),d3 ;load d3 with new rmode
+ lsl.l #4,d3 ;put bits in proper position
+ fmove.l d3,fpcr ;load bits into fpu
+ lsr.l #4,d3 ;put bits in proper position
+ tst.b d3 ;decode new rmode for pten table
+ bne.b not_rn ;if zero, it is RN
+ lea.l PTENRN,a1 ;load a1 with RN table base
+ bra.b rmode ;exit decode
+not_rn:
+ lsr.b #1,d3 ;get lsb in carry
+ bcc.b not_rp ;if carry clear, it is RM
+ lea.l PTENRP,a1 ;load a1 with RP table base
+ bra.b rmode ;exit decode
+not_rp:
+ lea.l PTENRM,a1 ;load a1 with RM table base
+rmode:
+ clr.l d3 ;clr table index
+e_loop:
+ lsr.l #1,d0 ;shift next bit into carry
+ bcc.b e_next ;if zero, skip the mul
+ fmul.x (a1,d3),fp1 ;mul by 10**(d3_bit_no)
+e_next:
+ add.l #12,d3 ;inc d3 to next pwrten table entry
+ tst.l d0 ;test if ISCALE is zero
+ bne.b e_loop ;if not, loop
+
+
+* A8. Clr INEX; Force RZ.
+* The operation in A3 above may have set INEX2.
+* RZ mode is forced for the scaling operation to insure
+* only one rounding error. The grs bits are collected in
+* the INEX flag for use in A10.
+*
+* Register usage:
+* Input/Output
+
+ fmove.l #0,FPSR ;clr INEX
+ fmove.l #rz_mode,FPCR ;set RZ rounding mode
+
+
+* A9. Scale X -> Y.
+* The mantissa is scaled to the desired number of significant
+* digits. The excess digits are collected in INEX2. If mul,
+* Check d2 for excess 10 exponential value. If not zero,
+* the iscale value would have caused the pwrten calculation
+* to overflow. Only a negative iscale can cause this, so
+* multiply by 10^(d2), which is now only allowed to be 24,
+* with a multiply by 10^8 and 10^16, which is exact since
+* 10^24 is exact. If the input was denormalized, we must
+* create a busy stack frame with the mul command and the
+* two operands, and allow the fpu to complete the multiply.
+*
+* Register usage:
+* Input/Output
+* d0: FPCR with RZ mode/Unchanged
+* d2: 0 or 24/unchanged
+* d3: x/x
+* d4: LEN/Unchanged
+* d5: ICTR:LAMBDA
+* d6: ILOG/Unchanged
+* d7: k-factor/Unchanged
+* a0: ptr for original operand/final result
+* a1: ptr to PTENRM array/Unchanged
+* a2: x/x
+* fp0: float(ILOG)/X adjusted for SCALE (Y)
+* fp1: 10^ISCALE/Unchanged
+* fp2: x/x
+* F_SCR1:x/x
+* F_SCR2:Abs(X) with $3fff exponent/Unchanged
+* L_SCR1:x/x
+* L_SCR2:first word of X packed/Unchanged
+
+A9_str:
+ fmove.x (a0),fp0 ;load X from memory
+ fabs.x fp0 ;use abs(X)
+ tst.w d5 ;LAMBDA is in lower word of d5
+ bne.b sc_mul ;if neg (LAMBDA = 1), scale by mul
+ fdiv.x fp1,fp0 ;calculate X / SCALE -> Y to fp0
+ bra.b A10_st ;branch to A10
+
+sc_mul:
+ tst.b BINDEC_FLG(a6) ;check for denorm
+ beq.b A9_norm ;if norm, continue with mul
+ fmovem.x fp1,-(a7) ;load ETEMP with 10^ISCALE
+ move.l 8(a0),-(a7) ;load FPTEMP with input arg
+ move.l 4(a0),-(a7)
+ move.l (a0),-(a7)
+ move.l #18,d3 ;load count for busy stack
+A9_loop:
+ clr.l -(a7) ;clear lword on stack
+ dbf.w d3,A9_loop
+ move.b VER_TMP(a6),(a7) ;write current version number
+ move.b #BUSY_SIZE-4,1(a7) ;write current busy size
+ move.b #$10,$44(a7) ;set fcefpte[15] bit
+ move.w #$0023,$40(a7) ;load cmdreg1b with mul command
+ move.b #$fe,$8(a7) ;load all 1s to cu savepc
+ frestore (a7)+ ;restore frame to fpu for completion
+ fmul.x 36(a1),fp0 ;multiply fp0 by 10^8
+ fmul.x 48(a1),fp0 ;multiply fp0 by 10^16
+ bra.b A10_st
+A9_norm:
+ tst.w d2 ;test for small exp case
+ beq.b A9_con ;if zero, continue as normal
+ fmul.x 36(a1),fp0 ;multiply fp0 by 10^8
+ fmul.x 48(a1),fp0 ;multiply fp0 by 10^16
+A9_con:
+ fmul.x fp1,fp0 ;calculate X * SCALE -> Y to fp0
+
+
+* A10. Or in INEX.
+* If INEX is set, round error occured. This is compensated
+* for by 'or-ing' in the INEX2 flag to the lsb of Y.
+*
+* Register usage:
+* Input/Output
+* d0: FPCR with RZ mode/FPSR with INEX2 isolated
+* d2: x/x
+* d3: x/x
+* d4: LEN/Unchanged
+* d5: ICTR:LAMBDA
+* d6: ILOG/Unchanged
+* d7: k-factor/Unchanged
+* a0: ptr for original operand/final result
+* a1: ptr to PTENxx array/Unchanged
+* a2: x/ptr to FP_SCR2(a6)
+* fp0: Y/Y with lsb adjusted
+* fp1: 10^ISCALE/Unchanged
+* fp2: x/x
+
+A10_st:
+ fmove.l FPSR,d0 ;get FPSR
+ fmove.x fp0,FP_SCR2(a6) ;move Y to memory
+ lea.l FP_SCR2(a6),a2 ;load a2 with ptr to FP_SCR2
+ btst.l #9,d0 ;check if INEX2 set
+ beq.b A11_st ;if clear, skip rest
+ ori.l #1,8(a2) ;or in 1 to lsb of mantissa
+ fmove.x FP_SCR2(a6),fp0 ;write adjusted Y back to fpu
+
+
+* A11. Restore original FPCR; set size ext.
+* Perform FINT operation in the user's rounding mode. Keep
+* the size to extended. The sintdo entry point in the sint
+* routine expects the FPCR value to be in USER_FPCR for
+* mode and precision. The original FPCR is saved in L_SCR1.
+
+A11_st:
+ move.l USER_FPCR(a6),L_SCR1(a6) ;save it for later
+ andi.l #$00000030,USER_FPCR(a6) ;set size to ext,
+* ;block exceptions
+
+
+* A12. Calculate YINT = FINT(Y) according to user's rounding mode.
+* The FPSP routine sintd0 is used. The output is in fp0.
+*
+* Register usage:
+* Input/Output
+* d0: FPSR with AINEX cleared/FPCR with size set to ext
+* d2: x/x/scratch
+* d3: x/x
+* d4: LEN/Unchanged
+* d5: ICTR:LAMBDA/Unchanged
+* d6: ILOG/Unchanged
+* d7: k-factor/Unchanged
+* a0: ptr for original operand/src ptr for sintdo
+* a1: ptr to PTENxx array/Unchanged
+* a2: ptr to FP_SCR2(a6)/Unchanged
+* a6: temp pointer to FP_SCR2(a6) - orig value saved and restored
+* fp0: Y/YINT
+* fp1: 10^ISCALE/Unchanged
+* fp2: x/x
+* F_SCR1:x/x
+* F_SCR2:Y adjusted for inex/Y with original exponent
+* L_SCR1:x/original USER_FPCR
+* L_SCR2:first word of X packed/Unchanged
+
+A12_st:
+ movem.l d0-d1/a0-a1,-(a7) ;save regs used by sintd0
+ move.l L_SCR1(a6),-(a7)
+ move.l L_SCR2(a6),-(a7)
+ lea.l FP_SCR2(a6),a0 ;a0 is ptr to F_SCR2(a6)
+ fmove.x fp0,(a0) ;move Y to memory at FP_SCR2(a6)
+ tst.l L_SCR2(a6) ;test sign of original operand
+ bge.b do_fint ;if pos, use Y
+ or.l #$80000000,(a0) ;if neg, use -Y
+do_fint:
+ move.l USER_FPSR(a6),-(a7)
+ bsr sintdo ;sint routine returns int in fp0
+ move.b (a7),USER_FPSR(a6)
+ add.l #4,a7
+ move.l (a7)+,L_SCR2(a6)
+ move.l (a7)+,L_SCR1(a6)
+ movem.l (a7)+,d0-d1/a0-a1 ;restore regs used by sint
+ move.l L_SCR2(a6),FP_SCR2(a6) ;restore original exponent
+ move.l L_SCR1(a6),USER_FPCR(a6) ;restore user's FPCR
+
+
+* A13. Check for LEN digits.
+* If the int operation results in more than LEN digits,
+* or less than LEN -1 digits, adjust ILOG and repeat from
+* A6. This test occurs only on the first pass. If the
+* result is exactly 10^LEN, decrement ILOG and divide
+* the mantissa by 10. The calculation of 10^LEN cannot
+* be inexact, since all powers of ten upto 10^27 are exact
+* in extended precision, so the use of a previous power-of-ten
+* table will introduce no error.
+*
+*
+* Register usage:
+* Input/Output
+* d0: FPCR with size set to ext/scratch final = 0
+* d2: x/x
+* d3: x/scratch final = x
+* d4: LEN/LEN adjusted
+* d5: ICTR:LAMBDA/LAMBDA:ICTR
+* d6: ILOG/ILOG adjusted
+* d7: k-factor/Unchanged
+* a0: pointer into memory for packed bcd string formation
+* a1: ptr to PTENxx array/Unchanged
+* a2: ptr to FP_SCR2(a6)/Unchanged
+* fp0: int portion of Y/abs(YINT) adjusted
+* fp1: 10^ISCALE/Unchanged
+* fp2: x/10^LEN
+* F_SCR1:x/x
+* F_SCR2:Y with original exponent/Unchanged
+* L_SCR1:original USER_FPCR/Unchanged
+* L_SCR2:first word of X packed/Unchanged
+
+A13_st:
+ swap d5 ;put ICTR in lower word of d5
+ tst.w d5 ;check if ICTR = 0
+ bne not_zr ;if non-zero, go to second test
+*
+* Compute 10^(LEN-1)
+*
+ fmove.s FONE,fp2 ;init fp2 to 1.0
+ move.l d4,d0 ;put LEN in d0
+ subq.l #1,d0 ;d0 = LEN -1
+ clr.l d3 ;clr table index
+l_loop:
+ lsr.l #1,d0 ;shift next bit into carry
+ bcc.b l_next ;if zero, skip the mul
+ fmul.x (a1,d3),fp2 ;mul by 10**(d3_bit_no)
+l_next:
+ add.l #12,d3 ;inc d3 to next pwrten table entry
+ tst.l d0 ;test if LEN is zero
+ bne.b l_loop ;if not, loop
+*
+* 10^LEN-1 is computed for this test and A14. If the input was
+* denormalized, check only the case in which YINT > 10^LEN.
+*
+ tst.b BINDEC_FLG(a6) ;check if input was norm
+ beq.b A13_con ;if norm, continue with checking
+ fabs.x fp0 ;take abs of YINT
+ bra test_2
+*
+* Compare abs(YINT) to 10^(LEN-1) and 10^LEN
+*
+A13_con:
+ fabs.x fp0 ;take abs of YINT
+ fcmp.x fp2,fp0 ;compare abs(YINT) with 10^(LEN-1)
+ fbge.w test_2 ;if greater, do next test
+ subq.l #1,d6 ;subtract 1 from ILOG
+ move.w #1,d5 ;set ICTR
+ fmove.l #rm_mode,FPCR ;set rmode to RM
+ fmul.s FTEN,fp2 ;compute 10^LEN
+ bra.w A6_str ;return to A6 and recompute YINT
+test_2:
+ fmul.s FTEN,fp2 ;compute 10^LEN
+ fcmp.x fp2,fp0 ;compare abs(YINT) with 10^LEN
+ fblt.w A14_st ;if less, all is ok, go to A14
+ fbgt.w fix_ex ;if greater, fix and redo
+ fdiv.s FTEN,fp0 ;if equal, divide by 10
+ addq.l #1,d6 ; and inc ILOG
+ bra.b A14_st ; and continue elsewhere
+fix_ex:
+ addq.l #1,d6 ;increment ILOG by 1
+ move.w #1,d5 ;set ICTR
+ fmove.l #rm_mode,FPCR ;set rmode to RM
+ bra.w A6_str ;return to A6 and recompute YINT
+*
+* Since ICTR <> 0, we have already been through one adjustment,
+* and shouldn't have another; this is to check if abs(YINT) = 10^LEN
+* 10^LEN is again computed using whatever table is in a1 since the
+* value calculated cannot be inexact.
+*
+not_zr:
+ fmove.s FONE,fp2 ;init fp2 to 1.0
+ move.l d4,d0 ;put LEN in d0
+ clr.l d3 ;clr table index
+z_loop:
+ lsr.l #1,d0 ;shift next bit into carry
+ bcc.b z_next ;if zero, skip the mul
+ fmul.x (a1,d3),fp2 ;mul by 10**(d3_bit_no)
+z_next:
+ add.l #12,d3 ;inc d3 to next pwrten table entry
+ tst.l d0 ;test if LEN is zero
+ bne.b z_loop ;if not, loop
+ fabs.x fp0 ;get abs(YINT)
+ fcmp.x fp2,fp0 ;check if abs(YINT) = 10^LEN
+ fbne.w A14_st ;if not, skip this
+ fdiv.s FTEN,fp0 ;divide abs(YINT) by 10
+ addq.l #1,d6 ;and inc ILOG by 1
+ addq.l #1,d4 ; and inc LEN
+ fmul.s FTEN,fp2 ; if LEN++, the get 10^^LEN
+
+
+* A14. Convert the mantissa to bcd.
+* The binstr routine is used to convert the LEN digit
+* mantissa to bcd in memory. The input to binstr is
+* to be a fraction; i.e. (mantissa)/10^LEN and adjusted
+* such that the decimal point is to the left of bit 63.
+* The bcd digits are stored in the correct position in
+* the final string area in memory.
+*
+*
+* Register usage:
+* Input/Output
+* d0: x/LEN call to binstr - final is 0
+* d1: x/0
+* d2: x/ms 32-bits of mant of abs(YINT)
+* d3: x/ls 32-bits of mant of abs(YINT)
+* d4: LEN/Unchanged
+* d5: ICTR:LAMBDA/LAMBDA:ICTR
+* d6: ILOG
+* d7: k-factor/Unchanged
+* a0: pointer into memory for packed bcd string formation
+* /ptr to first mantissa byte in result string
+* a1: ptr to PTENxx array/Unchanged
+* a2: ptr to FP_SCR2(a6)/Unchanged
+* fp0: int portion of Y/abs(YINT) adjusted
+* fp1: 10^ISCALE/Unchanged
+* fp2: 10^LEN/Unchanged
+* F_SCR1:x/Work area for final result
+* F_SCR2:Y with original exponent/Unchanged
+* L_SCR1:original USER_FPCR/Unchanged
+* L_SCR2:first word of X packed/Unchanged
+
+A14_st:
+ fmove.l #rz_mode,FPCR ;force rz for conversion
+ fdiv.x fp2,fp0 ;divide abs(YINT) by 10^LEN
+ lea.l FP_SCR1(a6),a0
+ fmove.x fp0,(a0) ;move abs(YINT)/10^LEN to memory
+ move.l 4(a0),d2 ;move 2nd word of FP_RES to d2
+ move.l 8(a0),d3 ;move 3rd word of FP_RES to d3
+ clr.l 4(a0) ;zero word 2 of FP_RES
+ clr.l 8(a0) ;zero word 3 of FP_RES
+ move.l (a0),d0 ;move exponent to d0
+ swap d0 ;put exponent in lower word
+ beq.b no_sft ;if zero, don't shift
+ subi.l #$3ffd,d0 ;sub bias less 2 to make fract
+ tst.l d0 ;check if > 1
+ bgt.b no_sft ;if so, don't shift
+ neg.l d0 ;make exp positive
+m_loop:
+ lsr.l #1,d2 ;shift d2:d3 right, add 0s
+ roxr.l #1,d3 ;the number of places
+ dbf.w d0,m_loop ;given in d0
+no_sft:
+ tst.l d2 ;check for mantissa of zero
+ bne.b no_zr ;if not, go on
+ tst.l d3 ;continue zero check
+ beq.b zer_m ;if zero, go directly to binstr
+no_zr:
+ clr.l d1 ;put zero in d1 for addx
+ addi.l #$00000080,d3 ;inc at bit 7
+ addx.l d1,d2 ;continue inc
+ andi.l #$ffffff80,d3 ;strip off lsb not used by 882
+zer_m:
+ move.l d4,d0 ;put LEN in d0 for binstr call
+ addq.l #3,a0 ;a0 points to M16 byte in result
+ bsr binstr ;call binstr to convert mant
+
+
+* A15. Convert the exponent to bcd.
+* As in A14 above, the exp is converted to bcd and the
+* digits are stored in the final string.
+*
+* Digits are stored in L_SCR1(a6) on return from BINDEC as:
+*
+* 32 16 15 0
+* -----------------------------------------
+* | 0 | e3 | e2 | e1 | e4 | X | X | X |
+* -----------------------------------------
+*
+* And are moved into their proper places in FP_SCR1. If digit e4
+* is non-zero, OPERR is signaled. In all cases, all 4 digits are
+* written as specified in the 881/882 manual for packed decimal.
+*
+* Register usage:
+* Input/Output
+* d0: x/LEN call to binstr - final is 0
+* d1: x/scratch (0);shift count for final exponent packing
+* d2: x/ms 32-bits of exp fraction/scratch
+* d3: x/ls 32-bits of exp fraction
+* d4: LEN/Unchanged
+* d5: ICTR:LAMBDA/LAMBDA:ICTR
+* d6: ILOG
+* d7: k-factor/Unchanged
+* a0: ptr to result string/ptr to L_SCR1(a6)
+* a1: ptr to PTENxx array/Unchanged
+* a2: ptr to FP_SCR2(a6)/Unchanged
+* fp0: abs(YINT) adjusted/float(ILOG)
+* fp1: 10^ISCALE/Unchanged
+* fp2: 10^LEN/Unchanged
+* F_SCR1:Work area for final result/BCD result
+* F_SCR2:Y with original exponent/ILOG/10^4
+* L_SCR1:original USER_FPCR/Exponent digits on return from binstr
+* L_SCR2:first word of X packed/Unchanged
+
+A15_st:
+ tst.b BINDEC_FLG(a6) ;check for denorm
+ beq.b not_denorm
+ ftst.x fp0 ;test for zero
+ fbeq.w den_zero ;if zero, use k-factor or 4933
+ fmove.l d6,fp0 ;float ILOG
+ fabs.x fp0 ;get abs of ILOG
+ bra.b convrt
+den_zero:
+ tst.l d7 ;check sign of the k-factor
+ blt.b use_ilog ;if negative, use ILOG
+ fmove.s F4933,fp0 ;force exponent to 4933
+ bra.b convrt ;do it
+use_ilog:
+ fmove.l d6,fp0 ;float ILOG
+ fabs.x fp0 ;get abs of ILOG
+ bra.b convrt
+not_denorm:
+ ftst.x fp0 ;test for zero
+ fbne.w not_zero ;if zero, force exponent
+ fmove.s FONE,fp0 ;force exponent to 1
+ bra.b convrt ;do it
+not_zero:
+ fmove.l d6,fp0 ;float ILOG
+ fabs.x fp0 ;get abs of ILOG
+convrt:
+ fdiv.x 24(a1),fp0 ;compute ILOG/10^4
+ fmove.x fp0,FP_SCR2(a6) ;store fp0 in memory
+ move.l 4(a2),d2 ;move word 2 to d2
+ move.l 8(a2),d3 ;move word 3 to d3
+ move.w (a2),d0 ;move exp to d0
+ beq.b x_loop_fin ;if zero, skip the shift
+ subi.w #$3ffd,d0 ;subtract off bias
+ neg.w d0 ;make exp positive
+x_loop:
+ lsr.l #1,d2 ;shift d2:d3 right
+ roxr.l #1,d3 ;the number of places
+ dbf.w d0,x_loop ;given in d0
+x_loop_fin:
+ clr.l d1 ;put zero in d1 for addx
+ addi.l #$00000080,d3 ;inc at bit 6
+ addx.l d1,d2 ;continue inc
+ andi.l #$ffffff80,d3 ;strip off lsb not used by 882
+ move.l #4,d0 ;put 4 in d0 for binstr call
+ lea.l L_SCR1(a6),a0 ;a0 is ptr to L_SCR1 for exp digits
+ bsr binstr ;call binstr to convert exp
+ move.l L_SCR1(a6),d0 ;load L_SCR1 lword to d0
+ move.l #12,d1 ;use d1 for shift count
+ lsr.l d1,d0 ;shift d0 right by 12
+ bfins d0,FP_SCR1(a6){4:12} ;put e3:e2:e1 in FP_SCR1
+ lsr.l d1,d0 ;shift d0 right by 12
+ bfins d0,FP_SCR1(a6){16:4} ;put e4 in FP_SCR1
+ tst.b d0 ;check if e4 is zero
+ beq.b A16_st ;if zero, skip rest
+ or.l #opaop_mask,USER_FPSR(a6) ;set OPERR & AIOP in USER_FPSR
+
+
+* A16. Write sign bits to final string.
+* Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
+*
+* Register usage:
+* Input/Output
+* d0: x/scratch - final is x
+* d2: x/x
+* d3: x/x
+* d4: LEN/Unchanged
+* d5: ICTR:LAMBDA/LAMBDA:ICTR
+* d6: ILOG/ILOG adjusted
+* d7: k-factor/Unchanged
+* a0: ptr to L_SCR1(a6)/Unchanged
+* a1: ptr to PTENxx array/Unchanged
+* a2: ptr to FP_SCR2(a6)/Unchanged
+* fp0: float(ILOG)/Unchanged
+* fp1: 10^ISCALE/Unchanged
+* fp2: 10^LEN/Unchanged
+* F_SCR1:BCD result with correct signs
+* F_SCR2:ILOG/10^4
+* L_SCR1:Exponent digits on return from binstr
+* L_SCR2:first word of X packed/Unchanged
+
+A16_st:
+ clr.l d0 ;clr d0 for collection of signs
+ andi.b #$0f,FP_SCR1(a6) ;clear first nibble of FP_SCR1
+ tst.l L_SCR2(a6) ;check sign of original mantissa
+ bge.b mant_p ;if pos, don't set SM
+ moveq.l #2,d0 ;move 2 in to d0 for SM
+mant_p:
+ tst.l d6 ;check sign of ILOG
+ bge.b wr_sgn ;if pos, don't set SE
+ addq.l #1,d0 ;set bit 0 in d0 for SE
+wr_sgn:
+ bfins d0,FP_SCR1(a6){0:2} ;insert SM and SE into FP_SCR1
+
+* Clean up and restore all registers used.
+
+ fmove.l #0,FPSR ;clear possible inex2/ainex bits
+ fmovem.x (a7)+,fp0-fp2
+ movem.l (a7)+,d2-d7/a2
+ rts
+
+ end
diff --git a/sys/arch/m68k/fpsp/binstr.sa b/sys/arch/m68k/fpsp/binstr.sa
new file mode 100644
index 00000000000..eeecf07f120
--- /dev/null
+++ b/sys/arch/m68k/fpsp/binstr.sa
@@ -0,0 +1,165 @@
+* $NetBSD: binstr.sa,v 1.3 1994/10/26 07:48:53 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* binstr.sa 3.3 12/19/90
+*
+*
+* Description: Converts a 64-bit binary integer to bcd.
+*
+* Input: 64-bit binary integer in d2:d3, desired length (LEN) in
+* d0, and a pointer to start in memory for bcd characters
+* in d0. (This pointer must point to byte 4 of the first
+* lword of the packed decimal memory string.)
+*
+* Output: LEN bcd digits representing the 64-bit integer.
+*
+* Algorithm:
+* The 64-bit binary is assumed to have a decimal point before
+* bit 63. The fraction is multiplied by 10 using a mul by 2
+* shift and a mul by 8 shift. The bits shifted out of the
+* msb form a decimal digit. This process is iterated until
+* LEN digits are formed.
+*
+* A1. Init d7 to 1. D7 is the byte digit counter, and if 1, the
+* digit formed will be assumed the least significant. This is
+* to force the first byte formed to have a 0 in the upper 4 bits.
+*
+* A2. Beginning of the loop:
+* Copy the fraction in d2:d3 to d4:d5.
+*
+* A3. Multiply the fraction in d2:d3 by 8 using bit-field
+* extracts and shifts. The three msbs from d2 will go into
+* d1.
+*
+* A4. Multiply the fraction in d4:d5 by 2 using shifts. The msb
+* will be collected by the carry.
+*
+* A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5
+* into d2:d3. D1 will contain the bcd digit formed.
+*
+* A6. Test d7. If zero, the digit formed is the ms digit. If non-
+* zero, it is the ls digit. Put the digit in its place in the
+* upper word of d0. If it is the ls digit, write the word
+* from d0 to memory.
+*
+* A7. Decrement d6 (LEN counter) and repeat the loop until zero.
+*
+* Implementation Notes:
+*
+* The registers are used as follows:
+*
+* d0: LEN counter
+* d1: temp used to form the digit
+* d2: upper 32-bits of fraction for mul by 8
+* d3: lower 32-bits of fraction for mul by 8
+* d4: upper 32-bits of fraction for mul by 2
+* d5: lower 32-bits of fraction for mul by 2
+* d6: temp for bit-field extracts
+* d7: byte digit formation word;digit count {0,1}
+* a0: pointer into memory for packed bcd string formation
+*
+
+BINSTR IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+ xdef binstr
+binstr:
+ movem.l d0-d7,-(a7)
+*
+* A1: Init d7
+*
+ moveq.l #1,d7 ;init d7 for second digit
+ subq.l #1,d0 ;for dbf d0 would have LEN+1 passes
+*
+* A2. Copy d2:d3 to d4:d5. Start loop.
+*
+loop:
+ move.l d2,d4 ;copy the fraction before muls
+ move.l d3,d5 ;to d4:d5
+*
+* A3. Multiply d2:d3 by 8; extract msbs into d1.
+*
+ bfextu d2{0:3},d1 ;copy 3 msbs of d2 into d1
+ asl.l #3,d2 ;shift d2 left by 3 places
+ bfextu d3{0:3},d6 ;copy 3 msbs of d3 into d6
+ asl.l #3,d3 ;shift d3 left by 3 places
+ or.l d6,d2 ;or in msbs from d3 into d2
+*
+* A4. Multiply d4:d5 by 2; add carry out to d1.
+*
+ add.l d5,d5 ;mul d5 by 2
+ addx.l d4,d4 ;mul d4 by 2
+ swap d6 ;put 0 in d6 lower word
+ addx.w d6,d1 ;add in extend from mul by 2
+*
+* A5. Add mul by 8 to mul by 2. D1 contains the digit formed.
+*
+ add.l d5,d3 ;add lower 32 bits
+ nop ;ERRATA FIX #13 (Rev. 1.2 6/6/90)
+ addx.l d4,d2 ;add with extend upper 32 bits
+ nop ;ERRATA FIX #13 (Rev. 1.2 6/6/90)
+ addx.w d6,d1 ;add in extend from add to d1
+ swap d6 ;with d6 = 0; put 0 in upper word
+*
+* A6. Test d7 and branch.
+*
+ tst.w d7 ;if zero, store digit & to loop
+ beq.b first_d ;if non-zero, form byte & write
+sec_d:
+ swap d7 ;bring first digit to word d7b
+ asl.w #4,d7 ;first digit in upper 4 bits d7b
+ add.w d1,d7 ;add in ls digit to d7b
+ move.b d7,(a0)+ ;store d7b byte in memory
+ swap d7 ;put LEN counter in word d7a
+ clr.w d7 ;set d7a to signal no digits done
+ dbf.w d0,loop ;do loop some more!
+ bra.b end_bstr ;finished, so exit
+first_d:
+ swap d7 ;put digit word in d7b
+ move.w d1,d7 ;put new digit in d7b
+ swap d7 ;put LEN counter in word d7a
+ addq.w #1,d7 ;set d7a to signal first digit done
+ dbf.w d0,loop ;do loop some more!
+ swap d7 ;put last digit in string
+ lsl.w #4,d7 ;move it to upper 4 bits
+ move.b d7,(a0)+ ;store it in memory string
+*
+* Clean up and return with result in fp0.
+*
+end_bstr:
+ movem.l (a7)+,d0-d7
+ rts
+ end
diff --git a/sys/arch/m68k/fpsp/bugfix.sa b/sys/arch/m68k/fpsp/bugfix.sa
new file mode 100644
index 00000000000..d38f81656b0
--- /dev/null
+++ b/sys/arch/m68k/fpsp/bugfix.sa
@@ -0,0 +1,520 @@
+* $NetBSD: bugfix.sa,v 1.3 1994/10/26 07:48:55 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* bugfix.sa 3.2 1/31/91
+*
+*
+* This file contains workarounds for bugs in the 040
+* relating to the Floating-Point Software Package (FPSP)
+*
+* Fixes for bugs: 1238
+*
+* Bug: 1238
+*
+*
+* /* The following dirty_bit clear should be left in
+* * the handler permanently to improve throughput.
+* * The dirty_bits are located at bits [23:16] in
+* * longword $08 in the busy frame $4x60. Bit 16
+* * corresponds to FP0, bit 17 corresponds to FP1,
+* * and so on.
+* */
+* if (E3_exception_just_serviced) {
+* dirty_bit[cmdreg3b[9:7]] = 0;
+* }
+*
+* if (fsave_format_version != $40) {goto NOFIX}
+*
+* if !(E3_exception_just_serviced) {goto NOFIX}
+* if (cupc == 0000000) {goto NOFIX}
+* if ((cmdreg1b[15:13] != 000) &&
+* (cmdreg1b[15:10] != 010001)) {goto NOFIX}
+* if (((cmdreg1b[15:13] != 000) || ((cmdreg1b[12:10] != cmdreg2b[9:7]) &&
+* (cmdreg1b[12:10] != cmdreg3b[9:7])) ) &&
+* ((cmdreg1b[ 9: 7] != cmdreg2b[9:7]) &&
+* (cmdreg1b[ 9: 7] != cmdreg3b[9:7])) ) {goto NOFIX}
+*
+* /* Note: for 6d43b or 8d43b, you may want to add the following code
+* * to get better coverage. (If you do not insert this code, the part
+* * won't lock up; it will simply get the wrong answer.)
+* * Do NOT insert this code for 10d43b or later parts.
+* *
+* * if (fpiarcu == integer stack return address) {
+* * cupc = 0000000;
+* * goto NOFIX;
+* * }
+* */
+*
+* if (cmdreg1b[15:13] != 000) {goto FIX_OPCLASS2}
+* FIX_OPCLASS0:
+* if (((cmdreg1b[12:10] == cmdreg2b[9:7]) ||
+* (cmdreg1b[ 9: 7] == cmdreg2b[9:7])) &&
+* (cmdreg1b[12:10] != cmdreg3b[9:7]) &&
+* (cmdreg1b[ 9: 7] != cmdreg3b[9:7])) { /* xu conflict only */
+* /* We execute the following code if there is an
+* xu conflict and NOT an nu conflict */
+*
+* /* first save some values on the fsave frame */
+* stag_temp = STAG[fsave_frame];
+* cmdreg1b_temp = CMDREG1B[fsave_frame];
+* dtag_temp = DTAG[fsave_frame];
+* ete15_temp = ETE15[fsave_frame];
+*
+* CUPC[fsave_frame] = 0000000;
+* FRESTORE
+* FSAVE
+*
+* /* If the xu instruction is exceptional, we punt.
+* * Otherwise, we would have to include OVFL/UNFL handler
+* * code here to get the correct answer.
+* */
+* if (fsave_frame_format == $4060) {goto KILL_PROCESS}
+*
+* fsave_frame = /* build a long frame of all zeros */
+* fsave_frame_format = $4060; /* label it as long frame */
+*
+* /* load it with the temps we saved */
+* STAG[fsave_frame] = stag_temp;
+* CMDREG1B[fsave_frame] = cmdreg1b_temp;
+* DTAG[fsave_frame] = dtag_temp;
+* ETE15[fsave_frame] = ete15_temp;
+*
+* /* Make sure that the cmdreg3b dest reg is not going to
+* * be destroyed by a FMOVEM at the end of all this code.
+* * If it is, you should move the current value of the reg
+* * onto the stack so that the reg will loaded with that value.
+* */
+*
+* /* All done. Proceed with the code below */
+* }
+*
+* etemp = FP_reg_[cmdreg1b[12:10]];
+* ete15 = ~ete14;
+* cmdreg1b[15:10] = 010010;
+* clear(bug_flag_procIDxxxx);
+* FRESTORE and return;
+*
+*
+* FIX_OPCLASS2:
+* if ((cmdreg1b[9:7] == cmdreg2b[9:7]) &&
+* (cmdreg1b[9:7] != cmdreg3b[9:7])) { /* xu conflict only */
+* /* We execute the following code if there is an
+* xu conflict and NOT an nu conflict */
+*
+* /* first save some values on the fsave frame */
+* stag_temp = STAG[fsave_frame];
+* cmdreg1b_temp = CMDREG1B[fsave_frame];
+* dtag_temp = DTAG[fsave_frame];
+* ete15_temp = ETE15[fsave_frame];
+* etemp_temp = ETEMP[fsave_frame];
+*
+* CUPC[fsave_frame] = 0000000;
+* FRESTORE
+* FSAVE
+*
+*
+* /* If the xu instruction is exceptional, we punt.
+* * Otherwise, we would have to include OVFL/UNFL handler
+* * code here to get the correct answer.
+* */
+* if (fsave_frame_format == $4060) {goto KILL_PROCESS}
+*
+* fsave_frame = /* build a long frame of all zeros */
+* fsave_frame_format = $4060; /* label it as long frame */
+*
+* /* load it with the temps we saved */
+* STAG[fsave_frame] = stag_temp;
+* CMDREG1B[fsave_frame] = cmdreg1b_temp;
+* DTAG[fsave_frame] = dtag_temp;
+* ETE15[fsave_frame] = ete15_temp;
+* ETEMP[fsave_frame] = etemp_temp;
+*
+* /* Make sure that the cmdreg3b dest reg is not going to
+* * be destroyed by a FMOVEM at the end of all this code.
+* * If it is, you should move the current value of the reg
+* * onto the stack so that the reg will loaded with that value.
+* */
+*
+* /* All done. Proceed with the code below */
+* }
+*
+* if (etemp_exponent == min_sgl) etemp_exponent = min_dbl;
+* if (etemp_exponent == max_sgl) etemp_exponent = max_dbl;
+* cmdreg1b[15:10] = 010101;
+* clear(bug_flag_procIDxxxx);
+* FRESTORE and return;
+*
+*
+* NOFIX:
+* clear(bug_flag_procIDxxxx);
+* FRESTORE and return;
+*
+
+BUGFIX IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+ xref fpsp_fmt_error
+
+ xdef b1238_fix
+b1238_fix:
+*
+* This code is entered only on completion of the handling of an
+* nu-generated ovfl, unfl, or inex exception. If the version
+* number of the fsave is not $40, this handler is not necessary.
+* Simply branch to fix_done and exit normally.
+*
+ cmpi.b #VER_40,4(a7)
+ bne.w fix_done
+*
+* Test for cu_savepc equal to zero. If not, this is not a bug
+* #1238 case.
+*
+ move.b CU_SAVEPC(a6),d0
+ andi.b #$FE,d0
+ beq fix_done ;if zero, this is not bug #1238
+
+*
+* Test the register conflict aspect. If opclass0, check for
+* cu src equal to xu dest or equal to nu dest. If so, go to
+* op0. Else, or if opclass2, check for cu dest equal to
+* xu dest or equal to nu dest. If so, go to tst_opcl. Else,
+* exit, it is not the bug case.
+*
+* Check for opclass 0. If not, go and check for opclass 2 and sgl.
+*
+ move.w CMDREG1B(a6),d0
+ andi.w #$E000,d0 ;strip all but opclass
+ bne op2sgl ;not opclass 0, check op2
+*
+* Check for cu and nu register conflict. If one exists, this takes
+* priority over a cu and xu conflict.
+*
+ bfextu CMDREG1B(a6){3:3},d0 ;get 1st src
+ bfextu CMDREG3B(a6){6:3},d1 ;get 3rd dest
+ cmp.b d0,d1
+ beq.b op0 ;if equal, continue bugfix
+*
+* Check for cu dest equal to nu dest. If so, go and fix the
+* bug condition. Otherwise, exit.
+*
+ bfextu CMDREG1B(a6){6:3},d0 ;get 1st dest
+ cmp.b d0,d1 ;cmp 1st dest with 3rd dest
+ beq.b op0 ;if equal, continue bugfix
+*
+* Check for cu and xu register conflict.
+*
+ bfextu CMDREG2B(a6){6:3},d1 ;get 2nd dest
+ cmp.b d0,d1 ;cmp 1st dest with 2nd dest
+ beq.b op0_xu ;if equal, continue bugfix
+ bfextu CMDREG1B(a6){3:3},d0 ;get 1st src
+ cmp.b d0,d1 ;cmp 1st src with 2nd dest
+ beq op0_xu
+ bne fix_done ;if the reg checks fail, exit
+*
+* We have the opclass 0 situation.
+*
+op0:
+ bfextu CMDREG1B(a6){3:3},d0 ;get source register no
+ move.l #7,d1
+ sub.l d0,d1
+ clr.l d0
+ bset.l d1,d0
+ fmovem.x d0,ETEMP(a6) ;load source to ETEMP
+
+ move.b #$12,d0
+ bfins d0,CMDREG1B(a6){0:6} ;opclass 2, extended
+*
+* Set ETEMP exponent bit 15 as the opposite of ete14
+*
+ btst #6,ETEMP_EX(a6) ;check etemp exponent bit 14
+ beq setete15
+ bclr #etemp15_bit,STAG(a6)
+ bra finish
+setete15:
+ bset #etemp15_bit,STAG(a6)
+ bra finish
+
+*
+* We have the case in which a conflict exists between the cu src or
+* dest and the dest of the xu. We must clear the instruction in
+* the cu and restore the state, allowing the instruction in the
+* xu to complete. Remember, the instruction in the nu
+* was exceptional, and was completed by the appropriate handler.
+* If the result of the xu instruction is not exceptional, we can
+* restore the instruction from the cu to the frame and continue
+* processing the original exception. If the result is also
+* exceptional, we choose to kill the process.
+*
+* Items saved from the stack:
+*
+* $3c stag - L_SCR1
+* $40 cmdreg1b - L_SCR2
+* $44 dtag - L_SCR3
+*
+* The cu savepc is set to zero, and the frame is restored to the
+* fpu.
+*
+op0_xu:
+ move.l STAG(a6),L_SCR1(a6)
+ move.l CMDREG1B(a6),L_SCR2(a6)
+ move.l DTAG(a6),L_SCR3(a6)
+ andi.l #$e0000000,L_SCR3(a6)
+ clr.b CU_SAVEPC(a6)
+ move.l (a7)+,d1 ;save return address from bsr
+ frestore (a7)+
+ fsave -(a7)
+*
+* Check if the instruction which just completed was exceptional.
+*
+ cmp.w #$4060,(a7)
+ beq op0_xb
+*
+* It is necessary to isolate the result of the instruction in the
+* xu if it is to fp0 - fp3 and write that value to the USER_FPn
+* locations on the stack. The correct destination register is in
+* cmdreg2b.
+*
+ bfextu CMDREG2B(a6){6:3},d0 ;get dest register no
+ cmpi.l #3,d0
+ bgt.b op0_xi
+ beq.b op0_fp3
+ cmpi.l #1,d0
+ blt.b op0_fp0
+ beq.b op0_fp1
+op0_fp2:
+ fmovem.x fp2,USER_FP2(a6)
+ bra.b op0_xi
+op0_fp1:
+ fmovem.x fp1,USER_FP1(a6)
+ bra.b op0_xi
+op0_fp0:
+ fmovem.x fp0,USER_FP0(a6)
+ bra.b op0_xi
+op0_fp3:
+ fmovem.x fp3,USER_FP3(a6)
+*
+* The frame returned is idle. We must build a busy frame to hold
+* the cu state information and setup etemp.
+*
+op0_xi:
+ move.l #22,d0 ;clear 23 lwords
+ clr.l (a7)
+op0_loop:
+ clr.l -(a7)
+ dbf d0,op0_loop
+ move.l #$40600000,-(a7)
+ move.l L_SCR1(a6),STAG(a6)
+ move.l L_SCR2(a6),CMDREG1B(a6)
+ move.l L_SCR3(a6),DTAG(a6)
+ move.b #$6,CU_SAVEPC(a6)
+ move.l d1,-(a7) ;return bsr return address
+ bfextu CMDREG1B(a6){3:3},d0 ;get source register no
+ move.l #7,d1
+ sub.l d0,d1
+ clr.l d0
+ bset.l d1,d0
+ fmovem.x d0,ETEMP(a6) ;load source to ETEMP
+
+ move.b #$12,d0
+ bfins d0,CMDREG1B(a6){0:6} ;opclass 2, extended
+*
+* Set ETEMP exponent bit 15 as the opposite of ete14
+*
+ btst #6,ETEMP_EX(a6) ;check etemp exponent bit 14
+ beq op0_sete15
+ bclr #etemp15_bit,STAG(a6)
+ bra finish
+op0_sete15:
+ bset #etemp15_bit,STAG(a6)
+ bra finish
+
+*
+* The frame returned is busy. It is not possible to reconstruct
+* the code sequence to allow completion. We will jump to
+* fpsp_fmt_error and allow the kernel to kill the process.
+*
+op0_xb:
+ jmp fpsp_fmt_error
+
+*
+* Check for opclass 2 and single size. If not both, exit.
+*
+op2sgl:
+ move.w CMDREG1B(a6),d0
+ andi.w #$FC00,d0 ;strip all but opclass and size
+ cmpi.w #$4400,d0 ;test for opclass 2 and size=sgl
+ bne fix_done ;if not, it is not bug 1238
+*
+* Check for cu dest equal to nu dest or equal to xu dest, with
+* a cu and nu conflict taking priority an nu conflict. If either,
+* go and fix the bug condition. Otherwise, exit.
+*
+ bfextu CMDREG1B(a6){6:3},d0 ;get 1st dest
+ bfextu CMDREG3B(a6){6:3},d1 ;get 3rd dest
+ cmp.b d0,d1 ;cmp 1st dest with 3rd dest
+ beq op2_com ;if equal, continue bugfix
+ bfextu CMDREG2B(a6){6:3},d1 ;get 2nd dest
+ cmp.b d0,d1 ;cmp 1st dest with 2nd dest
+ bne fix_done ;if the reg checks fail, exit
+*
+* We have the case in which a conflict exists between the cu src or
+* dest and the dest of the xu. We must clear the instruction in
+* the cu and restore the state, allowing the instruction in the
+* xu to complete. Remember, the instruction in the nu
+* was exceptional, and was completed by the appropriate handler.
+* If the result of the xu instruction is not exceptional, we can
+* restore the instruction from the cu to the frame and continue
+* processing the original exception. If the result is also
+* exceptional, we choose to kill the process.
+*
+* Items saved from the stack:
+*
+* $3c stag - L_SCR1
+* $40 cmdreg1b - L_SCR2
+* $44 dtag - L_SCR3
+* etemp - FP_SCR2
+*
+* The cu savepc is set to zero, and the frame is restored to the
+* fpu.
+*
+op2_xu:
+ move.l STAG(a6),L_SCR1(a6)
+ move.l CMDREG1B(a6),L_SCR2(a6)
+ move.l DTAG(a6),L_SCR3(a6)
+ andi.l #$e0000000,L_SCR3(a6)
+ clr.b CU_SAVEPC(a6)
+ move.l ETEMP(a6),FP_SCR2(a6)
+ move.l ETEMP_HI(a6),FP_SCR2+4(a6)
+ move.l ETEMP_LO(a6),FP_SCR2+8(a6)
+ move.l (a7)+,d1 ;save return address from bsr
+ frestore (a7)+
+ fsave -(a7)
+*
+* Check if the instruction which just completed was exceptional.
+*
+ cmp.w #$4060,(a7)
+ beq op2_xb
+*
+* It is necessary to isolate the result of the instruction in the
+* xu if it is to fp0 - fp3 and write that value to the USER_FPn
+* locations on the stack. The correct destination register is in
+* cmdreg2b.
+*
+ bfextu CMDREG2B(a6){6:3},d0 ;get dest register no
+ cmpi.l #3,d0
+ bgt.b op2_xi
+ beq.b op2_fp3
+ cmpi.l #1,d0
+ blt.b op2_fp0
+ beq.b op2_fp1
+op2_fp2:
+ fmovem.x fp2,USER_FP2(a6)
+ bra.b op2_xi
+op2_fp1:
+ fmovem.x fp1,USER_FP1(a6)
+ bra.b op2_xi
+op2_fp0:
+ fmovem.x fp0,USER_FP0(a6)
+ bra.b op2_xi
+op2_fp3:
+ fmovem.x fp3,USER_FP3(a6)
+*
+* The frame returned is idle. We must build a busy frame to hold
+* the cu state information and fix up etemp.
+*
+op2_xi:
+ move.l #22,d0 ;clear 23 lwords
+ clr.l (a7)
+op2_loop:
+ clr.l -(a7)
+ dbf d0,op2_loop
+ move.l #$40600000,-(a7)
+ move.l L_SCR1(a6),STAG(a6)
+ move.l L_SCR2(a6),CMDREG1B(a6)
+ move.l L_SCR3(a6),DTAG(a6)
+ move.b #$6,CU_SAVEPC(a6)
+ move.l FP_SCR2(a6),ETEMP(a6)
+ move.l FP_SCR2+4(a6),ETEMP_HI(a6)
+ move.l FP_SCR2+8(a6),ETEMP_LO(a6)
+ move.l d1,-(a7)
+ bra op2_com
+
+*
+* We have the opclass 2 single source situation.
+*
+op2_com:
+ move.b #$15,d0
+ bfins d0,CMDREG1B(a6){0:6} ;opclass 2, double
+
+ cmp.w #$407F,ETEMP_EX(a6) ;single +max
+ bne.b case2
+ move.w #$43FF,ETEMP_EX(a6) ;to double +max
+ bra finish
+case2:
+ cmp.w #$C07F,ETEMP_EX(a6) ;single -max
+ bne.b case3
+ move.w #$C3FF,ETEMP_EX(a6) ;to double -max
+ bra finish
+case3:
+ cmp.w #$3F80,ETEMP_EX(a6) ;single +min
+ bne.b case4
+ move.w #$3C00,ETEMP_EX(a6) ;to double +min
+ bra finish
+case4:
+ cmp.w #$BF80,ETEMP_EX(a6) ;single -min
+ bne fix_done
+ move.w #$BC00,ETEMP_EX(a6) ;to double -min
+ bra finish
+*
+* The frame returned is busy. It is not possible to reconstruct
+* the code sequence to allow completion. fpsp_fmt_error causes
+* an fline illegal instruction to be executed.
+*
+* You should replace the jump to fpsp_fmt_error with a jump
+* to the entry point used to kill a process.
+*
+op2_xb:
+ jmp fpsp_fmt_error
+
+*
+* Enter here if the case is not of the situations affected by
+* bug #1238, or if the fix is completed, and exit.
+*
+finish:
+fix_done:
+ rts
+
+ end
diff --git a/sys/arch/m68k/fpsp/copyright.s b/sys/arch/m68k/fpsp/copyright.s
new file mode 100644
index 00000000000..c6039f91313
--- /dev/null
+++ b/sys/arch/m68k/fpsp/copyright.s
@@ -0,0 +1,32 @@
+| $NetBSD: copyright.s,v 1.2 1994/10/26 07:48:57 cgd Exp $
+
+.text
+.ascii "MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP\n"
+.ascii "M68000 Hi-Performance Microprocessor Division\n"
+.ascii "M68040 Software Package\n"
+.ascii "\n"
+.ascii "M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.\n"
+.ascii "All rights reserved.\n"
+.ascii "\n"
+.ascii "THE SOFTWARE is provided on an \"AS IS\" basis and without warranty.\n"
+.ascii "To the maximum extent permitted by applicable law,\n"
+.ascii "MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,\n"
+.ascii "INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A\n"
+.ascii "PARTICULAR PURPOSE and any warranty against infringement with\n"
+.ascii "regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)\n"
+.ascii "and any accompanying written materials. \n"
+.ascii "\n"
+.ascii "To the maximum extent permitted by applicable law,\n"
+.ascii "IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER\n"
+.ascii "(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS\n"
+.ascii "PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR\n"
+.ascii "OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE\n"
+.ascii "SOFTWARE. Motorola assumes no responsibility for the maintenance\n"
+.ascii "and support of the SOFTWARE. \n"
+.ascii "\n"
+.ascii "You are hereby granted a copyright license to use, modify, and\n"
+.ascii "distribute the SOFTWARE so long as this entire notice is retained\n"
+.ascii "without alteration in any modified and/or redistributed versions,\n"
+.ascii "and that such modified versions are clearly identified as such.\n"
+.ascii "No licenses are granted by implication, estoppel or otherwise\n"
+.ascii "under any patents or trademarks of Motorola, Inc.\n"
diff --git a/sys/arch/m68k/fpsp/decbin.sa b/sys/arch/m68k/fpsp/decbin.sa
new file mode 100644
index 00000000000..5f7106427c5
--- /dev/null
+++ b/sys/arch/m68k/fpsp/decbin.sa
@@ -0,0 +1,531 @@
+* $NetBSD: decbin.sa,v 1.2 1994/10/26 07:48:59 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* decbin.sa 3.3 12/19/90
+*
+* Description: Converts normalized packed bcd value pointed to by
+* register A6 to extended-precision value in FP0.
+*
+* Input: Normalized packed bcd value in ETEMP(a6).
+*
+* Output: Exact floating-point representation of the packed bcd value.
+*
+* Saves and Modifies: D2-D5
+*
+* Speed: The program decbin takes ??? cycles to execute.
+*
+* Object Size:
+*
+* External Reference(s): None.
+*
+* Algorithm:
+* Expected is a normal bcd (i.e. non-exceptional; all inf, zero,
+* and NaN operands are dispatched without entering this routine)
+* value in 68881/882 format at location ETEMP(A6).
+*
+* A1. Convert the bcd exponent to binary by successive adds and muls.
+* Set the sign according to SE. Subtract 16 to compensate
+* for the mantissa which is to be interpreted as 17 integer
+* digits, rather than 1 integer and 16 fraction digits.
+* Note: this operation can never overflow.
+*
+* A2. Convert the bcd mantissa to binary by successive
+* adds and muls in FP0. Set the sign according to SM.
+* The mantissa digits will be converted with the decimal point
+* assumed following the least-significant digit.
+* Note: this operation can never overflow.
+*
+* A3. Count the number of leading/trailing zeros in the
+* bcd string. If SE is positive, count the leading zeros;
+* if negative, count the trailing zeros. Set the adjusted
+* exponent equal to the exponent from A1 and the zero count
+* added if SM = 1 and subtracted if SM = 0. Scale the
+* mantissa the equivalent of forcing in the bcd value:
+*
+* SM = 0 a non-zero digit in the integer position
+* SM = 1 a non-zero digit in Mant0, lsd of the fraction
+*
+* this will insure that any value, regardless of its
+* representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted
+* consistently.
+*
+* A4. Calculate the factor 10^exp in FP1 using a table of
+* 10^(2^n) values. To reduce the error in forming factors
+* greater than 10^27, a directed rounding scheme is used with
+* tables rounded to RN, RM, and RP, according to the table
+* in the comments of the pwrten section.
+*
+* A5. Form the final binary number by scaling the mantissa by
+* the exponent factor. This is done by multiplying the
+* mantissa in FP0 by the factor in FP1 if the adjusted
+* exponent sign is positive, and dividing FP0 by FP1 if
+* it is negative.
+*
+* Clean up and return. Check if the final mul or div resulted
+* in an inex2 exception. If so, set inex1 in the fpsr and
+* check if the inex1 exception is enabled. If so, set d7 upper
+* word to $0100. This will signal unimp.sa that an enabled inex1
+* exception occured. Unimp will fix the stack.
+*
+
+DECBIN IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+*
+* PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
+* to nearest, minus, and plus, respectively. The tables include
+* 10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}. No rounding
+* is required until the power is greater than 27, however, all
+* tables include the first 5 for ease of indexing.
+*
+ xref PTENRN
+ xref PTENRM
+ xref PTENRP
+
+RTABLE dc.b 0,0,0,0
+ dc.b 2,3,2,3
+ dc.b 2,3,3,2
+ dc.b 3,2,2,3
+
+ xdef decbin
+ xdef calc_e
+ xdef pwrten
+ xdef calc_m
+ xdef norm
+ xdef ap_st_z
+ xdef ap_st_n
+*
+FNIBS equ 7
+FSTRT equ 0
+*
+ESTRT equ 4
+EDIGITS equ 2
+*
+* Constants in single precision
+FZERO dc.l $00000000
+FONE dc.l $3F800000
+FTEN dc.l $41200000
+
+TEN equ 10
+
+*
+decbin:
+ fmove.l #0,FPCR ;clr real fpcr
+ movem.l d2-d5,-(a7)
+*
+* Calculate exponent:
+* 1. Copy bcd value in memory for use as a working copy.
+* 2. Calculate absolute value of exponent in d1 by mul and add.
+* 3. Correct for exponent sign.
+* 4. Subtract 16 to compensate for interpreting the mant as all integer digits.
+* (i.e., all digits assumed left of the decimal point.)
+*
+* Register usage:
+*
+* calc_e:
+* (*) d0: temp digit storage
+* (*) d1: accumulator for binary exponent
+* (*) d2: digit count
+* (*) d3: offset pointer
+* ( ) d4: first word of bcd
+* ( ) a0: pointer to working bcd value
+* ( ) a6: pointer to original bcd value
+* (*) FP_SCR1: working copy of original bcd value
+* (*) L_SCR1: copy of original exponent word
+*
+calc_e:
+ move.l #EDIGITS,d2 ;# of nibbles (digits) in fraction part
+ moveq.l #ESTRT,d3 ;counter to pick up digits
+ lea.l FP_SCR1(a6),a0 ;load tmp bcd storage address
+ move.l ETEMP(a6),(a0) ;save input bcd value
+ move.l ETEMP_HI(a6),4(a0) ;save words 2 and 3
+ move.l ETEMP_LO(a6),8(a0) ;and work with these
+ move.l (a0),d4 ;get first word of bcd
+ clr.l d1 ;zero d1 for accumulator
+e_gd:
+ mulu.l #TEN,d1 ;mul partial product by one digit place
+ bfextu d4{d3:4},d0 ;get the digit and zero extend into d0
+ add.l d0,d1 ;d1 = d1 + d0
+ addq.b #4,d3 ;advance d3 to the next digit
+ dbf.w d2,e_gd ;if we have used all 3 digits, exit loop
+ btst #30,d4 ;get SE
+ beq.b e_pos ;don't negate if pos
+ neg.l d1 ;negate before subtracting
+e_pos:
+ sub.l #16,d1 ;sub to compensate for shift of mant
+ bge.b e_save ;if still pos, do not neg
+ neg.l d1 ;now negative, make pos and set SE
+ or.l #$40000000,d4 ;set SE in d4,
+ or.l #$40000000,(a0) ;and in working bcd
+e_save:
+ move.l d1,L_SCR1(a6) ;save exp in memory
+*
+*
+* Calculate mantissa:
+* 1. Calculate absolute value of mantissa in fp0 by mul and add.
+* 2. Correct for mantissa sign.
+* (i.e., all digits assumed left of the decimal point.)
+*
+* Register usage:
+*
+* calc_m:
+* (*) d0: temp digit storage
+* (*) d1: lword counter
+* (*) d2: digit count
+* (*) d3: offset pointer
+* ( ) d4: words 2 and 3 of bcd
+* ( ) a0: pointer to working bcd value
+* ( ) a6: pointer to original bcd value
+* (*) fp0: mantissa accumulator
+* ( ) FP_SCR1: working copy of original bcd value
+* ( ) L_SCR1: copy of original exponent word
+*
+calc_m:
+ moveq.l #1,d1 ;word counter, init to 1
+ fmove.s FZERO,fp0 ;accumulator
+*
+*
+* Since the packed number has a long word between the first & second parts,
+* get the integer digit then skip down & get the rest of the
+* mantissa. We will unroll the loop once.
+*
+ bfextu (a0){28:4},d0 ;integer part is ls digit in long word
+ fadd.b d0,fp0 ;add digit to sum in fp0
+*
+*
+* Get the rest of the mantissa.
+*
+loadlw:
+ move.l (a0,d1.L*4),d4 ;load mantissa lonqword into d4
+ moveq.l #FSTRT,d3 ;counter to pick up digits
+ moveq.l #FNIBS,d2 ;reset number of digits per a0 ptr
+md2b:
+ fmul.s FTEN,fp0 ;fp0 = fp0 * 10
+ bfextu d4{d3:4},d0 ;get the digit and zero extend
+ fadd.b d0,fp0 ;fp0 = fp0 + digit
+*
+*
+* If all the digits (8) in that long word have been converted (d2=0),
+* then inc d1 (=2) to point to the next long word and reset d3 to 0
+* to initialize the digit offset, and set d2 to 7 for the digit count;
+* else continue with this long word.
+*
+ addq.b #4,d3 ;advance d3 to the next digit
+ dbf.w d2,md2b ;check for last digit in this lw
+nextlw:
+ addq.l #1,d1 ;inc lw pointer in mantissa
+ cmp.l #2,d1 ;test for last lw
+ ble loadlw ;if not, get last one
+
+*
+* Check the sign of the mant and make the value in fp0 the same sign.
+*
+m_sign:
+ btst #31,(a0) ;test sign of the mantissa
+ beq.b ap_st_z ;if clear, go to append/strip zeros
+ fneg.x fp0 ;if set, negate fp0
+
+*
+* Append/strip zeros:
+*
+* For adjusted exponents which have an absolute value greater than 27*,
+* this routine calculates the amount needed to normalize the mantissa
+* for the adjusted exponent. That number is subtracted from the exp
+* if the exp was positive, and added if it was negative. The purpose
+* of this is to reduce the value of the exponent and the possibility
+* of error in calculation of pwrten.
+*
+* 1. Branch on the sign of the adjusted exponent.
+* 2p.(positive exp)
+* 2. Check M16 and the digits in lwords 2 and 3 in decending order.
+* 3. Add one for each zero encountered until a non-zero digit.
+* 4. Subtract the count from the exp.
+* 5. Check if the exp has crossed zero in #3 above; make the exp abs
+* and set SE.
+* 6. Multiply the mantissa by 10**count.
+* 2n.(negative exp)
+* 2. Check the digits in lwords 3 and 2 in decending order.
+* 3. Add one for each zero encountered until a non-zero digit.
+* 4. Add the count to the exp.
+* 5. Check if the exp has crossed zero in #3 above; clear SE.
+* 6. Divide the mantissa by 10**count.
+*
+* *Why 27? If the adjusted exponent is within -28 < expA < 28, than
+* any adjustment due to append/strip zeros will drive the resultane
+* exponent towards zero. Since all pwrten constants with a power
+* of 27 or less are exact, there is no need to use this routine to
+* attempt to lessen the resultant exponent.
+*
+* Register usage:
+*
+* ap_st_z:
+* (*) d0: temp digit storage
+* (*) d1: zero count
+* (*) d2: digit count
+* (*) d3: offset pointer
+* ( ) d4: first word of bcd
+* (*) d5: lword counter
+* ( ) a0: pointer to working bcd value
+* ( ) FP_SCR1: working copy of original bcd value
+* ( ) L_SCR1: copy of original exponent word
+*
+*
+* First check the absolute value of the exponent to see if this
+* routine is necessary. If so, then check the sign of the exponent
+* and do append (+) or strip (-) zeros accordingly.
+* This section handles a positive adjusted exponent.
+*
+ap_st_z:
+ move.l L_SCR1(a6),d1 ;load expA for range test
+ cmp.l #27,d1 ;test is with 27
+ ble.w pwrten ;if abs(expA) <28, skip ap/st zeros
+ btst #30,(a0) ;check sign of exp
+ bne.b ap_st_n ;if neg, go to neg side
+ clr.l d1 ;zero count reg
+ move.l (a0),d4 ;load lword 1 to d4
+ bfextu d4{28:4},d0 ;get M16 in d0
+ bne.b ap_p_fx ;if M16 is non-zero, go fix exp
+ addq.l #1,d1 ;inc zero count
+ moveq.l #1,d5 ;init lword counter
+ move.l (a0,d5.L*4),d4 ;get lword 2 to d4
+ bne.b ap_p_cl ;if lw 2 is zero, skip it
+ addq.l #8,d1 ;and inc count by 8
+ addq.l #1,d5 ;inc lword counter
+ move.l (a0,d5.L*4),d4 ;get lword 3 to d4
+ap_p_cl:
+ clr.l d3 ;init offset reg
+ moveq.l #7,d2 ;init digit counter
+ap_p_gd:
+ bfextu d4{d3:4},d0 ;get digit
+ bne.b ap_p_fx ;if non-zero, go to fix exp
+ addq.l #4,d3 ;point to next digit
+ addq.l #1,d1 ;inc digit counter
+ dbf.w d2,ap_p_gd ;get next digit
+ap_p_fx:
+ move.l d1,d0 ;copy counter to d2
+ move.l L_SCR1(a6),d1 ;get adjusted exp from memory
+ sub.l d0,d1 ;subtract count from exp
+ bge.b ap_p_fm ;if still pos, go to pwrten
+ neg.l d1 ;now its neg; get abs
+ move.l (a0),d4 ;load lword 1 to d4
+ or.l #$40000000,d4 ; and set SE in d4
+ or.l #$40000000,(a0) ; and in memory
+*
+* Calculate the mantissa multiplier to compensate for the striping of
+* zeros from the mantissa.
+*
+ap_p_fm:
+ move.l #PTENRN,a1 ;get address of power-of-ten table
+ clr.l d3 ;init table index
+ fmove.s FONE,fp1 ;init fp1 to 1
+ moveq.l #3,d2 ;init d2 to count bits in counter
+ap_p_el:
+ asr.l #1,d0 ;shift lsb into carry
+ bcc.b ap_p_en ;if 1, mul fp1 by pwrten factor
+ fmul.x (a1,d3),fp1 ;mul by 10**(d3_bit_no)
+ap_p_en:
+ add.l #12,d3 ;inc d3 to next rtable entry
+ tst.l d0 ;check if d0 is zero
+ bne.b ap_p_el ;if not, get next bit
+ fmul.x fp1,fp0 ;mul mantissa by 10**(no_bits_shifted)
+ bra.b pwrten ;go calc pwrten
+*
+* This section handles a negative adjusted exponent.
+*
+ap_st_n:
+ clr.l d1 ;clr counter
+ moveq.l #2,d5 ;set up d5 to point to lword 3
+ move.l (a0,d5.L*4),d4 ;get lword 3
+ bne.b ap_n_cl ;if not zero, check digits
+ sub.l #1,d5 ;dec d5 to point to lword 2
+ addq.l #8,d1 ;inc counter by 8
+ move.l (a0,d5.L*4),d4 ;get lword 2
+ap_n_cl:
+ move.l #28,d3 ;point to last digit
+ moveq.l #7,d2 ;init digit counter
+ap_n_gd:
+ bfextu d4{d3:4},d0 ;get digit
+ bne.b ap_n_fx ;if non-zero, go to exp fix
+ subq.l #4,d3 ;point to previous digit
+ addq.l #1,d1 ;inc digit counter
+ dbf.w d2,ap_n_gd ;get next digit
+ap_n_fx:
+ move.l d1,d0 ;copy counter to d0
+ move.l L_SCR1(a6),d1 ;get adjusted exp from memory
+ sub.l d0,d1 ;subtract count from exp
+ bgt.b ap_n_fm ;if still pos, go fix mantissa
+ neg.l d1 ;take abs of exp and clr SE
+ move.l (a0),d4 ;load lword 1 to d4
+ and.l #$bfffffff,d4 ; and clr SE in d4
+ and.l #$bfffffff,(a0) ; and in memory
+*
+* Calculate the mantissa multiplier to compensate for the appending of
+* zeros to the mantissa.
+*
+ap_n_fm:
+ move.l #PTENRN,a1 ;get address of power-of-ten table
+ clr.l d3 ;init table index
+ fmove.s FONE,fp1 ;init fp1 to 1
+ moveq.l #3,d2 ;init d2 to count bits in counter
+ap_n_el:
+ asr.l #1,d0 ;shift lsb into carry
+ bcc.b ap_n_en ;if 1, mul fp1 by pwrten factor
+ fmul.x (a1,d3),fp1 ;mul by 10**(d3_bit_no)
+ap_n_en:
+ add.l #12,d3 ;inc d3 to next rtable entry
+ tst.l d0 ;check if d0 is zero
+ bne.b ap_n_el ;if not, get next bit
+ fdiv.x fp1,fp0 ;div mantissa by 10**(no_bits_shifted)
+*
+*
+* Calculate power-of-ten factor from adjusted and shifted exponent.
+*
+* Register usage:
+*
+* pwrten:
+* (*) d0: temp
+* ( ) d1: exponent
+* (*) d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
+* (*) d3: FPCR work copy
+* ( ) d4: first word of bcd
+* (*) a1: RTABLE pointer
+* calc_p:
+* (*) d0: temp
+* ( ) d1: exponent
+* (*) d3: PWRTxx table index
+* ( ) a0: pointer to working copy of bcd
+* (*) a1: PWRTxx pointer
+* (*) fp1: power-of-ten accumulator
+*
+* Pwrten calculates the exponent factor in the selected rounding mode
+* according to the following table:
+*
+* Sign of Mant Sign of Exp Rounding Mode PWRTEN Rounding Mode
+*
+* ANY ANY RN RN
+*
+* + + RP RP
+* - + RP RM
+* + - RP RM
+* - - RP RP
+*
+* + + RM RM
+* - + RM RP
+* + - RM RP
+* - - RM RM
+*
+* + + RZ RM
+* - + RZ RM
+* + - RZ RP
+* - - RZ RP
+*
+*
+pwrten:
+ move.l USER_FPCR(a6),d3 ;get user's FPCR
+ bfextu d3{26:2},d2 ;isolate rounding mode bits
+ move.l (a0),d4 ;reload 1st bcd word to d4
+ asl.l #2,d2 ;format d2 to be
+ bfextu d4{0:2},d0 ; {FPCR[6],FPCR[5],SM,SE}
+ add.l d0,d2 ;in d2 as index into RTABLE
+ lea.l RTABLE,a1 ;load rtable base
+ move.b (a1,d2),d0 ;load new rounding bits from table
+ clr.l d3 ;clear d3 to force no exc and extended
+ bfins d0,d3{26:2} ;stuff new rounding bits in FPCR
+ fmove.l d3,FPCR ;write new FPCR
+ asr.l #1,d0 ;write correct PTENxx table
+ bcc.b not_rp ;to a1
+ lea.l PTENRP,a1 ;it is RP
+ bra.b calc_p ;go to init section
+not_rp:
+ asr.l #1,d0 ;keep checking
+ bcc.b not_rm
+ lea.l PTENRM,a1 ;it is RM
+ bra.b calc_p ;go to init section
+not_rm:
+ lea.l PTENRN,a1 ;it is RN
+calc_p:
+ move.l d1,d0 ;copy exp to d0;use d0
+ bpl.b no_neg ;if exp is negative,
+ neg.l d0 ;invert it
+ or.l #$40000000,(a0) ;and set SE bit
+no_neg:
+ clr.l d3 ;table index
+ fmove.s FONE,fp1 ;init fp1 to 1
+e_loop:
+ asr.l #1,d0 ;shift next bit into carry
+ bcc.b e_next ;if zero, skip the mul
+ fmul.x (a1,d3),fp1 ;mul by 10**(d3_bit_no)
+e_next:
+ add.l #12,d3 ;inc d3 to next rtable entry
+ tst.l d0 ;check if d0 is zero
+ bne.b e_loop ;not zero, continue shifting
+*
+*
+* Check the sign of the adjusted exp and make the value in fp0 the
+* same sign. If the exp was pos then multiply fp1*fp0;
+* else divide fp0/fp1.
+*
+* Register Usage:
+* norm:
+* ( ) a0: pointer to working bcd value
+* (*) fp0: mantissa accumulator
+* ( ) fp1: scaling factor - 10**(abs(exp))
+*
+norm:
+ btst #30,(a0) ;test the sign of the exponent
+ beq.b mul ;if clear, go to multiply
+div:
+ fdiv.x fp1,fp0 ;exp is negative, so divide mant by exp
+ bra.b end_dec
+mul:
+ fmul.x fp1,fp0 ;exp is positive, so multiply by exp
+*
+*
+* Clean up and return with result in fp0.
+*
+* If the final mul/div in decbin incurred an inex exception,
+* it will be inex2, but will be reported as inex1 by get_op.
+*
+end_dec:
+ fmove.l FPSR,d0 ;get status register
+ bclr.l #inex2_bit+8,d0 ;test for inex2 and clear it
+ fmove.l d0,FPSR ;return status reg w/o inex2
+ beq.b no_exc ;skip this if no exc
+ or.l #inx1a_mask,USER_FPSR(a6) ;set inex1/ainex
+no_exc:
+ movem.l (a7)+,d2-d5
+ rts
+ end
diff --git a/sys/arch/m68k/fpsp/do_func.sa b/sys/arch/m68k/fpsp/do_func.sa
new file mode 100644
index 00000000000..92e3fde0b49
--- /dev/null
+++ b/sys/arch/m68k/fpsp/do_func.sa
@@ -0,0 +1,584 @@
+* $NetBSD: do_func.sa,v 1.2 1994/10/26 07:49:02 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* do_func.sa 3.4 2/18/91
+*
+* Do_func performs the unimplemented operation. The operation
+* to be performed is determined from the lower 7 bits of the
+* extension word (except in the case of fmovecr and fsincos).
+* The opcode and tag bits form an index into a jump table in
+* tbldo.sa. Cases of zero, infinity and NaN are handled in
+* do_func by forcing the default result. Normalized and
+* denormalized (there are no unnormalized numbers at this
+* point) are passed onto the emulation code.
+*
+* CMDREG1B and STAG are extracted from the fsave frame
+* and combined to form the table index. The function called
+* will start with a0 pointing to the ETEMP operand. Dyadic
+* functions can find FPTEMP at -12(a0).
+*
+* Called functions return their result in fp0. Sincos returns
+* sin(x) in fp0 and cos(x) in fp1.
+*
+
+DO_FUNC IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+ xref t_dz2
+ xref t_operr
+ xref t_inx2
+ xref t_resdnrm
+ xref dst_nan
+ xref src_nan
+ xref nrm_set
+ xref sto_cos
+
+ xref tblpre
+ xref slognp1,slogn,slog10,slog2
+ xref slognd,slog10d,slog2d
+ xref smod,srem
+ xref sscale
+ xref smovcr
+
+PONE dc.l $3fff0000,$80000000,$00000000 ;+1
+MONE dc.l $bfff0000,$80000000,$00000000 ;-1
+PZERO dc.l $00000000,$00000000,$00000000 ;+0
+MZERO dc.l $80000000,$00000000,$00000000 ;-0
+PINF dc.l $7fff0000,$00000000,$00000000 ;+inf
+MINF dc.l $ffff0000,$00000000,$00000000 ;-inf
+QNAN dc.l $7fff0000,$ffffffff,$ffffffff ;non-signaling nan
+PPIBY2 dc.l $3FFF0000,$C90FDAA2,$2168C235 ;+PI/2
+MPIBY2 dc.l $bFFF0000,$C90FDAA2,$2168C235 ;-PI/2
+
+ xdef do_func
+do_func:
+ clr.b CU_ONLY(a6)
+*
+* Check for fmovecr. It does not follow the format of fp gen
+* unimplemented instructions. The test is on the upper 6 bits;
+* if they are $17, the inst is fmovecr. Call entry smovcr
+* directly.
+*
+ bfextu CMDREG1B(a6){0:6},d0 ;get opclass and src fields
+ cmpi.l #$17,d0 ;if op class and size fields are $17,
+* ;it is FMOVECR; if not, continue
+ bne.b not_fmovecr
+ jmp smovcr ;fmovecr; jmp directly to emulation
+
+not_fmovecr:
+ move.w CMDREG1B(a6),d0
+ and.l #$7F,d0
+ cmpi.l #$38,d0 ;if the extension is >= $38,
+ bge.b serror ;it is illegal
+ bfextu STAG(a6){0:3},d1
+ lsl.l #3,d0 ;make room for STAG
+ add.l d1,d0 ;combine for final index into table
+ lea.l tblpre,a1 ;start of monster jump table
+ move.l (a1,d0.w*4),a1 ;real target address
+ lea.l ETEMP(a6),a0 ;a0 is pointer to src op
+ move.l USER_FPCR(a6),d1
+ and.l #$FF,d1 ; discard all but rounding mode/prec
+ fmove.l #0,fpcr
+ jmp (a1)
+*
+* ERROR
+*
+ xdef serror
+serror:
+ st.b STORE_FLG(a6)
+ rts
+*
+* These routines load forced values into fp0. They are called
+* by index into tbldo.
+*
+* Load a signed zero to fp0 and set inex2/ainex
+*
+ xdef snzrinx
+snzrinx:
+ btst.b #sign_bit,LOCAL_EX(a0) ;get sign of source operand
+ bne.b ld_mzinx ;if negative, branch
+ bsr ld_pzero ;bsr so we can return and set inx
+ bra t_inx2 ;now, set the inx for the next inst
+ld_mzinx:
+ bsr ld_mzero ;if neg, load neg zero, return here
+ bra t_inx2 ;now, set the inx for the next inst
+*
+* Load a signed zero to fp0; do not set inex2/ainex
+*
+ xdef szero
+szero:
+ btst.b #sign_bit,LOCAL_EX(a0) ;get sign of source operand
+ bne ld_mzero ;if neg, load neg zero
+ bra ld_pzero ;load positive zero
+*
+* Load a signed infinity to fp0; do not set inex2/ainex
+*
+ xdef sinf
+sinf:
+ btst.b #sign_bit,LOCAL_EX(a0) ;get sign of source operand
+ bne ld_minf ;if negative branch
+ bra ld_pinf
+*
+* Load a signed one to fp0; do not set inex2/ainex
+*
+ xdef sone
+sone:
+ btst.b #sign_bit,LOCAL_EX(a0) ;check sign of source
+ bne ld_mone
+ bra ld_pone
+*
+* Load a signed pi/2 to fp0; do not set inex2/ainex
+*
+ xdef spi_2
+spi_2:
+ btst.b #sign_bit,LOCAL_EX(a0) ;check sign of source
+ bne ld_mpi2
+ bra ld_ppi2
+*
+* Load either a +0 or +inf for plus/minus operand
+*
+ xdef szr_inf
+szr_inf:
+ btst.b #sign_bit,LOCAL_EX(a0) ;check sign of source
+ bne ld_pzero
+ bra ld_pinf
+*
+* Result is either an operr or +inf for plus/minus operand
+* [Used by slogn, slognp1, slog10, and slog2]
+*
+ xdef sopr_inf
+sopr_inf:
+ btst.b #sign_bit,LOCAL_EX(a0) ;check sign of source
+ bne t_operr
+ bra ld_pinf
+*
+* FLOGNP1
+*
+ xdef sslognp1
+sslognp1:
+ fmovem.x (a0),fp0
+ fcmp.b #-1,fp0
+ fbgt slognp1
+ fbeq t_dz2 ;if = -1, divide by zero exception
+ fmove.l #0,FPSR ;clr N flag
+ bra t_operr ;take care of operands < -1
+*
+* FETOXM1
+*
+ xdef setoxm1i
+setoxm1i:
+ btst.b #sign_bit,LOCAL_EX(a0) ;check sign of source
+ bne ld_mone
+ bra ld_pinf
+*
+* FLOGN
+*
+* Test for 1.0 as an input argument, returning +zero. Also check
+* the sign and return operr if negative.
+*
+ xdef sslogn
+sslogn:
+ btst.b #sign_bit,LOCAL_EX(a0)
+ bne t_operr ;take care of operands < 0
+ cmpi.w #$3fff,LOCAL_EX(a0) ;test for 1.0 input
+ bne slogn
+ cmpi.l #$80000000,LOCAL_HI(a0)
+ bne slogn
+ tst.l LOCAL_LO(a0)
+ bne slogn
+ fmove.x PZERO,fp0
+ rts
+
+ xdef sslognd
+sslognd:
+ btst.b #sign_bit,LOCAL_EX(a0)
+ beq slognd
+ bra t_operr ;take care of operands < 0
+
+*
+* FLOG10
+*
+ xdef sslog10
+sslog10:
+ btst.b #sign_bit,LOCAL_EX(a0)
+ bne t_operr ;take care of operands < 0
+ cmpi.w #$3fff,LOCAL_EX(a0) ;test for 1.0 input
+ bne slog10
+ cmpi.l #$80000000,LOCAL_HI(a0)
+ bne slog10
+ tst.l LOCAL_LO(a0)
+ bne slog10
+ fmove.x PZERO,fp0
+ rts
+
+ xdef sslog10d
+sslog10d:
+ btst.b #sign_bit,LOCAL_EX(a0)
+ beq slog10d
+ bra t_operr ;take care of operands < 0
+
+*
+* FLOG2
+*
+ xdef sslog2
+sslog2:
+ btst.b #sign_bit,LOCAL_EX(a0)
+ bne t_operr ;take care of operands < 0
+ cmpi.w #$3fff,LOCAL_EX(a0) ;test for 1.0 input
+ bne slog2
+ cmpi.l #$80000000,LOCAL_HI(a0)
+ bne slog2
+ tst.l LOCAL_LO(a0)
+ bne slog2
+ fmove.x PZERO,fp0
+ rts
+
+ xdef sslog2d
+sslog2d:
+ btst.b #sign_bit,LOCAL_EX(a0)
+ beq slog2d
+ bra t_operr ;take care of operands < 0
+
+*
+* FMOD
+*
+pmodt:
+* ;$21 fmod
+* ;dtag,stag
+ dc.l smod ; 00,00 norm,norm = normal
+ dc.l smod_oper ; 00,01 norm,zero = nan with operr
+ dc.l smod_fpn ; 00,10 norm,inf = fpn
+ dc.l smod_snan ; 00,11 norm,nan = nan
+ dc.l smod_zro ; 01,00 zero,norm = +-zero
+ dc.l smod_oper ; 01,01 zero,zero = nan with operr
+ dc.l smod_zro ; 01,10 zero,inf = +-zero
+ dc.l smod_snan ; 01,11 zero,nan = nan
+ dc.l smod_oper ; 10,00 inf,norm = nan with operr
+ dc.l smod_oper ; 10,01 inf,zero = nan with operr
+ dc.l smod_oper ; 10,10 inf,inf = nan with operr
+ dc.l smod_snan ; 10,11 inf,nan = nan
+ dc.l smod_dnan ; 11,00 nan,norm = nan
+ dc.l smod_dnan ; 11,01 nan,zero = nan
+ dc.l smod_dnan ; 11,10 nan,inf = nan
+ dc.l smod_dnan ; 11,11 nan,nan = nan
+
+ xdef pmod
+pmod:
+ clr.b FPSR_QBYTE(a6) ; clear quotient field
+ bfextu STAG(a6){0:3},d0 ;stag = d0
+ bfextu DTAG(a6){0:3},d1 ;dtag = d1
+
+*
+* Alias extended denorms to norms for the jump table.
+*
+ bclr.l #2,d0
+ bclr.l #2,d1
+
+ lsl.b #2,d1
+ or.b d0,d1 ;d1{3:2} = dtag, d1{1:0} = stag
+* ;Tag values:
+* ;00 = norm or denorm
+* ;01 = zero
+* ;10 = inf
+* ;11 = nan
+ lea pmodt,a1
+ move.l (a1,d1.w*4),a1
+ jmp (a1)
+
+smod_snan:
+ bra src_nan
+smod_dnan:
+ bra dst_nan
+smod_oper:
+ bra t_operr
+smod_zro:
+ move.b ETEMP(a6),d1 ;get sign of src op
+ move.b FPTEMP(a6),d0 ;get sign of dst op
+ eor.b d0,d1 ;get exor of sign bits
+ btst.l #7,d1 ;test for sign
+ beq.b smod_zsn ;if clr, do not set sign big
+ bset.b #q_sn_bit,FPSR_QBYTE(a6) ;set q-byte sign bit
+smod_zsn:
+ btst.l #7,d0 ;test if + or -
+ beq ld_pzero ;if pos then load +0
+ bra ld_mzero ;else neg load -0
+
+smod_fpn:
+ move.b ETEMP(a6),d1 ;get sign of src op
+ move.b FPTEMP(a6),d0 ;get sign of dst op
+ eor.b d0,d1 ;get exor of sign bits
+ btst.l #7,d1 ;test for sign
+ beq.b smod_fsn ;if clr, do not set sign big
+ bset.b #q_sn_bit,FPSR_QBYTE(a6) ;set q-byte sign bit
+smod_fsn:
+ tst.b DTAG(a6) ;filter out denormal destination case
+ bpl.b smod_nrm ;
+ lea.l FPTEMP(a6),a0 ;a0<- addr(FPTEMP)
+ bra t_resdnrm ;force UNFL(but exact) result
+smod_nrm:
+ fmove.l USER_FPCR(a6),fpcr ;use user's rmode and precision
+ fmove.x FPTEMP(a6),fp0 ;return dest to fp0
+ rts
+
+*
+* FREM
+*
+premt:
+* ;$25 frem
+* ;dtag,stag
+ dc.l srem ; 00,00 norm,norm = normal
+ dc.l srem_oper ; 00,01 norm,zero = nan with operr
+ dc.l srem_fpn ; 00,10 norm,inf = fpn
+ dc.l srem_snan ; 00,11 norm,nan = nan
+ dc.l srem_zro ; 01,00 zero,norm = +-zero
+ dc.l srem_oper ; 01,01 zero,zero = nan with operr
+ dc.l srem_zro ; 01,10 zero,inf = +-zero
+ dc.l srem_snan ; 01,11 zero,nan = nan
+ dc.l srem_oper ; 10,00 inf,norm = nan with operr
+ dc.l srem_oper ; 10,01 inf,zero = nan with operr
+ dc.l srem_oper ; 10,10 inf,inf = nan with operr
+ dc.l srem_snan ; 10,11 inf,nan = nan
+ dc.l srem_dnan ; 11,00 nan,norm = nan
+ dc.l srem_dnan ; 11,01 nan,zero = nan
+ dc.l srem_dnan ; 11,10 nan,inf = nan
+ dc.l srem_dnan ; 11,11 nan,nan = nan
+
+ xdef prem
+prem:
+ clr.b FPSR_QBYTE(a6) ;clear quotient field
+ bfextu STAG(a6){0:3},d0 ;stag = d0
+ bfextu DTAG(a6){0:3},d1 ;dtag = d1
+*
+* Alias extended denorms to norms for the jump table.
+*
+ bclr #2,d0
+ bclr #2,d1
+
+ lsl.b #2,d1
+ or.b d0,d1 ;d1{3:2} = dtag, d1{1:0} = stag
+* ;Tag values:
+* ;00 = norm or denorm
+* ;01 = zero
+* ;10 = inf
+* ;11 = nan
+ lea premt,a1
+ move.l (a1,d1.w*4),a1
+ jmp (a1)
+
+srem_snan:
+ bra src_nan
+srem_dnan:
+ bra dst_nan
+srem_oper:
+ bra t_operr
+srem_zro:
+ move.b ETEMP(a6),d1 ;get sign of src op
+ move.b FPTEMP(a6),d0 ;get sign of dst op
+ eor.b d0,d1 ;get exor of sign bits
+ btst.l #7,d1 ;test for sign
+ beq.b srem_zsn ;if clr, do not set sign big
+ bset.b #q_sn_bit,FPSR_QBYTE(a6) ;set q-byte sign bit
+srem_zsn:
+ btst.l #7,d0 ;test if + or -
+ beq ld_pzero ;if pos then load +0
+ bra ld_mzero ;else neg load -0
+
+srem_fpn:
+ move.b ETEMP(a6),d1 ;get sign of src op
+ move.b FPTEMP(a6),d0 ;get sign of dst op
+ eor.b d0,d1 ;get exor of sign bits
+ btst.l #7,d1 ;test for sign
+ beq.b srem_fsn ;if clr, do not set sign big
+ bset.b #q_sn_bit,FPSR_QBYTE(a6) ;set q-byte sign bit
+srem_fsn:
+ tst.b DTAG(a6) ;filter out denormal destination case
+ bpl.b srem_nrm ;
+ lea.l FPTEMP(a6),a0 ;a0<- addr(FPTEMP)
+ bra t_resdnrm ;force UNFL(but exact) result
+srem_nrm:
+ fmove.l USER_FPCR(a6),fpcr ;use user's rmode and precision
+ fmove.x FPTEMP(a6),fp0 ;return dest to fp0
+ rts
+*
+* FSCALE
+*
+pscalet:
+* ;$26 fscale
+* ;dtag,stag
+ dc.l sscale ; 00,00 norm,norm = result
+ dc.l sscale ; 00,01 norm,zero = fpn
+ dc.l scl_opr ; 00,10 norm,inf = nan with operr
+ dc.l scl_snan ; 00,11 norm,nan = nan
+ dc.l scl_zro ; 01,00 zero,norm = +-zero
+ dc.l scl_zro ; 01,01 zero,zero = +-zero
+ dc.l scl_opr ; 01,10 zero,inf = nan with operr
+ dc.l scl_snan ; 01,11 zero,nan = nan
+ dc.l scl_inf ; 10,00 inf,norm = +-inf
+ dc.l scl_inf ; 10,01 inf,zero = +-inf
+ dc.l scl_opr ; 10,10 inf,inf = nan with operr
+ dc.l scl_snan ; 10,11 inf,nan = nan
+ dc.l scl_dnan ; 11,00 nan,norm = nan
+ dc.l scl_dnan ; 11,01 nan,zero = nan
+ dc.l scl_dnan ; 11,10 nan,inf = nan
+ dc.l scl_dnan ; 11,11 nan,nan = nan
+
+ xdef pscale
+pscale:
+ bfextu STAG(a6){0:3},d0 ;stag in d0
+ bfextu DTAG(a6){0:3},d1 ;dtag in d1
+ bclr.l #2,d0 ;alias denorm into norm
+ bclr.l #2,d1 ;alias denorm into norm
+ lsl.b #2,d1
+ or.b d0,d1 ;d1{4:2} = dtag, d1{1:0} = stag
+* ;dtag values stag values:
+* ;000 = norm 00 = norm
+* ;001 = zero 01 = zero
+* ;010 = inf 10 = inf
+* ;011 = nan 11 = nan
+* ;100 = dnrm
+*
+*
+ lea.l pscalet,a1 ;load start of jump table
+ move.l (a1,d1.w*4),a1 ;load a1 with label depending on tag
+ jmp (a1) ;go to the routine
+
+scl_opr:
+ bra t_operr
+
+scl_dnan:
+ bra dst_nan
+
+scl_zro:
+ btst.b #sign_bit,FPTEMP_EX(a6) ;test if + or -
+ beq ld_pzero ;if pos then load +0
+ bra ld_mzero ;if neg then load -0
+scl_inf:
+ btst.b #sign_bit,FPTEMP_EX(a6) ;test if + or -
+ beq ld_pinf ;if pos then load +inf
+ bra ld_minf ;else neg load -inf
+scl_snan:
+ bra src_nan
+*
+* FSINCOS
+*
+ xdef ssincosz
+ssincosz:
+ btst.b #sign_bit,ETEMP(a6) ;get sign
+ beq.b sincosp
+ fmove.x MZERO,fp0
+ bra.b sincoscom
+sincosp:
+ fmove.x PZERO,fp0
+sincoscom:
+ fmovem.x PONE,fp1 ;do not allow FPSR to be affected
+ bra sto_cos ;store cosine result
+
+ xdef ssincosi
+ssincosi:
+ fmove.x QNAN,fp1 ;load NAN
+ bsr sto_cos ;store cosine result
+ fmove.x QNAN,fp0 ;load NAN
+ bra t_operr
+
+ xdef ssincosnan
+ssincosnan:
+ move.l ETEMP_EX(a6),FP_SCR1(a6)
+ move.l ETEMP_HI(a6),FP_SCR1+4(a6)
+ move.l ETEMP_LO(a6),FP_SCR1+8(a6)
+ bset.b #signan_bit,FP_SCR1+4(a6)
+ fmovem.x FP_SCR1(a6),fp1
+ bsr sto_cos
+ bra src_nan
+*
+* This code forces default values for the zero, inf, and nan cases
+* in the transcendentals code. The CC bits must be set in the
+* stacked FPSR to be correctly reported.
+*
+***Returns +PI/2
+ xdef ld_ppi2
+ld_ppi2:
+ fmove.x PPIBY2,fp0 ;load +pi/2
+ bra t_inx2 ;set inex2 exc
+
+***Returns -PI/2
+ xdef ld_mpi2
+ld_mpi2:
+ fmove.x MPIBY2,fp0 ;load -pi/2
+ or.l #neg_mask,USER_FPSR(a6) ;set N bit
+ bra t_inx2 ;set inex2 exc
+
+***Returns +inf
+ xdef ld_pinf
+ld_pinf:
+ fmove.x PINF,fp0 ;load +inf
+ or.l #inf_mask,USER_FPSR(a6) ;set I bit
+ rts
+
+***Returns -inf
+ xdef ld_minf
+ld_minf:
+ fmove.x MINF,fp0 ;load -inf
+ or.l #neg_mask+inf_mask,USER_FPSR(a6) ;set N and I bits
+ rts
+
+***Returns +1
+ xdef ld_pone
+ld_pone:
+ fmove.x PONE,fp0 ;load +1
+ rts
+
+***Returns -1
+ xdef ld_mone
+ld_mone:
+ fmove.x MONE,fp0 ;load -1
+ or.l #neg_mask,USER_FPSR(a6) ;set N bit
+ rts
+
+***Returns +0
+ xdef ld_pzero
+ld_pzero:
+ fmove.x PZERO,fp0 ;load +0
+ or.l #z_mask,USER_FPSR(a6) ;set Z bit
+ rts
+
+***Returns -0
+ xdef ld_mzero
+ld_mzero:
+ fmove.x MZERO,fp0 ;load -0
+ or.l #neg_mask+z_mask,USER_FPSR(a6) ;set N and Z bits
+ rts
+
+ end
diff --git a/sys/arch/m68k/fpsp/fpsp.h b/sys/arch/m68k/fpsp/fpsp.h
new file mode 100644
index 00000000000..4ce4d092ce7
--- /dev/null
+++ b/sys/arch/m68k/fpsp/fpsp.h
@@ -0,0 +1,373 @@
+* $NetBSD: fpsp.h,v 1.2 1994/10/26 07:49:04 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* fpsp.h 3.3 3.3
+*
+
+* fpsp.h --- stack frame offsets during FPSP exception handling
+*
+* These equates are used to access the exception frame, the fsave
+* frame and any local variables needed by the FPSP package.
+*
+* All FPSP handlers begin by executing:
+*
+* link a6,#-LOCAL_SIZE
+* fsave -(a7)
+* movem.l d0-d1/a0-a1,USER_DA(a6)
+* fmovem.x fp0-fp3,USER_FP0(a6)
+* fmove.l fpsr/fpcr/fpiar,USER_FPSR(a6)
+*
+* After initialization, the stack looks like this:
+*
+* A7 ---> +-------------------------------+
+* | |
+* | FPU fsave area |
+* | |
+* +-------------------------------+
+* | |
+* | FPSP Local Variables |
+* | including |
+* | saved registers |
+* | |
+* +-------------------------------+
+* A6 ---> | Saved A6 |
+* +-------------------------------+
+* | |
+* | Exception Frame |
+* | |
+* | |
+*
+* Positive offsets from A6 refer to the exception frame. Negative
+* offsets refer to the Local Variable area and the fsave area.
+* The fsave frame is also accessible 'from the top' via A7.
+*
+* On exit, the handlers execute:
+*
+* movem.l USER_DA(a6),d0-d1/a0-a1
+* fmovem.x USER_FP0(a6),fp0-fp3
+* fmove.l USER_FPSR(a6),fpsr/fpcr/fpiar
+* frestore (a7)+
+* unlk a6
+*
+* and then either 'bra fpsp_done' if the exception was completely
+* handled by the package, or 'bra real_xxxx' which is an external
+* label to a routine that will process a real exception of the
+* type that was generated. Some handlers may omit the 'frestore'
+* if the FPU state after the exception is idle.
+*
+* Sometimes the exception handler will transform the fsave area
+* because it needs to report an exception back to the user. This
+* can happen if the package is entered for an unimplemented float
+* instruction that generates (say) an underflow. Alternatively,
+* a second fsave frame can be pushed onto the stack and the
+* handler exit code will reload the new frame and discard the old.
+*
+* The registers d0, d1, a0, a1 and fp0-fp3 are always saved and
+* restored from the 'local variable' area and can be used as
+* temporaries. If a routine needs to change any
+* of these registers, it should modify the saved copy and let
+* the handler exit code restore the value.
+*
+*----------------------------------------------------------------------
+*
+* Local Variables on the stack
+*
+LOCAL_SIZE equ 192 ;bytes needed for local variables
+LV equ -LOCAL_SIZE ;convenient base value
+*
+USER_DA equ LV+0 ;save space for D0-D1,A0-A1
+USER_D0 equ LV+0 ;saved user D0
+USER_D1 equ LV+4 ;saved user D1
+USER_A0 equ LV+8 ;saved user A0
+USER_A1 equ LV+12 ;saved user A1
+USER_FP0 equ LV+16 ;saved user FP0
+USER_FP1 equ LV+28 ;saved user FP1
+USER_FP2 equ LV+40 ;saved user FP2
+USER_FP3 equ LV+52 ;saved user FP3
+USER_FPCR equ LV+64 ;saved user FPCR
+FPCR_ENABLE equ USER_FPCR+2 ; FPCR exception enable
+FPCR_MODE equ USER_FPCR+3 ; FPCR rounding mode control
+USER_FPSR equ LV+68 ;saved user FPSR
+FPSR_CC equ USER_FPSR+0 ; FPSR condition code
+FPSR_QBYTE equ USER_FPSR+1 ; FPSR quotient
+FPSR_EXCEPT equ USER_FPSR+2 ; FPSR exception
+FPSR_AEXCEPT equ USER_FPSR+3 ; FPSR accrued exception
+USER_FPIAR equ LV+72 ;saved user FPIAR
+FP_SCR1 equ LV+76 ;room for a temporary float value
+FP_SCR2 equ LV+92 ;room for a temporary float value
+L_SCR1 equ LV+108 ;room for a temporary long value
+L_SCR2 equ LV+112 ;room for a temporary long value
+STORE_FLG equ LV+116
+BINDEC_FLG equ LV+117 ;used in bindec
+DNRM_FLG equ LV+118 ;used in res_func
+RES_FLG equ LV+119 ;used in res_func
+DY_MO_FLG equ LV+120 ;dyadic/monadic flag
+UFLG_TMP equ LV+121 ;temporary for uflag errata
+CU_ONLY equ LV+122 ;cu-only flag
+VER_TMP equ LV+123 ;temp holding for version number
+L_SCR3 equ LV+124 ;room for a temporary long value
+FP_SCR3 equ LV+128 ;room for a temporary float value
+FP_SCR4 equ LV+144 ;room for a temporary float value
+FP_SCR5 equ LV+160 ;room for a temporary float value
+FP_SCR6 equ LV+176
+*
+*NEXT equ LV+192 ;need to increase LOCAL_SIZE
+*
+*--------------------------------------------------------------------------
+*
+* fsave offsets and bit definitions
+*
+* Offsets are defined from the end of an fsave because the last 10
+* words of a busy frame are the same as the unimplemented frame.
+*
+CU_SAVEPC equ LV-92 ;micro-pc for CU (1 byte)
+FPR_DIRTY_BITS equ LV-91 ;fpr dirty bits
+*
+WBTEMP equ LV-76 ;write back temp (12 bytes)
+WBTEMP_EX equ WBTEMP ;wbtemp sign and exponent (2 bytes)
+WBTEMP_HI equ WBTEMP+4 ;wbtemp mantissa [63:32] (4 bytes)
+WBTEMP_LO equ WBTEMP+8 ;wbtemp mantissa [31:00] (4 bytes)
+*
+WBTEMP_SGN equ WBTEMP+2 ;used to store sign
+*
+FPSR_SHADOW equ LV-64 ;fpsr shadow reg
+*
+FPIARCU equ LV-60 ;Instr. addr. reg. for CU (4 bytes)
+*
+CMDREG2B equ LV-52 ;cmd reg for machine 2
+CMDREG3B equ LV-48 ;cmd reg for E3 exceptions (2 bytes)
+*
+NMNEXC equ LV-44 ;NMNEXC (unsup,snan bits only)
+nmn_unsup_bit equ 1
+nmn_snan_bit equ 0
+*
+NMCEXC equ LV-43 ;NMNEXC & NMCEXC
+nmn_operr_bit equ 7
+nmn_ovfl_bit equ 6
+nmn_unfl_bit equ 5
+nmc_unsup_bit equ 4
+nmc_snan_bit equ 3
+nmc_operr_bit equ 2
+nmc_ovfl_bit equ 1
+nmc_unfl_bit equ 0
+*
+STAG equ LV-40 ;source tag (1 byte)
+WBTEMP_GRS equ LV-40 ;alias wbtemp guard, round, sticky
+guard_bit equ 1 ;guard bit is bit number 1
+round_bit equ 0 ;round bit is bit number 0
+stag_mask equ $E0 ;upper 3 bits are source tag type
+denorm_bit equ 7 ;bit determins if denorm or unnorm
+etemp15_bit equ 4 ;etemp exponent bit #15
+wbtemp66_bit equ 2 ;wbtemp mantissa bit #66
+wbtemp1_bit equ 1 ;wbtemp mantissa bit #1
+wbtemp0_bit equ 0 ;wbtemp mantissa bit #0
+*
+STICKY equ LV-39 ;holds sticky bit
+sticky_bit equ 7
+*
+CMDREG1B equ LV-36 ;cmd reg for E1 exceptions (2 bytes)
+kfact_bit equ 12 ;distinguishes static/dynamic k-factor
+* ;on packed move out's. NOTE: this
+* ;equate only works when CMDREG1B is in
+* ;a register.
+*
+CMDWORD equ LV-35 ;command word in cmd1b
+direction_bit equ 5 ;bit 0 in opclass
+size_bit2 equ 12 ;bit 2 in size field
+*
+DTAG equ LV-32 ;dest tag (1 byte)
+dtag_mask equ $E0 ;upper 3 bits are dest type tag
+fptemp15_bit equ 4 ;fptemp exponent bit #15
+*
+WB_BYTE equ LV-31 ;holds WBTE15 bit (1 byte)
+wbtemp15_bit equ 4 ;wbtemp exponent bit #15
+*
+E_BYTE equ LV-28 ;holds E1 and E3 bits (1 byte)
+E1 equ 2 ;which bit is E1 flag
+E3 equ 1 ;which bit is E3 flag
+SFLAG equ 0 ;which bit is S flag
+*
+T_BYTE equ LV-27 ;holds T and U bits (1 byte)
+XFLAG equ 7 ;which bit is X flag
+UFLAG equ 5 ;which bit is U flag
+TFLAG equ 4 ;which bit is T flag
+*
+FPTEMP equ LV-24 ;fptemp (12 bytes)
+FPTEMP_EX equ FPTEMP ;fptemp sign and exponent (2 bytes)
+FPTEMP_HI equ FPTEMP+4 ;fptemp mantissa [63:32] (4 bytes)
+FPTEMP_LO equ FPTEMP+8 ;fptemp mantissa [31:00] (4 bytes)
+*
+FPTEMP_SGN equ FPTEMP+2 ;used to store sign
+*
+ETEMP equ LV-12 ;etemp (12 bytes)
+ETEMP_EX equ ETEMP ;etemp sign and exponent (2 bytes)
+ETEMP_HI equ ETEMP+4 ;etemp mantissa [63:32] (4 bytes)
+ETEMP_LO equ ETEMP+8 ;etemp mantissa [31:00] (4 bytes)
+*
+ETEMP_SGN equ ETEMP+2 ;used to store sign
+*
+EXC_SR equ 4 ;exception frame status register
+EXC_PC equ 6 ;exception frame program counter
+EXC_VEC equ 10 ;exception frame vector (format+vector#)
+EXC_EA equ 12 ;exception frame effective address
+*
+*--------------------------------------------------------------------------
+*
+* FPSR/FPCR bits
+*
+neg_bit equ 3 negative result
+z_bit equ 2 zero result
+inf_bit equ 1 infinity result
+nan_bit equ 0 not-a-number result
+*
+q_sn_bit equ 7 sign bit of quotient byte
+*
+bsun_bit equ 7 branch on unordered
+snan_bit equ 6 signalling nan
+operr_bit equ 5 operand error
+ovfl_bit equ 4 overflow
+unfl_bit equ 3 underflow
+dz_bit equ 2 divide by zero
+inex2_bit equ 1 inexact result 2
+inex1_bit equ 0 inexact result 1
+*
+aiop_bit equ 7 accrued illegal operation
+aovfl_bit equ 6 accrued overflow
+aunfl_bit equ 5 accrued underflow
+adz_bit equ 4 accrued divide by zero
+ainex_bit equ 3 accrued inexact
+*
+* FPSR individual bit masks
+*
+neg_mask equ $08000000
+z_mask equ $04000000
+inf_mask equ $02000000
+nan_mask equ $01000000
+*
+bsun_mask equ $00008000
+snan_mask equ $00004000
+operr_mask equ $00002000
+ovfl_mask equ $00001000
+unfl_mask equ $00000800
+dz_mask equ $00000400
+inex2_mask equ $00000200
+inex1_mask equ $00000100
+*
+aiop_mask equ $00000080 accrued illegal operation
+aovfl_mask equ $00000040 accrued overflow
+aunfl_mask equ $00000020 accrued underflow
+adz_mask equ $00000010 accrued divide by zero
+ainex_mask equ $00000008 accrued inexact
+*
+* FPSR combinations used in the FPSP
+*
+dzinf_mask equ inf_mask+dz_mask+adz_mask
+opnan_mask equ nan_mask+operr_mask+aiop_mask
+nzi_mask equ $01ffffff clears N, Z, and I
+unfinx_mask equ unfl_mask+inex2_mask+aunfl_mask+ainex_mask
+unf2inx_mask equ unfl_mask+inex2_mask+ainex_mask
+ovfinx_mask equ ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
+inx1a_mask equ inex1_mask+ainex_mask
+inx2a_mask equ inex2_mask+ainex_mask
+snaniop_mask equ nan_mask+snan_mask+aiop_mask
+naniop_mask equ nan_mask+aiop_mask
+neginf_mask equ neg_mask+inf_mask
+infaiop_mask equ inf_mask+aiop_mask
+negz_mask equ neg_mask+z_mask
+opaop_mask equ operr_mask+aiop_mask
+unfl_inx_mask equ unfl_mask+aunfl_mask+ainex_mask
+ovfl_inx_mask equ ovfl_mask+aovfl_mask+ainex_mask
+*
+*--------------------------------------------------------------------------
+*
+* FPCR rounding modes
+*
+x_mode equ $00 round to extended
+s_mode equ $40 round to single
+d_mode equ $80 round to double
+*
+rn_mode equ $00 round nearest
+rz_mode equ $10 round to zero
+rm_mode equ $20 round to minus infinity
+rp_mode equ $30 round to plus infinity
+*
+*--------------------------------------------------------------------------
+*
+* Miscellaneous equates
+*
+signan_bit equ 6 signalling nan bit in mantissa
+sign_bit equ 7
+*
+rnd_stky_bit equ 29 round/sticky bit of mantissa
+* this can only be used if in a data register
+sx_mask equ $01800000 set s and x bits in word $48
+*
+LOCAL_EX equ 0
+LOCAL_SGN equ 2
+LOCAL_HI equ 4
+LOCAL_LO equ 8
+LOCAL_GRS equ 12 valid ONLY for FP_SCR1, FP_SCR2
+*
+*
+norm_tag equ $00 tag bits in {7:5} position
+zero_tag equ $20
+inf_tag equ $40
+nan_tag equ $60
+dnrm_tag equ $80
+*
+* fsave sizes and formats
+*
+VER_4 equ $40 fpsp compatible version numbers
+* are in the $40s {$40-$4f}
+VER_40 equ $40 original version number
+VER_41 equ $41 revision version number
+*
+BUSY_SIZE equ 100 size of busy frame
+BUSY_FRAME equ LV-BUSY_SIZE start of busy frame
+*
+UNIMP_40_SIZE equ 44 size of orig unimp frame
+UNIMP_41_SIZE equ 52 size of rev unimp frame
+*
+IDLE_SIZE equ 4 size of idle frame
+IDLE_FRAME equ LV-IDLE_SIZE start of idle frame
+*
+* exception vectors
+*
+TRACE_VEC equ $2024 trace trap
+FLINE_VEC equ $002C 'real' F-line
+UNIMP_VEC equ $202C unimplemented
+INEX_VEC equ $00C4
+*
+dbl_thresh equ $3C01
+sgl_thresh equ $3F81
+*
diff --git a/sys/arch/m68k/fpsp/gen_except.sa b/sys/arch/m68k/fpsp/gen_except.sa
new file mode 100644
index 00000000000..0d13020dac7
--- /dev/null
+++ b/sys/arch/m68k/fpsp/gen_except.sa
@@ -0,0 +1,493 @@
+* $NetBSD: gen_except.sa,v 1.3 1994/10/26 07:49:07 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* gen_except.sa 3.7 1/16/92
+*
+* gen_except --- FPSP routine to detect reportable exceptions
+*
+* This routine compares the exception enable byte of the
+* user_fpcr on the stack with the exception status byte
+* of the user_fpsr.
+*
+* Any routine which may report an exceptions must load
+* the stack frame in memory with the exceptional operand(s).
+*
+* Priority for exceptions is:
+*
+* Highest: bsun
+* snan
+* operr
+* ovfl
+* unfl
+* dz
+* inex2
+* Lowest: inex1
+*
+* Note: The IEEE standard specifies that inex2 is to be
+* reported if ovfl occurs and the ovfl enable bit is not
+* set but the inex2 enable bit is.
+*
+
+GEN_EXCEPT IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+ xref real_trace
+ xref fpsp_done
+ xref fpsp_fmt_error
+
+exc_tbl:
+ dc.l bsun_exc
+ dc.l commonE1
+ dc.l commonE1
+ dc.l ovfl_unfl
+ dc.l ovfl_unfl
+ dc.l commonE1
+ dc.l commonE3
+ dc.l commonE3
+ dc.l no_match
+
+ xdef gen_except
+gen_except:
+ cmpi.b #IDLE_SIZE-4,1(a7) ;test for idle frame
+ beq.w do_check ;go handle idle frame
+ cmpi.b #UNIMP_40_SIZE-4,1(a7) ;test for orig unimp frame
+ beq.b unimp_x ;go handle unimp frame
+ cmpi.b #UNIMP_41_SIZE-4,1(a7) ;test for rev unimp frame
+ beq.b unimp_x ;go handle unimp frame
+ cmpi.b #BUSY_SIZE-4,1(a7) ;if size <> $60, fmt error
+ bne.l fpsp_fmt_error
+ lea.l BUSY_SIZE+LOCAL_SIZE(a7),a1 ;init a1 so fpsp.h
+* ;equates will work
+* Fix up the new busy frame with entries from the unimp frame
+*
+ move.l ETEMP_EX(a6),ETEMP_EX(a1) ;copy etemp from unimp
+ move.l ETEMP_HI(a6),ETEMP_HI(a1) ;frame to busy frame
+ move.l ETEMP_LO(a6),ETEMP_LO(a1)
+ move.l CMDREG1B(a6),CMDREG1B(a1) ;set inst in frame to unimp
+ move.l CMDREG1B(a6),d0 ;fix cmd1b to make it
+ and.l #$03c30000,d0 ;work for cmd3b
+ bfextu CMDREG1B(a6){13:1},d1 ;extract bit 2
+ lsl.l #5,d1
+ swap d1
+ or.l d1,d0 ;put it in the right place
+ bfextu CMDREG1B(a6){10:3},d1 ;extract bit 3,4,5
+ lsl.l #2,d1
+ swap d1
+ or.l d1,d0 ;put them in the right place
+ move.l d0,CMDREG3B(a1) ;in the busy frame
+*
+* Or in the FPSR from the emulation with the USER_FPSR on the stack.
+*
+ fmove.l FPSR,d0
+ or.l d0,USER_FPSR(a6)
+ move.l USER_FPSR(a6),FPSR_SHADOW(a1) ;set exc bits
+ or.l #sx_mask,E_BYTE(a1)
+ bra do_clean
+
+*
+* Frame is an unimp frame possible resulting from an fmove <ea>,fp0
+* that caused an exception
+*
+* a1 is modified to point into the new frame allowing fpsp equates
+* to be valid.
+*
+unimp_x:
+ cmpi.b #UNIMP_40_SIZE-4,1(a7) ;test for orig unimp frame
+ bne.b test_rev
+ lea.l UNIMP_40_SIZE+LOCAL_SIZE(a7),a1
+ bra.b unimp_con
+test_rev:
+ cmpi.b #UNIMP_41_SIZE-4,1(a7) ;test for rev unimp frame
+ bne.l fpsp_fmt_error ;if not $28 or $30
+ lea.l UNIMP_41_SIZE+LOCAL_SIZE(a7),a1
+
+unimp_con:
+*
+* Fix up the new unimp frame with entries from the old unimp frame
+*
+ move.l CMDREG1B(a6),CMDREG1B(a1) ;set inst in frame to unimp
+*
+* Or in the FPSR from the emulation with the USER_FPSR on the stack.
+*
+ fmove.l FPSR,d0
+ or.l d0,USER_FPSR(a6)
+ bra do_clean
+
+*
+* Frame is idle, so check for exceptions reported through
+* USER_FPSR and set the unimp frame accordingly.
+* A7 must be incremented to the point before the
+* idle fsave vector to the unimp vector.
+*
+
+do_check:
+ add.l #4,A7 ;point A7 back to unimp frame
+*
+* Or in the FPSR from the emulation with the USER_FPSR on the stack.
+*
+ fmove.l FPSR,d0
+ or.l d0,USER_FPSR(a6)
+*
+* On a busy frame, we must clear the nmnexc bits.
+*
+ cmpi.b #BUSY_SIZE-4,1(a7) ;check frame type
+ bne.b check_fr ;if busy, clr nmnexc
+ clr.w NMNEXC(a6) ;clr nmnexc & nmcexc
+ btst.b #5,CMDREG1B(a6) ;test for fmove out
+ bne.b frame_com
+ move.l USER_FPSR(a6),FPSR_SHADOW(a6) ;set exc bits
+ or.l #sx_mask,E_BYTE(a6)
+ bra.b frame_com
+check_fr:
+ cmp.b #UNIMP_40_SIZE-4,1(a7)
+ beq.b frame_com
+ clr.w NMNEXC(a6)
+frame_com:
+ move.b FPCR_ENABLE(a6),d0 ;get fpcr enable byte
+ and.b FPSR_EXCEPT(a6),d0 ;and in the fpsr exc byte
+ bfffo d0{24:8},d1 ;test for first set bit
+ lea.l exc_tbl,a0 ;load jmp table address
+ subi.b #24,d1 ;normalize bit offset to 0-8
+ move.l (a0,d1.w*4),a0 ;load routine address based
+* ;based on first enabled exc
+ jmp (a0) ;jump to routine
+*
+* Bsun is not possible in unimp or unsupp
+*
+bsun_exc:
+ bra do_clean
+*
+* The typical work to be done to the unimp frame to report an
+* exception is to set the E1/E3 byte and clr the U flag.
+* commonE1 does this for E1 exceptions, which are snan,
+* operr, and dz. commonE3 does this for E3 exceptions, which
+* are inex2 and inex1, and also clears the E1 exception bit
+* left over from the unimp exception.
+*
+commonE1:
+ bset.b #E1,E_BYTE(a6) ;set E1 flag
+ bra.w commonE ;go clean and exit
+
+commonE3:
+ tst.b UFLG_TMP(a6) ;test flag for unsup/unimp state
+ bne.b unsE3
+uniE3:
+ bset.b #E3,E_BYTE(a6) ;set E3 flag
+ bclr.b #E1,E_BYTE(a6) ;clr E1 from unimp
+ bra.w commonE
+
+unsE3:
+ tst.b RES_FLG(a6)
+ bne.b unsE3_0
+unsE3_1:
+ bset.b #E3,E_BYTE(a6) ;set E3 flag
+unsE3_0:
+ bclr.b #E1,E_BYTE(a6) ;clr E1 flag
+ move.l CMDREG1B(a6),d0
+ and.l #$03c30000,d0 ;work for cmd3b
+ bfextu CMDREG1B(a6){13:1},d1 ;extract bit 2
+ lsl.l #5,d1
+ swap d1
+ or.l d1,d0 ;put it in the right place
+ bfextu CMDREG1B(a6){10:3},d1 ;extract bit 3,4,5
+ lsl.l #2,d1
+ swap d1
+ or.l d1,d0 ;put them in the right place
+ move.l d0,CMDREG3B(a6) ;in the busy frame
+
+commonE:
+ bclr.b #UFLAG,T_BYTE(a6) ;clr U flag from unimp
+ bra.w do_clean ;go clean and exit
+*
+* No bits in the enable byte match existing exceptions. Check for
+* the case of the ovfl exc without the ovfl enabled, but with
+* inex2 enabled.
+*
+no_match:
+ btst.b #inex2_bit,FPCR_ENABLE(a6) ;check for ovfl/inex2 case
+ beq.b no_exc ;if clear, exit
+ btst.b #ovfl_bit,FPSR_EXCEPT(a6) ;now check ovfl
+ beq.b no_exc ;if clear, exit
+ bra.b ovfl_unfl ;go to unfl_ovfl to determine if
+* ;it is an unsupp or unimp exc
+
+* No exceptions are to be reported. If the instruction was
+* unimplemented, no FPU restore is necessary. If it was
+* unsupported, we must perform the restore.
+no_exc:
+ tst.b UFLG_TMP(a6) ;test flag for unsupp/unimp state
+ beq.b uni_no_exc
+uns_no_exc:
+ tst.b RES_FLG(a6) ;check if frestore is needed
+ bne.w do_clean ;if clear, no frestore needed
+uni_no_exc:
+ movem.l USER_DA(a6),d0-d1/a0-a1
+ fmovem.x USER_FP0(a6),fp0-fp3
+ fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar
+ unlk a6
+ bra finish_up
+*
+* Unsupported Data Type Handler:
+* Ovfl:
+* An fmoveout that results in an overflow is reported this way.
+* Unfl:
+* An fmoveout that results in an underflow is reported this way.
+*
+* Unimplemented Instruction Handler:
+* Ovfl:
+* Only scosh, setox, ssinh, stwotox, and scale can set overflow in
+* this manner.
+* Unfl:
+* Stwotox, setox, and scale can set underflow in this manner.
+* Any of the other Library Routines such that f(x)=x in which
+* x is an extended denorm can report an underflow exception.
+* It is the responsibility of the exception-causing exception
+* to make sure that WBTEMP is correct.
+*
+* The exceptional operand is in FP_SCR1.
+*
+ovfl_unfl:
+ tst.b UFLG_TMP(a6) ;test flag for unsupp/unimp state
+ beq.b ofuf_con
+*
+* The caller was from an unsupported data type trap. Test if the
+* caller set CU_ONLY. If so, the exceptional operand is expected in
+* FPTEMP, rather than WBTEMP.
+*
+ tst.b CU_ONLY(a6) ;test if inst is cu-only
+ beq.w unsE3
+* move.w #$fe,CU_SAVEPC(a6)
+ clr.b CU_SAVEPC(a6)
+ bset.b #E1,E_BYTE(a6) ;set E1 exception flag
+ move.w ETEMP_EX(a6),FPTEMP_EX(a6)
+ move.l ETEMP_HI(a6),FPTEMP_HI(a6)
+ move.l ETEMP_LO(a6),FPTEMP_LO(a6)
+ bset.b #fptemp15_bit,DTAG(a6) ;set fpte15
+ bclr.b #UFLAG,T_BYTE(a6) ;clr U flag from unimp
+ bra.w do_clean ;go clean and exit
+
+ofuf_con:
+ move.b (a7),VER_TMP(a6) ;save version number
+ cmpi.b #BUSY_SIZE-4,1(a7) ;check for busy frame
+ beq.b busy_fr ;if unimp, grow to busy
+ cmpi.b #VER_40,(a7) ;test for orig unimp frame
+ bne.b try_41 ;if not, test for rev frame
+ moveq.l #13,d0 ;need to zero 14 lwords
+ bra.b ofuf_fin
+try_41:
+ cmpi.b #VER_41,(a7) ;test for rev unimp frame
+ bne.l fpsp_fmt_error ;if neither, exit with error
+ moveq.l #11,d0 ;need to zero 12 lwords
+
+ofuf_fin:
+ clr.l (a7)
+loop1:
+ clr.l -(a7) ;clear and dec a7
+ dbra.w d0,loop1
+ move.b VER_TMP(a6),(a7)
+ move.b #BUSY_SIZE-4,1(a7) ;write busy fmt word.
+busy_fr:
+ move.l FP_SCR1(a6),WBTEMP_EX(a6) ;write
+ move.l FP_SCR1+4(a6),WBTEMP_HI(a6) ;execptional op to
+ move.l FP_SCR1+8(a6),WBTEMP_LO(a6) ;wbtemp
+ bset.b #E3,E_BYTE(a6) ;set E3 flag
+ bclr.b #E1,E_BYTE(a6) ;make sure E1 is clear
+ bclr.b #UFLAG,T_BYTE(a6) ;clr U flag
+ move.l USER_FPSR(a6),FPSR_SHADOW(a6)
+ or.l #sx_mask,E_BYTE(a6)
+ move.l CMDREG1B(a6),d0 ;fix cmd1b to make it
+ and.l #$03c30000,d0 ;work for cmd3b
+ bfextu CMDREG1B(a6){13:1},d1 ;extract bit 2
+ lsl.l #5,d1
+ swap d1
+ or.l d1,d0 ;put it in the right place
+ bfextu CMDREG1B(a6){10:3},d1 ;extract bit 3,4,5
+ lsl.l #2,d1
+ swap d1
+ or.l d1,d0 ;put them in the right place
+ move.l d0,CMDREG3B(a6) ;in the busy frame
+
+*
+* Check if the frame to be restored is busy or unimp.
+*** NOTE *** Bug fix for errata (0d43b #3)
+* If the frame is unimp, we must create a busy frame to
+* fix the bug with the nmnexc bits in cases in which they
+* are set by a previous instruction and not cleared by
+* the save. The frame will be unimp only if the final
+* instruction in an emulation routine caused the exception
+* by doing an fmove <ea>,fp0. The exception operand, in
+* internal format, is in fptemp.
+*
+do_clean:
+ cmpi.b #UNIMP_40_SIZE-4,1(a7)
+ bne.b do_con
+ moveq.l #13,d0 ;in orig, need to zero 14 lwords
+ bra.b do_build
+do_con:
+ cmpi.b #UNIMP_41_SIZE-4,1(a7)
+ bne.b do_restore ;frame must be busy
+ moveq.l #11,d0 ;in rev, need to zero 12 lwords
+
+do_build:
+ move.b (a7),VER_TMP(a6)
+ clr.l (a7)
+loop2:
+ clr.l -(a7) ;clear and dec a7
+ dbra.w d0,loop2
+*
+* Use a1 as pointer into new frame. a6 is not correct if an unimp or
+* busy frame was created as the result of an exception on the final
+* instruction of an emulation routine.
+*
+* We need to set the nmcexc bits if the exception is E1. Otherwise,
+* the exc taken will be inex2.
+*
+ lea.l BUSY_SIZE+LOCAL_SIZE(a7),a1 ;init a1 for new frame
+ move.b VER_TMP(a6),(a7) ;write busy fmt word
+ move.b #BUSY_SIZE-4,1(a7)
+ move.l FP_SCR1(a6),WBTEMP_EX(a1) ;write
+ move.l FP_SCR1+4(a6),WBTEMP_HI(a1) ;exceptional op to
+ move.l FP_SCR1+8(a6),WBTEMP_LO(a1) ;wbtemp
+* btst.b #E1,E_BYTE(a1)
+* beq.b do_restore
+ bfextu USER_FPSR(a6){17:4},d0 ;get snan/operr/ovfl/unfl bits
+ bfins d0,NMCEXC(a1){4:4} ;and insert them in nmcexc
+ move.l USER_FPSR(a6),FPSR_SHADOW(a1) ;set exc bits
+ or.l #sx_mask,E_BYTE(a1)
+
+do_restore:
+ movem.l USER_DA(a6),d0-d1/a0-a1
+ fmovem.x USER_FP0(a6),fp0-fp3
+ fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar
+ frestore (a7)+
+ tst.b RES_FLG(a6) ;RES_FLG indicates a "continuation" frame
+ beq cont
+ bsr bug1384
+cont:
+ unlk a6
+*
+* If trace mode enabled, then go to trace handler. This handler
+* cannot have any fp instructions. If there are fp inst's and an
+* exception has been restored into the machine then the exception
+* will occur upon execution of the fp inst. This is not desirable
+* in the kernel (supervisor mode). See MC68040 manual Section 9.3.8.
+*
+finish_up:
+ btst.b #7,(a7) ;test T1 in SR
+ bne.b g_trace
+ btst.b #6,(a7) ;test T0 in SR
+ bne.b g_trace
+ bra.l fpsp_done
+*
+* Change integer stack to look like trace stack
+* The address of the instruction that caused the
+* exception is already in the integer stack (is
+* the same as the saved friar)
+*
+* If the current frame is already a 6-word stack then all
+* that needs to be done is to change the vector# to TRACE.
+* If the frame is only a 4-word stack (meaning we got here
+* on an Unsupported data type exception), then we need to grow
+* the stack an extra 2 words and get the FPIAR from the FPU.
+*
+g_trace:
+ bftst EXC_VEC-4(sp){0:4}
+ bne g_easy
+
+ subq.l #4,sp make room
+ move.l 4(sp),(sp)
+ move.l 8(sp),4(sp)
+ sub.l #BUSY_SIZE,sp
+ fsave (sp)
+ fmove.l fpiar,BUSY_SIZE+EXC_EA-4(sp)
+ frestore (sp)
+ add.l #BUSY_SIZE,sp
+
+g_easy:
+ move.w #TRACE_VEC,EXC_VEC-4(a7)
+ bra.l real_trace
+*
+* This is a work-around for hardware bug 1384.
+*
+bug1384:
+ link a5,#0
+ fsave -(sp)
+ cmpi.b #$41,(sp) ; check for correct frame
+ beq frame_41
+ bgt nofix ; if more advanced mask, do nada
+
+frame_40:
+ tst.b 1(sp) ; check to see if idle
+ bne notidle
+idle40:
+ clr.l (sp) ; get rid of old fsave frame
+ move.l d1,USER_D1(a6) ; save d1
+ move.w #8,d1 ; place unimp frame instead
+loop40: clr.l -(sp)
+ dbra d1,loop40
+ move.l USER_D1(a6),d1 ; restore d1
+ move.l #$40280000,-(sp)
+ frestore (sp)+
+ unlk a5
+ rts
+
+frame_41:
+ tst.b 1(sp) ; check to see if idle
+ bne notidle
+idle41:
+ clr.l (sp) ; get rid of old fsave frame
+ move.l d1,USER_D1(a6) ; save d1
+ move.w #10,d1 ; place unimp frame instead
+loop41: clr.l -(sp)
+ dbra d1,loop41
+ move.l USER_D1(a6),d1 ; restore d1
+ move.l #$41300000,-(sp)
+ frestore (sp)+
+ unlk a5
+ rts
+
+notidle:
+ bclr.b #etemp15_bit,-40(a5)
+ frestore (sp)+
+ unlk a5
+ rts
+
+nofix:
+ frestore (sp)+
+ unlk a5
+ rts
+
+ end
diff --git a/sys/arch/m68k/fpsp/get_op.sa b/sys/arch/m68k/fpsp/get_op.sa
new file mode 100644
index 00000000000..c79646e0438
--- /dev/null
+++ b/sys/arch/m68k/fpsp/get_op.sa
@@ -0,0 +1,701 @@
+* $NetBSD: get_op.sa,v 1.3 1994/10/26 07:49:09 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* get_op.sa 3.6 5/19/92
+*
+* get_op.sa 3.5 4/26/91
+*
+* Description: This routine is called by the unsupported format/data
+* type exception handler ('unsupp' - vector 55) and the unimplemented
+* instruction exception handler ('unimp' - vector 11). 'get_op'
+* determines the opclass (0, 2, or 3) and branches to the
+* opclass handler routine. See 68881/2 User's Manual table 4-11
+* for a description of the opclasses.
+*
+* For UNSUPPORTED data/format (exception vector 55) and for
+* UNIMPLEMENTED instructions (exception vector 11) the following
+* applies:
+*
+* - For unnormormalized numbers (opclass 0, 2, or 3) the
+* number(s) is normalized and the operand type tag is updated.
+*
+* - For a packed number (opclass 2) the number is unpacked and the
+* operand type tag is updated.
+*
+* - For denormalized numbers (opclass 0 or 2) the number(s) is not
+* changed but passed to the next module. The next module for
+* unimp is do_func, the next module for unsupp is res_func.
+*
+* For UNSUPPORTED data/format (exception vector 55) only the
+* following applies:
+*
+* - If there is a move out with a packed number (opclass 3) the
+* number is packed and written to user memory. For the other
+* opclasses the number(s) are written back to the fsave stack
+* and the instruction is then restored back into the '040. The
+* '040 is then able to complete the instruction.
+*
+* For example:
+* fadd.x fpm,fpn where the fpm contains an unnormalized number.
+* The '040 takes an unsupported data trap and gets to this
+* routine. The number is normalized, put back on the stack and
+* then an frestore is done to restore the instruction back into
+* the '040. The '040 then re-executes the fadd.x fpm,fpn with
+* a normalized number in the source and the instruction is
+* successful.
+*
+* Next consider if in the process of normalizing the un-
+* normalized number it becomes a denormalized number. The
+* routine which converts the unnorm to a norm (called mk_norm)
+* detects this and tags the number as a denorm. The routine
+* res_func sees the denorm tag and converts the denorm to a
+* norm. The instruction is then restored back into the '040
+* which re_executess the instruction.
+*
+
+GET_OP IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+ xdef PIRN,PIRZRM,PIRP
+ xdef SMALRN,SMALRZRM,SMALRP
+ xdef BIGRN,BIGRZRM,BIGRP
+
+PIRN:
+ dc.l $40000000,$c90fdaa2,$2168c235 ;pi
+PIRZRM:
+ dc.l $40000000,$c90fdaa2,$2168c234 ;pi
+PIRP:
+ dc.l $40000000,$c90fdaa2,$2168c235 ;pi
+
+*round to nearest
+SMALRN:
+ dc.l $3ffd0000,$9a209a84,$fbcff798 ;log10(2)
+ dc.l $40000000,$adf85458,$a2bb4a9a ;e
+ dc.l $3fff0000,$b8aa3b29,$5c17f0bc ;log2(e)
+ dc.l $3ffd0000,$de5bd8a9,$37287195 ;log10(e)
+ dc.l $00000000,$00000000,$00000000 ;0.0
+* round to zero;round to negative infinity
+SMALRZRM:
+ dc.l $3ffd0000,$9a209a84,$fbcff798 ;log10(2)
+ dc.l $40000000,$adf85458,$a2bb4a9a ;e
+ dc.l $3fff0000,$b8aa3b29,$5c17f0bb ;log2(e)
+ dc.l $3ffd0000,$de5bd8a9,$37287195 ;log10(e)
+ dc.l $00000000,$00000000,$00000000 ;0.0
+* round to positive infinity
+SMALRP:
+ dc.l $3ffd0000,$9a209a84,$fbcff799 ;log10(2)
+ dc.l $40000000,$adf85458,$a2bb4a9b ;e
+ dc.l $3fff0000,$b8aa3b29,$5c17f0bc ;log2(e)
+ dc.l $3ffd0000,$de5bd8a9,$37287195 ;log10(e)
+ dc.l $00000000,$00000000,$00000000 ;0.0
+
+*round to nearest
+BIGRN:
+ dc.l $3ffe0000,$b17217f7,$d1cf79ac ;ln(2)
+ dc.l $40000000,$935d8ddd,$aaa8ac17 ;ln(10)
+ dc.l $3fff0000,$80000000,$00000000 ;10 ^ 0
+
+ xdef PTENRN
+PTENRN:
+ dc.l $40020000,$A0000000,$00000000 ;10 ^ 1
+ dc.l $40050000,$C8000000,$00000000 ;10 ^ 2
+ dc.l $400C0000,$9C400000,$00000000 ;10 ^ 4
+ dc.l $40190000,$BEBC2000,$00000000 ;10 ^ 8
+ dc.l $40340000,$8E1BC9BF,$04000000 ;10 ^ 16
+ dc.l $40690000,$9DC5ADA8,$2B70B59E ;10 ^ 32
+ dc.l $40D30000,$C2781F49,$FFCFA6D5 ;10 ^ 64
+ dc.l $41A80000,$93BA47C9,$80E98CE0 ;10 ^ 128
+ dc.l $43510000,$AA7EEBFB,$9DF9DE8E ;10 ^ 256
+ dc.l $46A30000,$E319A0AE,$A60E91C7 ;10 ^ 512
+ dc.l $4D480000,$C9767586,$81750C17 ;10 ^ 1024
+ dc.l $5A920000,$9E8B3B5D,$C53D5DE5 ;10 ^ 2048
+ dc.l $75250000,$C4605202,$8A20979B ;10 ^ 4096
+*round to minus infinity
+BIGRZRM:
+ dc.l $3ffe0000,$b17217f7,$d1cf79ab ;ln(2)
+ dc.l $40000000,$935d8ddd,$aaa8ac16 ;ln(10)
+ dc.l $3fff0000,$80000000,$00000000 ;10 ^ 0
+
+ xdef PTENRM
+PTENRM:
+ dc.l $40020000,$A0000000,$00000000 ;10 ^ 1
+ dc.l $40050000,$C8000000,$00000000 ;10 ^ 2
+ dc.l $400C0000,$9C400000,$00000000 ;10 ^ 4
+ dc.l $40190000,$BEBC2000,$00000000 ;10 ^ 8
+ dc.l $40340000,$8E1BC9BF,$04000000 ;10 ^ 16
+ dc.l $40690000,$9DC5ADA8,$2B70B59D ;10 ^ 32
+ dc.l $40D30000,$C2781F49,$FFCFA6D5 ;10 ^ 64
+ dc.l $41A80000,$93BA47C9,$80E98CDF ;10 ^ 128
+ dc.l $43510000,$AA7EEBFB,$9DF9DE8D ;10 ^ 256
+ dc.l $46A30000,$E319A0AE,$A60E91C6 ;10 ^ 512
+ dc.l $4D480000,$C9767586,$81750C17 ;10 ^ 1024
+ dc.l $5A920000,$9E8B3B5D,$C53D5DE5 ;10 ^ 2048
+ dc.l $75250000,$C4605202,$8A20979A ;10 ^ 4096
+*round to positive infinity
+BIGRP:
+ dc.l $3ffe0000,$b17217f7,$d1cf79ac ;ln(2)
+ dc.l $40000000,$935d8ddd,$aaa8ac17 ;ln(10)
+ dc.l $3fff0000,$80000000,$00000000 ;10 ^ 0
+
+ xdef PTENRP
+PTENRP:
+ dc.l $40020000,$A0000000,$00000000 ;10 ^ 1
+ dc.l $40050000,$C8000000,$00000000 ;10 ^ 2
+ dc.l $400C0000,$9C400000,$00000000 ;10 ^ 4
+ dc.l $40190000,$BEBC2000,$00000000 ;10 ^ 8
+ dc.l $40340000,$8E1BC9BF,$04000000 ;10 ^ 16
+ dc.l $40690000,$9DC5ADA8,$2B70B59E ;10 ^ 32
+ dc.l $40D30000,$C2781F49,$FFCFA6D6 ;10 ^ 64
+ dc.l $41A80000,$93BA47C9,$80E98CE0 ;10 ^ 128
+ dc.l $43510000,$AA7EEBFB,$9DF9DE8E ;10 ^ 256
+ dc.l $46A30000,$E319A0AE,$A60E91C7 ;10 ^ 512
+ dc.l $4D480000,$C9767586,$81750C18 ;10 ^ 1024
+ dc.l $5A920000,$9E8B3B5D,$C53D5DE6 ;10 ^ 2048
+ dc.l $75250000,$C4605202,$8A20979B ;10 ^ 4096
+
+ xref nrm_zero
+ xref decbin
+ xref round
+
+ xdef get_op
+ xdef uns_getop
+ xdef uni_getop
+get_op:
+ clr.b DY_MO_FLG(a6)
+ tst.b UFLG_TMP(a6) ;test flag for unsupp/unimp state
+ beq.b uni_getop
+
+uns_getop:
+ btst.b #direction_bit,CMDREG1B(a6)
+ bne.w opclass3 ;branch if a fmove out (any kind)
+ btst.b #6,CMDREG1B(a6)
+ beq.b uns_notpacked
+
+ bfextu CMDREG1B(a6){3:3},d0
+ cmp.b #3,d0
+ beq.w pack_source ;check for a packed src op, branch if so
+uns_notpacked:
+ bsr chk_dy_mo ;set the dyadic/monadic flag
+ tst.b DY_MO_FLG(a6)
+ beq.b src_op_ck ;if monadic, go check src op
+* ;else, check dst op (fall through)
+
+ btst.b #7,DTAG(a6)
+ beq.b src_op_ck ;if dst op is norm, check src op
+ bra.b dst_ex_dnrm ;else, handle destination unnorm/dnrm
+
+uni_getop:
+ bfextu CMDREG1B(a6){0:6},d0 ;get opclass and src fields
+ cmpi.l #$17,d0 ;if op class and size fields are $17,
+* ;it is FMOVECR; if not, continue
+*
+* If the instruction is fmovecr, exit get_op. It is handled
+* in do_func and smovecr.sa.
+*
+ bne.w not_fmovecr ;handle fmovecr as an unimplemented inst
+ rts
+
+not_fmovecr:
+ btst.b #E1,E_BYTE(a6) ;if set, there is a packed operand
+ bne.w pack_source ;check for packed src op, branch if so
+
+* The following lines of are coded to optimize on normalized operands
+ move.b STAG(a6),d0
+ or.b DTAG(a6),d0 ;check if either of STAG/DTAG msb set
+ bmi.b dest_op_ck ;if so, some op needs to be fixed
+ rts
+
+dest_op_ck:
+ btst.b #7,DTAG(a6) ;check for unsupported data types in
+ beq.b src_op_ck ;the destination, if not, check src op
+ bsr chk_dy_mo ;set dyadic/monadic flag
+ tst.b DY_MO_FLG(a6) ;
+ beq.b src_op_ck ;if monadic, check src op
+*
+* At this point, destination has an extended denorm or unnorm.
+*
+dst_ex_dnrm:
+ move.w FPTEMP_EX(a6),d0 ;get destination exponent
+ andi.w #$7fff,d0 ;mask sign, check if exp = 0000
+ beq.b src_op_ck ;if denorm then check source op.
+* ;denorms are taken care of in res_func
+* ;(unsupp) or do_func (unimp)
+* ;else unnorm fall through
+ lea.l FPTEMP(a6),a0 ;point a0 to dop - used in mk_norm
+ bsr mk_norm ;go normalize - mk_norm returns:
+* ;L_SCR1{7:5} = operand tag
+* ; (000 = norm, 100 = denorm)
+* ;L_SCR1{4} = fpte15 or ete15
+* ; 0 = exp > $3fff
+* ; 1 = exp <= $3fff
+* ;and puts the normalized num back
+* ;on the fsave stack
+*
+ move.b L_SCR1(a6),DTAG(a6) ;write the new tag & fpte15
+* ;to the fsave stack and fall
+* ;through to check source operand
+*
+src_op_ck:
+ btst.b #7,STAG(a6)
+ beq.w end_getop ;check for unsupported data types on the
+* ;source operand
+ btst.b #5,STAG(a6)
+ bne.b src_sd_dnrm ;if bit 5 set, handle sgl/dbl denorms
+*
+* At this point only unnorms or extended denorms are possible.
+*
+src_ex_dnrm:
+ move.w ETEMP_EX(a6),d0 ;get source exponent
+ andi.w #$7fff,d0 ;mask sign, check if exp = 0000
+ beq.w end_getop ;if denorm then exit, denorms are
+* ;handled in do_func
+ lea.l ETEMP(a6),a0 ;point a0 to sop - used in mk_norm
+ bsr mk_norm ;go normalize - mk_norm returns:
+* ;L_SCR1{7:5} = operand tag
+* ; (000 = norm, 100 = denorm)
+* ;L_SCR1{4} = fpte15 or ete15
+* ; 0 = exp > $3fff
+* ; 1 = exp <= $3fff
+* ;and puts the normalized num back
+* ;on the fsave stack
+*
+ move.b L_SCR1(a6),STAG(a6) ;write the new tag & ete15
+ rts ;end_getop
+
+*
+* At this point, only single or double denorms are possible.
+* If the inst is not fmove, normalize the source. If it is,
+* do nothing to the input.
+*
+src_sd_dnrm:
+ btst.b #4,CMDREG1B(a6) ;differentiate between sgl/dbl denorm
+ bne.b is_double
+is_single:
+ move.w #$3f81,d1 ;write bias for sgl denorm
+ bra.b common ;goto the common code
+is_double:
+ move.w #$3c01,d1 ;write the bias for a dbl denorm
+common:
+ btst.b #sign_bit,ETEMP_EX(a6) ;grab sign bit of mantissa
+ beq.b pos
+ bset #15,d1 ;set sign bit because it is negative
+pos:
+ move.w d1,ETEMP_EX(a6)
+* ;put exponent on stack
+
+ move.w CMDREG1B(a6),d1
+ and.w #$e3ff,d1 ;clear out source specifier
+ or.w #$0800,d1 ;set source specifier to extended prec
+ move.w d1,CMDREG1B(a6) ;write back to the command word in stack
+* ;this is needed to fix unsupp data stack
+ lea.l ETEMP(a6),a0 ;point a0 to sop
+
+ bsr mk_norm ;convert sgl/dbl denorm to norm
+ move.b L_SCR1(a6),STAG(a6) ;put tag into source tag reg - d0
+ rts ;end_getop
+*
+* At this point, the source is definitely packed, whether
+* instruction is dyadic or monadic is still unknown
+*
+pack_source:
+ move.l FPTEMP_LO(a6),ETEMP(a6) ;write ms part of packed
+* ;number to etemp slot
+ bsr chk_dy_mo ;set dyadic/monadic flag
+ bsr unpack
+
+ tst.b DY_MO_FLG(a6)
+ beq.b end_getop ;if monadic, exit
+* ;else, fix FPTEMP
+pack_dya:
+ bfextu CMDREG1B(a6){6:3},d0 ;extract dest fp reg
+ move.l #7,d1
+ sub.l d0,d1
+ clr.l d0
+ bset.l d1,d0 ;set up d0 as a dynamic register mask
+ fmovem.x d0,FPTEMP(a6) ;write to FPTEMP
+
+ btst.b #7,DTAG(a6) ;check dest tag for unnorm or denorm
+ bne.w dst_ex_dnrm ;else, handle the unnorm or ext denorm
+*
+* Dest is not denormalized. Check for norm, and set fpte15
+* accordingly.
+*
+ move.b DTAG(a6),d0
+ andi.b #$f0,d0 ;strip to only dtag:fpte15
+ tst.b d0 ;check for normalized value
+ bne.b end_getop ;if inf/nan/zero leave get_op
+ move.w FPTEMP_EX(a6),d0
+ andi.w #$7fff,d0
+ cmpi.w #$3fff,d0 ;check if fpte15 needs setting
+ bge.b end_getop ;if >= $3fff, leave fpte15=0
+ or.b #$10,DTAG(a6)
+ bra.b end_getop
+
+*
+* At this point, it is either an fmoveout packed, unnorm or denorm
+*
+opclass3:
+ clr.b DY_MO_FLG(a6) ;set dyadic/monadic flag to monadic
+ bfextu CMDREG1B(a6){4:2},d0
+ cmpi.b #3,d0
+ bne.w src_ex_dnrm ;if not equal, must be unnorm or denorm
+* ;else it is a packed move out
+* ;exit
+end_getop:
+ rts
+
+*
+* Sets the DY_MO_FLG correctly. This is used only on if it is an
+* unuspported data type exception. Set if dyadic.
+*
+chk_dy_mo:
+ move.w CMDREG1B(a6),d0
+ btst.l #5,d0 ;testing extension command word
+ beq.b set_mon ;if bit 5 = 0 then monadic
+ btst.l #4,d0 ;know that bit 5 = 1
+ beq.b set_dya ;if bit 4 = 0 then dyadic
+ andi.w #$007f,d0 ;get rid of all but extension bits {6:0}
+ cmpi.w #$0038,d0 ;if extension = $38 then fcmp (dyadic)
+ bne.b set_mon
+set_dya:
+ st.b DY_MO_FLG(a6) ;set the inst flag type to dyadic
+ rts
+set_mon:
+ clr.b DY_MO_FLG(a6) ;set the inst flag type to monadic
+ rts
+*
+* MK_NORM
+*
+* Normalizes unnormalized numbers, sets tag to norm or denorm, sets unfl
+* exception if denorm.
+*
+* CASE opclass 0x0 unsupp
+* mk_norm till msb set
+* set tag = norm
+*
+* CASE opclass 0x0 unimp
+* mk_norm till msb set or exp = 0
+* if integer bit = 0
+* tag = denorm
+* else
+* tag = norm
+*
+* CASE opclass 011 unsupp
+* mk_norm till msb set or exp = 0
+* if integer bit = 0
+* tag = denorm
+* set unfl_nmcexe = 1
+* else
+* tag = norm
+*
+* if exp <= $3fff
+* set ete15 or fpte15 = 1
+* else set ete15 or fpte15 = 0
+
+* input:
+* a0 = points to operand to be normalized
+* output:
+* L_SCR1{7:5} = operand tag (000 = norm, 100 = denorm)
+* L_SCR1{4} = fpte15 or ete15 (0 = exp > $3fff, 1 = exp <=$3fff)
+* the normalized operand is placed back on the fsave stack
+mk_norm:
+ clr.l L_SCR1(a6)
+ bclr.b #sign_bit,LOCAL_EX(a0)
+ sne LOCAL_SGN(a0) ;transform into internal extended format
+
+ cmpi.b #$2c,1+EXC_VEC(a6) ;check if unimp
+ bne.b uns_data ;branch if unsupp
+ bsr uni_inst ;call if unimp (opclass 0x0)
+ bra.b reload
+uns_data:
+ btst.b #direction_bit,CMDREG1B(a6) ;check transfer direction
+ bne.b bit_set ;branch if set (opclass 011)
+ bsr uns_opx ;call if opclass 0x0
+ bra.b reload
+bit_set:
+ bsr uns_op3 ;opclass 011
+reload:
+ cmp.w #$3fff,LOCAL_EX(a0) ;if exp > $3fff
+ bgt.b end_mk ; fpte15/ete15 already set to 0
+ bset.b #4,L_SCR1(a6) ;else set fpte15/ete15 to 1
+* ;calling routine actually sets the
+* ;value on the stack (along with the
+* ;tag), since this routine doesn't
+* ;know if it should set ete15 or fpte15
+* ;ie, it doesn't know if this is the
+* ;src op or dest op.
+end_mk:
+ bfclr LOCAL_SGN(a0){0:8}
+ beq.b end_mk_pos
+ bset.b #sign_bit,LOCAL_EX(a0) ;convert back to IEEE format
+end_mk_pos:
+ rts
+*
+* CASE opclass 011 unsupp
+*
+uns_op3:
+ bsr nrm_zero ;normalize till msb = 1 or exp = zero
+ btst.b #7,LOCAL_HI(a0) ;if msb = 1
+ bne.b no_unfl ;then branch
+set_unfl:
+ or.b #dnrm_tag,L_SCR1(a6) ;set denorm tag
+ bset.b #unfl_bit,FPSR_EXCEPT(a6) ;set unfl exception bit
+no_unfl:
+ rts
+*
+* CASE opclass 0x0 unsupp
+*
+uns_opx:
+ bsr nrm_zero ;normalize the number
+ btst.b #7,LOCAL_HI(a0) ;check if integer bit (j-bit) is set
+ beq.b uns_den ;if clear then now have a denorm
+uns_nrm:
+ or.b #norm_tag,L_SCR1(a6) ;set tag to norm
+ rts
+uns_den:
+ or.b #dnrm_tag,L_SCR1(a6) ;set tag to denorm
+ rts
+*
+* CASE opclass 0x0 unimp
+*
+uni_inst:
+ bsr nrm_zero
+ btst.b #7,LOCAL_HI(a0) ;check if integer bit (j-bit) is set
+ beq.b uni_den ;if clear then now have a denorm
+uni_nrm:
+ or.b #norm_tag,L_SCR1(a6) ;set tag to norm
+ rts
+uni_den:
+ or.b #dnrm_tag,L_SCR1(a6) ;set tag to denorm
+ rts
+
+*
+* Decimal to binary conversion
+*
+* Special cases of inf and NaNs are completed outside of decbin.
+* If the input is an snan, the snan bit is not set.
+*
+* input:
+* ETEMP(a6) - points to packed decimal string in memory
+* output:
+* fp0 - contains packed string converted to extended precision
+* ETEMP - same as fp0
+unpack:
+ move.w CMDREG1B(a6),d0 ;examine command word, looking for fmove's
+ and.w #$3b,d0
+ beq move_unpack ;special handling for fmove: must set FPSR_CC
+
+ move.w ETEMP(a6),d0 ;get word with inf information
+ bfextu d0{20:12},d1 ;get exponent into d1
+ cmpi.w #$0fff,d1 ;test for inf or NaN
+ bne.b try_zero ;if not equal, it is not special
+ bfextu d0{17:3},d1 ;get SE and y bits into d1
+ cmpi.w #7,d1 ;SE and y bits must be on for special
+ bne.b try_zero ;if not on, it is not special
+*input is of the special cases of inf and NaN
+ tst.l ETEMP_HI(a6) ;check ms mantissa
+ bne.b fix_nan ;if non-zero, it is a NaN
+ tst.l ETEMP_LO(a6) ;check ls mantissa
+ bne.b fix_nan ;if non-zero, it is a NaN
+ bra.w finish ;special already on stack
+fix_nan:
+ btst.b #signan_bit,ETEMP_HI(a6) ;test for snan
+ bne.w finish
+ or.l #snaniop_mask,USER_FPSR(a6) ;always set snan if it is so
+ bra.w finish
+try_zero:
+ move.w ETEMP_EX+2(a6),d0 ;get word 4
+ andi.w #$000f,d0 ;clear all but last ni(y)bble
+ tst.w d0 ;check for zero.
+ bne.w not_spec
+ tst.l ETEMP_HI(a6) ;check words 3 and 2
+ bne.w not_spec
+ tst.l ETEMP_LO(a6) ;check words 1 and 0
+ bne.w not_spec
+ tst.l ETEMP(a6) ;test sign of the zero
+ bge.b pos_zero
+ move.l #$80000000,ETEMP(a6) ;write neg zero to etemp
+ clr.l ETEMP_HI(a6)
+ clr.l ETEMP_LO(a6)
+ bra.w finish
+pos_zero:
+ clr.l ETEMP(a6)
+ clr.l ETEMP_HI(a6)
+ clr.l ETEMP_LO(a6)
+ bra.w finish
+
+not_spec:
+ fmovem.x fp0-fp1,-(a7) ;save fp0 - decbin returns in it
+ bsr decbin
+ fmove.x fp0,ETEMP(a6) ;put the unpacked sop in the fsave stack
+ fmovem.x (a7)+,fp0-fp1
+ fmove.l #0,FPSR ;clr fpsr from decbin
+ bra finish
+
+*
+* Special handling for packed move in: Same results as all other
+* packed cases, but we must set the FPSR condition codes properly.
+*
+move_unpack:
+ move.w ETEMP(a6),d0 ;get word with inf information
+ bfextu d0{20:12},d1 ;get exponent into d1
+ cmpi.w #$0fff,d1 ;test for inf or NaN
+ bne.b mtry_zero ;if not equal, it is not special
+ bfextu d0{17:3},d1 ;get SE and y bits into d1
+ cmpi.w #7,d1 ;SE and y bits must be on for special
+ bne.b mtry_zero ;if not on, it is not special
+*input is of the special cases of inf and NaN
+ tst.l ETEMP_HI(a6) ;check ms mantissa
+ bne.b mfix_nan ;if non-zero, it is a NaN
+ tst.l ETEMP_LO(a6) ;check ls mantissa
+ bne.b mfix_nan ;if non-zero, it is a NaN
+*input is inf
+ or.l #inf_mask,USER_FPSR(a6) ;set I bit
+ tst.l ETEMP(a6) ;check sign
+ bge.w finish
+ or.l #neg_mask,USER_FPSR(a6) ;set N bit
+ bra.w finish ;special already on stack
+mfix_nan:
+ or.l #nan_mask,USER_FPSR(a6) ;set NaN bit
+ move.b #nan_tag,STAG(a6) ;set stag to NaN
+ btst.b #signan_bit,ETEMP_HI(a6) ;test for snan
+ bne.b mn_snan
+ or.l #snaniop_mask,USER_FPSR(a6) ;set snan bit
+ btst.b #snan_bit,FPCR_ENABLE(a6) ;test for snan enabled
+ bne.b mn_snan
+ bset.b #signan_bit,ETEMP_HI(a6) ;force snans to qnans
+mn_snan:
+ tst.l ETEMP(a6) ;check for sign
+ bge.w finish ;if clr, go on
+ or.l #neg_mask,USER_FPSR(a6) ;set N bit
+ bra.w finish
+
+mtry_zero:
+ move.w ETEMP_EX+2(a6),d0 ;get word 4
+ andi.w #$000f,d0 ;clear all but last ni(y)bble
+ tst.w d0 ;check for zero.
+ bne.b mnot_spec
+ tst.l ETEMP_HI(a6) ;check words 3 and 2
+ bne.b mnot_spec
+ tst.l ETEMP_LO(a6) ;check words 1 and 0
+ bne.b mnot_spec
+ tst.l ETEMP(a6) ;test sign of the zero
+ bge.b mpos_zero
+ or.l #neg_mask+z_mask,USER_FPSR(a6) ;set N and Z
+ move.l #$80000000,ETEMP(a6) ;write neg zero to etemp
+ clr.l ETEMP_HI(a6)
+ clr.l ETEMP_LO(a6)
+ bra.b finish
+mpos_zero:
+ or.l #z_mask,USER_FPSR(a6) ;set Z
+ clr.l ETEMP(a6)
+ clr.l ETEMP_HI(a6)
+ clr.l ETEMP_LO(a6)
+ bra.b finish
+
+mnot_spec:
+ fmovem.x fp0-fp1,-(a7) ;save fp0 ,fp1 - decbin returns in fp0
+ bsr decbin
+ fmove.x fp0,ETEMP(a6)
+* ;put the unpacked sop in the fsave stack
+ fmovem.x (a7)+,fp0-fp1
+
+finish:
+ move.w CMDREG1B(a6),d0 ;get the command word
+ and.w #$fbff,d0 ;change the source specifier field to
+* ;extended (was packed).
+ move.w d0,CMDREG1B(a6) ;write command word back to fsave stack
+* ;we need to do this so the 040 will
+* ;re-execute the inst. without taking
+* ;another packed trap.
+
+fix_stag:
+*Converted result is now in etemp on fsave stack, now set the source
+*tag (stag)
+* if (ete =$7fff) then INF or NAN
+* if (etemp = $x.0----0) then
+* stag = INF
+* else
+* stag = NAN
+* else
+* if (ete = $0000) then
+* stag = ZERO
+* else
+* stag = NORM
+*
+* Note also that the etemp_15 bit (just right of the stag) must
+* be set accordingly.
+*
+ move.w ETEMP_EX(a6),d1
+ andi.w #$7fff,d1 ;strip sign
+ cmp.w #$7fff,d1
+ bne.b z_or_nrm
+ move.l ETEMP_HI(a6),d1
+ bne.b is_nan
+ move.l ETEMP_LO(a6),d1
+ bne.b is_nan
+is_inf:
+ move.b #$40,STAG(a6)
+ move.l #$40,d0
+ rts
+is_nan:
+ move.b #$60,STAG(a6)
+ move.l #$60,d0
+ rts
+z_or_nrm:
+ tst.w d1
+ bne.b is_nrm
+is_zro:
+* For a zero, set etemp_15
+ move.b #$30,STAG(a6)
+ move.l #$20,d0
+ rts
+is_nrm:
+* For a norm, check if the exp <= $3fff; if so, set etemp_15
+ cmpi.w #$3fff,d1
+ ble.b set_bit15
+ clr.b STAG(a6)
+ bra.b end_is_nrm
+set_bit15:
+ move.b #$10,STAG(a6)
+end_is_nrm:
+ clr.l d0
+end_fix:
+ rts
+
+end_get:
+ rts
+ end
diff --git a/sys/arch/m68k/fpsp/kernel_ex.sa b/sys/arch/m68k/fpsp/kernel_ex.sa
new file mode 100644
index 00000000000..98807a91adb
--- /dev/null
+++ b/sys/arch/m68k/fpsp/kernel_ex.sa
@@ -0,0 +1,519 @@
+* $NetBSD: kernel_ex.sa,v 1.2 1994/10/26 07:49:12 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* kernel_ex.sa 3.3 12/19/90
+*
+* This file contains routines to force exception status in the
+* fpu for exceptional cases detected or reported within the
+* transcendental functions. Typically, the t_xx routine will
+* set the appropriate bits in the USER_FPSR word on the stack.
+* The bits are tested in gen_except.sa to determine if an exceptional
+* situation needs to be created on return from the FPSP.
+*
+
+KERNEL_EX IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+mns_inf dc.l $ffff0000,$00000000,$00000000
+pls_inf dc.l $7fff0000,$00000000,$00000000
+nan dc.l $7fff0000,$ffffffff,$ffffffff
+huge dc.l $7ffe0000,$ffffffff,$ffffffff
+
+ xref ovf_r_k
+ xref unf_sub
+ xref nrm_set
+
+ xdef t_dz
+ xdef t_dz2
+ xdef t_operr
+ xdef t_unfl
+ xdef t_ovfl
+ xdef t_ovfl2
+ xdef t_inx2
+ xdef t_frcinx
+ xdef t_extdnrm
+ xdef t_resdnrm
+ xdef dst_nan
+ xdef src_nan
+*
+* DZ exception
+*
+*
+* if dz trap disabled
+* store properly signed inf (use sign of etemp) into fp0
+* set FPSR exception status dz bit, condition code
+* inf bit, and accrued dz bit
+* return
+* frestore the frame into the machine (done by unimp_hd)
+*
+* else dz trap enabled
+* set exception status bit & accrued bits in FPSR
+* set flag to disable sto_res from corrupting fp register
+* return
+* frestore the frame into the machine (done by unimp_hd)
+*
+* t_dz2 is used by monadic functions such as flogn (from do_func).
+* t_dz is used by monadic functions such as satanh (from the
+* transcendental function).
+*
+t_dz2:
+ bset.b #neg_bit,FPSR_CC(a6) ;set neg bit in FPSR
+ fmove.l #0,FPSR ;clr status bits (Z set)
+ btst.b #dz_bit,FPCR_ENABLE(a6) ;test FPCR for dz exc enabled
+ bne.b dz_ena_end
+ bra.b m_inf ;flogx always returns -inf
+t_dz:
+ fmove.l #0,FPSR ;clr status bits (Z set)
+ btst.b #dz_bit,FPCR_ENABLE(a6) ;test FPCR for dz exc enabled
+ bne.b dz_ena
+*
+* dz disabled
+*
+ btst.b #sign_bit,ETEMP_EX(a6) ;check sign for neg or pos
+ beq.b p_inf ;branch if pos sign
+
+m_inf:
+ fmovem.x mns_inf,fp0 ;load -inf
+ bset.b #neg_bit,FPSR_CC(a6) ;set neg bit in FPSR
+ bra.b set_fpsr
+p_inf:
+ fmovem.x pls_inf,fp0 ;load +inf
+set_fpsr:
+ or.l #dzinf_mask,USER_FPSR(a6) ;set I,DZ,ADZ
+ rts
+*
+* dz enabled
+*
+dz_ena:
+ btst.b #sign_bit,ETEMP_EX(a6) ;check sign for neg or pos
+ beq.b dz_ena_end
+ bset.b #neg_bit,FPSR_CC(a6) ;set neg bit in FPSR
+dz_ena_end:
+ or.l #dzinf_mask,USER_FPSR(a6) ;set I,DZ,ADZ
+ st.b STORE_FLG(a6)
+ rts
+*
+* OPERR exception
+*
+* if (operr trap disabled)
+* set FPSR exception status operr bit, condition code
+* nan bit; Store default NAN into fp0
+* frestore the frame into the machine (done by unimp_hd)
+*
+* else (operr trap enabled)
+* set FPSR exception status operr bit, accrued operr bit
+* set flag to disable sto_res from corrupting fp register
+* frestore the frame into the machine (done by unimp_hd)
+*
+t_operr:
+ or.l #opnan_mask,USER_FPSR(a6) ;set NaN, OPERR, AIOP
+
+ btst.b #operr_bit,FPCR_ENABLE(a6) ;test FPCR for operr enabled
+ bne.b op_ena
+
+ fmovem.x nan,fp0 ;load default nan
+ rts
+op_ena:
+ st.b STORE_FLG(a6) ;do not corrupt destination
+ rts
+
+*
+* t_unfl --- UNFL exception
+*
+* This entry point is used by all routines requiring unfl, inex2,
+* aunfl, and ainex to be set on exit.
+*
+* On entry, a0 points to the exceptional operand. The final exceptional
+* operand is built in FP_SCR1 and only the sign from the original operand
+* is used.
+*
+t_unfl:
+ clr.l FP_SCR1(a6) ;set exceptional operand to zero
+ clr.l FP_SCR1+4(a6)
+ clr.l FP_SCR1+8(a6)
+ tst.b (a0) ;extract sign from caller's exop
+ bpl.b unfl_signok
+ bset #sign_bit,FP_SCR1(a6)
+unfl_signok:
+ lea.l FP_SCR1(a6),a0
+ or.l #unfinx_mask,USER_FPSR(a6)
+* ;set UNFL, INEX2, AUNFL, AINEX
+unfl_con:
+ btst.b #unfl_bit,FPCR_ENABLE(a6)
+ beq.b unfl_dis
+
+unfl_ena:
+ bfclr STAG(a6){5:3} ;clear wbtm66,wbtm1,wbtm0
+ bset.b #wbtemp15_bit,WB_BYTE(a6) ;set wbtemp15
+ bset.b #sticky_bit,STICKY(a6) ;set sticky bit
+
+ bclr.b #E1,E_BYTE(a6)
+
+unfl_dis:
+ bfextu FPCR_MODE(a6){0:2},d0 ;get round precision
+
+ bclr.b #sign_bit,LOCAL_EX(a0)
+ sne LOCAL_SGN(a0) ;convert to internal ext format
+
+ bsr unf_sub ;returns IEEE result at a0
+* ;and sets FPSR_CC accordingly
+
+ bfclr LOCAL_SGN(a0){0:8} ;convert back to IEEE ext format
+ beq.b unfl_fin
+
+ bset.b #sign_bit,LOCAL_EX(a0)
+ bset.b #sign_bit,FP_SCR1(a6) ;set sign bit of exc operand
+
+unfl_fin:
+ fmovem.x (a0),fp0 ;store result in fp0
+ rts
+
+
+*
+* t_ovfl2 --- OVFL exception (without inex2 returned)
+*
+* This entry is used by scale to force catastrophic overflow. The
+* ovfl, aovfl, and ainex bits are set, but not the inex2 bit.
+*
+t_ovfl2:
+ or.l #ovfl_inx_mask,USER_FPSR(a6)
+ move.l ETEMP(a6),FP_SCR1(a6)
+ move.l ETEMP_HI(a6),FP_SCR1+4(a6)
+ move.l ETEMP_LO(a6),FP_SCR1+8(a6)
+*
+* Check for single or double round precision. If single, check if
+* the lower 40 bits of ETEMP are zero; if not, set inex2. If double,
+* check if the lower 21 bits are zero; if not, set inex2.
+*
+ move.b FPCR_MODE(a6),d0
+ andi.b #$c0,d0
+ beq.w t_work ;if extended, finish ovfl processing
+ cmpi.b #$40,d0 ;test for single
+ bne.b t_dbl
+t_sgl:
+ tst.b ETEMP_LO(a6)
+ bne.b t_setinx2
+ move.l ETEMP_HI(a6),d0
+ andi.l #$ff,d0 ;look at only lower 8 bits
+ bne.b t_setinx2
+ bra.w t_work
+t_dbl:
+ move.l ETEMP_LO(a6),d0
+ andi.l #$7ff,d0 ;look at only lower 11 bits
+ beq.w t_work
+t_setinx2:
+ or.l #inex2_mask,USER_FPSR(a6)
+ bra.b t_work
+*
+* t_ovfl --- OVFL exception
+*
+*** Note: the exc operand is returned in ETEMP.
+*
+t_ovfl:
+ or.l #ovfinx_mask,USER_FPSR(a6)
+t_work:
+ btst.b #ovfl_bit,FPCR_ENABLE(a6) ;test FPCR for ovfl enabled
+ beq.b ovf_dis
+
+ovf_ena:
+ clr.l FP_SCR1(a6) ;set exceptional operand
+ clr.l FP_SCR1+4(a6)
+ clr.l FP_SCR1+8(a6)
+
+ bfclr STAG(a6){5:3} ;clear wbtm66,wbtm1,wbtm0
+ bclr.b #wbtemp15_bit,WB_BYTE(a6) ;clear wbtemp15
+ bset.b #sticky_bit,STICKY(a6) ;set sticky bit
+
+ bclr.b #E1,E_BYTE(a6)
+* ;fall through to disabled case
+
+* For disabled overflow call 'ovf_r_k'. This routine loads the
+* correct result based on the rounding precision, destination
+* format, rounding mode and sign.
+*
+ovf_dis:
+ bsr ovf_r_k ;returns unsigned ETEMP_EX
+* ;and sets FPSR_CC accordingly.
+ bfclr ETEMP_SGN(a6){0:8} ;fix sign
+ beq.b ovf_pos
+ bset.b #sign_bit,ETEMP_EX(a6)
+ bset.b #sign_bit,FP_SCR1(a6) ;set exceptional operand sign
+ovf_pos:
+ fmovem.x ETEMP(a6),fp0 ;move the result to fp0
+ rts
+
+
+*
+* INEX2 exception
+*
+* The inex2 and ainex bits are set.
+*
+t_inx2:
+ or.l #inx2a_mask,USER_FPSR(a6) ;set INEX2, AINEX
+ rts
+
+*
+* Force Inex2
+*
+* This routine is called by the transcendental routines to force
+* the inex2 exception bits set in the FPSR. If the underflow bit
+* is set, but the underflow trap was not taken, the aunfl bit in
+* the FPSR must be set.
+*
+t_frcinx:
+ or.l #inx2a_mask,USER_FPSR(a6) ;set INEX2, AINEX
+ btst.b #unfl_bit,FPSR_EXCEPT(a6) ;test for unfl bit set
+ beq.b no_uacc1 ;if clear, do not set aunfl
+ bset.b #aunfl_bit,FPSR_AEXCEPT(a6)
+no_uacc1:
+ rts
+
+*
+* DST_NAN
+*
+* Determine if the destination nan is signalling or non-signalling,
+* and set the FPSR bits accordingly. See the MC68040 User's Manual
+* section 3.2.2.5 NOT-A-NUMBERS.
+*
+dst_nan:
+ btst.b #sign_bit,FPTEMP_EX(a6) ;test sign of nan
+ beq.b dst_pos ;if clr, it was positive
+ bset.b #neg_bit,FPSR_CC(a6) ;set N bit
+dst_pos:
+ btst.b #signan_bit,FPTEMP_HI(a6) ;check if signalling
+ beq.b dst_snan ;branch if signalling
+
+ fmove.l d1,fpcr ;restore user's rmode/prec
+ fmove.x FPTEMP(a6),fp0 ;return the non-signalling nan
+*
+* Check the source nan. If it is signalling, snan will be reported.
+*
+ move.b STAG(a6),d0
+ andi.b #$e0,d0
+ cmpi.b #$60,d0
+ bne.b no_snan
+ btst.b #signan_bit,ETEMP_HI(a6) ;check if signalling
+ bne.b no_snan
+ or.l #snaniop_mask,USER_FPSR(a6) ;set NAN, SNAN, AIOP
+no_snan:
+ rts
+
+dst_snan:
+ btst.b #snan_bit,FPCR_ENABLE(a6) ;check if trap enabled
+ beq.b dst_dis ;branch if disabled
+
+ or.b #nan_tag,DTAG(a6) ;set up dtag for nan
+ st.b STORE_FLG(a6) ;do not store a result
+ or.l #snaniop_mask,USER_FPSR(a6) ;set NAN, SNAN, AIOP
+ rts
+
+dst_dis:
+ bset.b #signan_bit,FPTEMP_HI(a6) ;set SNAN bit in sop
+ fmove.l d1,fpcr ;restore user's rmode/prec
+ fmove.x FPTEMP(a6),fp0 ;load non-sign. nan
+ or.l #snaniop_mask,USER_FPSR(a6) ;set NAN, SNAN, AIOP
+ rts
+
+*
+* SRC_NAN
+*
+* Determine if the source nan is signalling or non-signalling,
+* and set the FPSR bits accordingly. See the MC68040 User's Manual
+* section 3.2.2.5 NOT-A-NUMBERS.
+*
+src_nan:
+ btst.b #sign_bit,ETEMP_EX(a6) ;test sign of nan
+ beq.b src_pos ;if clr, it was positive
+ bset.b #neg_bit,FPSR_CC(a6) ;set N bit
+src_pos:
+ btst.b #signan_bit,ETEMP_HI(a6) ;check if signalling
+ beq.b src_snan ;branch if signalling
+ fmove.l d1,fpcr ;restore user's rmode/prec
+ fmove.x ETEMP(a6),fp0 ;return the non-signalling nan
+ rts
+
+src_snan:
+ btst.b #snan_bit,FPCR_ENABLE(a6) ;check if trap enabled
+ beq.b src_dis ;branch if disabled
+ bset.b #signan_bit,ETEMP_HI(a6) ;set SNAN bit in sop
+ or.b #norm_tag,DTAG(a6) ;set up dtag for norm
+ or.b #nan_tag,STAG(a6) ;set up stag for nan
+ st.b STORE_FLG(a6) ;do not store a result
+ or.l #snaniop_mask,USER_FPSR(a6) ;set NAN, SNAN, AIOP
+ rts
+
+src_dis:
+ bset.b #signan_bit,ETEMP_HI(a6) ;set SNAN bit in sop
+ fmove.l d1,fpcr ;restore user's rmode/prec
+ fmove.x ETEMP(a6),fp0 ;load non-sign. nan
+ or.l #snaniop_mask,USER_FPSR(a6) ;set NAN, SNAN, AIOP
+ rts
+
+*
+* For all functions that have a denormalized input and that f(x)=x,
+* this is the entry point
+*
+t_extdnrm:
+ or.l #unfinx_mask,USER_FPSR(a6)
+* ;set UNFL, INEX2, AUNFL, AINEX
+ bra.b xdnrm_con
+*
+* Entry point for scale with extended denorm. The function does
+* not set inex2, aunfl, or ainex.
+*
+t_resdnrm:
+ or.l #unfl_mask,USER_FPSR(a6)
+
+xdnrm_con:
+ btst.b #unfl_bit,FPCR_ENABLE(a6)
+ beq.b xdnrm_dis
+
+*
+* If exceptions are enabled, the additional task of setting up WBTEMP
+* is needed so that when the underflow exception handler is entered,
+* the user perceives no difference between what the 040 provides vs.
+* what the FPSP provides.
+*
+xdnrm_ena:
+ move.l a0,-(a7)
+
+ move.l LOCAL_EX(a0),FP_SCR1(a6)
+ move.l LOCAL_HI(a0),FP_SCR1+4(a6)
+ move.l LOCAL_LO(a0),FP_SCR1+8(a6)
+
+ lea FP_SCR1(a6),a0
+
+ bclr.b #sign_bit,LOCAL_EX(a0)
+ sne LOCAL_SGN(a0) ;convert to internal ext format
+ tst.w LOCAL_EX(a0) ;check if input is denorm
+ beq.b xdnrm_dn ;if so, skip nrm_set
+ bsr nrm_set ;normalize the result (exponent
+* ;will be negative
+xdnrm_dn:
+ bclr.b #sign_bit,LOCAL_EX(a0) ;take off false sign
+ bfclr LOCAL_SGN(a0){0:8} ;change back to IEEE ext format
+ beq.b xdep
+ bset.b #sign_bit,LOCAL_EX(a0)
+xdep:
+ bfclr STAG(a6){5:3} ;clear wbtm66,wbtm1,wbtm0
+ bset.b #wbtemp15_bit,WB_BYTE(a6) ;set wbtemp15
+ bclr.b #sticky_bit,STICKY(a6) ;clear sticky bit
+ bclr.b #E1,E_BYTE(a6)
+ move.l (a7)+,a0
+xdnrm_dis:
+ bfextu FPCR_MODE(a6){0:2},d0 ;get round precision
+ bne.b not_ext ;if not round extended, store
+* ;IEEE defaults
+is_ext:
+ btst.b #sign_bit,LOCAL_EX(a0)
+ beq.b xdnrm_store
+
+ bset.b #neg_bit,FPSR_CC(a6) ;set N bit in FPSR_CC
+
+ bra.b xdnrm_store
+
+not_ext:
+ bclr.b #sign_bit,LOCAL_EX(a0)
+ sne LOCAL_SGN(a0) ;convert to internal ext format
+ bsr unf_sub ;returns IEEE result pointed by
+* ;a0; sets FPSR_CC accordingly
+ bfclr LOCAL_SGN(a0){0:8} ;convert back to IEEE ext format
+ beq.b xdnrm_store
+ bset.b #sign_bit,LOCAL_EX(a0)
+xdnrm_store:
+ fmovem.x (a0),fp0 ;store result in fp0
+ rts
+
+*
+* This subroutine is used for dyadic operations that use an extended
+* denorm within the kernel. The approach used is to capture the frame,
+* fix/restore.
+*
+ xdef t_avoid_unsupp
+t_avoid_unsupp:
+ link a2,#-LOCAL_SIZE ;so that a2 fpsp.h negative
+* ;offsets may be used
+ fsave -(a7)
+ tst.b 1(a7) ;check if idle, exit if so
+ beq.w idle_end
+ btst.b #E1,E_BYTE(a2) ;check for an E1 exception if
+* ;enabled, there is an unsupp
+ beq.w end_avun ;else, exit
+ btst.b #7,DTAG(a2) ;check for denorm destination
+ beq.b src_den ;else, must be a source denorm
+*
+* handle destination denorm
+*
+ lea FPTEMP(a2),a0
+ btst.b #sign_bit,LOCAL_EX(a0)
+ sne LOCAL_SGN(a0) ;convert to internal ext format
+ bclr.b #7,DTAG(a2) ;set DTAG to norm
+ bsr nrm_set ;normalize result, exponent
+* ;will become negative
+ bclr.b #sign_bit,LOCAL_EX(a0) ;get rid of fake sign
+ bfclr LOCAL_SGN(a0){0:8} ;convert back to IEEE ext format
+ beq.b ck_src_den ;check if source is also denorm
+ bset.b #sign_bit,LOCAL_EX(a0)
+ck_src_den:
+ btst.b #7,STAG(a2)
+ beq.b end_avun
+src_den:
+ lea ETEMP(a2),a0
+ btst.b #sign_bit,LOCAL_EX(a0)
+ sne LOCAL_SGN(a0) ;convert to internal ext format
+ bclr.b #7,STAG(a2) ;set STAG to norm
+ bsr nrm_set ;normalize result, exponent
+* ;will become negative
+ bclr.b #sign_bit,LOCAL_EX(a0) ;get rid of fake sign
+ bfclr LOCAL_SGN(a0){0:8} ;convert back to IEEE ext format
+ beq.b den_com
+ bset.b #sign_bit,LOCAL_EX(a0)
+den_com:
+ move.b #$fe,CU_SAVEPC(a2) ;set continue frame
+ clr.w NMNEXC(a2) ;clear NMNEXC
+ bclr.b #E1,E_BYTE(a2)
+* fmove.l FPSR,FPSR_SHADOW(a2)
+* bset.b #SFLAG,E_BYTE(a2)
+* bset.b #XFLAG,T_BYTE(a2)
+end_avun:
+ frestore (a7)+
+ unlk a2
+ rts
+idle_end:
+ add.l #4,a7
+ unlk a2
+ rts
+ end
diff --git a/sys/arch/m68k/fpsp/l_fpsp.h b/sys/arch/m68k/fpsp/l_fpsp.h
new file mode 100644
index 00000000000..7737b1ce524
--- /dev/null
+++ b/sys/arch/m68k/fpsp/l_fpsp.h
@@ -0,0 +1,280 @@
+* $NetBSD: l_fpsp.h,v 1.2 1994/10/26 07:49:14 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* l_fpsp.h 1.2 5/1/91
+*
+
+* l_fpsp.h --- stack frame offsets for library version of FPSP
+*
+* This file is derived from fpsp.h. All equates that refer
+* to the fsave frame and it's bits are removed with the
+* exception of ETEMP, WBTEMP, DTAG and STAG which are simulated
+* in the library version. Equates for the exception frame are
+* also not needed. Some of the equates that are only used in
+* the kernel version of the FPSP are left in to minimize the
+* differences between this file and the original.
+*
+* The library routines use the same source files as the regular
+* kernel mode code so they expect the same setup. That is, you
+* must create enough space on the stack for all save areas and
+* work variables that are needed, and save any registers that
+* your compiler does not treat as scratch registers on return
+* from function calls.
+*
+* The worst case setup is:
+*
+* link a6,#-LOCAL_SIZE
+* movem.l d0-d1/a0-a1,USER_DA(a6)
+* fmovem.x fp0-fp3,USER_FP0(a6)
+* fmovem.l fpsr/fpcr,USER_FPSR(a6)
+*
+* After initialization, the stack looks like this:
+*
+* A7 ---> +-------------------------------+
+* | |
+* | FPSP Local Variables |
+* | including |
+* | saved registers |
+* | |
+* +-------------------------------+
+* A6 ---> | Saved A6 |
+* +-------------------------------+
+* | Return PC |
+* +-------------------------------+
+* | Arguments to |
+* | an FPSP library |
+* | package |
+* | |
+*
+* Positive offsets from A6 refer to the input arguments. Negative
+* offsets refer to the Local Variable area.
+*
+* On exit, execute:
+*
+* movem.l USER_DA(a6),d0-d1/a0-a1
+* fmovem.x USER_FP0(a6),fp0-fp3
+* fmove.l USER_FPSR(a6),fpsr/fpcr
+* unlk a6
+* rts
+*
+* Many 68K C compilers treat a0/a1/d0/d1/fp0/fp1 as scratch so
+* a simplified setup/exit is possible:
+*
+* link a6,#-LOCAL_SIZE
+* fmovem.x fp2-fp3,USER_FP2(a6)
+* fmove.l fpsr/fpcr,USER_FPSR(a6)
+*
+* [call appropriate emulation routine]
+*
+* fmovem.x USER_FP2(a6),fp2-fp3
+* fmove.l USER_FPSR(a6),fpsr/fpcr
+* unlk a6
+* rts
+*
+* Note that you must still save fp2/fp3 because the FPSP emulation
+* routines expect fp0-fp3 as scratch registers. For all monadic
+* entry points, the caller should save the fpcr in d1 and zero the
+* real fpcr before calling the emulation routine. On return, the
+* monadic emulation code will place the value supplied in d1 back
+* into the fpcr and do a single floating point operation so that
+* the final result will be correctly rounded and any specified
+* exceptions will be generated.
+*
+*----------------------------------------------------------------------
+*
+* Local Variables on the stack
+*
+LOCAL_SIZE equ 228 ;bytes needed for local variables
+LV equ -LOCAL_SIZE ;convenient base value
+*
+USER_DA equ LV+0 ;save space for D0-D1,A0-A1
+USER_D0 equ LV+0 ;saved user D0
+USER_D1 equ LV+4 ;saved user D1
+USER_A0 equ LV+8 ;saved user A0
+USER_A1 equ LV+12 ;saved user A1
+USER_FP0 equ LV+16 ;saved user FP0
+USER_FP1 equ LV+28 ;saved user FP1
+USER_FP2 equ LV+40 ;saved user FP2
+USER_FP3 equ LV+52 ;saved user FP3
+USER_FPCR equ LV+64 ;saved user FPCR
+FPCR_ENABLE equ USER_FPCR+2 ; FPCR exception enable
+FPCR_MODE equ USER_FPCR+3 ; FPCR rounding mode control
+USER_FPSR equ LV+68 ;saved user FPSR
+FPSR_CC equ USER_FPSR+0 ; FPSR condition code
+FPSR_QBYTE equ USER_FPSR+1 ; FPSR quotient
+FPSR_EXCEPT equ USER_FPSR+2 ; FPSR exception
+FPSR_AEXCEPT equ USER_FPSR+3 ; FPSR accrued exception
+USER_FPIAR equ LV+72 ;saved user FPIAR
+FP_SCR1 equ LV+76 ;room for a temporary float value
+FP_SCR2 equ LV+92 ;room for a temporary float value
+L_SCR1 equ LV+108 ;room for a temporary long value
+L_SCR2 equ LV+112 ;room for a temporary long value
+STORE_FLG equ LV+116
+BINDEC_FLG equ LV+117 ;used in bindec
+DNRM_FLG equ LV+118 ;used in res_func
+RES_FLG equ LV+119 ;used in res_func
+DY_MO_FLG equ LV+120 ;dyadic/monadic flag
+UFLG_TMP equ LV+121 ;temporary for uflag errata
+CU_ONLY equ LV+122 ;cu-only flag
+VER_TMP equ LV+123 ;temp holding for version number
+L_SCR3 equ LV+124 ;room for a temporary long value
+FP_SCR3 equ LV+128 ;room for a temporary float value
+FP_SCR4 equ LV+144 ;room for a temporary float value
+FP_SCR5 equ LV+160 ;room for a temporary float value
+FP_SCR6 equ LV+176
+*
+*--------------------------------------------------------------------------
+*
+STAG equ LV+192 ;source tag (1 byte)
+*
+DTAG equ LV+193 ;dest tag (1 byte)
+*
+FPTEMP equ LV+196 ;fptemp (12 bytes)
+FPTEMP_EX equ FPTEMP ;fptemp sign and exponent (2 bytes)
+FPTEMP_HI equ FPTEMP+4 ;fptemp mantissa [63:32] (4 bytes)
+FPTEMP_LO equ FPTEMP+8 ;fptemp mantissa [31:00] (4 bytes)
+*
+FPTEMP_SGN equ FPTEMP+2 ;used to store sign
+*
+ETEMP equ LV+208 ;etemp (12 bytes)
+ETEMP_EX equ ETEMP ;etemp sign and exponent (2 bytes)
+ETEMP_HI equ ETEMP+4 ;etemp mantissa [63:32] (4 bytes)
+ETEMP_LO equ ETEMP+8 ;etemp mantissa [31:00] (4 bytes)
+*
+ETEMP_SGN equ ETEMP+2 ;used to store sign
+*
+*--------------------------------------------------------------------------
+*
+* FPSR/FPCR bits
+*
+neg_bit equ 3 negative result
+z_bit equ 2 zero result
+inf_bit equ 1 infinity result
+nan_bit equ 0 not-a-number result
+*
+q_sn_bit equ 7 sign bit of quotient byte
+*
+bsun_bit equ 7 branch on unordered
+snan_bit equ 6 signalling nan
+operr_bit equ 5 operand error
+ovfl_bit equ 4 overflow
+unfl_bit equ 3 underflow
+dz_bit equ 2 divide by zero
+inex2_bit equ 1 inexact result 2
+inex1_bit equ 0 inexact result 1
+*
+aiop_bit equ 7 accrued illegal operation
+aovfl_bit equ 6 accrued overflow
+aunfl_bit equ 5 accrued underflow
+adz_bit equ 4 accrued divide by zero
+ainex_bit equ 3 accrued inexact
+*
+* FPSR individual bit masks
+*
+neg_mask equ $08000000
+z_mask equ $04000000
+inf_mask equ $02000000
+nan_mask equ $01000000
+*
+bsun_mask equ $00008000
+snan_mask equ $00004000
+operr_mask equ $00002000
+ovfl_mask equ $00001000
+unfl_mask equ $00000800
+dz_mask equ $00000400
+inex2_mask equ $00000200
+inex1_mask equ $00000100
+*
+aiop_mask equ $00000080 accrued illegal operation
+aovfl_mask equ $00000040 accrued overflow
+aunfl_mask equ $00000020 accrued underflow
+adz_mask equ $00000010 accrued divide by zero
+ainex_mask equ $00000008 accrued inexact
+*
+* FPSR combinations used in the FPSP
+*
+dzinf_mask equ inf_mask+dz_mask+adz_mask
+opnan_mask equ nan_mask+operr_mask+aiop_mask
+nzi_mask equ $01ffffff clears N, Z, and I
+unfinx_mask equ unfl_mask+inex2_mask+aunfl_mask+ainex_mask
+unf2inx_mask equ unfl_mask+inex2_mask+ainex_mask
+ovfinx_mask equ ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
+inx1a_mask equ inex1_mask+ainex_mask
+inx2a_mask equ inex2_mask+ainex_mask
+snaniop_mask equ nan_mask+snan_mask+aiop_mask
+naniop_mask equ nan_mask+aiop_mask
+neginf_mask equ neg_mask+inf_mask
+infaiop_mask equ inf_mask+aiop_mask
+negz_mask equ neg_mask+z_mask
+opaop_mask equ operr_mask+aiop_mask
+unfl_inx_mask equ unfl_mask+aunfl_mask+ainex_mask
+ovfl_inx_mask equ ovfl_mask+aovfl_mask+ainex_mask
+*
+*--------------------------------------------------------------------------
+*
+* FPCR rounding modes
+*
+x_mode equ $00 round to extended
+s_mode equ $40 round to single
+d_mode equ $80 round to double
+*
+rn_mode equ $00 round nearest
+rz_mode equ $10 round to zero
+rm_mode equ $20 round to minus infinity
+rp_mode equ $30 round to plus infinity
+*
+*--------------------------------------------------------------------------
+*
+* Miscellaneous equates
+*
+signan_bit equ 6 signalling nan bit in mantissa
+sign_bit equ 7
+*
+rnd_stky_bit equ 29 round/sticky bit of mantissa
+* this can only be used if in a data register
+LOCAL_EX equ 0
+LOCAL_SGN equ 2
+LOCAL_HI equ 4
+LOCAL_LO equ 8
+LOCAL_GRS equ 12 valid ONLY for FP_SCR1, FP_SCR2
+*
+*
+norm_tag equ $00 tag bits in {7:5} position
+zero_tag equ $20
+inf_tag equ $40
+nan_tag equ $60
+dnrm_tag equ $80
+*
+dbl_thresh equ $3C01
+sgl_thresh equ $3F81
+*
diff --git a/sys/arch/m68k/fpsp/l_support.sa b/sys/arch/m68k/fpsp/l_support.sa
new file mode 100644
index 00000000000..e704484b5a5
--- /dev/null
+++ b/sys/arch/m68k/fpsp/l_support.sa
@@ -0,0 +1,388 @@
+* $NetBSD: l_support.sa,v 1.3 1994/10/26 07:49:16 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* l_support.sa 1.2 5/1/91
+*
+
+L_SUPPORT IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+mns_one dc.l $bfff0000,$80000000,$00000000
+pls_one dc.l $3fff0000,$80000000,$00000000
+pls_inf dc.l $7fff0000,$00000000,$00000000
+pls_huge dc.l $7ffe0000,$ffffffff,$ffffffff
+mns_huge dc.l $fffe0000,$ffffffff,$ffffffff
+pls_tiny dc.l $00000000,$80000000,$00000000
+mns_tiny dc.l $80000000,$80000000,$00000000
+small dc.l $20000000,$80000000,$00000000
+pls_zero dc.l $00000000,$00000000,$00000000
+
+ include l_fpsp.h
+
+*
+* tag --- determine the type of an extended precision operand
+*
+* The tag values returned match the way the 68040 would have
+* tagged them.
+*
+* Input: a0 points to operand
+*
+* Output d0.b = $00 norm
+* $20 zero
+* $40 inf
+* $60 nan
+* $80 denorm
+* All other registers are unchanged
+*
+ xdef tag
+tag:
+ move.w LOCAL_EX(a0),d0
+ andi.w #$7fff,d0
+ beq.b chk_zro
+ cmpi.w #$7fff,d0
+ beq.b chk_inf
+tag_nrm:
+ clr.b d0
+ rts
+tag_nan:
+ move.b #$60,d0
+ rts
+tag_dnrm:
+ move.b #$80,d0
+ rts
+chk_zro:
+ btst.b #7,LOCAL_HI(a0) # check if J-bit is set
+ bne.b tag_nrm
+ tst.l LOCAL_HI(a0)
+ bne.b tag_dnrm
+ tst.l LOCAL_LO(a0)
+ bne.b tag_dnrm
+tag_zero:
+ move.b #$20,d0
+ rts
+chk_inf:
+ tst.l LOCAL_HI(a0)
+ bne.b tag_nan
+ tst.l LOCAL_LO(a0)
+ bne.b tag_nan
+tag_inf:
+ move.b #$40,d0
+ rts
+
+*
+* t_dz, t_dz2 --- divide by zero exception
+*
+* t_dz2 is used by monadic functions such as flogn (from do_func).
+* t_dz is used by monadic functions such as satanh (from the
+* transcendental function).
+*
+ xdef t_dz2
+t_dz2:
+ fmovem.x mns_one,fp0
+ fmove.l d1,fpcr
+ fdiv.x pls_zero,fp0
+ rts
+
+ xdef t_dz
+t_dz:
+ btst.b #sign_bit,ETEMP_EX(a6) ;check sign for neg or pos
+ beq.b p_inf ;branch if pos sign
+m_inf:
+ fmovem.x mns_one,fp0
+ fmove.l d1,fpcr
+ fdiv.x pls_zero,fp0
+ rts
+p_inf:
+ fmovem.x pls_one,fp0
+ fmove.l d1,fpcr
+ fdiv.x pls_zero,fp0
+ rts
+*
+* t_operr --- Operand Error exception
+*
+ xdef t_operr
+t_operr:
+ fmovem.x pls_inf,fp0
+ fmove.l d1,fpcr
+ fmul.x pls_zero,fp0
+ rts
+
+*
+* t_unfl --- UNFL exception
+*
+ xdef t_unfl
+t_unfl:
+ btst.b #sign_bit,ETEMP(a6)
+ beq.b unf_pos
+unf_neg:
+ fmovem.x mns_tiny,fp0
+ fmove.l d1,fpcr
+ fmul.x pls_tiny,fp0
+ rts
+
+unf_pos:
+ fmovem.x pls_tiny,fp0
+ fmove.l d1,fpcr
+ fmul.x fp0,fp0
+ rts
+*
+* t_ovfl --- OVFL exception
+*
+* t_ovfl is called as an exit for monadic functions. t_ovfl2
+* is for dyadic exits.
+*
+ xdef t_ovfl
+t_ovfl:
+ xdef t_ovfl2
+ move.l d1,USER_FPCR(a6) user's control register
+ move.l #ovfinx_mask,d0
+ bra.b t_work
+t_ovfl2:
+ move.l #ovfl_inx_mask,d0
+t_work:
+ btst.b #sign_bit,ETEMP(a6)
+ beq.b ovf_pos
+ovf_neg:
+ fmovem.x mns_huge,fp0
+ fmove.l USER_FPCR(a6),fpcr
+ fmul.x pls_huge,fp0
+ fmove.l fpsr,d1
+ or.l d1,d0
+ fmove.l d0,fpsr
+ rts
+ovf_pos:
+ fmovem.x pls_huge,fp0
+ fmove.l USER_FPCR(a6),fpcr
+ fmul.x pls_huge,fp0
+ fmove.l fpsr,d1
+ or.l d1,d0
+ fmove.l d0,fpsr
+ rts
+*
+* t_inx2 --- INEX2 exception (correct fpcr is in USER_FPCR(a6))
+*
+ xdef t_inx2
+t_inx2:
+ fmove.l fpsr,USER_FPSR(a6) capture incoming fpsr
+ fmove.l USER_FPCR(a6),fpcr
+*
+* create an inex2 exception by adding two numbers with very different exponents
+* do the add in fp1 so as to not disturb the result sitting in fp0
+*
+ fmove.x pls_one,fp1
+ fadd.x small,fp1
+*
+ or.l #inx2a_mask,USER_FPSR(a6) ;set INEX2, AINEX
+ fmove.l USER_FPSR(a6),fpsr
+ rts
+*
+* t_frcinx --- Force Inex2 (for monadic functions)
+*
+ xdef t_frcinx
+t_frcinx:
+ fmove.l fpsr,USER_FPSR(a6) capture incoming fpsr
+ fmove.l d1,fpcr
+*
+* create an inex2 exception by adding two numbers with very different exponents
+* do the add in fp1 so as to not disturb the result sitting in fp0
+*
+ fmove.x pls_one,fp1
+ fadd.x small,fp1
+*
+ or.l #inx2a_mask,USER_FPSR(a6) ;set INEX2, AINEX
+ btst.b #unfl_bit,FPSR_EXCEPT(a6) ;test for unfl bit set
+ beq.b no_uacc1 ;if clear, do not set aunfl
+ bset.b #aunfl_bit,FPSR_AEXCEPT(a6)
+no_uacc1:
+ fmove.l USER_FPSR(a6),fpsr
+ rts
+*
+* dst_nan --- force result when destination is a NaN
+*
+ xdef dst_nan
+dst_nan:
+ fmove.l USER_FPCR(a6),fpcr
+ fmove.x FPTEMP(a6),fp0
+ rts
+
+*
+* src_nan --- force result when source is a NaN
+*
+ xdef src_nan
+src_nan:
+ fmove.l USER_FPCR(a6),fpcr
+ fmove.x ETEMP(a6),fp0
+ rts
+*
+* mon_nan --- force result when source is a NaN (monadic version)
+*
+* This is the same as src_nan except that the user's fpcr comes
+* in via d1, not USER_FPCR(a6).
+*
+ xdef mon_nan
+mon_nan:
+ fmove.l d1,fpcr
+ fmove.x ETEMP(a6),fp0
+ rts
+*
+* t_extdnrm, t_resdnrm --- generate results for denorm inputs
+*
+* For all functions that have a denormalized input and that f(x)=x,
+* this is the entry point.
+*
+ xdef t_extdnrm
+t_extdnrm:
+ fmove.l d1,fpcr
+ fmove.x LOCAL_EX(a0),fp0
+ fmove.l fpsr,d0
+ or.l #unfinx_mask,d0
+ fmove.l d0,fpsr
+ rts
+
+ xdef t_resdnrm
+t_resdnrm:
+ fmove.l USER_FPCR(a6),fpcr
+ fmove.x LOCAL_EX(a0),fp0
+ fmove.l fpsr,d0
+ or.l #unfl_mask,d0
+ fmove.l d0,fpsr
+ rts
+*
+*
+*
+ xdef t_avoid_unsupp
+t_avoid_unsupp:
+ fmove.x fp0,fp0
+ rts
+
+ xdef sto_cos
+sto_cos:
+ fmovem.x LOCAL_EX(a0),fp1
+ rts
+*
+* Native instruction support
+*
+* Some systems may need entry points even for 68040 native
+* instructions. These routines are provided for
+* convenience.
+*
+ xdef sadd
+sadd:
+ fmovem.x FPTEMP(a6),fp0
+ fmove.l USER_FPCR(a6),fpcr
+ fadd.x ETEMP(a6),fp0
+ rts
+
+ xdef ssub
+ssub:
+ fmovem.x FPTEMP(a6),fp0
+ fmove.l USER_FPCR(a6),fpcr
+ fsub.x ETEMP(a6),fp0
+ rts
+
+ xdef smul
+smul:
+ fmovem.x FPTEMP(a6),fp0
+ fmove.l USER_FPCR(a6),fpcr
+ fmul.x ETEMP(a6),fp0
+ rts
+
+ xdef sdiv
+sdiv:
+ fmovem.x FPTEMP(a6),fp0
+ fmove.l USER_FPCR(a6),fpcr
+ fdiv.x ETEMP(a6),fp0
+ rts
+
+ xdef sabs
+sabs:
+ fmovem.x ETEMP(a6),fp0
+ fmove.l d1,fpcr
+ fabs.x fp0
+ rts
+
+ xdef sneg
+sneg:
+ fmovem.x ETEMP(a6),fp0
+ fmove.l d1,fpcr
+ fneg.x fp0
+ rts
+
+ xdef ssqrt
+ssqrt:
+ fmovem.x ETEMP(a6),fp0
+ fmove.l d1,fpcr
+ fsqrt.x fp0
+ rts
+
+*
+* l_sint,l_sintrz,l_sintd --- special wrapper for fint and fintrz
+*
+* On entry, move the user's FPCR to USER_FPCR.
+*
+* On return from, we need to pickup the INEX2/AINEX bits
+* that are in USER_FPSR.
+*
+ xref sint
+ xref sintrz
+ xref sintd
+
+ xdef l_sint
+l_sint:
+ move.l d1,USER_FPCR(a6)
+ jsr sint
+ fmove.l fpsr,d0
+ or.l USER_FPSR(a6),d0
+ fmove.l d0,fpsr
+ rts
+
+ xdef l_sintrz
+l_sintrz:
+ move.l d1,USER_FPCR(a6)
+ jsr sintrz
+ fmove.l fpsr,d0
+ or.l USER_FPSR(a6),d0
+ fmove.l d0,fpsr
+ rts
+
+ xdef l_sintd
+l_sintd:
+ move.l d1,USER_FPCR(a6)
+ jsr sintd
+ fmove.l fpsr,d0
+ or.l USER_FPSR(a6),d0
+ fmove.l d0,fpsr
+ rts
+
+ end
diff --git a/sys/arch/m68k/fpsp/netbsd.sa b/sys/arch/m68k/fpsp/netbsd.sa
new file mode 100644
index 00000000000..5dad0ef7779
--- /dev/null
+++ b/sys/arch/m68k/fpsp/netbsd.sa
@@ -0,0 +1,442 @@
+* $NetBSD: netbsd.sa,v 1.2 1994/10/26 07:49:19 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* skeleton.sa 3.2 4/26/91
+*
+* This file contains code that is system dependent and will
+* need to be modified to install the FPSP.
+*
+* Each entry point for exception 'xxxx' begins with a 'jmp fpsp_xxxx'.
+* Put any target system specific handling that must be done immediately
+* before the jump instruction. If there no handling necessary, then
+* the 'fpsp_xxxx' handler entry point should be placed in the exception
+* table so that the 'jmp' can be eliminated. If the FPSP determines that the
+* exception is one that must be reported then there will be a
+* return from the package by a 'jmp real_xxxx'. At that point
+* the machine state will be identical to the state before
+* the FPSP was entered. In particular, whatever condition
+* that caused the exception will still be pending when the FPSP
+* package returns. Thus, there will be system specific code
+* to handle the exception.
+*
+* If the exception was completely handled by the package, then
+* the return will be via a 'jmp fpsp_done'. Unless there is
+* OS specific work to be done (such as handling a context switch or
+* interrupt) the user program can be resumed via 'rte'.
+*
+* In the following skeleton code, some typical 'real_xxxx' handling
+* code is shown. This code may need to be moved to an appropriate
+* place in the target system, or rewritten.
+*
+
+SKELETON IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 15
+*
+* The following counters are used for standalone testing
+*
+
+ section 8
+
+ include fpsp.h
+
+ xref b1238_fix
+ xref _mmutype
+
+*
+* Divide by Zero exception
+*
+* All dz exceptions are 'real', hence no fpsp_dz entry point.
+*
+ xdef dz
+ xdef real_dz
+dz:
+ cmp.l #-2,_mmutype
+ bne.l _fpfault
+real_dz:
+ link a6,#-LOCAL_SIZE
+ fsave -(sp)
+ bclr.b #E1,E_BYTE(a6)
+ frestore (sp)+
+ unlk a6
+ jmp _fpfault
+
+*
+* Inexact exception
+*
+* All inexact exceptions are real, but the 'real' handler
+* will probably want to clear the pending exception.
+* The provided code will clear the E3 exception (if pending),
+* otherwise clear the E1 exception. The frestore is not really
+* necessary for E1 exceptions.
+*
+* Code following the 'inex' label is to handle bug #1232. In this
+* bug, if an E1 snan, ovfl, or unfl occured, and the process was
+* swapped out before taking the exception, the exception taken on
+* return was inex, rather than the correct exception. The snan, ovfl,
+* and unfl exception to be taken must not have been enabled. The
+* fix is to check for E1, and the existence of one of snan, ovfl,
+* or unfl bits set in the fpsr. If any of these are set, branch
+* to the appropriate handler for the exception in the fpsr. Note
+* that this fix is only for d43b parts, and is skipped if the
+* version number is not $40.
+*
+*
+ xdef real_inex
+ xdef inex
+inex:
+ cmp.l #-2,_mmutype
+ bne.l _fpfault
+ link a6,#-LOCAL_SIZE
+ fsave -(sp)
+ cmpi.b #VER_40,(sp) ;test version number
+ bne.b not_fmt40
+ fmove.l fpsr,-(sp)
+ btst.b #E1,E_BYTE(a6) ;test for E1 set
+ beq.b not_b1232
+ btst.b #snan_bit,2(sp) ;test for snan
+ beq inex_ckofl
+ addq.l #4,sp
+ frestore (sp)+
+ unlk a6
+ bra snan
+inex_ckofl:
+ btst.b #ovfl_bit,2(sp) ;test for ovfl
+ beq inex_ckufl
+ addq.l #4,sp
+ frestore (sp)+
+ unlk a6
+ bra ovfl
+inex_ckufl:
+ btst.b #unfl_bit,2(sp) ;test for unfl
+ beq not_b1232
+ addq.l #4,sp
+ frestore (sp)+
+ unlk a6
+ bra unfl
+
+*
+* We do not have the bug 1232 case. Clean up the stack and call
+* real_inex.
+*
+not_b1232:
+ addq.l #4,sp
+ frestore (sp)+
+ unlk a6
+
+real_inex:
+ link a6,#-LOCAL_SIZE
+ fsave -(sp)
+not_fmt40:
+ bclr.b #E3,E_BYTE(a6) ;clear and test E3 flag
+ beq.b inex_cke1
+*
+* Clear dirty bit on dest resister in the frame before branching
+* to b1238_fix.
+*
+ movem.l d0/d1,USER_DA(a6)
+ bfextu CMDREG1B(a6){6:3},d0 ;get dest reg no
+ bclr.b d0,FPR_DIRTY_BITS(a6) ;clr dest dirty bit
+ bsr.l b1238_fix ;test for bug1238 case
+ movem.l USER_DA(a6),d0/d1
+ bra.b inex_done
+inex_cke1:
+ bclr.b #E1,E_BYTE(a6)
+inex_done:
+ frestore (sp)+
+ unlk a6
+ jmp _fpfault
+
+*
+* Overflow exception
+*
+ xref fpsp_ovfl
+ xdef real_ovfl
+ xdef ovfl
+ovfl:
+ cmp.l #-2,_mmutype
+ beq.l fpsp_ovfl
+ jmp _fpfault
+real_ovfl:
+ link a6,#-LOCAL_SIZE
+ fsave -(sp)
+ bclr.b #E3,E_BYTE(a6) ;clear and test E3 flag
+ bne.b ovfl_done
+ bclr.b #E1,E_BYTE(a6)
+ovfl_done:
+ frestore (sp)+
+ unlk a6
+ jmp _fpfault
+
+*
+* Underflow exception
+*
+ xref fpsp_unfl
+ xdef real_unfl
+ xdef unfl
+unfl:
+ cmp.l #-2,_mmutype
+ beq.l fpsp_unfl
+ jmp _fpfault
+real_unfl:
+ link a6,#-LOCAL_SIZE
+ fsave -(sp)
+ bclr.b #E3,E_BYTE(a6) ;clear and test E3 flag
+ bne.b unfl_done
+ bclr.b #E1,E_BYTE(a6)
+unfl_done:
+ frestore (sp)+
+ unlk a6
+ jmp _fpfault
+
+*
+* Signalling NAN exception
+*
+ xref fpsp_snan
+ xdef real_snan
+ xdef snan
+snan:
+ cmp.l #-2,_mmutype
+ beq.l fpsp_snan
+ jmp _fpfault
+real_snan:
+ link a6,#-LOCAL_SIZE
+ fsave -(sp)
+ bclr.b #E1,E_BYTE(a6) ;snan is always an E1 exception
+ frestore (sp)+
+ unlk a6
+ jmp _fpfault
+
+*
+* Operand Error exception
+*
+ xref fpsp_operr
+ xdef real_operr
+ xdef operr
+operr:
+ cmp.l #-2,_mmutype
+ beq.l fpsp_operr
+ jmp _fpfault
+real_operr:
+ link a6,#-LOCAL_SIZE
+ fsave -(sp)
+ bclr.b #E1,E_BYTE(a6) ;operr is always an E1 exception
+ frestore (sp)+
+ unlk a6
+ jmp _fpfault
+
+*
+* BSUN exception
+*
+* This sample handler simply clears the nan bit in the FPSR.
+*
+ xref fpsp_bsun
+ xdef real_bsun
+ xdef bsun
+bsun:
+ cmp.l #-2,_mmutype
+ beq.l fpsp_bsun
+ jmp _fpfault
+real_bsun:
+ link a6,#-LOCAL_SIZE
+ fsave -(sp)
+ bclr.b #E1,E_BYTE(a6) ;bsun is always an E1 exception
+ fmove.l FPSR,-(sp)
+ bclr.b #nan_bit,(sp)
+ fmove.l (sp)+,FPSR
+ frestore (sp)+
+ unlk a6
+ jmp _fpfault
+
+*
+* F-line exception
+*
+* A 'real' F-line exception is one that the FPSP isn't supposed to
+* handle. E.g. an instruction with a co-processor ID that is not 1.
+*
+*
+ xref fpsp_fline
+ xdef real_fline
+ xdef fline
+fline:
+ cmp.l #-2,_mmutype
+ beq.l fpsp_fline
+ jmp _fpfault
+real_fline:
+ jmp _fpfault
+
+*
+* Unsupported data type exception
+*
+ xref fpsp_unsupp
+ xdef real_unsupp
+ xdef unsupp
+unsupp:
+ cmp.l #-2,_mmutype
+ beq.l fpsp_unsupp
+ jmp _fpfault
+real_unsupp:
+ link a6,#-LOCAL_SIZE
+ fsave -(sp)
+ bclr.b #E1,E_BYTE(a6) ;unsupp is always an E1 exception
+ frestore (sp)+
+ unlk a6
+ jmp _fpfault
+
+*
+* Trace exception
+*
+ xdef real_trace
+real_trace:
+ rte
+
+*
+* fpsp_fmt_error --- exit point for frame format error
+*
+* The fpu stack frame does not match the frames existing
+* or planned at the time of this writing. The fpsp is
+* unable to handle frame sizes not in the following
+* version:size pairs:
+*
+* {4060, 4160} - busy frame
+* {4028, 4130} - unimp frame
+* {4000, 4100} - idle frame
+*
+* This entry point simply holds an f-line illegal value.
+* Replace this with a call to your kernel panic code or
+* code to handle future revisions of the fpu.
+*
+ xdef fpsp_fmt_error
+fpsp_fmt_error:
+ pea 1f
+ jsr _panic
+ dc.l $f27f0000 ;f-line illegal
+1:
+ .asciz "bad floating point stack frame"
+ .even
+
+*
+* fpsp_done --- FPSP exit point
+*
+* The exception has been handled by the package and we are ready
+* to return to user mode, but there may be OS specific code
+* to execute before we do. If there is, do it now.
+*
+*
+ xref rei
+ xdef fpsp_done
+fpsp_done:
+ jmp rei
+
+*
+* mem_write --- write to user or supervisor address space
+*
+* Writes to memory while in supervisor mode. copyout accomplishes
+* this via a 'moves' instruction. copyout is a UNIX SVR3 (and later) function.
+* If you don't have copyout, use the local copy of the function below.
+*
+* a0 - supervisor source address
+* a1 - user destination address
+* d0 - number of bytes to write (maximum count is 12)
+*
+* The supervisor source address is guaranteed to point into the supervisor
+* stack. The result is that a UNIX
+* process is allowed to sleep as a consequence of a page fault during
+* copyout. The probability of a page fault is exceedingly small because
+* the 68040 always reads the destination address and thus the page
+* faults should have already been handled.
+*
+* If the EXC_SR shows that the exception was from supervisor space,
+* then just do a dumb (and slow) memory move. In a UNIX environment
+* there shouldn't be any supervisor mode floating point exceptions.
+*
+ xdef mem_write
+mem_write:
+ btst.b #5,EXC_SR(a6) ;check for supervisor state
+ beq.b user_write
+super_write:
+ move.b (a0)+,(a1)+
+ subq.l #1,d0
+ bne.b super_write
+ rts
+user_write:
+ move.l d1,-(sp) ;preserve d1 just in case
+ move.l d0,-(sp)
+ move.l a1,-(sp)
+ move.l a0,-(sp)
+ jsr _copyout
+ add.l #12,sp
+ move.l (sp)+,d1
+ rts
+
+*
+* mem_read --- read from user or supervisor address space
+*
+* Reads from memory while in supervisor mode. copyin accomplishes
+* this via a 'moves' instruction. copyin is a UNIX SVR3 (and later) function.
+* If you don't have copyin, use the local copy of the function below.
+*
+* The FPSP calls mem_read to read the original F-line instruction in order
+* to extract the data register number when the 'Dn' addressing mode is
+* used.
+*
+*Input:
+* a0 - user source address
+* a1 - supervisor destination address
+* d0 - number of bytes to read (maximum count is 12)
+*
+* Like mem_write, mem_read always reads with a supervisor
+* destination address on the supervisor stack. Also like mem_write,
+* the EXC_SR is checked and a simple memory copy is done if reading
+* from supervisor space is indicated.
+*
+ xdef mem_read
+mem_read:
+ btst.b #5,EXC_SR(a6) ;check for supervisor state
+ beq.b user_read
+super_read:
+ move.b (a0)+,(a1)+
+ subq.l #1,d0
+ bne.b super_read
+ rts
+user_read:
+ move.l d1,-(sp) ;preserve d1 just in case
+ move.l d0,-(sp)
+ move.l a1,-(sp)
+ move.l a0,-(sp)
+ jsr _copyin
+ add.l #12,sp
+ move.l (sp)+,d1
+ rts
+
+ end
diff --git a/sys/arch/m68k/fpsp/res_func.sa b/sys/arch/m68k/fpsp/res_func.sa
new file mode 100644
index 00000000000..5c036b742fc
--- /dev/null
+++ b/sys/arch/m68k/fpsp/res_func.sa
@@ -0,0 +1,2065 @@
+* $NetBSD: res_func.sa,v 1.3 1994/10/26 07:49:22 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* res_func.sa 3.9 7/29/91
+*
+* Normalizes denormalized numbers if necessary and updates the
+* stack frame. The function is then restored back into the
+* machine and the 040 completes the operation. This routine
+* is only used by the unsupported data type/format handler.
+* (Exception vector 55).
+*
+* For packed move out (fmove.p fpm,<ea>) the operation is
+* completed here; data is packed and moved to user memory.
+* The stack is restored to the 040 only in the case of a
+* reportable exception in the conversion.
+*
+
+RES_FUNC IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+sp_bnds: dc.w $3f81,$407e
+ dc.w $3f6a,$0000
+dp_bnds: dc.w $3c01,$43fe
+ dc.w $3bcd,$0000
+
+ xref mem_write
+ xref bindec
+ xref get_fline
+ xref round
+ xref denorm
+ xref dest_ext
+ xref dest_dbl
+ xref dest_sgl
+ xref unf_sub
+ xref nrm_set
+ xref dnrm_lp
+ xref ovf_res
+ xref reg_dest
+ xref t_ovfl
+ xref t_unfl
+
+ xdef res_func
+ xdef p_move
+
+res_func:
+ clr.b DNRM_FLG(a6)
+ clr.b RES_FLG(a6)
+ clr.b CU_ONLY(a6)
+ tst.b DY_MO_FLG(a6)
+ beq.b monadic
+dyadic:
+ btst.b #7,DTAG(a6) ;if dop = norm=000, zero=001,
+* ;inf=010 or nan=011
+ beq.b monadic ;then branch
+* ;else denorm
+* HANDLE DESTINATION DENORM HERE
+* ;set dtag to norm
+* ;write the tag & fpte15 to the fstack
+ lea.l FPTEMP(a6),a0
+
+ bclr.b #sign_bit,LOCAL_EX(a0)
+ sne LOCAL_SGN(a0)
+
+ bsr nrm_set ;normalize number (exp will go negative)
+ bclr.b #sign_bit,LOCAL_EX(a0) ;get rid of false sign
+ bfclr LOCAL_SGN(a0){0:8} ;change back to IEEE ext format
+ beq.b dpos
+ bset.b #sign_bit,LOCAL_EX(a0)
+dpos:
+ bfclr DTAG(a6){0:4} ;set tag to normalized, FPTE15 = 0
+ bset.b #4,DTAG(a6) ;set FPTE15
+ or.b #$0f,DNRM_FLG(a6)
+monadic:
+ lea.l ETEMP(a6),a0
+ btst.b #direction_bit,CMDREG1B(a6) ;check direction
+ bne.w opclass3 ;it is a mv out
+*
+* At this point, only oplcass 0 and 2 possible
+*
+ btst.b #7,STAG(a6) ;if sop = norm=000, zero=001,
+* ;inf=010 or nan=011
+ bne.w mon_dnrm ;else denorm
+ tst.b DY_MO_FLG(a6) ;all cases of dyadic instructions would
+ bne.w normal ;require normalization of denorm
+
+* At this point:
+* monadic instructions: fabs = $18 fneg = $1a ftst = $3a
+* fmove = $00 fsmove = $40 fdmove = $44
+* fsqrt = $05* fssqrt = $41 fdsqrt = $45
+* (*fsqrt reencoded to $05)
+*
+ move.w CMDREG1B(a6),d0 ;get command register
+ andi.l #$7f,d0 ;strip to only command word
+*
+* At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and
+* fdsqrt are possible.
+* For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize)
+* For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize)
+*
+ btst.l #0,d0
+ bne.w normal ;weed out fsqrt instructions
+*
+* cu_norm handles fmove in instructions with normalized inputs.
+* The routine round is used to correctly round the input for the
+* destination precision and mode.
+*
+cu_norm:
+ st CU_ONLY(a6) ;set cu-only inst flag
+ move.w CMDREG1B(a6),d0
+ andi.b #$3b,d0 ;isolate bits to select inst
+ tst.b d0
+ beq.l cu_nmove ;if zero, it is an fmove
+ cmpi.b #$18,d0
+ beq.l cu_nabs ;if $18, it is fabs
+ cmpi.b #$1a,d0
+ beq.l cu_nneg ;if $1a, it is fneg
+*
+* Inst is ftst. Check the source operand and set the cc's accordingly.
+* No write is done, so simply rts.
+*
+cu_ntst:
+ move.w LOCAL_EX(a0),d0
+ bclr.l #15,d0
+ sne LOCAL_SGN(a0)
+ beq.b cu_ntpo
+ or.l #neg_mask,USER_FPSR(a6) ;set N
+cu_ntpo:
+ cmpi.w #$7fff,d0 ;test for inf/nan
+ bne.b cu_ntcz
+ tst.l LOCAL_HI(a0)
+ bne.b cu_ntn
+ tst.l LOCAL_LO(a0)
+ bne.b cu_ntn
+ or.l #inf_mask,USER_FPSR(a6)
+ rts
+cu_ntn:
+ or.l #nan_mask,USER_FPSR(a6)
+ move.l ETEMP_EX(a6),FPTEMP_EX(a6) ;set up fptemp sign for
+* ;snan handler
+
+ rts
+cu_ntcz:
+ tst.l LOCAL_HI(a0)
+ bne.l cu_ntsx
+ tst.l LOCAL_LO(a0)
+ bne.l cu_ntsx
+ or.l #z_mask,USER_FPSR(a6)
+cu_ntsx:
+ rts
+*
+* Inst is fabs. Execute the absolute value function on the input.
+* Branch to the fmove code. If the operand is NaN, do nothing.
+*
+cu_nabs:
+ move.b STAG(a6),d0
+ btst.l #5,d0 ;test for NaN or zero
+ bne wr_etemp ;if either, simply write it
+ bclr.b #7,LOCAL_EX(a0) ;do abs
+ bra.b cu_nmove ;fmove code will finish
+*
+* Inst is fneg. Execute the negate value function on the input.
+* Fall though to the fmove code. If the operand is NaN, do nothing.
+*
+cu_nneg:
+ move.b STAG(a6),d0
+ btst.l #5,d0 ;test for NaN or zero
+ bne wr_etemp ;if either, simply write it
+ bchg.b #7,LOCAL_EX(a0) ;do neg
+*
+* Inst is fmove. This code also handles all result writes.
+* If bit 2 is set, round is forced to double. If it is clear,
+* and bit 6 is set, round is forced to single. If both are clear,
+* the round precision is found in the fpcr. If the rounding precision
+* is double or single, round the result before the write.
+*
+cu_nmove:
+ move.b STAG(a6),d0
+ andi.b #$e0,d0 ;isolate stag bits
+ bne wr_etemp ;if not norm, simply write it
+ btst.b #2,CMDREG1B+1(a6) ;check for rd
+ bne cu_nmrd
+ btst.b #6,CMDREG1B+1(a6) ;check for rs
+ bne cu_nmrs
+*
+* The move or operation is not with forced precision. Test for
+* nan or inf as the input; if so, simply write it to FPn. Use the
+* FPCR_MODE byte to get rounding on norms and zeros.
+*
+cu_nmnr:
+ bfextu FPCR_MODE(a6){0:2},d0
+ tst.b d0 ;check for extended
+ beq cu_wrexn ;if so, just write result
+ cmpi.b #1,d0 ;check for single
+ beq cu_nmrs ;fall through to double
+*
+* The move is fdmove or round precision is double.
+*
+cu_nmrd:
+ move.l #2,d0 ;set up the size for denorm
+ move.w LOCAL_EX(a0),d1 ;compare exponent to double threshold
+ and.w #$7fff,d1
+ cmp.w #$3c01,d1
+ bls cu_nunfl
+ bfextu FPCR_MODE(a6){2:2},d1 ;get rmode
+ or.l #$00020000,d1 ;or in rprec (double)
+ clr.l d0 ;clear g,r,s for round
+ bclr.b #sign_bit,LOCAL_EX(a0) ;convert to internal format
+ sne LOCAL_SGN(a0)
+ bsr.l round
+ bfclr LOCAL_SGN(a0){0:8}
+ beq.b cu_nmrdc
+ bset.b #sign_bit,LOCAL_EX(a0)
+cu_nmrdc:
+ move.w LOCAL_EX(a0),d1 ;check for overflow
+ and.w #$7fff,d1
+ cmp.w #$43ff,d1
+ bge cu_novfl ;take care of overflow case
+ bra.w cu_wrexn
+*
+* The move is fsmove or round precision is single.
+*
+cu_nmrs:
+ move.l #1,d0
+ move.w LOCAL_EX(a0),d1
+ and.w #$7fff,d1
+ cmp.w #$3f81,d1
+ bls cu_nunfl
+ bfextu FPCR_MODE(a6){2:2},d1
+ or.l #$00010000,d1
+ clr.l d0
+ bclr.b #sign_bit,LOCAL_EX(a0)
+ sne LOCAL_SGN(a0)
+ bsr.l round
+ bfclr LOCAL_SGN(a0){0:8}
+ beq.b cu_nmrsc
+ bset.b #sign_bit,LOCAL_EX(a0)
+cu_nmrsc:
+ move.w LOCAL_EX(a0),d1
+ and.w #$7FFF,d1
+ cmp.w #$407f,d1
+ blt cu_wrexn
+*
+* The operand is above precision boundaries. Use t_ovfl to
+* generate the correct value.
+*
+cu_novfl:
+ bsr t_ovfl
+ bra cu_wrexn
+*
+* The operand is below precision boundaries. Use denorm to
+* generate the correct value.
+*
+cu_nunfl:
+ bclr.b #sign_bit,LOCAL_EX(a0)
+ sne LOCAL_SGN(a0)
+ bsr denorm
+ bfclr LOCAL_SGN(a0){0:8} ;change back to IEEE ext format
+ beq.b cu_nucont
+ bset.b #sign_bit,LOCAL_EX(a0)
+cu_nucont:
+ bfextu FPCR_MODE(a6){2:2},d1
+ btst.b #2,CMDREG1B+1(a6) ;check for rd
+ bne inst_d
+ btst.b #6,CMDREG1B+1(a6) ;check for rs
+ bne inst_s
+ swap d1
+ move.b FPCR_MODE(a6),d1
+ lsr.b #6,d1
+ swap d1
+ bra inst_sd
+inst_d:
+ or.l #$00020000,d1
+ bra inst_sd
+inst_s:
+ or.l #$00010000,d1
+inst_sd:
+ bclr.b #sign_bit,LOCAL_EX(a0)
+ sne LOCAL_SGN(a0)
+ bsr.l round
+ bfclr LOCAL_SGN(a0){0:8}
+ beq.b cu_nuflp
+ bset.b #sign_bit,LOCAL_EX(a0)
+cu_nuflp:
+ btst.b #inex2_bit,FPSR_EXCEPT(a6)
+ beq.b cu_nuninx
+ or.l #aunfl_mask,USER_FPSR(a6) ;if the round was inex, set AUNFL
+cu_nuninx:
+ tst.l LOCAL_HI(a0) ;test for zero
+ bne.b cu_nunzro
+ tst.l LOCAL_LO(a0)
+ bne.b cu_nunzro
+*
+* The mantissa is zero from the denorm loop. Check sign and rmode
+* to see if rounding should have occured which would leave the lsb.
+*
+ move.l USER_FPCR(a6),d0
+ andi.l #$30,d0 ;isolate rmode
+ cmpi.l #$20,d0
+ blt.b cu_nzro
+ bne.b cu_nrp
+cu_nrm:
+ tst.w LOCAL_EX(a0) ;if positive, set lsb
+ bge.b cu_nzro
+ btst.b #7,FPCR_MODE(a6) ;check for double
+ beq.b cu_nincs
+ bra.b cu_nincd
+cu_nrp:
+ tst.w LOCAL_EX(a0) ;if positive, set lsb
+ blt.b cu_nzro
+ btst.b #7,FPCR_MODE(a6) ;check for double
+ beq.b cu_nincs
+cu_nincd:
+ or.l #$800,LOCAL_LO(a0) ;inc for double
+ bra cu_nunzro
+cu_nincs:
+ or.l #$100,LOCAL_HI(a0) ;inc for single
+ bra cu_nunzro
+cu_nzro:
+ or.l #z_mask,USER_FPSR(a6)
+ move.b STAG(a6),d0
+ andi.b #$e0,d0
+ cmpi.b #$40,d0 ;check if input was tagged zero
+ beq.b cu_numv
+cu_nunzro:
+ or.l #unfl_mask,USER_FPSR(a6) ;set unfl
+cu_numv:
+ move.l (a0),ETEMP(a6)
+ move.l 4(a0),ETEMP_HI(a6)
+ move.l 8(a0),ETEMP_LO(a6)
+*
+* Write the result to memory, setting the fpsr cc bits. NaN and Inf
+* bypass cu_wrexn.
+*
+cu_wrexn:
+ tst.w LOCAL_EX(a0) ;test for zero
+ beq.b cu_wrzero
+ cmp.w #$8000,LOCAL_EX(a0) ;test for zero
+ bne.b cu_wreon
+cu_wrzero:
+ or.l #z_mask,USER_FPSR(a6) ;set Z bit
+cu_wreon:
+ tst.w LOCAL_EX(a0)
+ bpl wr_etemp
+ or.l #neg_mask,USER_FPSR(a6)
+ bra wr_etemp
+
+*
+* HANDLE SOURCE DENORM HERE
+*
+* ;clear denorm stag to norm
+* ;write the new tag & ete15 to the fstack
+mon_dnrm:
+*
+* At this point, check for the cases in which normalizing the
+* denorm produces incorrect results.
+*
+ tst.b DY_MO_FLG(a6) ;all cases of dyadic instructions would
+ bne.b nrm_src ;require normalization of denorm
+
+* At this point:
+* monadic instructions: fabs = $18 fneg = $1a ftst = $3a
+* fmove = $00 fsmove = $40 fdmove = $44
+* fsqrt = $05* fssqrt = $41 fdsqrt = $45
+* (*fsqrt reencoded to $05)
+*
+ move.w CMDREG1B(a6),d0 ;get command register
+ andi.l #$7f,d0 ;strip to only command word
+*
+* At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and
+* fdsqrt are possible.
+* For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize)
+* For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize)
+*
+ btst.l #0,d0
+ bne.b nrm_src ;weed out fsqrt instructions
+ st CU_ONLY(a6) ;set cu-only inst flag
+ bra cu_dnrm ;fmove, fabs, fneg, ftst
+* ;cases go to cu_dnrm
+nrm_src:
+ bclr.b #sign_bit,LOCAL_EX(a0)
+ sne LOCAL_SGN(a0)
+ bsr nrm_set ;normalize number (exponent will go
+* ; negative)
+ bclr.b #sign_bit,LOCAL_EX(a0) ;get rid of false sign
+
+ bfclr LOCAL_SGN(a0){0:8} ;change back to IEEE ext format
+ beq.b spos
+ bset.b #sign_bit,LOCAL_EX(a0)
+spos:
+ bfclr STAG(a6){0:4} ;set tag to normalized, FPTE15 = 0
+ bset.b #4,STAG(a6) ;set ETE15
+ or.b #$f0,DNRM_FLG(a6)
+normal:
+ tst.b DNRM_FLG(a6) ;check if any of the ops were denorms
+ bne ck_wrap ;if so, check if it is a potential
+* ;wrap-around case
+fix_stk:
+ move.b #$fe,CU_SAVEPC(a6)
+ bclr.b #E1,E_BYTE(a6)
+
+ clr.w NMNEXC(a6)
+
+ st.b RES_FLG(a6) ;indicate that a restore is needed
+ rts
+
+*
+* cu_dnrm handles all cu-only instructions (fmove, fabs, fneg, and
+* ftst) completly in software without an frestore to the 040.
+*
+cu_dnrm:
+ st.b CU_ONLY(a6)
+ move.w CMDREG1B(a6),d0
+ andi.b #$3b,d0 ;isolate bits to select inst
+ tst.b d0
+ beq.l cu_dmove ;if zero, it is an fmove
+ cmpi.b #$18,d0
+ beq.l cu_dabs ;if $18, it is fabs
+ cmpi.b #$1a,d0
+ beq.l cu_dneg ;if $1a, it is fneg
+*
+* Inst is ftst. Check the source operand and set the cc's accordingly.
+* No write is done, so simply rts.
+*
+cu_dtst:
+ move.w LOCAL_EX(a0),d0
+ bclr.l #15,d0
+ sne LOCAL_SGN(a0)
+ beq.b cu_dtpo
+ or.l #neg_mask,USER_FPSR(a6) ;set N
+cu_dtpo:
+ cmpi.w #$7fff,d0 ;test for inf/nan
+ bne.b cu_dtcz
+ tst.l LOCAL_HI(a0)
+ bne.b cu_dtn
+ tst.l LOCAL_LO(a0)
+ bne.b cu_dtn
+ or.l #inf_mask,USER_FPSR(a6)
+ rts
+cu_dtn:
+ or.l #nan_mask,USER_FPSR(a6)
+ move.l ETEMP_EX(a6),FPTEMP_EX(a6) ;set up fptemp sign for
+* ;snan handler
+ rts
+cu_dtcz:
+ tst.l LOCAL_HI(a0)
+ bne.l cu_dtsx
+ tst.l LOCAL_LO(a0)
+ bne.l cu_dtsx
+ or.l #z_mask,USER_FPSR(a6)
+cu_dtsx:
+ rts
+*
+* Inst is fabs. Execute the absolute value function on the input.
+* Branch to the fmove code.
+*
+cu_dabs:
+ bclr.b #7,LOCAL_EX(a0) ;do abs
+ bra.b cu_dmove ;fmove code will finish
+*
+* Inst is fneg. Execute the negate value function on the input.
+* Fall though to the fmove code.
+*
+cu_dneg:
+ bchg.b #7,LOCAL_EX(a0) ;do neg
+*
+* Inst is fmove. This code also handles all result writes.
+* If bit 2 is set, round is forced to double. If it is clear,
+* and bit 6 is set, round is forced to single. If both are clear,
+* the round precision is found in the fpcr. If the rounding precision
+* is double or single, the result is zero, and the mode is checked
+* to determine if the lsb of the result should be set.
+*
+cu_dmove:
+ btst.b #2,CMDREG1B+1(a6) ;check for rd
+ bne cu_dmrd
+ btst.b #6,CMDREG1B+1(a6) ;check for rs
+ bne cu_dmrs
+*
+* The move or operation is not with forced precision. Use the
+* FPCR_MODE byte to get rounding.
+*
+cu_dmnr:
+ bfextu FPCR_MODE(a6){0:2},d0
+ tst.b d0 ;check for extended
+ beq cu_wrexd ;if so, just write result
+ cmpi.b #1,d0 ;check for single
+ beq cu_dmrs ;fall through to double
+*
+* The move is fdmove or round precision is double. Result is zero.
+* Check rmode for rp or rm and set lsb accordingly.
+*
+cu_dmrd:
+ bfextu FPCR_MODE(a6){2:2},d1 ;get rmode
+ tst.w LOCAL_EX(a0) ;check sign
+ blt.b cu_dmdn
+ cmpi.b #3,d1 ;check for rp
+ bne cu_dpd ;load double pos zero
+ bra cu_dpdr ;load double pos zero w/lsb
+cu_dmdn:
+ cmpi.b #2,d1 ;check for rm
+ bne cu_dnd ;load double neg zero
+ bra cu_dndr ;load double neg zero w/lsb
+*
+* The move is fsmove or round precision is single. Result is zero.
+* Check for rp or rm and set lsb accordingly.
+*
+cu_dmrs:
+ bfextu FPCR_MODE(a6){2:2},d1 ;get rmode
+ tst.w LOCAL_EX(a0) ;check sign
+ blt.b cu_dmsn
+ cmpi.b #3,d1 ;check for rp
+ bne cu_spd ;load single pos zero
+ bra cu_spdr ;load single pos zero w/lsb
+cu_dmsn:
+ cmpi.b #2,d1 ;check for rm
+ bne cu_snd ;load single neg zero
+ bra cu_sndr ;load single neg zero w/lsb
+*
+* The precision is extended, so the result in etemp is correct.
+* Simply set unfl (not inex2 or aunfl) and write the result to
+* the correct fp register.
+cu_wrexd:
+ or.l #unfl_mask,USER_FPSR(a6)
+ tst.w LOCAL_EX(a0)
+ beq wr_etemp
+ or.l #neg_mask,USER_FPSR(a6)
+ bra wr_etemp
+*
+* These routines write +/- zero in double format. The routines
+* cu_dpdr and cu_dndr set the double lsb.
+*
+cu_dpd:
+ move.l #$3c010000,LOCAL_EX(a0) ;force pos double zero
+ clr.l LOCAL_HI(a0)
+ clr.l LOCAL_LO(a0)
+ or.l #z_mask,USER_FPSR(a6)
+ or.l #unfinx_mask,USER_FPSR(a6)
+ bra wr_etemp
+cu_dpdr:
+ move.l #$3c010000,LOCAL_EX(a0) ;force pos double zero
+ clr.l LOCAL_HI(a0)
+ move.l #$800,LOCAL_LO(a0) ;with lsb set
+ or.l #unfinx_mask,USER_FPSR(a6)
+ bra wr_etemp
+cu_dnd:
+ move.l #$bc010000,LOCAL_EX(a0) ;force pos double zero
+ clr.l LOCAL_HI(a0)
+ clr.l LOCAL_LO(a0)
+ or.l #z_mask,USER_FPSR(a6)
+ or.l #neg_mask,USER_FPSR(a6)
+ or.l #unfinx_mask,USER_FPSR(a6)
+ bra wr_etemp
+cu_dndr:
+ move.l #$bc010000,LOCAL_EX(a0) ;force pos double zero
+ clr.l LOCAL_HI(a0)
+ move.l #$800,LOCAL_LO(a0) ;with lsb set
+ or.l #neg_mask,USER_FPSR(a6)
+ or.l #unfinx_mask,USER_FPSR(a6)
+ bra wr_etemp
+*
+* These routines write +/- zero in single format. The routines
+* cu_dpdr and cu_dndr set the single lsb.
+*
+cu_spd:
+ move.l #$3f810000,LOCAL_EX(a0) ;force pos single zero
+ clr.l LOCAL_HI(a0)
+ clr.l LOCAL_LO(a0)
+ or.l #z_mask,USER_FPSR(a6)
+ or.l #unfinx_mask,USER_FPSR(a6)
+ bra wr_etemp
+cu_spdr:
+ move.l #$3f810000,LOCAL_EX(a0) ;force pos single zero
+ move.l #$100,LOCAL_HI(a0) ;with lsb set
+ clr.l LOCAL_LO(a0)
+ or.l #unfinx_mask,USER_FPSR(a6)
+ bra wr_etemp
+cu_snd:
+ move.l #$bf810000,LOCAL_EX(a0) ;force pos single zero
+ clr.l LOCAL_HI(a0)
+ clr.l LOCAL_LO(a0)
+ or.l #z_mask,USER_FPSR(a6)
+ or.l #neg_mask,USER_FPSR(a6)
+ or.l #unfinx_mask,USER_FPSR(a6)
+ bra wr_etemp
+cu_sndr:
+ move.l #$bf810000,LOCAL_EX(a0) ;force pos single zero
+ move.l #$100,LOCAL_HI(a0) ;with lsb set
+ clr.l LOCAL_LO(a0)
+ or.l #neg_mask,USER_FPSR(a6)
+ or.l #unfinx_mask,USER_FPSR(a6)
+ bra wr_etemp
+
+*
+* This code checks for 16-bit overflow conditions on dyadic
+* operations which are not restorable into the floating-point
+* unit and must be completed in software. Basically, this
+* condition exists with a very large norm and a denorm. One
+* of the operands must be denormalized to enter this code.
+*
+* Flags used:
+* DY_MO_FLG contains 0 for monadic op, $ff for dyadic
+* DNRM_FLG contains $00 for neither op denormalized
+* $0f for the destination op denormalized
+* $f0 for the source op denormalized
+* $ff for both ops denormalzed
+*
+* The wrap-around condition occurs for add, sub, div, and cmp
+* when
+*
+* abs(dest_exp - src_exp) >= $8000
+*
+* and for mul when
+*
+* (dest_exp + src_exp) < $0
+*
+* we must process the operation here if this case is true.
+*
+* The rts following the frcfpn routine is the exit from res_func
+* for this condition. The restore flag (RES_FLG) is left clear.
+* No frestore is done unless an exception is to be reported.
+*
+* For fadd:
+* if(sign_of(dest) != sign_of(src))
+* replace exponent of src with $3fff (keep sign)
+* use fpu to perform dest+new_src (user's rmode and X)
+* clr sticky
+* else
+* set sticky
+* call round with user's precision and mode
+* move result to fpn and wbtemp
+*
+* For fsub:
+* if(sign_of(dest) == sign_of(src))
+* replace exponent of src with $3fff (keep sign)
+* use fpu to perform dest+new_src (user's rmode and X)
+* clr sticky
+* else
+* set sticky
+* call round with user's precision and mode
+* move result to fpn and wbtemp
+*
+* For fdiv/fsgldiv:
+* if(both operands are denorm)
+* restore_to_fpu;
+* if(dest is norm)
+* force_ovf;
+* else(dest is denorm)
+* force_unf:
+*
+* For fcmp:
+* if(dest is norm)
+* N = sign_of(dest);
+* else(dest is denorm)
+* N = sign_of(src);
+*
+* For fmul:
+* if(both operands are denorm)
+* force_unf;
+* if((dest_exp + src_exp) < 0)
+* force_unf:
+* else
+* restore_to_fpu;
+*
+* local equates:
+addcode equ $22
+subcode equ $28
+mulcode equ $23
+divcode equ $20
+cmpcode equ $38
+ck_wrap:
+ tst.b DY_MO_FLG(a6) ;check for fsqrt
+ beq fix_stk ;if zero, it is fsqrt
+ move.w CMDREG1B(a6),d0
+ andi.w #$3b,d0 ;strip to command bits
+ cmpi.w #addcode,d0
+ beq wrap_add
+ cmpi.w #subcode,d0
+ beq wrap_sub
+ cmpi.w #mulcode,d0
+ beq wrap_mul
+ cmpi.w #cmpcode,d0
+ beq wrap_cmp
+*
+* Inst is fdiv.
+*
+wrap_div:
+ cmp.b #$ff,DNRM_FLG(a6) ;if both ops denorm,
+ beq fix_stk ;restore to fpu
+*
+* One of the ops is denormalized. Test for wrap condition
+* and force the result.
+*
+ cmp.b #$0f,DNRM_FLG(a6) ;check for dest denorm
+ bne.b div_srcd
+div_destd:
+ bsr.l ckinf_ns
+ bne fix_stk
+ bfextu ETEMP_EX(a6){1:15},d0 ;get src exp (always pos)
+ bfexts FPTEMP_EX(a6){1:15},d1 ;get dest exp (always neg)
+ sub.l d1,d0 ;subtract dest from src
+ cmp.l #$7fff,d0
+ blt fix_stk ;if less, not wrap case
+ clr.b WBTEMP_SGN(a6)
+ move.w ETEMP_EX(a6),d0 ;find the sign of the result
+ move.w FPTEMP_EX(a6),d1
+ eor.w d1,d0
+ andi.w #$8000,d0
+ beq force_unf
+ st.b WBTEMP_SGN(a6)
+ bra force_unf
+
+ckinf_ns:
+ move.b STAG(a6),d0 ;check source tag for inf or nan
+ bra ck_in_com
+ckinf_nd:
+ move.b DTAG(a6),d0 ;check destination tag for inf or nan
+ck_in_com:
+ andi.b #$60,d0 ;isolate tag bits
+ cmp.b #$40,d0 ;is it inf?
+ beq nan_or_inf ;not wrap case
+ cmp.b #$60,d0 ;is it nan?
+ beq nan_or_inf ;yes, not wrap case?
+ cmp.b #$20,d0 ;is it a zero?
+ beq nan_or_inf ;yes
+ clr.l d0
+ rts ;then it is either a zero of norm,
+* ;check wrap case
+nan_or_inf:
+ moveq.l #-1,d0
+ rts
+
+
+
+div_srcd:
+ bsr.l ckinf_nd
+ bne fix_stk
+ bfextu FPTEMP_EX(a6){1:15},d0 ;get dest exp (always pos)
+ bfexts ETEMP_EX(a6){1:15},d1 ;get src exp (always neg)
+ sub.l d1,d0 ;subtract src from dest
+ cmp.l #$8000,d0
+ blt fix_stk ;if less, not wrap case
+ clr.b WBTEMP_SGN(a6)
+ move.w ETEMP_EX(a6),d0 ;find the sign of the result
+ move.w FPTEMP_EX(a6),d1
+ eor.w d1,d0
+ andi.w #$8000,d0
+ beq.b force_ovf
+ st.b WBTEMP_SGN(a6)
+*
+* This code handles the case of the instruction resulting in
+* an overflow condition.
+*
+force_ovf:
+ bclr.b #E1,E_BYTE(a6)
+ or.l #ovfl_inx_mask,USER_FPSR(a6)
+ clr.w NMNEXC(a6)
+ lea.l WBTEMP(a6),a0 ;point a0 to memory location
+ move.w CMDREG1B(a6),d0
+ btst.l #6,d0 ;test for forced precision
+ beq.b frcovf_fpcr
+ btst.l #2,d0 ;check for double
+ bne.b frcovf_dbl
+ move.l #$1,d0 ;inst is forced single
+ bra.b frcovf_rnd
+frcovf_dbl:
+ move.l #$2,d0 ;inst is forced double
+ bra.b frcovf_rnd
+frcovf_fpcr:
+ bfextu FPCR_MODE(a6){0:2},d0 ;inst not forced - use fpcr prec
+frcovf_rnd:
+
+* The 881/882 does not set inex2 for the following case, so the
+* line is commented out to be compatible with 881/882
+* tst.b d0
+* beq.b frcovf_x
+* or.l #inex2_mask,USER_FPSR(a6) ;if prec is s or d, set inex2
+
+*frcovf_x:
+ bsr.l ovf_res ;get correct result based on
+* ;round precision/mode. This
+* ;sets FPSR_CC correctly
+* ;returns in external format
+ bfclr WBTEMP_SGN(a6){0:8}
+ beq frcfpn
+ bset.b #sign_bit,WBTEMP_EX(a6)
+ bra frcfpn
+*
+* Inst is fadd.
+*
+wrap_add:
+ cmp.b #$ff,DNRM_FLG(a6) ;if both ops denorm,
+ beq fix_stk ;restore to fpu
+*
+* One of the ops is denormalized. Test for wrap condition
+* and complete the instruction.
+*
+ cmp.b #$0f,DNRM_FLG(a6) ;check for dest denorm
+ bne.b add_srcd
+add_destd:
+ bsr.l ckinf_ns
+ bne fix_stk
+ bfextu ETEMP_EX(a6){1:15},d0 ;get src exp (always pos)
+ bfexts FPTEMP_EX(a6){1:15},d1 ;get dest exp (always neg)
+ sub.l d1,d0 ;subtract dest from src
+ cmp.l #$8000,d0
+ blt fix_stk ;if less, not wrap case
+ bra add_wrap
+add_srcd:
+ bsr.l ckinf_nd
+ bne fix_stk
+ bfextu FPTEMP_EX(a6){1:15},d0 ;get dest exp (always pos)
+ bfexts ETEMP_EX(a6){1:15},d1 ;get src exp (always neg)
+ sub.l d1,d0 ;subtract src from dest
+ cmp.l #$8000,d0
+ blt fix_stk ;if less, not wrap case
+*
+* Check the signs of the operands. If they are unlike, the fpu
+* can be used to add the norm and 1.0 with the sign of the
+* denorm and it will correctly generate the result in extended
+* precision. We can then call round with no sticky and the result
+* will be correct for the user's rounding mode and precision. If
+* the signs are the same, we call round with the sticky bit set
+* and the result will be correctfor the user's rounding mode and
+* precision.
+*
+add_wrap:
+ move.w ETEMP_EX(a6),d0
+ move.w FPTEMP_EX(a6),d1
+ eor.w d1,d0
+ andi.w #$8000,d0
+ beq add_same
+*
+* The signs are unlike.
+*
+ cmp.b #$0f,DNRM_FLG(a6) ;is dest the denorm?
+ bne.b add_u_srcd
+ move.w FPTEMP_EX(a6),d0
+ andi.w #$8000,d0
+ or.w #$3fff,d0 ;force the exponent to +/- 1
+ move.w d0,FPTEMP_EX(a6) ;in the denorm
+ move.l USER_FPCR(a6),d0
+ andi.l #$30,d0
+ fmove.l d0,fpcr ;set up users rmode and X
+ fmove.x ETEMP(a6),fp0
+ fadd.x FPTEMP(a6),fp0
+ lea.l WBTEMP(a6),a0 ;point a0 to wbtemp in frame
+ fmove.l fpsr,d1
+ or.l d1,USER_FPSR(a6) ;capture cc's and inex from fadd
+ fmove.x fp0,WBTEMP(a6) ;write result to memory
+ lsr.l #4,d0 ;put rmode in lower 2 bits
+ move.l USER_FPCR(a6),d1
+ andi.l #$c0,d1
+ lsr.l #6,d1 ;put precision in upper word
+ swap d1
+ or.l d0,d1 ;set up for round call
+ clr.l d0 ;force sticky to zero
+ bclr.b #sign_bit,WBTEMP_EX(a6)
+ sne WBTEMP_SGN(a6)
+ bsr.l round ;round result to users rmode & prec
+ bfclr WBTEMP_SGN(a6){0:8} ;convert back to IEEE ext format
+ beq frcfpnr
+ bset.b #sign_bit,WBTEMP_EX(a6)
+ bra frcfpnr
+add_u_srcd:
+ move.w ETEMP_EX(a6),d0
+ andi.w #$8000,d0
+ or.w #$3fff,d0 ;force the exponent to +/- 1
+ move.w d0,ETEMP_EX(a6) ;in the denorm
+ move.l USER_FPCR(a6),d0
+ andi.l #$30,d0
+ fmove.l d0,fpcr ;set up users rmode and X
+ fmove.x ETEMP(a6),fp0
+ fadd.x FPTEMP(a6),fp0
+ fmove.l fpsr,d1
+ or.l d1,USER_FPSR(a6) ;capture cc's and inex from fadd
+ lea.l WBTEMP(a6),a0 ;point a0 to wbtemp in frame
+ fmove.x fp0,WBTEMP(a6) ;write result to memory
+ lsr.l #4,d0 ;put rmode in lower 2 bits
+ move.l USER_FPCR(a6),d1
+ andi.l #$c0,d1
+ lsr.l #6,d1 ;put precision in upper word
+ swap d1
+ or.l d0,d1 ;set up for round call
+ clr.l d0 ;force sticky to zero
+ bclr.b #sign_bit,WBTEMP_EX(a6)
+ sne WBTEMP_SGN(a6) ;use internal format for round
+ bsr.l round ;round result to users rmode & prec
+ bfclr WBTEMP_SGN(a6){0:8} ;convert back to IEEE ext format
+ beq frcfpnr
+ bset.b #sign_bit,WBTEMP_EX(a6)
+ bra frcfpnr
+*
+* Signs are alike:
+*
+add_same:
+ cmp.b #$0f,DNRM_FLG(a6) ;is dest the denorm?
+ bne.b add_s_srcd
+add_s_destd:
+ lea.l ETEMP(a6),a0
+ move.l USER_FPCR(a6),d0
+ andi.l #$30,d0
+ lsr.l #4,d0 ;put rmode in lower 2 bits
+ move.l USER_FPCR(a6),d1
+ andi.l #$c0,d1
+ lsr.l #6,d1 ;put precision in upper word
+ swap d1
+ or.l d0,d1 ;set up for round call
+ move.l #$20000000,d0 ;set sticky for round
+ bclr.b #sign_bit,ETEMP_EX(a6)
+ sne ETEMP_SGN(a6)
+ bsr.l round ;round result to users rmode & prec
+ bfclr ETEMP_SGN(a6){0:8} ;convert back to IEEE ext format
+ beq.b add_s_dclr
+ bset.b #sign_bit,ETEMP_EX(a6)
+add_s_dclr:
+ lea.l WBTEMP(a6),a0
+ move.l ETEMP(a6),(a0) ;write result to wbtemp
+ move.l ETEMP_HI(a6),4(a0)
+ move.l ETEMP_LO(a6),8(a0)
+ tst.w ETEMP_EX(a6)
+ bgt add_ckovf
+ or.l #neg_mask,USER_FPSR(a6)
+ bra add_ckovf
+add_s_srcd:
+ lea.l FPTEMP(a6),a0
+ move.l USER_FPCR(a6),d0
+ andi.l #$30,d0
+ lsr.l #4,d0 ;put rmode in lower 2 bits
+ move.l USER_FPCR(a6),d1
+ andi.l #$c0,d1
+ lsr.l #6,d1 ;put precision in upper word
+ swap d1
+ or.l d0,d1 ;set up for round call
+ move.l #$20000000,d0 ;set sticky for round
+ bclr.b #sign_bit,FPTEMP_EX(a6)
+ sne FPTEMP_SGN(a6)
+ bsr.l round ;round result to users rmode & prec
+ bfclr FPTEMP_SGN(a6){0:8} ;convert back to IEEE ext format
+ beq.b add_s_sclr
+ bset.b #sign_bit,FPTEMP_EX(a6)
+add_s_sclr:
+ lea.l WBTEMP(a6),a0
+ move.l FPTEMP(a6),(a0) ;write result to wbtemp
+ move.l FPTEMP_HI(a6),4(a0)
+ move.l FPTEMP_LO(a6),8(a0)
+ tst.w FPTEMP_EX(a6)
+ bgt add_ckovf
+ or.l #neg_mask,USER_FPSR(a6)
+add_ckovf:
+ move.w WBTEMP_EX(a6),d0
+ andi.w #$7fff,d0
+ cmpi.w #$7fff,d0
+ bne frcfpnr
+*
+* The result has overflowed to $7fff exponent. Set I, ovfl,
+* and aovfl, and clr the mantissa (incorrectly set by the
+* round routine.)
+*
+ or.l #inf_mask+ovfl_inx_mask,USER_FPSR(a6)
+ clr.l 4(a0)
+ bra frcfpnr
+*
+* Inst is fsub.
+*
+wrap_sub:
+ cmp.b #$ff,DNRM_FLG(a6) ;if both ops denorm,
+ beq fix_stk ;restore to fpu
+*
+* One of the ops is denormalized. Test for wrap condition
+* and complete the instruction.
+*
+ cmp.b #$0f,DNRM_FLG(a6) ;check for dest denorm
+ bne.b sub_srcd
+sub_destd:
+ bsr.l ckinf_ns
+ bne fix_stk
+ bfextu ETEMP_EX(a6){1:15},d0 ;get src exp (always pos)
+ bfexts FPTEMP_EX(a6){1:15},d1 ;get dest exp (always neg)
+ sub.l d1,d0 ;subtract src from dest
+ cmp.l #$8000,d0
+ blt fix_stk ;if less, not wrap case
+ bra sub_wrap
+sub_srcd:
+ bsr.l ckinf_nd
+ bne fix_stk
+ bfextu FPTEMP_EX(a6){1:15},d0 ;get dest exp (always pos)
+ bfexts ETEMP_EX(a6){1:15},d1 ;get src exp (always neg)
+ sub.l d1,d0 ;subtract dest from src
+ cmp.l #$8000,d0
+ blt fix_stk ;if less, not wrap case
+*
+* Check the signs of the operands. If they are alike, the fpu
+* can be used to subtract from the norm 1.0 with the sign of the
+* denorm and it will correctly generate the result in extended
+* precision. We can then call round with no sticky and the result
+* will be correct for the user's rounding mode and precision. If
+* the signs are unlike, we call round with the sticky bit set
+* and the result will be correctfor the user's rounding mode and
+* precision.
+*
+sub_wrap:
+ move.w ETEMP_EX(a6),d0
+ move.w FPTEMP_EX(a6),d1
+ eor.w d1,d0
+ andi.w #$8000,d0
+ bne sub_diff
+*
+* The signs are alike.
+*
+ cmp.b #$0f,DNRM_FLG(a6) ;is dest the denorm?
+ bne.b sub_u_srcd
+ move.w FPTEMP_EX(a6),d0
+ andi.w #$8000,d0
+ or.w #$3fff,d0 ;force the exponent to +/- 1
+ move.w d0,FPTEMP_EX(a6) ;in the denorm
+ move.l USER_FPCR(a6),d0
+ andi.l #$30,d0
+ fmove.l d0,fpcr ;set up users rmode and X
+ fmove.x FPTEMP(a6),fp0
+ fsub.x ETEMP(a6),fp0
+ fmove.l fpsr,d1
+ or.l d1,USER_FPSR(a6) ;capture cc's and inex from fadd
+ lea.l WBTEMP(a6),a0 ;point a0 to wbtemp in frame
+ fmove.x fp0,WBTEMP(a6) ;write result to memory
+ lsr.l #4,d0 ;put rmode in lower 2 bits
+ move.l USER_FPCR(a6),d1
+ andi.l #$c0,d1
+ lsr.l #6,d1 ;put precision in upper word
+ swap d1
+ or.l d0,d1 ;set up for round call
+ clr.l d0 ;force sticky to zero
+ bclr.b #sign_bit,WBTEMP_EX(a6)
+ sne WBTEMP_SGN(a6)
+ bsr.l round ;round result to users rmode & prec
+ bfclr WBTEMP_SGN(a6){0:8} ;convert back to IEEE ext format
+ beq frcfpnr
+ bset.b #sign_bit,WBTEMP_EX(a6)
+ bra frcfpnr
+sub_u_srcd:
+ move.w ETEMP_EX(a6),d0
+ andi.w #$8000,d0
+ or.w #$3fff,d0 ;force the exponent to +/- 1
+ move.w d0,ETEMP_EX(a6) ;in the denorm
+ move.l USER_FPCR(a6),d0
+ andi.l #$30,d0
+ fmove.l d0,fpcr ;set up users rmode and X
+ fmove.x FPTEMP(a6),fp0
+ fsub.x ETEMP(a6),fp0
+ fmove.l fpsr,d1
+ or.l d1,USER_FPSR(a6) ;capture cc's and inex from fadd
+ lea.l WBTEMP(a6),a0 ;point a0 to wbtemp in frame
+ fmove.x fp0,WBTEMP(a6) ;write result to memory
+ lsr.l #4,d0 ;put rmode in lower 2 bits
+ move.l USER_FPCR(a6),d1
+ andi.l #$c0,d1
+ lsr.l #6,d1 ;put precision in upper word
+ swap d1
+ or.l d0,d1 ;set up for round call
+ clr.l d0 ;force sticky to zero
+ bclr.b #sign_bit,WBTEMP_EX(a6)
+ sne WBTEMP_SGN(a6)
+ bsr.l round ;round result to users rmode & prec
+ bfclr WBTEMP_SGN(a6){0:8} ;convert back to IEEE ext format
+ beq frcfpnr
+ bset.b #sign_bit,WBTEMP_EX(a6)
+ bra frcfpnr
+*
+* Signs are unlike:
+*
+sub_diff:
+ cmp.b #$0f,DNRM_FLG(a6) ;is dest the denorm?
+ bne.b sub_s_srcd
+sub_s_destd:
+ lea.l ETEMP(a6),a0
+ move.l USER_FPCR(a6),d0
+ andi.l #$30,d0
+ lsr.l #4,d0 ;put rmode in lower 2 bits
+ move.l USER_FPCR(a6),d1
+ andi.l #$c0,d1
+ lsr.l #6,d1 ;put precision in upper word
+ swap d1
+ or.l d0,d1 ;set up for round call
+ move.l #$20000000,d0 ;set sticky for round
+*
+* Since the dest is the denorm, the sign is the opposite of the
+* norm sign.
+*
+ eori.w #$8000,ETEMP_EX(a6) ;flip sign on result
+ tst.w ETEMP_EX(a6)
+ bgt.b sub_s_dwr
+ or.l #neg_mask,USER_FPSR(a6)
+sub_s_dwr:
+ bclr.b #sign_bit,ETEMP_EX(a6)
+ sne ETEMP_SGN(a6)
+ bsr.l round ;round result to users rmode & prec
+ bfclr ETEMP_SGN(a6){0:8} ;convert back to IEEE ext format
+ beq.b sub_s_dclr
+ bset.b #sign_bit,ETEMP_EX(a6)
+sub_s_dclr:
+ lea.l WBTEMP(a6),a0
+ move.l ETEMP(a6),(a0) ;write result to wbtemp
+ move.l ETEMP_HI(a6),4(a0)
+ move.l ETEMP_LO(a6),8(a0)
+ bra sub_ckovf
+sub_s_srcd:
+ lea.l FPTEMP(a6),a0
+ move.l USER_FPCR(a6),d0
+ andi.l #$30,d0
+ lsr.l #4,d0 ;put rmode in lower 2 bits
+ move.l USER_FPCR(a6),d1
+ andi.l #$c0,d1
+ lsr.l #6,d1 ;put precision in upper word
+ swap d1
+ or.l d0,d1 ;set up for round call
+ move.l #$20000000,d0 ;set sticky for round
+ bclr.b #sign_bit,FPTEMP_EX(a6)
+ sne FPTEMP_SGN(a6)
+ bsr.l round ;round result to users rmode & prec
+ bfclr FPTEMP_SGN(a6){0:8} ;convert back to IEEE ext format
+ beq.b sub_s_sclr
+ bset.b #sign_bit,FPTEMP_EX(a6)
+sub_s_sclr:
+ lea.l WBTEMP(a6),a0
+ move.l FPTEMP(a6),(a0) ;write result to wbtemp
+ move.l FPTEMP_HI(a6),4(a0)
+ move.l FPTEMP_LO(a6),8(a0)
+ tst.w FPTEMP_EX(a6)
+ bgt sub_ckovf
+ or.l #neg_mask,USER_FPSR(a6)
+sub_ckovf:
+ move.w WBTEMP_EX(a6),d0
+ andi.w #$7fff,d0
+ cmpi.w #$7fff,d0
+ bne frcfpnr
+*
+* The result has overflowed to $7fff exponent. Set I, ovfl,
+* and aovfl, and clr the mantissa (incorrectly set by the
+* round routine.)
+*
+ or.l #inf_mask+ovfl_inx_mask,USER_FPSR(a6)
+ clr.l 4(a0)
+ bra frcfpnr
+*
+* Inst is fcmp.
+*
+wrap_cmp:
+ cmp.b #$ff,DNRM_FLG(a6) ;if both ops denorm,
+ beq fix_stk ;restore to fpu
+*
+* One of the ops is denormalized. Test for wrap condition
+* and complete the instruction.
+*
+ cmp.b #$0f,DNRM_FLG(a6) ;check for dest denorm
+ bne.b cmp_srcd
+cmp_destd:
+ bsr.l ckinf_ns
+ bne fix_stk
+ bfextu ETEMP_EX(a6){1:15},d0 ;get src exp (always pos)
+ bfexts FPTEMP_EX(a6){1:15},d1 ;get dest exp (always neg)
+ sub.l d1,d0 ;subtract dest from src
+ cmp.l #$8000,d0
+ blt fix_stk ;if less, not wrap case
+ tst.w ETEMP_EX(a6) ;set N to ~sign_of(src)
+ bge cmp_setn
+ rts
+cmp_srcd:
+ bsr.l ckinf_nd
+ bne fix_stk
+ bfextu FPTEMP_EX(a6){1:15},d0 ;get dest exp (always pos)
+ bfexts ETEMP_EX(a6){1:15},d1 ;get src exp (always neg)
+ sub.l d1,d0 ;subtract src from dest
+ cmp.l #$8000,d0
+ blt fix_stk ;if less, not wrap case
+ tst.w FPTEMP_EX(a6) ;set N to sign_of(dest)
+ blt cmp_setn
+ rts
+cmp_setn:
+ or.l #neg_mask,USER_FPSR(a6)
+ rts
+
+*
+* Inst is fmul.
+*
+wrap_mul:
+ cmp.b #$ff,DNRM_FLG(a6) ;if both ops denorm,
+ beq force_unf ;force an underflow (really!)
+*
+* One of the ops is denormalized. Test for wrap condition
+* and complete the instruction.
+*
+ cmp.b #$0f,DNRM_FLG(a6) ;check for dest denorm
+ bne.b mul_srcd
+mul_destd:
+ bsr.l ckinf_ns
+ bne fix_stk
+ bfextu ETEMP_EX(a6){1:15},d0 ;get src exp (always pos)
+ bfexts FPTEMP_EX(a6){1:15},d1 ;get dest exp (always neg)
+ add.l d1,d0 ;subtract dest from src
+ bgt fix_stk
+ bra force_unf
+mul_srcd:
+ bsr.l ckinf_nd
+ bne fix_stk
+ bfextu FPTEMP_EX(a6){1:15},d0 ;get dest exp (always pos)
+ bfexts ETEMP_EX(a6){1:15},d1 ;get src exp (always neg)
+ add.l d1,d0 ;subtract src from dest
+ bgt fix_stk
+
+*
+* This code handles the case of the instruction resulting in
+* an underflow condition.
+*
+force_unf:
+ bclr.b #E1,E_BYTE(a6)
+ or.l #unfinx_mask,USER_FPSR(a6)
+ clr.w NMNEXC(a6)
+ clr.b WBTEMP_SGN(a6)
+ move.w ETEMP_EX(a6),d0 ;find the sign of the result
+ move.w FPTEMP_EX(a6),d1
+ eor.w d1,d0
+ andi.w #$8000,d0
+ beq.b frcunfcont
+ st.b WBTEMP_SGN(a6)
+frcunfcont:
+ lea WBTEMP(a6),a0 ;point a0 to memory location
+ move.w CMDREG1B(a6),d0
+ btst.l #6,d0 ;test for forced precision
+ beq.b frcunf_fpcr
+ btst.l #2,d0 ;check for double
+ bne.b frcunf_dbl
+ move.l #$1,d0 ;inst is forced single
+ bra.b frcunf_rnd
+frcunf_dbl:
+ move.l #$2,d0 ;inst is forced double
+ bra.b frcunf_rnd
+frcunf_fpcr:
+ bfextu FPCR_MODE(a6){0:2},d0 ;inst not forced - use fpcr prec
+frcunf_rnd:
+ bsr.l unf_sub ;get correct result based on
+* ;round precision/mode. This
+* ;sets FPSR_CC correctly
+ bfclr WBTEMP_SGN(a6){0:8} ;convert back to IEEE ext format
+ beq.b frcfpn
+ bset.b #sign_bit,WBTEMP_EX(a6)
+ bra frcfpn
+
+*
+* Write the result to the user's fpn. All results must be HUGE to be
+* written; otherwise the results would have overflowed or underflowed.
+* If the rounding precision is single or double, the ovf_res routine
+* is needed to correctly supply the max value.
+*
+frcfpnr:
+ move.w CMDREG1B(a6),d0
+ btst.l #6,d0 ;test for forced precision
+ beq.b frcfpn_fpcr
+ btst.l #2,d0 ;check for double
+ bne.b frcfpn_dbl
+ move.l #$1,d0 ;inst is forced single
+ bra.b frcfpn_rnd
+frcfpn_dbl:
+ move.l #$2,d0 ;inst is forced double
+ bra.b frcfpn_rnd
+frcfpn_fpcr:
+ bfextu FPCR_MODE(a6){0:2},d0 ;inst not forced - use fpcr prec
+ tst.b d0
+ beq.b frcfpn ;if extended, write what you got
+frcfpn_rnd:
+ bclr.b #sign_bit,WBTEMP_EX(a6)
+ sne WBTEMP_SGN(a6)
+ bsr.l ovf_res ;get correct result based on
+* ;round precision/mode. This
+* ;sets FPSR_CC correctly
+ bfclr WBTEMP_SGN(a6){0:8} ;convert back to IEEE ext format
+ beq.b frcfpn_clr
+ bset.b #sign_bit,WBTEMP_EX(a6)
+frcfpn_clr:
+ or.l #ovfinx_mask,USER_FPSR(a6)
+*
+* Perform the write.
+*
+frcfpn:
+ bfextu CMDREG1B(a6){6:3},d0 ;extract fp destination register
+ cmpi.b #3,d0
+ ble.b frc0123 ;check if dest is fp0-fp3
+ move.l #7,d1
+ sub.l d0,d1
+ clr.l d0
+ bset.l d1,d0
+ fmovem.x WBTEMP(a6),d0
+ rts
+frc0123:
+ tst.b d0
+ beq.b frc0_dst
+ cmpi.b #1,d0
+ beq.b frc1_dst
+ cmpi.b #2,d0
+ beq.b frc2_dst
+frc3_dst:
+ move.l WBTEMP_EX(a6),USER_FP3(a6)
+ move.l WBTEMP_HI(a6),USER_FP3+4(a6)
+ move.l WBTEMP_LO(a6),USER_FP3+8(a6)
+ rts
+frc2_dst:
+ move.l WBTEMP_EX(a6),USER_FP2(a6)
+ move.l WBTEMP_HI(a6),USER_FP2+4(a6)
+ move.l WBTEMP_LO(a6),USER_FP2+8(a6)
+ rts
+frc1_dst:
+ move.l WBTEMP_EX(a6),USER_FP1(a6)
+ move.l WBTEMP_HI(a6),USER_FP1+4(a6)
+ move.l WBTEMP_LO(a6),USER_FP1+8(a6)
+ rts
+frc0_dst:
+ move.l WBTEMP_EX(a6),USER_FP0(a6)
+ move.l WBTEMP_HI(a6),USER_FP0+4(a6)
+ move.l WBTEMP_LO(a6),USER_FP0+8(a6)
+ rts
+
+*
+* Write etemp to fpn.
+* A check is made on enabled and signalled snan exceptions,
+* and the destination is not overwritten if this condition exists.
+* This code is designed to make fmoveins of unsupported data types
+* faster.
+*
+wr_etemp:
+ btst.b #snan_bit,FPSR_EXCEPT(a6) ;if snan is set, and
+ beq.b fmoveinc ;enabled, force restore
+ btst.b #snan_bit,FPCR_ENABLE(a6) ;and don't overwrite
+ beq.b fmoveinc ;the dest
+ move.l ETEMP_EX(a6),FPTEMP_EX(a6) ;set up fptemp sign for
+* ;snan handler
+ tst.b ETEMP(a6) ;check for negative
+ blt.b snan_neg
+ rts
+snan_neg:
+ or.l #neg_bit,USER_FPSR(a6) ;snan is negative; set N
+ rts
+fmoveinc:
+ clr.w NMNEXC(a6)
+ bclr.b #E1,E_BYTE(a6)
+ move.b STAG(a6),d0 ;check if stag is inf
+ andi.b #$e0,d0
+ cmpi.b #$40,d0
+ bne.b fminc_cnan
+ or.l #inf_mask,USER_FPSR(a6) ;if inf, nothing yet has set I
+ tst.w LOCAL_EX(a0) ;check sign
+ bge.b fminc_con
+ or.l #neg_mask,USER_FPSR(a6)
+ bra fminc_con
+fminc_cnan:
+ cmpi.b #$60,d0 ;check if stag is NaN
+ bne.b fminc_czero
+ or.l #nan_mask,USER_FPSR(a6) ;if nan, nothing yet has set NaN
+ move.l ETEMP_EX(a6),FPTEMP_EX(a6) ;set up fptemp sign for
+* ;snan handler
+ tst.w LOCAL_EX(a0) ;check sign
+ bge.b fminc_con
+ or.l #neg_mask,USER_FPSR(a6)
+ bra fminc_con
+fminc_czero:
+ cmpi.b #$20,d0 ;check if zero
+ bne.b fminc_con
+ or.l #z_mask,USER_FPSR(a6) ;if zero, set Z
+ tst.w LOCAL_EX(a0) ;check sign
+ bge.b fminc_con
+ or.l #neg_mask,USER_FPSR(a6)
+fminc_con:
+ bfextu CMDREG1B(a6){6:3},d0 ;extract fp destination register
+ cmpi.b #3,d0
+ ble.b fp0123 ;check if dest is fp0-fp3
+ move.l #7,d1
+ sub.l d0,d1
+ clr.l d0
+ bset.l d1,d0
+ fmovem.x ETEMP(a6),d0
+ rts
+
+fp0123:
+ tst.b d0
+ beq.b fp0_dst
+ cmpi.b #1,d0
+ beq.b fp1_dst
+ cmpi.b #2,d0
+ beq.b fp2_dst
+fp3_dst:
+ move.l ETEMP_EX(a6),USER_FP3(a6)
+ move.l ETEMP_HI(a6),USER_FP3+4(a6)
+ move.l ETEMP_LO(a6),USER_FP3+8(a6)
+ rts
+fp2_dst:
+ move.l ETEMP_EX(a6),USER_FP2(a6)
+ move.l ETEMP_HI(a6),USER_FP2+4(a6)
+ move.l ETEMP_LO(a6),USER_FP2+8(a6)
+ rts
+fp1_dst:
+ move.l ETEMP_EX(a6),USER_FP1(a6)
+ move.l ETEMP_HI(a6),USER_FP1+4(a6)
+ move.l ETEMP_LO(a6),USER_FP1+8(a6)
+ rts
+fp0_dst:
+ move.l ETEMP_EX(a6),USER_FP0(a6)
+ move.l ETEMP_HI(a6),USER_FP0+4(a6)
+ move.l ETEMP_LO(a6),USER_FP0+8(a6)
+ rts
+
+opclass3:
+ st.b CU_ONLY(a6)
+ move.w CMDREG1B(a6),d0 ;check if packed moveout
+ andi.w #$0c00,d0 ;isolate last 2 bits of size field
+ cmpi.w #$0c00,d0 ;if size is 011 or 111, it is packed
+ beq.w pack_out ;else it is norm or denorm
+ bra.w mv_out
+
+
+*
+* MOVE OUT
+*
+
+mv_tbl:
+ dc.l li
+ dc.l sgp
+ dc.l xp
+ dc.l mvout_end ;should never be taken
+ dc.l wi
+ dc.l dp
+ dc.l bi
+ dc.l mvout_end ;should never be taken
+mv_out:
+ bfextu CMDREG1B(a6){3:3},d1 ;put source specifier in d1
+ lea.l mv_tbl,a0
+ move.l (a0,d1*4),a0
+ jmp (a0)
+
+*
+* This exit is for move-out to memory. The aunfl bit is
+* set if the result is inex and unfl is signalled.
+*
+mvout_end:
+ btst.b #inex2_bit,FPSR_EXCEPT(a6)
+ beq.b no_aufl
+ btst.b #unfl_bit,FPSR_EXCEPT(a6)
+ beq.b no_aufl
+ bset.b #aunfl_bit,FPSR_AEXCEPT(a6)
+no_aufl:
+ clr.w NMNEXC(a6)
+ bclr.b #E1,E_BYTE(a6)
+ fmove.l #0,FPSR ;clear any cc bits from res_func
+*
+* Return ETEMP to extended format from internal extended format so
+* that gen_except will have a correctly signed value for ovfl/unfl
+* handlers.
+*
+ bfclr ETEMP_SGN(a6){0:8}
+ beq.b mvout_con
+ bset.b #sign_bit,ETEMP_EX(a6)
+mvout_con:
+ rts
+*
+* This exit is for move-out to int register. The aunfl bit is
+* not set in any case for this move.
+*
+mvouti_end:
+ clr.w NMNEXC(a6)
+ bclr.b #E1,E_BYTE(a6)
+ fmove.l #0,FPSR ;clear any cc bits from res_func
+*
+* Return ETEMP to extended format from internal extended format so
+* that gen_except will have a correctly signed value for ovfl/unfl
+* handlers.
+*
+ bfclr ETEMP_SGN(a6){0:8}
+ beq.b mvouti_con
+ bset.b #sign_bit,ETEMP_EX(a6)
+mvouti_con:
+ rts
+*
+* li is used to handle a long integer source specifier
+*
+
+li:
+ moveq.l #4,d0 ;set byte count
+
+ btst.b #7,STAG(a6) ;check for extended denorm
+ bne.w int_dnrm ;if so, branch
+
+ fmovem.x ETEMP(a6),fp0
+ fcmp.d #:41dfffffffc00000,fp0
+* 41dfffffffc00000 in dbl prec = 401d0000fffffffe00000000 in ext prec
+ fbge.w lo_plrg
+ fcmp.d #:c1e0000000000000,fp0
+* c1e0000000000000 in dbl prec = c01e00008000000000000000 in ext prec
+ fble.w lo_nlrg
+*
+* at this point, the answer is between the largest pos and neg values
+*
+ move.l USER_FPCR(a6),d1 ;use user's rounding mode
+ andi.l #$30,d1
+ fmove.l d1,fpcr
+ fmove.l fp0,L_SCR1(a6) ;let the 040 perform conversion
+ fmove.l fpsr,d1
+ or.l d1,USER_FPSR(a6) ;capture inex2/ainex if set
+ bra.w int_wrt
+
+
+lo_plrg:
+ move.l #$7fffffff,L_SCR1(a6) ;answer is largest positive int
+ fbeq.w int_wrt ;exact answer
+ fcmp.d #:41dfffffffe00000,fp0
+* 41dfffffffe00000 in dbl prec = 401d0000ffffffff00000000 in ext prec
+ fbge.w int_operr ;set operr
+ bra.w int_inx ;set inexact
+
+lo_nlrg:
+ move.l #$80000000,L_SCR1(a6)
+ fbeq.w int_wrt ;exact answer
+ fcmp.d #:c1e0000000100000,fp0
+* c1e0000000100000 in dbl prec = c01e00008000000080000000 in ext prec
+ fblt.w int_operr ;set operr
+ bra.w int_inx ;set inexact
+
+*
+* wi is used to handle a word integer source specifier
+*
+
+wi:
+ moveq.l #2,d0 ;set byte count
+
+ btst.b #7,STAG(a6) ;check for extended denorm
+ bne.w int_dnrm ;branch if so
+
+ fmovem.x ETEMP(a6),fp0
+ fcmp.s #:46fffe00,fp0
+* 46fffe00 in sgl prec = 400d0000fffe000000000000 in ext prec
+ fbge.w wo_plrg
+ fcmp.s #:c7000000,fp0
+* c7000000 in sgl prec = c00e00008000000000000000 in ext prec
+ fble.w wo_nlrg
+
+*
+* at this point, the answer is between the largest pos and neg values
+*
+ move.l USER_FPCR(a6),d1 ;use user's rounding mode
+ andi.l #$30,d1
+ fmove.l d1,fpcr
+ fmove.w fp0,L_SCR1(a6) ;let the 040 perform conversion
+ fmove.l fpsr,d1
+ or.l d1,USER_FPSR(a6) ;capture inex2/ainex if set
+ bra.w int_wrt
+
+wo_plrg:
+ move.w #$7fff,L_SCR1(a6) ;answer is largest positive int
+ fbeq.w int_wrt ;exact answer
+ fcmp.s #:46ffff00,fp0
+* 46ffff00 in sgl prec = 400d0000ffff000000000000 in ext prec
+ fbge.w int_operr ;set operr
+ bra.w int_inx ;set inexact
+
+wo_nlrg:
+ move.w #$8000,L_SCR1(a6)
+ fbeq.w int_wrt ;exact answer
+ fcmp.s #:c7000080,fp0
+* c7000080 in sgl prec = c00e00008000800000000000 in ext prec
+ fblt.w int_operr ;set operr
+ bra.w int_inx ;set inexact
+
+*
+* bi is used to handle a byte integer source specifier
+*
+
+bi:
+ moveq.l #1,d0 ;set byte count
+
+ btst.b #7,STAG(a6) ;check for extended denorm
+ bne.w int_dnrm ;branch if so
+
+ fmovem.x ETEMP(a6),fp0
+ fcmp.s #:42fe0000,fp0
+* 42fe0000 in sgl prec = 40050000fe00000000000000 in ext prec
+ fbge.w by_plrg
+ fcmp.s #:c3000000,fp0
+* c3000000 in sgl prec = c00600008000000000000000 in ext prec
+ fble.w by_nlrg
+
+*
+* at this point, the answer is between the largest pos and neg values
+*
+ move.l USER_FPCR(a6),d1 ;use user's rounding mode
+ andi.l #$30,d1
+ fmove.l d1,fpcr
+ fmove.b fp0,L_SCR1(a6) ;let the 040 perform conversion
+ fmove.l fpsr,d1
+ or.l d1,USER_FPSR(a6) ;capture inex2/ainex if set
+ bra.w int_wrt
+
+by_plrg:
+ move.b #$7f,L_SCR1(a6) ;answer is largest positive int
+ fbeq.w int_wrt ;exact answer
+ fcmp.s #:42ff0000,fp0
+* 42ff0000 in sgl prec = 40050000ff00000000000000 in ext prec
+ fbge.w int_operr ;set operr
+ bra.w int_inx ;set inexact
+
+by_nlrg:
+ move.b #$80,L_SCR1(a6)
+ fbeq.w int_wrt ;exact answer
+ fcmp.s #:c3008000,fp0
+* c3008000 in sgl prec = c00600008080000000000000 in ext prec
+ fblt.w int_operr ;set operr
+ bra.w int_inx ;set inexact
+
+*
+* Common integer routines
+*
+* int_drnrm---account for possible nonzero result for round up with positive
+* operand and round down for negative answer. In the first case (result = 1)
+* byte-width (store in d0) of result must be honored. In the second case,
+* -1 in L_SCR1(a6) will cover all contingencies (FMOVE.B/W/L out).
+
+int_dnrm:
+ clr.l L_SCR1(a6) ; initialize result to 0
+ bfextu FPCR_MODE(a6){2:2},d1 ; d1 is the rounding mode
+ cmp.b #2,d1
+ bmi.b int_inx ; if RN or RZ, done
+ bne.b int_rp ; if RP, continue below
+ tst.w ETEMP(a6) ; RM: store -1 in L_SCR1 if src is negative
+ bpl.b int_inx ; otherwise result is 0
+ move.l #-1,L_SCR1(a6)
+ bra.b int_inx
+int_rp:
+ tst.w ETEMP(a6) ; RP: store +1 of proper width in L_SCR1 if
+* ; source is greater than 0
+ bmi.b int_inx ; otherwise, result is 0
+ lea L_SCR1(a6),a1 ; a1 is address of L_SCR1
+ adda.l d0,a1 ; offset by destination width -1
+ suba.l #1,a1
+ bset.b #0,(a1) ; set low bit at a1 address
+int_inx:
+ ori.l #inx2a_mask,USER_FPSR(a6)
+ bra.b int_wrt
+int_operr:
+ fmovem.x fp0,FPTEMP(a6) ;FPTEMP must contain the extended
+* ;precision source that needs to be
+* ;converted to integer this is required
+* ;if the operr exception is enabled.
+* ;set operr/aiop (no inex2 on int ovfl)
+
+ ori.l #opaop_mask,USER_FPSR(a6)
+* ;fall through to perform int_wrt
+int_wrt:
+ move.l EXC_EA(a6),a1 ;load destination address
+ tst.l a1 ;check to see if it is a dest register
+ beq.b wrt_dn ;write data register
+ lea L_SCR1(a6),a0 ;point to supervisor source address
+ bsr.l mem_write
+ bra.w mvouti_end
+
+wrt_dn:
+ move.l d0,-(sp) ;d0 currently contains the size to write
+ bsr.l get_fline ;get_fline returns Dn in d0
+ andi.w #$7,d0 ;isolate register
+ move.l (sp)+,d1 ;get size
+ cmpi.l #4,d1 ;most frequent case
+ beq.b sz_long
+ cmpi.l #2,d1
+ bne.b sz_con
+ or.l #8,d0 ;add 'word' size to register#
+ bra.b sz_con
+sz_long:
+ or.l #$10,d0 ;add 'long' size to register#
+sz_con:
+ move.l d0,d1 ;reg_dest expects size:reg in d1
+ bsr.l reg_dest ;load proper data register
+ bra.w mvouti_end
+xp:
+ lea ETEMP(a6),a0
+ bclr.b #sign_bit,LOCAL_EX(a0)
+ sne LOCAL_SGN(a0)
+ btst.b #7,STAG(a6) ;check for extended denorm
+ bne.w xdnrm
+ clr.l d0
+ bra.b do_fp ;do normal case
+sgp:
+ lea ETEMP(a6),a0
+ bclr.b #sign_bit,LOCAL_EX(a0)
+ sne LOCAL_SGN(a0)
+ btst.b #7,STAG(a6) ;check for extended denorm
+ bne.w sp_catas ;branch if so
+ move.w LOCAL_EX(a0),d0
+ lea sp_bnds,a1
+ cmp.w (a1),d0
+ blt.w sp_under
+ cmp.w 2(a1),d0
+ bgt.w sp_over
+ move.l #1,d0 ;set destination format to single
+ bra.b do_fp ;do normal case
+dp:
+ lea ETEMP(a6),a0
+ bclr.b #sign_bit,LOCAL_EX(a0)
+ sne LOCAL_SGN(a0)
+
+ btst.b #7,STAG(a6) ;check for extended denorm
+ bne.w dp_catas ;branch if so
+
+ move.w LOCAL_EX(a0),d0
+ lea dp_bnds,a1
+
+ cmp.w (a1),d0
+ blt.w dp_under
+ cmp.w 2(a1),d0
+ bgt.w dp_over
+
+ move.l #2,d0 ;set destination format to double
+* ;fall through to do_fp
+*
+do_fp:
+ bfextu FPCR_MODE(a6){2:2},d1 ;rnd mode in d1
+ swap d0 ;rnd prec in upper word
+ add.l d0,d1 ;d1 has PREC/MODE info
+
+ clr.l d0 ;clear g,r,s
+
+ bsr.l round ;round
+
+ move.l a0,a1
+ move.l EXC_EA(a6),a0
+
+ bfextu CMDREG1B(a6){3:3},d1 ;extract destination format
+* ;at this point only the dest
+* ;formats sgl, dbl, ext are
+* ;possible
+ cmp.b #2,d1
+ bgt.b ddbl ;double=5, extended=2, single=1
+ bne.b dsgl
+* ;fall through to dext
+dext:
+ bsr.l dest_ext
+ bra.w mvout_end
+dsgl:
+ bsr.l dest_sgl
+ bra.w mvout_end
+ddbl:
+ bsr.l dest_dbl
+ bra.w mvout_end
+
+*
+* Handle possible denorm or catastrophic underflow cases here
+*
+xdnrm:
+ bsr.w set_xop ;initialize WBTEMP
+ bset.b #wbtemp15_bit,WB_BYTE(a6) ;set wbtemp15
+
+ move.l a0,a1
+ move.l EXC_EA(a6),a0 ;a0 has the destination pointer
+ bsr.l dest_ext ;store to memory
+ bset.b #unfl_bit,FPSR_EXCEPT(a6)
+ bra.w mvout_end
+
+sp_under:
+ bset.b #etemp15_bit,STAG(a6)
+
+ cmp.w 4(a1),d0
+ blt.b sp_catas ;catastrophic underflow case
+
+ move.l #1,d0 ;load in round precision
+ move.l #sgl_thresh,d1 ;load in single denorm threshold
+ bsr.l dpspdnrm ;expects d1 to have the proper
+* ;denorm threshold
+ bsr.l dest_sgl ;stores value to destination
+ bset.b #unfl_bit,FPSR_EXCEPT(a6)
+ bra.w mvout_end ;exit
+
+dp_under:
+ bset.b #etemp15_bit,STAG(a6)
+
+ cmp.w 4(a1),d0
+ blt.b dp_catas ;catastrophic underflow case
+
+ move.l #dbl_thresh,d1 ;load in double precision threshold
+ move.l #2,d0
+ bsr.l dpspdnrm ;expects d1 to have proper
+* ;denorm threshold
+* ;expects d0 to have round precision
+ bsr.l dest_dbl ;store value to destination
+ bset.b #unfl_bit,FPSR_EXCEPT(a6)
+ bra.w mvout_end ;exit
+
+*
+* Handle catastrophic underflow cases here
+*
+sp_catas:
+* Temp fix for z bit set in unf_sub
+ move.l USER_FPSR(a6),-(a7)
+
+ move.l #1,d0 ;set round precision to sgl
+
+ bsr.l unf_sub ;a0 points to result
+
+ move.l (a7)+,USER_FPSR(a6)
+
+ move.l #1,d0
+ sub.w d0,LOCAL_EX(a0) ;account for difference between
+* ;denorm/norm bias
+
+ move.l a0,a1 ;a1 has the operand input
+ move.l EXC_EA(a6),a0 ;a0 has the destination pointer
+
+ bsr.l dest_sgl ;store the result
+ ori.l #unfinx_mask,USER_FPSR(a6)
+ bra.w mvout_end
+
+dp_catas:
+* Temp fix for z bit set in unf_sub
+ move.l USER_FPSR(a6),-(a7)
+
+ move.l #2,d0 ;set round precision to dbl
+ bsr.l unf_sub ;a0 points to result
+
+ move.l (a7)+,USER_FPSR(a6)
+
+ move.l #1,d0
+ sub.w d0,LOCAL_EX(a0) ;account for difference between
+* ;denorm/norm bias
+
+ move.l a0,a1 ;a1 has the operand input
+ move.l EXC_EA(a6),a0 ;a0 has the destination pointer
+
+ bsr.l dest_dbl ;store the result
+ ori.l #unfinx_mask,USER_FPSR(a6)
+ bra.w mvout_end
+
+*
+* Handle catastrophic overflow cases here
+*
+sp_over:
+* Temp fix for z bit set in unf_sub
+ move.l USER_FPSR(a6),-(a7)
+
+ move.l #1,d0
+ lea.l FP_SCR1(a6),a0 ;use FP_SCR1 for creating result
+ move.l ETEMP_EX(a6),(a0)
+ move.l ETEMP_HI(a6),4(a0)
+ move.l ETEMP_LO(a6),8(a0)
+ bsr.l ovf_res
+
+ move.l (a7)+,USER_FPSR(a6)
+
+ move.l a0,a1
+ move.l EXC_EA(a6),a0
+ bsr.l dest_sgl
+ or.l #ovfinx_mask,USER_FPSR(a6)
+ bra.w mvout_end
+
+dp_over:
+* Temp fix for z bit set in ovf_res
+ move.l USER_FPSR(a6),-(a7)
+
+ move.l #2,d0
+ lea.l FP_SCR1(a6),a0 ;use FP_SCR1 for creating result
+ move.l ETEMP_EX(a6),(a0)
+ move.l ETEMP_HI(a6),4(a0)
+ move.l ETEMP_LO(a6),8(a0)
+ bsr.l ovf_res
+
+ move.l (a7)+,USER_FPSR(a6)
+
+ move.l a0,a1
+ move.l EXC_EA(a6),a0
+ bsr.l dest_dbl
+ or.l #ovfinx_mask,USER_FPSR(a6)
+ bra.w mvout_end
+
+*
+* DPSPDNRM
+*
+* This subroutine takes an extended normalized number and denormalizes
+* it to the given round precision. This subroutine also decrements
+* the input operand's exponent by 1 to account for the fact that
+* dest_sgl or dest_dbl expects a normalized number's bias.
+*
+* Input: a0 points to a normalized number in internal extended format
+* d0 is the round precision (=1 for sgl; =2 for dbl)
+* d1 is the the single precision or double precision
+* denorm threshold
+*
+* Output: (In the format for dest_sgl or dest_dbl)
+* a0 points to the destination
+* a1 points to the operand
+*
+* Exceptions: Reports inexact 2 exception by setting USER_FPSR bits
+*
+dpspdnrm:
+ move.l d0,-(a7) ;save round precision
+ clr.l d0 ;clear initial g,r,s
+ bsr.l dnrm_lp ;careful with d0, it's needed by round
+
+ bfextu FPCR_MODE(a6){2:2},d1 ;get rounding mode
+ swap d1
+ move.w 2(a7),d1 ;set rounding precision
+ swap d1 ;at this point d1 has PREC/MODE info
+ bsr.l round ;round result, sets the inex bit in
+* ;USER_FPSR if needed
+
+ move.w #1,d0
+ sub.w d0,LOCAL_EX(a0) ;account for difference in denorm
+* ;vs norm bias
+
+ move.l a0,a1 ;a1 has the operand input
+ move.l EXC_EA(a6),a0 ;a0 has the destination pointer
+ addq.l #4,a7 ;pop stack
+ rts
+*
+* SET_XOP initialized WBTEMP with the value pointed to by a0
+* input: a0 points to input operand in the internal extended format
+*
+set_xop:
+ move.l LOCAL_EX(a0),WBTEMP_EX(a6)
+ move.l LOCAL_HI(a0),WBTEMP_HI(a6)
+ move.l LOCAL_LO(a0),WBTEMP_LO(a6)
+ bfclr WBTEMP_SGN(a6){0:8}
+ beq.b sxop
+ bset.b #sign_bit,WBTEMP_EX(a6)
+sxop:
+ bfclr STAG(a6){5:4} ;clear wbtm66,wbtm1,wbtm0,sbit
+ rts
+*
+* P_MOVE
+*
+p_movet:
+ dc.l p_move
+ dc.l p_movez
+ dc.l p_movei
+ dc.l p_moven
+ dc.l p_move
+p_regd:
+ dc.l p_dyd0
+ dc.l p_dyd1
+ dc.l p_dyd2
+ dc.l p_dyd3
+ dc.l p_dyd4
+ dc.l p_dyd5
+ dc.l p_dyd6
+ dc.l p_dyd7
+
+pack_out:
+ lea.l p_movet,a0 ;load jmp table address
+ move.w STAG(a6),d0 ;get source tag
+ bfextu d0{16:3},d0 ;isolate source bits
+ move.l (a0,d0.w*4),a0 ;load a0 with routine label for tag
+ jmp (a0) ;go to the routine
+
+p_write:
+ move.l #$0c,d0 ;get byte count
+ move.l EXC_EA(a6),a1 ;get the destination address
+ bsr mem_write ;write the user's destination
+ clr.b CU_SAVEPC(a6) ;set the cu save pc to all 0's
+
+*
+* Also note that the dtag must be set to norm here - this is because
+* the 040 uses the dtag to execute the correct microcode.
+*
+ bfclr DTAG(a6){0:3} ;set dtag to norm
+
+ rts
+
+* Notes on handling of special case (zero, inf, and nan) inputs:
+* 1. Operr is not signalled if the k-factor is greater than 18.
+* 2. Per the manual, status bits are not set.
+*
+
+p_move:
+ move.w CMDREG1B(a6),d0
+ btst.l #kfact_bit,d0 ;test for dynamic k-factor
+ beq.b statick ;if clear, k-factor is static
+dynamick:
+ bfextu d0{25:3},d0 ;isolate register for dynamic k-factor
+ lea p_regd,a0
+ move.l (a0,d0*4),a0
+ jmp (a0)
+statick:
+ andi.w #$007f,d0 ;get k-factor
+ bfexts d0{25:7},d0 ;sign extend d0 for bindec
+ lea.l ETEMP(a6),a0 ;a0 will point to the packed decimal
+ bsr.l bindec ;perform the convert; data at a6
+ lea.l FP_SCR1(a6),a0 ;load a0 with result address
+ bra.l p_write
+p_movez:
+ lea.l ETEMP(a6),a0 ;a0 will point to the packed decimal
+ clr.w 2(a0) ;clear lower word of exp
+ clr.l 4(a0) ;load second lword of ZERO
+ clr.l 8(a0) ;load third lword of ZERO
+ bra.w p_write ;go write results
+p_movei:
+ fmove.l #0,FPSR ;clear aiop
+ lea.l ETEMP(a6),a0 ;a0 will point to the packed decimal
+ clr.w 2(a0) ;clear lower word of exp
+ bra.w p_write ;go write the result
+p_moven:
+ lea.l ETEMP(a6),a0 ;a0 will point to the packed decimal
+ clr.w 2(a0) ;clear lower word of exp
+ bra.w p_write ;go write the result
+
+*
+* Routines to read the dynamic k-factor from Dn.
+*
+p_dyd0:
+ move.l USER_D0(a6),d0
+ bra.b statick
+p_dyd1:
+ move.l USER_D1(a6),d0
+ bra.b statick
+p_dyd2:
+ move.l d2,d0
+ bra.b statick
+p_dyd3:
+ move.l d3,d0
+ bra.b statick
+p_dyd4:
+ move.l d4,d0
+ bra.b statick
+p_dyd5:
+ move.l d5,d0
+ bra.b statick
+p_dyd6:
+ move.l d6,d0
+ bra.w statick
+p_dyd7:
+ move.l d7,d0
+ bra.w statick
+
+ end
diff --git a/sys/arch/m68k/fpsp/round.sa b/sys/arch/m68k/fpsp/round.sa
new file mode 100644
index 00000000000..ebd02d11e25
--- /dev/null
+++ b/sys/arch/m68k/fpsp/round.sa
@@ -0,0 +1,673 @@
+* $NetBSD: round.sa,v 1.3 1994/10/26 07:49:24 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* round.sa 3.4 7/29/91
+*
+* handle rounding and normalization tasks
+*
+
+ROUND IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+*
+* round --- round result according to precision/mode
+*
+* a0 points to the input operand in the internal extended format
+* d1(high word) contains rounding precision:
+* ext = $0000xxxx
+* sgl = $0001xxxx
+* dbl = $0002xxxx
+* d1(low word) contains rounding mode:
+* RN = $xxxx0000
+* RZ = $xxxx0001
+* RM = $xxxx0010
+* RP = $xxxx0011
+* d0{31:29} contains the g,r,s bits (extended)
+*
+* On return the value pointed to by a0 is correctly rounded,
+* a0 is preserved and the g-r-s bits in d0 are cleared.
+* The result is not typed - the tag field is invalid. The
+* result is still in the internal extended format.
+*
+* The INEX bit of USER_FPSR will be set if the rounded result was
+* inexact (i.e. if any of the g-r-s bits were set).
+*
+
+ xdef round
+round:
+* If g=r=s=0 then result is exact and round is done, else set
+* the inex flag in status reg and continue.
+*
+ bsr.b ext_grs ;this subroutine looks at the
+* :rounding precision and sets
+* ;the appropriate g-r-s bits.
+ tst.l d0 ;if grs are zero, go force
+ bne.w rnd_cont ;lower bits to zero for size
+
+ swap d1 ;set up d1.w for round prec.
+ bra.w truncate
+
+rnd_cont:
+*
+* Use rounding mode as an index into a jump table for these modes.
+*
+ or.l #inx2a_mask,USER_FPSR(a6) ;set inex2/ainex
+ lea mode_tab,a1
+ move.l (a1,d1.w*4),a1
+ jmp (a1)
+*
+* Jump table indexed by rounding mode in d1.w. All following assumes
+* grs != 0.
+*
+mode_tab:
+ dc.l rnd_near
+ dc.l rnd_zero
+ dc.l rnd_mnus
+ dc.l rnd_plus
+*
+* ROUND PLUS INFINITY
+*
+* If sign of fp number = 0 (positive), then add 1 to l.
+*
+rnd_plus:
+ swap d1 ;set up d1 for round prec.
+ tst.b LOCAL_SGN(a0) ;check for sign
+ bmi.w truncate ;if positive then truncate
+ move.l #$ffffffff,d0 ;force g,r,s to be all f's
+ lea add_to_l,a1
+ move.l (a1,d1.w*4),a1
+ jmp (a1)
+*
+* ROUND MINUS INFINITY
+*
+* If sign of fp number = 1 (negative), then add 1 to l.
+*
+rnd_mnus:
+ swap d1 ;set up d1 for round prec.
+ tst.b LOCAL_SGN(a0) ;check for sign
+ bpl.w truncate ;if negative then truncate
+ move.l #$ffffffff,d0 ;force g,r,s to be all f's
+ lea add_to_l,a1
+ move.l (a1,d1.w*4),a1
+ jmp (a1)
+*
+* ROUND ZERO
+*
+* Always truncate.
+rnd_zero:
+ swap d1 ;set up d1 for round prec.
+ bra.w truncate
+*
+*
+* ROUND NEAREST
+*
+* If (g=1), then add 1 to l and if (r=s=0), then clear l
+* Note that this will round to even in case of a tie.
+*
+rnd_near:
+ swap d1 ;set up d1 for round prec.
+ add.l d0,d0 ;shift g-bit to c-bit
+ bcc.w truncate ;if (g=1) then
+ lea add_to_l,a1
+ move.l (a1,d1.w*4),a1
+ jmp (a1)
+
+*
+* ext_grs --- extract guard, round and sticky bits
+*
+* Input: d1 = PREC:ROUND
+* Output: d0{31:29}= guard, round, sticky
+*
+* The ext_grs extract the guard/round/sticky bits according to the
+* selected rounding precision. It is called by the round subroutine
+* only. All registers except d0 are kept intact. d0 becomes an
+* updated guard,round,sticky in d0{31:29}
+*
+* Notes: the ext_grs uses the round PREC, and therefore has to swap d1
+* prior to usage, and needs to restore d1 to original.
+*
+ext_grs:
+ swap d1 ;have d1.w point to round precision
+ tst.w d1
+ bne.b sgl_or_dbl
+ bra.b end_ext_grs
+
+sgl_or_dbl:
+ movem.l d2/d3,-(a7) ;make some temp registers
+ cmpi.w #1,d1
+ bne.b grs_dbl
+grs_sgl:
+ bfextu LOCAL_HI(a0){24:2},d3 ;sgl prec. g-r are 2 bits right
+ move.l #30,d2 ;of the sgl prec. limits
+ lsl.l d2,d3 ;shift g-r bits to MSB of d3
+ move.l LOCAL_HI(a0),d2 ;get word 2 for s-bit test
+ andi.l #$0000003f,d2 ;s bit is the or of all other
+ bne.b st_stky ;bits to the right of g-r
+ tst.l LOCAL_LO(a0) ;test lower mantissa
+ bne.b st_stky ;if any are set, set sticky
+ tst.l d0 ;test original g,r,s
+ bne.b st_stky ;if any are set, set sticky
+ bra.b end_sd ;if words 3 and 4 are clr, exit
+grs_dbl:
+ bfextu LOCAL_LO(a0){21:2},d3 ;dbl-prec. g-r are 2 bits right
+ move.l #30,d2 ;of the dbl prec. limits
+ lsl.l d2,d3 ;shift g-r bits to the MSB of d3
+ move.l LOCAL_LO(a0),d2 ;get lower mantissa for s-bit test
+ andi.l #$000001ff,d2 ;s bit is the or-ing of all
+ bne.b st_stky ;other bits to the right of g-r
+ tst.l d0 ;test word original g,r,s
+ bne.b st_stky ;if any are set, set sticky
+ bra.b end_sd ;if clear, exit
+st_stky:
+ bset #rnd_stky_bit,d3
+end_sd:
+ move.l d3,d0 ;return grs to d0
+ movem.l (a7)+,d2/d3 ;restore scratch registers
+end_ext_grs:
+ swap d1 ;restore d1 to original
+ rts
+
+******************** Local Equates
+ad_1_sgl equ $00000100 constant to add 1 to l-bit in sgl prec
+ad_1_dbl equ $00000800 constant to add 1 to l-bit in dbl prec
+
+
+*Jump table for adding 1 to the l-bit indexed by rnd prec
+
+add_to_l:
+ dc.l add_ext
+ dc.l add_sgl
+ dc.l add_dbl
+ dc.l add_dbl
+*
+* ADD SINGLE
+*
+add_sgl:
+ add.l #ad_1_sgl,LOCAL_HI(a0)
+ bcc.b scc_clr ;no mantissa overflow
+ roxr.w LOCAL_HI(a0) ;shift v-bit back in
+ roxr.w LOCAL_HI+2(a0) ;shift v-bit back in
+ add.w #$1,LOCAL_EX(a0) ;and incr exponent
+scc_clr:
+ tst.l d0 ;test for rs = 0
+ bne.b sgl_done
+ andi.w #$fe00,LOCAL_HI+2(a0) ;clear the l-bit
+sgl_done:
+ andi.l #$ffffff00,LOCAL_HI(a0) ;truncate bits beyond sgl limit
+ clr.l LOCAL_LO(a0) ;clear d2
+ rts
+
+*
+* ADD EXTENDED
+*
+add_ext:
+ addq.l #1,LOCAL_LO(a0) ;add 1 to l-bit
+ bcc.b xcc_clr ;test for carry out
+ addq.l #1,LOCAL_HI(a0) ;propogate carry
+ bcc.b xcc_clr
+ roxr.w LOCAL_HI(a0) ;mant is 0 so restore v-bit
+ roxr.w LOCAL_HI+2(a0) ;mant is 0 so restore v-bit
+ roxr.w LOCAL_LO(a0)
+ roxr.w LOCAL_LO+2(a0)
+ add.w #$1,LOCAL_EX(a0) ;and inc exp
+xcc_clr:
+ tst.l d0 ;test rs = 0
+ bne.b add_ext_done
+ andi.b #$fe,LOCAL_LO+3(a0) ;clear the l bit
+add_ext_done:
+ rts
+*
+* ADD DOUBLE
+*
+add_dbl:
+ add.l #ad_1_dbl,LOCAL_LO(a0)
+ bcc.b dcc_clr
+ addq.l #1,LOCAL_HI(a0) ;propogate carry
+ bcc.b dcc_clr
+ roxr.w LOCAL_HI(a0) ;mant is 0 so restore v-bit
+ roxr.w LOCAL_HI+2(a0) ;mant is 0 so restore v-bit
+ roxr.w LOCAL_LO(a0)
+ roxr.w LOCAL_LO+2(a0)
+ add.w #$1,LOCAL_EX(a0) ;incr exponent
+dcc_clr:
+ tst.l d0 ;test for rs = 0
+ bne.b dbl_done
+ andi.w #$f000,LOCAL_LO+2(a0) ;clear the l-bit
+
+dbl_done:
+ andi.l #$fffff800,LOCAL_LO(a0) ;truncate bits beyond dbl limit
+ rts
+
+error:
+ rts
+*
+* Truncate all other bits
+*
+trunct:
+ dc.l end_rnd
+ dc.l sgl_done
+ dc.l dbl_done
+ dc.l dbl_done
+
+truncate:
+ lea trunct,a1
+ move.l (a1,d1.w*4),a1
+ jmp (a1)
+
+end_rnd:
+ rts
+
+*
+* NORMALIZE
+*
+* These routines (nrm_zero & nrm_set) normalize the unnorm. This
+* is done by shifting the mantissa left while decrementing the
+* exponent.
+*
+* NRM_SET shifts and decrements until there is a 1 set in the integer
+* bit of the mantissa (msb in d1).
+*
+* NRM_ZERO shifts and decrements until there is a 1 set in the integer
+* bit of the mantissa (msb in d1) unless this would mean the exponent
+* would go less than 0. In that case the number becomes a denorm - the
+* exponent (d0) is set to 0 and the mantissa (d1 & d2) is not
+* normalized.
+*
+* Note that both routines have been optimized (for the worst case) and
+* therefore do not have the easy to follow decrement/shift loop.
+*
+* NRM_ZERO
+*
+* Distance to first 1 bit in mantissa = X
+* Distance to 0 from exponent = Y
+* If X < Y
+* Then
+* nrm_set
+* Else
+* shift mantissa by Y
+* set exponent = 0
+*
+*input:
+* FP_SCR1 = exponent, ms mantissa part, ls mantissa part
+*output:
+* L_SCR1{4} = fpte15 or ete15 bit
+*
+ xdef nrm_zero
+nrm_zero:
+ move.w LOCAL_EX(a0),d0
+ cmp.w #64,d0 ;see if exp > 64
+ bmi.b d0_less
+ bsr nrm_set ;exp > 64 so exp won't exceed 0
+ rts
+d0_less:
+ movem.l d2/d3/d5/d6,-(a7)
+ move.l LOCAL_HI(a0),d1
+ move.l LOCAL_LO(a0),d2
+
+ bfffo d1{0:32},d3 ;get the distance to the first 1
+* ;in ms mant
+ beq.b ms_clr ;branch if no bits were set
+ cmp.w d3,d0 ;of X>Y
+ bmi.b greater ;then exp will go past 0 (neg) if
+* ;it is just shifted
+ bsr nrm_set ;else exp won't go past 0
+ movem.l (a7)+,d2/d3/d5/d6
+ rts
+greater:
+ move.l d2,d6 ;save ls mant in d6
+ lsl.l d0,d2 ;shift ls mant by count
+ lsl.l d0,d1 ;shift ms mant by count
+ move.l #32,d5
+ sub.l d0,d5 ;make op a denorm by shifting bits
+ lsr.l d5,d6 ;by the number in the exp, then
+* ;set exp = 0.
+ or.l d6,d1 ;shift the ls mant bits into the ms mant
+ clr.l d0 ;same as if decremented exp to 0
+* ;while shifting
+ move.w d0,LOCAL_EX(a0)
+ move.l d1,LOCAL_HI(a0)
+ move.l d2,LOCAL_LO(a0)
+ movem.l (a7)+,d2/d3/d5/d6
+ rts
+ms_clr:
+ bfffo d2{0:32},d3 ;check if any bits set in ls mant
+ beq.b all_clr ;branch if none set
+ add.w #32,d3
+ cmp.w d3,d0 ;if X>Y
+ bmi.b greater ;then branch
+ bsr nrm_set ;else exp won't go past 0
+ movem.l (a7)+,d2/d3/d5/d6
+ rts
+all_clr:
+ clr.w LOCAL_EX(a0) ;no mantissa bits set. Set exp = 0.
+ movem.l (a7)+,d2/d3/d5/d6
+ rts
+*
+* NRM_SET
+*
+ xdef nrm_set
+nrm_set:
+ move.l d7,-(a7)
+ bfffo LOCAL_HI(a0){0:32},d7 ;find first 1 in ms mant to d7)
+ beq.b lower ;branch if ms mant is all 0's
+
+ move.l d6,-(a7)
+
+ sub.w d7,LOCAL_EX(a0) ;sub exponent by count
+ move.l LOCAL_HI(a0),d0 ;d0 has ms mant
+ move.l LOCAL_LO(a0),d1 ;d1 has ls mant
+
+ lsl.l d7,d0 ;shift first 1 to j bit position
+ move.l d1,d6 ;copy ls mant into d6
+ lsl.l d7,d6 ;shift ls mant by count
+ move.l d6,LOCAL_LO(a0) ;store ls mant into memory
+ moveq.l #32,d6
+ sub.l d7,d6 ;continue shift
+ lsr.l d6,d1 ;shift off all bits but those that will
+* ;be shifted into ms mant
+ or.l d1,d0 ;shift the ls mant bits into the ms mant
+ move.l d0,LOCAL_HI(a0) ;store ms mant into memory
+ movem.l (a7)+,d7/d6 ;restore registers
+ rts
+
+*
+* We get here if ms mant was = 0, and we assume ls mant has bits
+* set (otherwise this would have been tagged a zero not a denorm).
+*
+lower:
+ move.w LOCAL_EX(a0),d0 ;d0 has exponent
+ move.l LOCAL_LO(a0),d1 ;d1 has ls mant
+ sub.w #32,d0 ;account for ms mant being all zeros
+ bfffo d1{0:32},d7 ;find first 1 in ls mant to d7)
+ sub.w d7,d0 ;subtract shift count from exp
+ lsl.l d7,d1 ;shift first 1 to integer bit in ms mant
+ move.w d0,LOCAL_EX(a0) ;store ms mant
+ move.l d1,LOCAL_HI(a0) ;store exp
+ clr.l LOCAL_LO(a0) ;clear ls mant
+ move.l (a7)+,d7
+ rts
+*
+* denorm --- denormalize an intermediate result
+*
+* Used by underflow.
+*
+* Input:
+* a0 points to the operand to be denormalized
+* (in the internal extended format)
+*
+* d0: rounding precision
+* Output:
+* a0 points to the denormalized result
+* (in the internal extended format)
+*
+* d0 is guard,round,sticky
+*
+* d0 comes into this routine with the rounding precision. It
+* is then loaded with the denormalized exponent threshold for the
+* rounding precision.
+*
+
+ xdef denorm
+denorm:
+ btst.b #6,LOCAL_EX(a0) ;check for exponents between $7fff-$4000
+ beq.b no_sgn_ext
+ bset.b #7,LOCAL_EX(a0) ;sign extend if it is so
+no_sgn_ext:
+
+ tst.b d0 ;if 0 then extended precision
+ bne.b not_ext ;else branch
+
+ clr.l d1 ;load d1 with ext threshold
+ clr.l d0 ;clear the sticky flag
+ bsr dnrm_lp ;denormalize the number
+ tst.b d1 ;check for inex
+ beq.w no_inex ;if clr, no inex
+ bra.b dnrm_inex ;if set, set inex
+
+not_ext:
+ cmpi.l #1,d0 ;if 1 then single precision
+ beq.b load_sgl ;else must be 2, double prec
+
+load_dbl:
+ move.w #dbl_thresh,d1 ;put copy of threshold in d1
+ move.l d1,d0 ;copy d1 into d0
+ sub.w LOCAL_EX(a0),d0 ;diff = threshold - exp
+ cmp.w #67,d0 ;if diff > 67 (mant + grs bits)
+ bpl.b chk_stky ;then branch (all bits would be
+* ; shifted off in denorm routine)
+ clr.l d0 ;else clear the sticky flag
+ bsr dnrm_lp ;denormalize the number
+ tst.b d1 ;check flag
+ beq.b no_inex ;if clr, no inex
+ bra.b dnrm_inex ;if set, set inex
+
+load_sgl:
+ move.w #sgl_thresh,d1 ;put copy of threshold in d1
+ move.l d1,d0 ;copy d1 into d0
+ sub.w LOCAL_EX(a0),d0 ;diff = threshold - exp
+ cmp.w #67,d0 ;if diff > 67 (mant + grs bits)
+ bpl.b chk_stky ;then branch (all bits would be
+* ; shifted off in denorm routine)
+ clr.l d0 ;else clear the sticky flag
+ bsr dnrm_lp ;denormalize the number
+ tst.b d1 ;check flag
+ beq.b no_inex ;if clr, no inex
+ bra.b dnrm_inex ;if set, set inex
+
+chk_stky:
+ tst.l LOCAL_HI(a0) ;check for any bits set
+ bne.b set_stky
+ tst.l LOCAL_LO(a0) ;check for any bits set
+ bne.b set_stky
+ bra.b clr_mant
+set_stky:
+ or.l #inx2a_mask,USER_FPSR(a6) ;set inex2/ainex
+ move.l #$20000000,d0 ;set sticky bit in return value
+clr_mant:
+ move.w d1,LOCAL_EX(a0) ;load exp with threshold
+ clr.l LOCAL_HI(a0) ;set d1 = 0 (ms mantissa)
+ clr.l LOCAL_LO(a0) ;set d2 = 0 (ms mantissa)
+ rts
+dnrm_inex:
+ or.l #inx2a_mask,USER_FPSR(a6) ;set inex2/ainex
+no_inex:
+ rts
+
+*
+* dnrm_lp --- normalize exponent/mantissa to specified threshhold
+*
+* Input:
+* a0 points to the operand to be denormalized
+* d0{31:29} initial guard,round,sticky
+* d1{15:0} denormalization threshold
+* Output:
+* a0 points to the denormalized operand
+* d0{31:29} final guard,round,sticky
+* d1.b inexact flag: all ones means inexact result
+*
+* The LOCAL_LO and LOCAL_GRS parts of the value are copied to FP_SCR2
+* so that bfext can be used to extract the new low part of the mantissa.
+* Dnrm_lp can be called with a0 pointing to ETEMP or WBTEMP and there
+* is no LOCAL_GRS scratch word following it on the fsave frame.
+*
+ xdef dnrm_lp
+dnrm_lp:
+ move.l d2,-(sp) ;save d2 for temp use
+ btst.b #E3,E_BYTE(a6) ;test for type E3 exception
+ beq.b not_E3 ;not type E3 exception
+ bfextu WBTEMP_GRS(a6){6:3},d2 ;extract guard,round, sticky bit
+ move.l #29,d0
+ lsl.l d0,d2 ;shift g,r,s to their postions
+ move.l d2,d0
+not_E3:
+ move.l (sp)+,d2 ;restore d2
+ move.l LOCAL_LO(a0),FP_SCR2+LOCAL_LO(a6)
+ move.l d0,FP_SCR2+LOCAL_GRS(a6)
+ move.l d1,d0 ;copy the denorm threshold
+ sub.w LOCAL_EX(a0),d1 ;d1 = threshold - uns exponent
+ ble.b no_lp ;d1 <= 0
+ cmp.w #32,d1
+ blt.b case_1 ;0 = d1 < 32
+ cmp.w #64,d1
+ blt.b case_2 ;32 <= d1 < 64
+ bra.w case_3 ;d1 >= 64
+*
+* No normalization necessary
+*
+no_lp:
+ clr.b d1 ;set no inex2 reported
+ move.l FP_SCR2+LOCAL_GRS(a6),d0 ;restore original g,r,s
+ rts
+*
+* case (0<d1<32)
+*
+case_1:
+ move.l d2,-(sp)
+ move.w d0,LOCAL_EX(a0) ;exponent = denorm threshold
+ move.l #32,d0
+ sub.w d1,d0 ;d0 = 32 - d1
+ bfextu LOCAL_EX(a0){d0:32},d2
+ bfextu d2{d1:d0},d2 ;d2 = new LOCAL_HI
+ bfextu LOCAL_HI(a0){d0:32},d1 ;d1 = new LOCAL_LO
+ bfextu FP_SCR2+LOCAL_LO(a6){d0:32},d0 ;d0 = new G,R,S
+ move.l d2,LOCAL_HI(a0) ;store new LOCAL_HI
+ move.l d1,LOCAL_LO(a0) ;store new LOCAL_LO
+ clr.b d1
+ bftst d0{2:30}
+ beq.b c1nstky
+ bset.l #rnd_stky_bit,d0
+ st.b d1
+c1nstky:
+ move.l FP_SCR2+LOCAL_GRS(a6),d2 ;restore original g,r,s
+ andi.l #$e0000000,d2 ;clear all but G,R,S
+ tst.l d2 ;test if original G,R,S are clear
+ beq.b grs_clear
+ or.l #$20000000,d0 ;set sticky bit in d0
+grs_clear:
+ andi.l #$e0000000,d0 ;clear all but G,R,S
+ move.l (sp)+,d2
+ rts
+*
+* case (32<=d1<64)
+*
+case_2:
+ move.l d2,-(sp)
+ move.w d0,LOCAL_EX(a0) ;unsigned exponent = threshold
+ sub.w #32,d1 ;d1 now between 0 and 32
+ move.l #32,d0
+ sub.w d1,d0 ;d0 = 32 - d1
+ bfextu LOCAL_EX(a0){d0:32},d2
+ bfextu d2{d1:d0},d2 ;d2 = new LOCAL_LO
+ bfextu LOCAL_HI(a0){d0:32},d1 ;d1 = new G,R,S
+ bftst d1{2:30}
+ bne.b c2_sstky ;bra if sticky bit to be set
+ bftst FP_SCR2+LOCAL_LO(a6){d0:32}
+ bne.b c2_sstky ;bra if sticky bit to be set
+ move.l d1,d0
+ clr.b d1
+ bra.b end_c2
+c2_sstky:
+ move.l d1,d0
+ bset.l #rnd_stky_bit,d0
+ st.b d1
+end_c2:
+ clr.l LOCAL_HI(a0) ;store LOCAL_HI = 0
+ move.l d2,LOCAL_LO(a0) ;store LOCAL_LO
+ move.l FP_SCR2+LOCAL_GRS(a6),d2 ;restore original g,r,s
+ andi.l #$e0000000,d2 ;clear all but G,R,S
+ tst.l d2 ;test if original G,R,S are clear
+ beq.b clear_grs
+ or.l #$20000000,d0 ;set sticky bit in d0
+clear_grs:
+ andi.l #$e0000000,d0 ;get rid of all but G,R,S
+ move.l (sp)+,d2
+ rts
+*
+* d1 >= 64 Force the exponent to be the denorm threshold with the
+* correct sign.
+*
+case_3:
+ move.w d0,LOCAL_EX(a0)
+ tst.w LOCAL_SGN(a0)
+ bge.b c3con
+c3neg:
+ or.l #$80000000,LOCAL_EX(a0)
+c3con:
+ cmp.w #64,d1
+ beq.b sixty_four
+ cmp.w #65,d1
+ beq.b sixty_five
+*
+* Shift value is out of range. Set d1 for inex2 flag and
+* return a zero with the given threshold.
+*
+ clr.l LOCAL_HI(a0)
+ clr.l LOCAL_LO(a0)
+ move.l #$20000000,d0
+ st.b d1
+ rts
+
+sixty_four:
+ move.l LOCAL_HI(a0),d0
+ bfextu d0{2:30},d1
+ andi.l #$c0000000,d0
+ bra.b c3com
+
+sixty_five:
+ move.l LOCAL_HI(a0),d0
+ bfextu d0{1:31},d1
+ andi.l #$80000000,d0
+ lsr.l #1,d0 ;shift high bit into R bit
+
+c3com:
+ tst.l d1
+ bne.b c3ssticky
+ tst.l LOCAL_LO(a0)
+ bne.b c3ssticky
+ tst.b FP_SCR2+LOCAL_GRS(a6)
+ bne.b c3ssticky
+ clr.b d1
+ bra.b c3end
+
+c3ssticky:
+ bset.l #rnd_stky_bit,d0
+ st.b d1
+c3end:
+ clr.l LOCAL_HI(a0)
+ clr.l LOCAL_LO(a0)
+ rts
+
+ end
diff --git a/sys/arch/m68k/fpsp/sacos.sa b/sys/arch/m68k/fpsp/sacos.sa
new file mode 100644
index 00000000000..7a904741823
--- /dev/null
+++ b/sys/arch/m68k/fpsp/sacos.sa
@@ -0,0 +1,140 @@
+* $NetBSD: sacos.sa,v 1.3 1994/10/26 07:49:27 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* sacos.sa 3.3 12/19/90
+*
+* Description: The entry point sAcos computes the inverse cosine of
+* an input argument; sAcosd does the same except for denormalized
+* input.
+*
+* Input: Double-extended number X in location pointed to
+* by address register a0.
+*
+* Output: The value arccos(X) returned in floating-point register Fp0.
+*
+* Accuracy and Monotonicity: The returned result is within 3 ulps in
+* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+* result is subsequently rounded to double precision. The
+* result is provably monotonic in double precision.
+*
+* Speed: The program sCOS takes approximately 310 cycles.
+*
+* Algorithm:
+*
+* ACOS
+* 1. If |X| >= 1, go to 3.
+*
+* 2. (|X| < 1) Calculate acos(X) by
+* z := (1-X) / (1+X)
+* acos(X) = 2 * atan( sqrt(z) ).
+* Exit.
+*
+* 3. If |X| > 1, go to 5.
+*
+* 4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit.
+*
+* 5. (|X| > 1) Generate an invalid operation by 0 * infinity.
+* Exit.
+*
+
+SACOS IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+PI DC.L $40000000,$C90FDAA2,$2168C235,$00000000
+PIBY2 DC.L $3FFF0000,$C90FDAA2,$2168C235,$00000000
+
+ xref t_operr
+ xref t_frcinx
+ xref satan
+
+ xdef sacosd
+sacosd:
+*--ACOS(X) = PI/2 FOR DENORMALIZED X
+ fmove.l d1,fpcr ...load user's rounding mode/precision
+ FMOVE.X PIBY2,FP0
+ bra t_frcinx
+
+ xdef sacos
+sacos:
+ FMOVE.X (a0),FP0 ...LOAD INPUT
+
+ move.l (a0),d0 ...pack exponent with upper 16 fraction
+ move.w 4(a0),d0
+ ANDI.L #$7FFFFFFF,D0
+ CMPI.L #$3FFF8000,D0
+ BGE.B ACOSBIG
+
+*--THIS IS THE USUAL CASE, |X| < 1
+*--ACOS(X) = 2 * ATAN( SQRT( (1-X)/(1+X) ) )
+
+ FMOVE.S #:3F800000,FP1
+ FADD.X FP0,FP1 ...1+X
+ FNEG.X FP0 ... -X
+ FADD.S #:3F800000,FP0 ...1-X
+ FDIV.X FP1,FP0 ...(1-X)/(1+X)
+ FSQRT.X FP0 ...SQRT((1-X)/(1+X))
+ fmovem.x fp0,(a0) ...overwrite input
+ move.l d1,-(sp) ;save original users fpcr
+ clr.l d1
+ bsr satan ...ATAN(SQRT([1-X]/[1+X]))
+ fMOVE.L (sp)+,fpcr ;restore users exceptions
+ FADD.X FP0,FP0 ...2 * ATAN( STUFF )
+ bra t_frcinx
+
+ACOSBIG:
+ FABS.X FP0
+ FCMP.S #:3F800000,FP0
+ fbgt t_operr ;cause an operr exception
+
+*--|X| = 1, ACOS(X) = 0 OR PI
+ move.l (a0),d0 ...pack exponent with upper 16 fraction
+ move.w 4(a0),d0
+ TST.L D0 ;D0 has original exponent+fraction
+ BGT.B ACOSP1
+
+*--X = -1
+*Returns PI and inexact exception
+ FMOVE.X PI,FP0
+ FMOVE.L d1,FPCR
+ FADD.S #:00800000,FP0 ;cause an inexact exception to be put
+* ;into the 040 - will not trap until next
+* ;fp inst.
+ bra t_frcinx
+
+ACOSP1:
+ FMOVE.L d1,FPCR
+ FMOVE.S #:00000000,FP0
+ rts ;Facos of +1 is exact
+
+ end
diff --git a/sys/arch/m68k/fpsp/sasin.sa b/sys/arch/m68k/fpsp/sasin.sa
new file mode 100644
index 00000000000..99e2b88d9e0
--- /dev/null
+++ b/sys/arch/m68k/fpsp/sasin.sa
@@ -0,0 +1,129 @@
+* $NetBSD: sasin.sa,v 1.2 1994/10/26 07:49:29 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* sasin.sa 3.3 12/19/90
+*
+* Description: The entry point sAsin computes the inverse sine of
+* an input argument; sAsind does the same except for denormalized
+* input.
+*
+* Input: Double-extended number X in location pointed to
+* by address register a0.
+*
+* Output: The value arcsin(X) returned in floating-point register Fp0.
+*
+* Accuracy and Monotonicity: The returned result is within 3 ulps in
+* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+* result is subsequently rounded to double precision. The
+* result is provably monotonic in double precision.
+*
+* Speed: The program sASIN takes approximately 310 cycles.
+*
+* Algorithm:
+*
+* ASIN
+* 1. If |X| >= 1, go to 3.
+*
+* 2. (|X| < 1) Calculate asin(X) by
+* z := sqrt( [1-X][1+X] )
+* asin(X) = atan( x / z ).
+* Exit.
+*
+* 3. If |X| > 1, go to 5.
+*
+* 4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.
+*
+* 5. (|X| > 1) Generate an invalid operation by 0 * infinity.
+* Exit.
+*
+
+SASIN IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+PIBY2 DC.L $3FFF0000,$C90FDAA2,$2168C235,$00000000
+
+ xref t_operr
+ xref t_frcinx
+ xref t_extdnrm
+ xref satan
+
+ xdef sasind
+sasind:
+*--ASIN(X) = X FOR DENORMALIZED X
+
+ bra t_extdnrm
+
+ xdef sasin
+sasin:
+ FMOVE.X (a0),FP0 ...LOAD INPUT
+
+ move.l (a0),d0
+ move.w 4(a0),d0
+ ANDI.L #$7FFFFFFF,D0
+ CMPI.L #$3FFF8000,D0
+ BGE.B asinbig
+
+*--THIS IS THE USUAL CASE, |X| < 1
+*--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
+
+ FMOVE.S #:3F800000,FP1
+ FSUB.X FP0,FP1 ...1-X
+ fmovem.x fp2,-(a7)
+ FMOVE.S #:3F800000,FP2
+ FADD.X FP0,FP2 ...1+X
+ FMUL.X FP2,FP1 ...(1+X)(1-X)
+ fmovem.x (a7)+,fp2
+ FSQRT.X FP1 ...SQRT([1-X][1+X])
+ FDIV.X FP1,FP0 ...X/SQRT([1-X][1+X])
+ fmovem.x fp0,(a0)
+ bsr satan
+ bra t_frcinx
+
+asinbig:
+ FABS.X FP0 ...|X|
+ FCMP.S #:3F800000,FP0
+ fbgt t_operr ;cause an operr exception
+
+*--|X| = 1, ASIN(X) = +- PI/2.
+
+ FMOVE.X PIBY2,FP0
+ move.l (a0),d0
+ ANDI.L #$80000000,D0 ...SIGN BIT OF X
+ ORI.L #$3F800000,D0 ...+-1 IN SGL FORMAT
+ MOVE.L D0,-(sp) ...push SIGN(X) IN SGL-FMT
+ FMOVE.L d1,FPCR
+ FMUL.S (sp)+,FP0
+ bra t_frcinx
+
+ end
diff --git a/sys/arch/m68k/fpsp/satan.sa b/sys/arch/m68k/fpsp/satan.sa
new file mode 100644
index 00000000000..a865043197b
--- /dev/null
+++ b/sys/arch/m68k/fpsp/satan.sa
@@ -0,0 +1,503 @@
+* $NetBSD: satan.sa,v 1.3 1994/10/26 07:49:31 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* satan.sa 3.3 12/19/90
+*
+* The entry point satan computes the arctagent of an
+* input value. satand does the same except the input value is a
+* denormalized number.
+*
+* Input: Double-extended value in memory location pointed to by address
+* register a0.
+*
+* Output: Arctan(X) returned in floating-point register Fp0.
+*
+* Accuracy and Monotonicity: The returned result is within 2 ulps in
+* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+* result is subsequently rounded to double precision. The
+* result is provably monotonic in double precision.
+*
+* Speed: The program satan takes approximately 160 cycles for input
+* argument X such that 1/16 < |X| < 16. For the other arguments,
+* the program will run no worse than 10% slower.
+*
+* Algorithm:
+* Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5.
+*
+* Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. Note that k = -4, -3,..., or 3.
+* Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 significant bits
+* of X with a bit-1 attached at the 6-th bit position. Define u
+* to be u = (X-F) / (1 + X*F).
+*
+* Step 3. Approximate arctan(u) by a polynomial poly.
+*
+* Step 4. Return arctan(F) + poly, arctan(F) is fetched from a table of values
+* calculated beforehand. Exit.
+*
+* Step 5. If |X| >= 16, go to Step 7.
+*
+* Step 6. Approximate arctan(X) by an odd polynomial in X. Exit.
+*
+* Step 7. Define X' = -1/X. Approximate arctan(X') by an odd polynomial in X'.
+* Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit.
+*
+
+satan IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+BOUNDS1 DC.L $3FFB8000,$4002FFFF
+
+ONE DC.L $3F800000
+
+ DC.L $00000000
+
+ATANA3 DC.L $BFF6687E,$314987D8
+ATANA2 DC.L $4002AC69,$34A26DB3
+
+ATANA1 DC.L $BFC2476F,$4E1DA28E
+ATANB6 DC.L $3FB34444,$7F876989
+
+ATANB5 DC.L $BFB744EE,$7FAF45DB
+ATANB4 DC.L $3FBC71C6,$46940220
+
+ATANB3 DC.L $BFC24924,$921872F9
+ATANB2 DC.L $3FC99999,$99998FA9
+
+ATANB1 DC.L $BFD55555,$55555555
+ATANC5 DC.L $BFB70BF3,$98539E6A
+
+ATANC4 DC.L $3FBC7187,$962D1D7D
+ATANC3 DC.L $BFC24924,$827107B8
+
+ATANC2 DC.L $3FC99999,$9996263E
+ATANC1 DC.L $BFD55555,$55555536
+
+PPIBY2 DC.L $3FFF0000,$C90FDAA2,$2168C235,$00000000
+NPIBY2 DC.L $BFFF0000,$C90FDAA2,$2168C235,$00000000
+PTINY DC.L $00010000,$80000000,$00000000,$00000000
+NTINY DC.L $80010000,$80000000,$00000000,$00000000
+
+ATANTBL:
+ DC.L $3FFB0000,$83D152C5,$060B7A51,$00000000
+ DC.L $3FFB0000,$8BC85445,$65498B8B,$00000000
+ DC.L $3FFB0000,$93BE4060,$17626B0D,$00000000
+ DC.L $3FFB0000,$9BB3078D,$35AEC202,$00000000
+ DC.L $3FFB0000,$A3A69A52,$5DDCE7DE,$00000000
+ DC.L $3FFB0000,$AB98E943,$62765619,$00000000
+ DC.L $3FFB0000,$B389E502,$F9C59862,$00000000
+ DC.L $3FFB0000,$BB797E43,$6B09E6FB,$00000000
+ DC.L $3FFB0000,$C367A5C7,$39E5F446,$00000000
+ DC.L $3FFB0000,$CB544C61,$CFF7D5C6,$00000000
+ DC.L $3FFB0000,$D33F62F8,$2488533E,$00000000
+ DC.L $3FFB0000,$DB28DA81,$62404C77,$00000000
+ DC.L $3FFB0000,$E310A407,$8AD34F18,$00000000
+ DC.L $3FFB0000,$EAF6B0A8,$188EE1EB,$00000000
+ DC.L $3FFB0000,$F2DAF194,$9DBE79D5,$00000000
+ DC.L $3FFB0000,$FABD5813,$61D47E3E,$00000000
+ DC.L $3FFC0000,$8346AC21,$0959ECC4,$00000000
+ DC.L $3FFC0000,$8B232A08,$304282D8,$00000000
+ DC.L $3FFC0000,$92FB70B8,$D29AE2F9,$00000000
+ DC.L $3FFC0000,$9ACF476F,$5CCD1CB4,$00000000
+ DC.L $3FFC0000,$A29E7630,$4954F23F,$00000000
+ DC.L $3FFC0000,$AA68C5D0,$8AB85230,$00000000
+ DC.L $3FFC0000,$B22DFFFD,$9D539F83,$00000000
+ DC.L $3FFC0000,$B9EDEF45,$3E900EA5,$00000000
+ DC.L $3FFC0000,$C1A85F1C,$C75E3EA5,$00000000
+ DC.L $3FFC0000,$C95D1BE8,$28138DE6,$00000000
+ DC.L $3FFC0000,$D10BF300,$840D2DE4,$00000000
+ DC.L $3FFC0000,$D8B4B2BA,$6BC05E7A,$00000000
+ DC.L $3FFC0000,$E0572A6B,$B42335F6,$00000000
+ DC.L $3FFC0000,$E7F32A70,$EA9CAA8F,$00000000
+ DC.L $3FFC0000,$EF888432,$64ECEFAA,$00000000
+ DC.L $3FFC0000,$F7170A28,$ECC06666,$00000000
+ DC.L $3FFD0000,$812FD288,$332DAD32,$00000000
+ DC.L $3FFD0000,$88A8D1B1,$218E4D64,$00000000
+ DC.L $3FFD0000,$9012AB3F,$23E4AEE8,$00000000
+ DC.L $3FFD0000,$976CC3D4,$11E7F1B9,$00000000
+ DC.L $3FFD0000,$9EB68949,$3889A227,$00000000
+ DC.L $3FFD0000,$A5EF72C3,$4487361B,$00000000
+ DC.L $3FFD0000,$AD1700BA,$F07A7227,$00000000
+ DC.L $3FFD0000,$B42CBCFA,$FD37EFB7,$00000000
+ DC.L $3FFD0000,$BB303A94,$0BA80F89,$00000000
+ DC.L $3FFD0000,$C22115C6,$FCAEBBAF,$00000000
+ DC.L $3FFD0000,$C8FEF3E6,$86331221,$00000000
+ DC.L $3FFD0000,$CFC98330,$B4000C70,$00000000
+ DC.L $3FFD0000,$D6807AA1,$102C5BF9,$00000000
+ DC.L $3FFD0000,$DD2399BC,$31252AA3,$00000000
+ DC.L $3FFD0000,$E3B2A855,$6B8FC517,$00000000
+ DC.L $3FFD0000,$EA2D764F,$64315989,$00000000
+ DC.L $3FFD0000,$F3BF5BF8,$BAD1A21D,$00000000
+ DC.L $3FFE0000,$801CE39E,$0D205C9A,$00000000
+ DC.L $3FFE0000,$8630A2DA,$DA1ED066,$00000000
+ DC.L $3FFE0000,$8C1AD445,$F3E09B8C,$00000000
+ DC.L $3FFE0000,$91DB8F16,$64F350E2,$00000000
+ DC.L $3FFE0000,$97731420,$365E538C,$00000000
+ DC.L $3FFE0000,$9CE1C8E6,$A0B8CDBA,$00000000
+ DC.L $3FFE0000,$A22832DB,$CADAAE09,$00000000
+ DC.L $3FFE0000,$A746F2DD,$B7602294,$00000000
+ DC.L $3FFE0000,$AC3EC0FB,$997DD6A2,$00000000
+ DC.L $3FFE0000,$B110688A,$EBDC6F6A,$00000000
+ DC.L $3FFE0000,$B5BCC490,$59ECC4B0,$00000000
+ DC.L $3FFE0000,$BA44BC7D,$D470782F,$00000000
+ DC.L $3FFE0000,$BEA94144,$FD049AAC,$00000000
+ DC.L $3FFE0000,$C2EB4ABB,$661628B6,$00000000
+ DC.L $3FFE0000,$C70BD54C,$E602EE14,$00000000
+ DC.L $3FFE0000,$CD000549,$ADEC7159,$00000000
+ DC.L $3FFE0000,$D48457D2,$D8EA4EA3,$00000000
+ DC.L $3FFE0000,$DB948DA7,$12DECE3B,$00000000
+ DC.L $3FFE0000,$E23855F9,$69E8096A,$00000000
+ DC.L $3FFE0000,$E8771129,$C4353259,$00000000
+ DC.L $3FFE0000,$EE57C16E,$0D379C0D,$00000000
+ DC.L $3FFE0000,$F3E10211,$A87C3779,$00000000
+ DC.L $3FFE0000,$F919039D,$758B8D41,$00000000
+ DC.L $3FFE0000,$FE058B8F,$64935FB3,$00000000
+ DC.L $3FFF0000,$8155FB49,$7B685D04,$00000000
+ DC.L $3FFF0000,$83889E35,$49D108E1,$00000000
+ DC.L $3FFF0000,$859CFA76,$511D724B,$00000000
+ DC.L $3FFF0000,$87952ECF,$FF8131E7,$00000000
+ DC.L $3FFF0000,$89732FD1,$9557641B,$00000000
+ DC.L $3FFF0000,$8B38CAD1,$01932A35,$00000000
+ DC.L $3FFF0000,$8CE7A8D8,$301EE6B5,$00000000
+ DC.L $3FFF0000,$8F46A39E,$2EAE5281,$00000000
+ DC.L $3FFF0000,$922DA7D7,$91888487,$00000000
+ DC.L $3FFF0000,$94D19FCB,$DEDF5241,$00000000
+ DC.L $3FFF0000,$973AB944,$19D2A08B,$00000000
+ DC.L $3FFF0000,$996FF00E,$08E10B96,$00000000
+ DC.L $3FFF0000,$9B773F95,$12321DA7,$00000000
+ DC.L $3FFF0000,$9D55CC32,$0F935624,$00000000
+ DC.L $3FFF0000,$9F100575,$006CC571,$00000000
+ DC.L $3FFF0000,$A0A9C290,$D97CC06C,$00000000
+ DC.L $3FFF0000,$A22659EB,$EBC0630A,$00000000
+ DC.L $3FFF0000,$A388B4AF,$F6EF0EC9,$00000000
+ DC.L $3FFF0000,$A4D35F10,$61D292C4,$00000000
+ DC.L $3FFF0000,$A60895DC,$FBE3187E,$00000000
+ DC.L $3FFF0000,$A72A51DC,$7367BEAC,$00000000
+ DC.L $3FFF0000,$A83A5153,$0956168F,$00000000
+ DC.L $3FFF0000,$A93A2007,$7539546E,$00000000
+ DC.L $3FFF0000,$AA9E7245,$023B2605,$00000000
+ DC.L $3FFF0000,$AC4C84BA,$6FE4D58F,$00000000
+ DC.L $3FFF0000,$ADCE4A4A,$606B9712,$00000000
+ DC.L $3FFF0000,$AF2A2DCD,$8D263C9C,$00000000
+ DC.L $3FFF0000,$B0656F81,$F22265C7,$00000000
+ DC.L $3FFF0000,$B1846515,$0F71496A,$00000000
+ DC.L $3FFF0000,$B28AAA15,$6F9ADA35,$00000000
+ DC.L $3FFF0000,$B37B44FF,$3766B895,$00000000
+ DC.L $3FFF0000,$B458C3DC,$E9630433,$00000000
+ DC.L $3FFF0000,$B525529D,$562246BD,$00000000
+ DC.L $3FFF0000,$B5E2CCA9,$5F9D88CC,$00000000
+ DC.L $3FFF0000,$B692CADA,$7ACA1ADA,$00000000
+ DC.L $3FFF0000,$B736AEA7,$A6925838,$00000000
+ DC.L $3FFF0000,$B7CFAB28,$7E9F7B36,$00000000
+ DC.L $3FFF0000,$B85ECC66,$CB219835,$00000000
+ DC.L $3FFF0000,$B8E4FD5A,$20A593DA,$00000000
+ DC.L $3FFF0000,$B99F41F6,$4AFF9BB5,$00000000
+ DC.L $3FFF0000,$BA7F1E17,$842BBE7B,$00000000
+ DC.L $3FFF0000,$BB471285,$7637E17D,$00000000
+ DC.L $3FFF0000,$BBFABE8A,$4788DF6F,$00000000
+ DC.L $3FFF0000,$BC9D0FAD,$2B689D79,$00000000
+ DC.L $3FFF0000,$BD306A39,$471ECD86,$00000000
+ DC.L $3FFF0000,$BDB6C731,$856AF18A,$00000000
+ DC.L $3FFF0000,$BE31CAC5,$02E80D70,$00000000
+ DC.L $3FFF0000,$BEA2D55C,$E33194E2,$00000000
+ DC.L $3FFF0000,$BF0B10B7,$C03128F0,$00000000
+ DC.L $3FFF0000,$BF6B7A18,$DACB778D,$00000000
+ DC.L $3FFF0000,$BFC4EA46,$63FA18F6,$00000000
+ DC.L $3FFF0000,$C0181BDE,$8B89A454,$00000000
+ DC.L $3FFF0000,$C065B066,$CFBF6439,$00000000
+ DC.L $3FFF0000,$C0AE345F,$56340AE6,$00000000
+ DC.L $3FFF0000,$C0F22291,$9CB9E6A7,$00000000
+
+X equ FP_SCR1
+XDCARE equ X+2
+XFRAC equ X+4
+XFRACLO equ X+8
+
+ATANF equ FP_SCR2
+ATANFHI equ ATANF+4
+ATANFLO equ ATANF+8
+
+
+ xref t_frcinx
+ xref t_extdnrm
+
+ xdef satand
+satand:
+*--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
+
+ bra t_extdnrm
+
+ xdef satan
+satan:
+*--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
+
+ FMOVE.X (A0),FP0 ...LOAD INPUT
+
+ MOVE.L (A0),D0
+ MOVE.W 4(A0),D0
+ FMOVE.X FP0,X(a6)
+ ANDI.L #$7FFFFFFF,D0
+
+ CMPI.L #$3FFB8000,D0 ...|X| >= 1/16?
+ BGE.B ATANOK1
+ BRA.W ATANSM
+
+ATANOK1:
+ CMPI.L #$4002FFFF,D0 ...|X| < 16 ?
+ BLE.B ATANMAIN
+ BRA.W ATANBIG
+
+
+*--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
+*--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
+*--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
+*--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
+*--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
+*--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
+*--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
+*--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
+*--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
+*--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
+*--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
+*--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
+*--WILL INVOLVE A VERY LONG POLYNOMIAL.
+
+*--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
+*--WE CHOSE F TO BE +-2^K * 1.BBBB1
+*--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
+*--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
+*--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
+*-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
+
+ATANMAIN:
+
+ CLR.W XDCARE(a6) ...CLEAN UP X JUST IN CASE
+ ANDI.L #$F8000000,XFRAC(a6) ...FIRST 5 BITS
+ ORI.L #$04000000,XFRAC(a6) ...SET 6-TH BIT TO 1
+ CLR.L XFRACLO(a6) ...LOCATION OF X IS NOW F
+
+ FMOVE.X FP0,FP1 ...FP1 IS X
+ FMUL.X X(a6),FP1 ...FP1 IS X*F, NOTE THAT X*F > 0
+ FSUB.X X(a6),FP0 ...FP0 IS X-F
+ FADD.S #:3F800000,FP1 ...FP1 IS 1 + X*F
+ FDIV.X FP1,FP0 ...FP0 IS U = (X-F)/(1+X*F)
+
+*--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
+*--CREATE ATAN(F) AND STORE IT IN ATANF, AND
+*--SAVE REGISTERS FP2.
+
+ MOVE.L d2,-(a7) ...SAVE d2 TEMPORARILY
+ MOVE.L d0,d2 ...THE EXPO AND 16 BITS OF X
+ ANDI.L #$00007800,d0 ...4 VARYING BITS OF F'S FRACTION
+ ANDI.L #$7FFF0000,d2 ...EXPONENT OF F
+ SUBI.L #$3FFB0000,d2 ...K+4
+ ASR.L #1,d2
+ ADD.L d2,d0 ...THE 7 BITS IDENTIFYING F
+ ASR.L #7,d0 ...INDEX INTO TBL OF ATAN(|F|)
+ LEA ATANTBL,a1
+ ADDA.L d0,a1 ...ADDRESS OF ATAN(|F|)
+ MOVE.L (a1)+,ATANF(a6)
+ MOVE.L (a1)+,ATANFHI(a6)
+ MOVE.L (a1)+,ATANFLO(a6) ...ATANF IS NOW ATAN(|F|)
+ MOVE.L X(a6),d0 ...LOAD SIGN AND EXPO. AGAIN
+ ANDI.L #$80000000,d0 ...SIGN(F)
+ OR.L d0,ATANF(a6) ...ATANF IS NOW SIGN(F)*ATAN(|F|)
+ MOVE.L (a7)+,d2 ...RESTORE d2
+
+*--THAT'S ALL I HAVE TO DO FOR NOW,
+*--BUT ALAS, THE DIVIDE IS STILL CRANKING!
+
+*--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
+*--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
+*--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
+*--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
+*--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3.
+*--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
+*--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
+
+
+ FMOVE.X FP0,FP1
+ FMUL.X FP1,FP1
+ FMOVE.D ATANA3,FP2
+ FADD.X FP1,FP2 ...A3+V
+ FMUL.X FP1,FP2 ...V*(A3+V)
+ FMUL.X FP0,FP1 ...U*V
+ FADD.D ATANA2,FP2 ...A2+V*(A3+V)
+ FMUL.D ATANA1,FP1 ...A1*U*V
+ FMUL.X FP2,FP1 ...A1*U*V*(A2+V*(A3+V))
+
+ FADD.X FP1,FP0 ...ATAN(U), FP1 RELEASED
+ FMOVE.L d1,FPCR ;restore users exceptions
+ FADD.X ATANF(a6),FP0 ...ATAN(X)
+ bra t_frcinx
+
+ATANBORS:
+*--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
+*--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
+ CMPI.L #$3FFF8000,d0
+ BGT.W ATANBIG ...I.E. |X| >= 16
+
+ATANSM:
+*--|X| <= 1/16
+*--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
+*--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
+*--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
+*--WHERE Y = X*X, AND Z = Y*Y.
+
+ CMPI.L #$3FD78000,d0
+ BLT.W ATANTINY
+*--COMPUTE POLYNOMIAL
+ FMUL.X FP0,FP0 ...FP0 IS Y = X*X
+
+
+ CLR.W XDCARE(a6)
+
+ FMOVE.X FP0,FP1
+ FMUL.X FP1,FP1 ...FP1 IS Z = Y*Y
+
+ FMOVE.D ATANB6,FP2
+ FMOVE.D ATANB5,FP3
+
+ FMUL.X FP1,FP2 ...Z*B6
+ FMUL.X FP1,FP3 ...Z*B5
+
+ FADD.D ATANB4,FP2 ...B4+Z*B6
+ FADD.D ATANB3,FP3 ...B3+Z*B5
+
+ FMUL.X FP1,FP2 ...Z*(B4+Z*B6)
+ FMUL.X FP3,FP1 ...Z*(B3+Z*B5)
+
+ FADD.D ATANB2,FP2 ...B2+Z*(B4+Z*B6)
+ FADD.D ATANB1,FP1 ...B1+Z*(B3+Z*B5)
+
+ FMUL.X FP0,FP2 ...Y*(B2+Z*(B4+Z*B6))
+ FMUL.X X(a6),FP0 ...X*Y
+
+ FADD.X FP2,FP1 ...[B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
+
+
+ FMUL.X FP1,FP0 ...X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
+
+ FMOVE.L d1,FPCR ;restore users exceptions
+ FADD.X X(a6),FP0
+
+ bra t_frcinx
+
+ATANTINY:
+*--|X| < 2^(-40), ATAN(X) = X
+ CLR.W XDCARE(a6)
+
+ FMOVE.L d1,FPCR ;restore users exceptions
+ FMOVE.X X(a6),FP0 ;last inst - possible exception set
+
+ bra t_frcinx
+
+ATANBIG:
+*--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE,
+*--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
+ CMPI.L #$40638000,d0
+ BGT.W ATANHUGE
+
+*--APPROXIMATE ATAN(-1/X) BY
+*--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
+*--THIS CAN BE RE-WRITTEN AS
+*--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
+
+ FMOVE.S #:BF800000,FP1 ...LOAD -1
+ FDIV.X FP0,FP1 ...FP1 IS -1/X
+
+
+*--DIVIDE IS STILL CRANKING
+
+ FMOVE.X FP1,FP0 ...FP0 IS X'
+ FMUL.X FP0,FP0 ...FP0 IS Y = X'*X'
+ FMOVE.X FP1,X(a6) ...X IS REALLY X'
+
+ FMOVE.X FP0,FP1
+ FMUL.X FP1,FP1 ...FP1 IS Z = Y*Y
+
+ FMOVE.D ATANC5,FP3
+ FMOVE.D ATANC4,FP2
+
+ FMUL.X FP1,FP3 ...Z*C5
+ FMUL.X FP1,FP2 ...Z*B4
+
+ FADD.D ATANC3,FP3 ...C3+Z*C5
+ FADD.D ATANC2,FP2 ...C2+Z*C4
+
+ FMUL.X FP3,FP1 ...Z*(C3+Z*C5), FP3 RELEASED
+ FMUL.X FP0,FP2 ...Y*(C2+Z*C4)
+
+ FADD.D ATANC1,FP1 ...C1+Z*(C3+Z*C5)
+ FMUL.X X(a6),FP0 ...X'*Y
+
+ FADD.X FP2,FP1 ...[Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
+
+
+ FMUL.X FP1,FP0 ...X'*Y*([B1+Z*(B3+Z*B5)]
+* ... +[Y*(B2+Z*(B4+Z*B6))])
+ FADD.X X(a6),FP0
+
+ FMOVE.L d1,FPCR ;restore users exceptions
+
+ btst.b #7,(a0)
+ beq.b pos_big
+
+neg_big:
+ FADD.X NPIBY2,FP0
+ bra t_frcinx
+
+pos_big:
+ FADD.X PPIBY2,FP0
+ bra t_frcinx
+
+ATANHUGE:
+*--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
+ btst.b #7,(a0)
+ beq.b pos_huge
+
+neg_huge:
+ FMOVE.X NPIBY2,fp0
+ fmove.l d1,fpcr
+ fsub.x NTINY,fp0
+ bra t_frcinx
+
+pos_huge:
+ FMOVE.X PPIBY2,fp0
+ fmove.l d1,fpcr
+ fsub.x PTINY,fp0
+ bra t_frcinx
+
+ end
diff --git a/sys/arch/m68k/fpsp/satanh.sa b/sys/arch/m68k/fpsp/satanh.sa
new file mode 100644
index 00000000000..06362c78d8a
--- /dev/null
+++ b/sys/arch/m68k/fpsp/satanh.sa
@@ -0,0 +1,129 @@
+* $NetBSD: satanh.sa,v 1.2 1994/10/26 07:49:33 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* satanh.sa 3.3 12/19/90
+*
+* The entry point satanh computes the inverse
+* hyperbolic tangent of
+* an input argument; satanhd does the same except for denormalized
+* input.
+*
+* Input: Double-extended number X in location pointed to
+* by address register a0.
+*
+* Output: The value arctanh(X) returned in floating-point register Fp0.
+*
+* Accuracy and Monotonicity: The returned result is within 3 ulps in
+* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+* result is subsequently rounded to double precision. The
+* result is provably monotonic in double precision.
+*
+* Speed: The program satanh takes approximately 270 cycles.
+*
+* Algorithm:
+*
+* ATANH
+* 1. If |X| >= 1, go to 3.
+*
+* 2. (|X| < 1) Calculate atanh(X) by
+* sgn := sign(X)
+* y := |X|
+* z := 2y/(1-y)
+* atanh(X) := sgn * (1/2) * logp1(z)
+* Exit.
+*
+* 3. If |X| > 1, go to 5.
+*
+* 4. (|X| = 1) Generate infinity with an appropriate sign and
+* divide-by-zero by
+* sgn := sign(X)
+* atan(X) := sgn / (+0).
+* Exit.
+*
+* 5. (|X| > 1) Generate an invalid operation by 0 * infinity.
+* Exit.
+*
+
+satanh IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ xref t_dz
+ xref t_operr
+ xref t_frcinx
+ xref t_extdnrm
+ xref slognp1
+
+ xdef satanhd
+satanhd:
+*--ATANH(X) = X FOR DENORMALIZED X
+
+ bra t_extdnrm
+
+ xdef satanh
+satanh:
+ move.l (a0),d0
+ move.w 4(a0),d0
+ ANDI.L #$7FFFFFFF,D0
+ CMPI.L #$3FFF8000,D0
+ BGE.B ATANHBIG
+
+*--THIS IS THE USUAL CASE, |X| < 1
+*--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).
+
+ FABS.X (a0),FP0 ...Y = |X|
+ FMOVE.X FP0,FP1
+ FNEG.X FP1 ...-Y
+ FADD.X FP0,FP0 ...2Y
+ FADD.S #:3F800000,FP1 ...1-Y
+ FDIV.X FP1,FP0 ...2Y/(1-Y)
+ move.l (a0),d0
+ ANDI.L #$80000000,D0
+ ORI.L #$3F000000,D0 ...SIGN(X)*HALF
+ move.l d0,-(sp)
+
+ fmovem.x fp0,(a0) ...overwrite input
+ move.l d1,-(sp)
+ clr.l d1
+ bsr slognp1 ...LOG1P(Z)
+ fmove.l (sp)+,fpcr
+ FMUL.S (sp)+,FP0
+ bra t_frcinx
+
+ATANHBIG:
+ FABS.X (a0),FP0 ...|X|
+ FCMP.S #:3F800000,FP0
+ fbgt t_operr
+ bra t_dz
+
+ end
diff --git a/sys/arch/m68k/fpsp/scale.sa b/sys/arch/m68k/fpsp/scale.sa
new file mode 100644
index 00000000000..e94fded546d
--- /dev/null
+++ b/sys/arch/m68k/fpsp/scale.sa
@@ -0,0 +1,397 @@
+* $NetBSD: scale.sa,v 1.3 1994/10/26 07:49:34 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* scale.sa 3.3 7/30/91
+*
+* The entry point sSCALE computes the destination operand
+* scaled by the source operand. If the absoulute value of
+* the source operand is (>= 2^14) an overflow or underflow
+* is returned.
+*
+* The entry point sscale is called from do_func to emulate
+* the fscale unimplemented instruction.
+*
+* Input: Double-extended destination operand in FPTEMP,
+* double-extended source operand in ETEMP.
+*
+* Output: The function returns scale(X,Y) to fp0.
+*
+* Modifies: fp0.
+*
+* Algorithm:
+*
+
+SCALE IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+ xref t_ovfl2
+ xref t_unfl
+ xref round
+ xref t_resdnrm
+
+SRC_BNDS dc.w $3fff,$400c
+
+*
+* This entry point is used by the unimplemented instruction exception
+* handler.
+*
+*
+*
+* FSCALE
+*
+ xdef sscale
+sscale:
+ fmove.l #0,fpcr ;clr user enabled exc
+ clr.l d1
+ move.w FPTEMP(a6),d1 ;get dest exponent
+ smi L_SCR1(a6) ;use L_SCR1 to hold sign
+ andi.l #$7fff,d1 ;strip sign
+ move.w ETEMP(a6),d0 ;check src bounds
+ andi.w #$7fff,d0 ;clr sign bit
+ cmp2.w SRC_BNDS,d0
+ bcc.b src_in
+ cmpi.w #$400c,d0 ;test for too large
+ bge.w src_out
+*
+* The source input is below 1, so we check for denormalized numbers
+* and set unfl.
+*
+src_small:
+ move.b DTAG(a6),d0
+ andi.b #$e0,d0
+ tst.b d0
+ beq.b no_denorm
+ st STORE_FLG(a6) ;dest already contains result
+ or.l #unfl_mask,USER_FPSR(a6) ;set UNFL
+den_done:
+ lea.l FPTEMP(a6),a0
+ bra t_resdnrm
+no_denorm:
+ fmove.l USER_FPCR(a6),FPCR
+ fmove.x FPTEMP(a6),fp0 ;simply return dest
+ rts
+
+
+*
+* Source is within 2^14 range. To perform the int operation,
+* move it to d0.
+*
+src_in:
+ fmove.x ETEMP(a6),fp0 ;move in src for int
+ fmove.l #rz_mode,fpcr ;force rz for src conversion
+ fmove.l fp0,d0 ;int src to d0
+ fmove.l #0,FPSR ;clr status from above
+ tst.w ETEMP(a6) ;check src sign
+ blt.w src_neg
+*
+* Source is positive. Add the src to the dest exponent.
+* The result can be denormalized, if src = 0, or overflow,
+* if the result of the add sets a bit in the upper word.
+*
+src_pos:
+ tst.w d1 ;check for denorm
+ beq.w dst_dnrm
+ add.l d0,d1 ;add src to dest exp
+ beq.b denorm ;if zero, result is denorm
+ cmpi.l #$7fff,d1 ;test for overflow
+ bge.b ovfl
+ tst.b L_SCR1(a6)
+ beq.b spos_pos
+ or.w #$8000,d1
+spos_pos:
+ move.w d1,FPTEMP(a6) ;result in FPTEMP
+ fmove.l USER_FPCR(a6),FPCR
+ fmove.x FPTEMP(a6),fp0 ;write result to fp0
+ rts
+ovfl:
+ tst.b L_SCR1(a6)
+ beq.b sovl_pos
+ or.w #$8000,d1
+sovl_pos:
+ move.w FPTEMP(a6),ETEMP(a6) ;result in ETEMP
+ move.l FPTEMP_HI(a6),ETEMP_HI(a6)
+ move.l FPTEMP_LO(a6),ETEMP_LO(a6)
+ bra t_ovfl2
+
+denorm:
+ tst.b L_SCR1(a6)
+ beq.b den_pos
+ or.w #$8000,d1
+den_pos:
+ tst.l FPTEMP_HI(a6) ;check j bit
+ blt.b nden_exit ;if set, not denorm
+ move.w d1,ETEMP(a6) ;input expected in ETEMP
+ move.l FPTEMP_HI(a6),ETEMP_HI(a6)
+ move.l FPTEMP_LO(a6),ETEMP_LO(a6)
+ or.l #unfl_bit,USER_FPSR(a6) ;set unfl
+ lea.l ETEMP(a6),a0
+ bra t_resdnrm
+nden_exit:
+ move.w d1,FPTEMP(a6) ;result in FPTEMP
+ fmove.l USER_FPCR(a6),FPCR
+ fmove.x FPTEMP(a6),fp0 ;write result to fp0
+ rts
+
+*
+* Source is negative. Add the src to the dest exponent.
+* (The result exponent will be reduced). The result can be
+* denormalized.
+*
+src_neg:
+ add.l d0,d1 ;add src to dest
+ beq.b denorm ;if zero, result is denorm
+ blt.b fix_dnrm ;if negative, result is
+* ;needing denormalization
+ tst.b L_SCR1(a6)
+ beq.b sneg_pos
+ or.w #$8000,d1
+sneg_pos:
+ move.w d1,FPTEMP(a6) ;result in FPTEMP
+ fmove.l USER_FPCR(a6),FPCR
+ fmove.x FPTEMP(a6),fp0 ;write result to fp0
+ rts
+
+
+*
+* The result exponent is below denorm value. Test for catastrophic
+* underflow and force zero if true. If not, try to shift the
+* mantissa right until a zero exponent exists.
+*
+fix_dnrm:
+ cmpi.w #$ffc0,d1 ;lower bound for normalization
+ blt.w fix_unfl ;if lower, catastrophic unfl
+ move.w d1,d0 ;use d0 for exp
+ move.l d2,-(a7) ;free d2 for norm
+ move.l FPTEMP_HI(a6),d1
+ move.l FPTEMP_LO(a6),d2
+ clr.l L_SCR2(a6)
+fix_loop:
+ add.w #1,d0 ;drive d0 to 0
+ lsr.l #1,d1 ;while shifting the
+ roxr.l #1,d2 ;mantissa to the right
+ bcc.b no_carry
+ st L_SCR2(a6) ;use L_SCR2 to capture inex
+no_carry:
+ tst.w d0 ;it is finished when
+ blt.b fix_loop ;d0 is zero or the mantissa
+ tst.b L_SCR2(a6)
+ beq.b tst_zero
+ or.l #unfl_inx_mask,USER_FPSR(a6)
+* ;set unfl, aunfl, ainex
+*
+* Test for zero. If zero, simply use fmove to return +/- zero
+* to the fpu.
+*
+tst_zero:
+ clr.w FPTEMP_EX(a6)
+ tst.b L_SCR1(a6) ;test for sign
+ beq.b tst_con
+ or.w #$8000,FPTEMP_EX(a6) ;set sign bit
+tst_con:
+ move.l d1,FPTEMP_HI(a6)
+ move.l d2,FPTEMP_LO(a6)
+ move.l (a7)+,d2
+ tst.l d1
+ bne.b not_zero
+ tst.l FPTEMP_LO(a6)
+ bne.b not_zero
+*
+* Result is zero. Check for rounding mode to set lsb. If the
+* mode is rp, and the zero is positive, return smallest denorm.
+* If the mode is rm, and the zero is negative, return smallest
+* negative denorm.
+*
+ btst.b #5,FPCR_MODE(a6) ;test if rm or rp
+ beq.b no_dir
+ btst.b #4,FPCR_MODE(a6) ;check which one
+ beq.b zer_rm
+zer_rp:
+ tst.b L_SCR1(a6) ;check sign
+ bne.b no_dir ;if set, neg op, no inc
+ move.l #1,FPTEMP_LO(a6) ;set lsb
+ bra.b sm_dnrm
+zer_rm:
+ tst.b L_SCR1(a6) ;check sign
+ beq.b no_dir ;if clr, neg op, no inc
+ move.l #1,FPTEMP_LO(a6) ;set lsb
+ or.l #neg_mask,USER_FPSR(a6) ;set N
+ bra.b sm_dnrm
+no_dir:
+ fmove.l USER_FPCR(a6),FPCR
+ fmove.x FPTEMP(a6),fp0 ;use fmove to set cc's
+ rts
+
+*
+* The rounding mode changed the zero to a smallest denorm. Call
+* t_resdnrm with exceptional operand in ETEMP.
+*
+sm_dnrm:
+ move.l FPTEMP_EX(a6),ETEMP_EX(a6)
+ move.l FPTEMP_HI(a6),ETEMP_HI(a6)
+ move.l FPTEMP_LO(a6),ETEMP_LO(a6)
+ lea.l ETEMP(a6),a0
+ bra t_resdnrm
+
+*
+* Result is still denormalized.
+*
+not_zero:
+ or.l #unfl_mask,USER_FPSR(a6) ;set unfl
+ tst.b L_SCR1(a6) ;check for sign
+ beq.b fix_exit
+ or.l #neg_mask,USER_FPSR(a6) ;set N
+fix_exit:
+ bra.b sm_dnrm
+
+
+*
+* The result has underflowed to zero. Return zero and set
+* unfl, aunfl, and ainex.
+*
+fix_unfl:
+ or.l #unfl_inx_mask,USER_FPSR(a6)
+ btst.b #5,FPCR_MODE(a6) ;test if rm or rp
+ beq.b no_dir2
+ btst.b #4,FPCR_MODE(a6) ;check which one
+ beq.b zer_rm2
+zer_rp2:
+ tst.b L_SCR1(a6) ;check sign
+ bne.b no_dir2 ;if set, neg op, no inc
+ clr.l FPTEMP_EX(a6)
+ clr.l FPTEMP_HI(a6)
+ move.l #1,FPTEMP_LO(a6) ;set lsb
+ bra.b sm_dnrm ;return smallest denorm
+zer_rm2:
+ tst.b L_SCR1(a6) ;check sign
+ beq.b no_dir2 ;if clr, neg op, no inc
+ move.w #$8000,FPTEMP_EX(a6)
+ clr.l FPTEMP_HI(a6)
+ move.l #1,FPTEMP_LO(a6) ;set lsb
+ or.l #neg_mask,USER_FPSR(a6) ;set N
+ bra.w sm_dnrm ;return smallest denorm
+
+no_dir2:
+ tst.b L_SCR1(a6)
+ bge.b pos_zero
+neg_zero:
+ clr.l FP_SCR1(a6) ;clear the exceptional operand
+ clr.l FP_SCR1+4(a6) ;for gen_except.
+ clr.l FP_SCR1+8(a6)
+ fmove.s #:80000000,fp0
+ rts
+pos_zero:
+ clr.l FP_SCR1(a6) ;clear the exceptional operand
+ clr.l FP_SCR1+4(a6) ;for gen_except.
+ clr.l FP_SCR1+8(a6)
+ fmove.s #:00000000,fp0
+ rts
+
+*
+* The destination is a denormalized number. It must be handled
+* by first shifting the bits in the mantissa until it is normalized,
+* then adding the remainder of the source to the exponent.
+*
+dst_dnrm:
+ movem.l d2/d3,-(a7)
+ move.w FPTEMP_EX(a6),d1
+ move.l FPTEMP_HI(a6),d2
+ move.l FPTEMP_LO(a6),d3
+dst_loop:
+ tst.l d2 ;test for normalized result
+ blt.b dst_norm ;exit loop if so
+ tst.l d0 ;otherwise, test shift count
+ beq.b dst_fin ;if zero, shifting is done
+ subq.l #1,d0 ;dec src
+ add.l d3,d3
+ addx.l d2,d2
+ bra.b dst_loop
+*
+* Destination became normalized. Simply add the remaining
+* portion of the src to the exponent.
+*
+dst_norm:
+ add.w d0,d1 ;dst is normalized; add src
+ tst.b L_SCR1(a6)
+ beq.b dnrm_pos
+ or.w #$8000,d1
+dnrm_pos:
+ movem.w d1,FPTEMP_EX(a6)
+ movem.l d2,FPTEMP_HI(a6)
+ movem.l d3,FPTEMP_LO(a6)
+ fmove.l USER_FPCR(a6),FPCR
+ fmove.x FPTEMP(a6),fp0
+ movem.l (a7)+,d2/d3
+ rts
+
+*
+* Destination remained denormalized. Call t_excdnrm with
+* exceptional operand in ETEMP.
+*
+dst_fin:
+ tst.b L_SCR1(a6) ;check for sign
+ beq.b dst_exit
+ or.l #neg_mask,USER_FPSR(a6) ;set N
+ or.w #$8000,d1
+dst_exit:
+ movem.w d1,ETEMP_EX(a6)
+ movem.l d2,ETEMP_HI(a6)
+ movem.l d3,ETEMP_LO(a6)
+ or.l #unfl_mask,USER_FPSR(a6) ;set unfl
+ movem.l (a7)+,d2/d3
+ lea.l ETEMP(a6),a0
+ bra t_resdnrm
+
+*
+* Source is outside of 2^14 range. Test the sign and branch
+* to the appropriate exception handler.
+*
+src_out:
+ tst.b L_SCR1(a6)
+ beq.b scro_pos
+ or.w #$8000,d1
+scro_pos:
+ move.l FPTEMP_HI(a6),ETEMP_HI(a6)
+ move.l FPTEMP_LO(a6),ETEMP_LO(a6)
+ tst.w ETEMP(a6)
+ blt.b res_neg
+res_pos:
+ move.w d1,ETEMP(a6) ;result in ETEMP
+ bra t_ovfl2
+res_neg:
+ move.w d1,ETEMP(a6) ;result in ETEMP
+ lea.l ETEMP(a6),a0
+ bra t_unfl
+ end
diff --git a/sys/arch/m68k/fpsp/scosh.sa b/sys/arch/m68k/fpsp/scosh.sa
new file mode 100644
index 00000000000..93fffc268aa
--- /dev/null
+++ b/sys/arch/m68k/fpsp/scosh.sa
@@ -0,0 +1,156 @@
+* $NetBSD: scosh.sa,v 1.2 1994/10/26 07:49:39 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* scosh.sa 3.1 12/10/90
+*
+* The entry point sCosh computes the hyperbolic cosine of
+* an input argument; sCoshd does the same except for denormalized
+* input.
+*
+* Input: Double-extended number X in location pointed to
+* by address register a0.
+*
+* Output: The value cosh(X) returned in floating-point register Fp0.
+*
+* Accuracy and Monotonicity: The returned result is within 3 ulps in
+* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+* result is subsequently rounded to double precision. The
+* result is provably monotonic in double precision.
+*
+* Speed: The program sCOSH takes approximately 250 cycles.
+*
+* Algorithm:
+*
+* COSH
+* 1. If |X| > 16380 log2, go to 3.
+*
+* 2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae
+* y = |X|, z = exp(Y), and
+* cosh(X) = (1/2)*( z + 1/z ).
+* Exit.
+*
+* 3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5.
+*
+* 4. (16380 log2 < |X| <= 16480 log2)
+* cosh(X) = sign(X) * exp(|X|)/2.
+* However, invoking exp(|X|) may cause premature overflow.
+* Thus, we calculate sinh(X) as follows:
+* Y := |X|
+* Fact := 2**(16380)
+* Y' := Y - 16381 log2
+* cosh(X) := Fact * exp(Y').
+* Exit.
+*
+* 5. (|X| > 16480 log2) sinh(X) must overflow. Return
+* Huge*Huge to generate overflow and an infinity with
+* the appropriate sign. Huge is the largest finite number in
+* extended format. Exit.
+*
+
+SCOSH IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ xref t_ovfl
+ xref t_frcinx
+ xref setox
+
+T1 DC.L $40C62D38,$D3D64634 ... 16381 LOG2 LEAD
+T2 DC.L $3D6F90AE,$B1E75CC7 ... 16381 LOG2 TRAIL
+
+TWO16380 DC.L $7FFB0000,$80000000,$00000000,$00000000
+
+ xdef scoshd
+scoshd:
+*--COSH(X) = 1 FOR DENORMALIZED X
+
+ FMOVE.S #:3F800000,FP0
+
+ FMOVE.L d1,FPCR
+ FADD.S #:00800000,FP0
+ bra t_frcinx
+
+ xdef scosh
+scosh:
+ FMOVE.X (a0),FP0 ...LOAD INPUT
+
+ move.l (a0),d0
+ move.w 4(a0),d0
+ ANDI.L #$7FFFFFFF,d0
+ CMPI.L #$400CB167,d0
+ BGT.B COSHBIG
+
+*--THIS IS THE USUAL CASE, |X| < 16380 LOG2
+*--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
+
+ FABS.X FP0 ...|X|
+
+ move.l d1,-(sp)
+ clr.l d1
+ fmovem.x fp0,(a0) ;pass parameter to setox
+ bsr setox ...FP0 IS EXP(|X|)
+ FMUL.S #:3F000000,FP0 ...(1/2)EXP(|X|)
+ move.l (sp)+,d1
+
+ FMOVE.S #:3E800000,FP1 ...(1/4)
+ FDIV.X FP0,FP1 ...1/(2 EXP(|X|))
+
+ FMOVE.L d1,FPCR
+ FADD.X fp1,FP0
+
+ bra t_frcinx
+
+COSHBIG:
+ CMPI.L #$400CB2B3,d0
+ BGT.B COSHHUGE
+
+ FABS.X FP0
+ FSUB.D T1(pc),FP0 ...(|X|-16381LOG2_LEAD)
+ FSUB.D T2(pc),FP0 ...|X| - 16381 LOG2, ACCURATE
+
+ move.l d1,-(sp)
+ clr.l d1
+ fmovem.x fp0,(a0)
+ bsr setox
+ fmove.l (sp)+,fpcr
+
+ FMUL.X TWO16380(pc),FP0
+ bra t_frcinx
+
+COSHHUGE:
+ fmove.l #0,fpsr ;clr N bit if set by source
+ bclr.b #7,(a0) ;always return positive value
+ fmovem.x (a0),fp0
+ bra t_ovfl
+
+ end
diff --git a/sys/arch/m68k/fpsp/setox.sa b/sys/arch/m68k/fpsp/setox.sa
new file mode 100644
index 00000000000..7627b746bdd
--- /dev/null
+++ b/sys/arch/m68k/fpsp/setox.sa
@@ -0,0 +1,889 @@
+* $NetBSD: setox.sa,v 1.3 1994/10/26 07:49:42 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* setox.sa 3.1 12/10/90
+*
+* The entry point setox computes the exponential of a value.
+* setoxd does the same except the input value is a denormalized
+* number. setoxm1 computes exp(X)-1, and setoxm1d computes
+* exp(X)-1 for denormalized X.
+*
+* INPUT
+* -----
+* Double-extended value in memory location pointed to by address
+* register a0.
+*
+* OUTPUT
+* ------
+* exp(X) or exp(X)-1 returned in floating-point register fp0.
+*
+* ACCURACY and MONOTONICITY
+* -------------------------
+* The returned result is within 0.85 ulps in 64 significant bit, i.e.
+* within 0.5001 ulp to 53 bits if the result is subsequently rounded
+* to double precision. The result is provably monotonic in double
+* precision.
+*
+* SPEED
+* -----
+* Two timings are measured, both in the copy-back mode. The
+* first one is measured when the function is invoked the first time
+* (so the instructions and data are not in cache), and the
+* second one is measured when the function is reinvoked at the same
+* input argument.
+*
+* The program setox takes approximately 210/190 cycles for input
+* argument X whose magnitude is less than 16380 log2, which
+* is the usual situation. For the less common arguments,
+* depending on their values, the program may run faster or slower --
+* but no worse than 10% slower even in the extreme cases.
+*
+* The program setoxm1 takes approximately ???/??? cycles for input
+* argument X, 0.25 <= |X| < 70log2. For |X| < 0.25, it takes
+* approximately ???/??? cycles. For the less common arguments,
+* depending on their values, the program may run faster or slower --
+* but no worse than 10% slower even in the extreme cases.
+*
+* ALGORITHM and IMPLEMENTATION NOTES
+* ----------------------------------
+*
+* setoxd
+* ------
+* Step 1. Set ans := 1.0
+*
+* Step 2. Return ans := ans + sign(X)*2^(-126). Exit.
+* Notes: This will always generate one exception -- inexact.
+*
+*
+* setox
+* -----
+*
+* Step 1. Filter out extreme cases of input argument.
+* 1.1 If |X| >= 2^(-65), go to Step 1.3.
+* 1.2 Go to Step 7.
+* 1.3 If |X| < 16380 log(2), go to Step 2.
+* 1.4 Go to Step 8.
+* Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.
+* To avoid the use of floating-point comparisons, a
+* compact representation of |X| is used. This format is a
+* 32-bit integer, the upper (more significant) 16 bits are
+* the sign and biased exponent field of |X|; the lower 16
+* bits are the 16 most significant fraction (including the
+* explicit bit) bits of |X|. Consequently, the comparisons
+* in Steps 1.1 and 1.3 can be performed by integer comparison.
+* Note also that the constant 16380 log(2) used in Step 1.3
+* is also in the compact form. Thus taking the branch
+* to Step 2 guarantees |X| < 16380 log(2). There is no harm
+* to have a small number of cases where |X| is less than,
+* but close to, 16380 log(2) and the branch to Step 9 is
+* taken.
+*
+* Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ).
+* 2.1 Set AdjFlag := 0 (indicates the branch 1.3 -> 2 was taken)
+* 2.2 N := round-to-nearest-integer( X * 64/log2 ).
+* 2.3 Calculate J = N mod 64; so J = 0,1,2,..., or 63.
+* 2.4 Calculate M = (N - J)/64; so N = 64M + J.
+* 2.5 Calculate the address of the stored value of 2^(J/64).
+* 2.6 Create the value Scale = 2^M.
+* Notes: The calculation in 2.2 is really performed by
+*
+* Z := X * constant
+* N := round-to-nearest-integer(Z)
+*
+* where
+*
+* constant := single-precision( 64/log 2 ).
+*
+* Using a single-precision constant avoids memory access.
+* Another effect of using a single-precision "constant" is
+* that the calculated value Z is
+*
+* Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24).
+*
+* This error has to be considered later in Steps 3 and 4.
+*
+* Step 3. Calculate X - N*log2/64.
+* 3.1 R := X + N*L1, where L1 := single-precision(-log2/64).
+* 3.2 R := R + N*L2, L2 := extended-precision(-log2/64 - L1).
+* Notes: a) The way L1 and L2 are chosen ensures L1+L2 approximate
+* the value -log2/64 to 88 bits of accuracy.
+* b) N*L1 is exact because N is no longer than 22 bits and
+* L1 is no longer than 24 bits.
+* c) The calculation X+N*L1 is also exact due to cancellation.
+* Thus, R is practically X+N(L1+L2) to full 64 bits.
+* d) It is important to estimate how large can |R| be after
+* Step 3.2.
+*
+* N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24)
+* X*64/log2 (1+eps) = N + f, |f| <= 0.5
+* X*64/log2 - N = f - eps*X 64/log2
+* X - N*log2/64 = f*log2/64 - eps*X
+*
+*
+* Now |X| <= 16446 log2, thus
+*
+* |X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64
+* <= 0.57 log2/64.
+* This bound will be used in Step 4.
+*
+* Step 4. Approximate exp(R)-1 by a polynomial
+* p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
+* Notes: a) In order to reduce memory access, the coefficients are
+* made as "short" as possible: A1 (which is 1/2), A4 and A5
+* are single precision; A2 and A3 are double precision.
+* b) Even with the restrictions above,
+* |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062.
+* Note that 0.0062 is slightly bigger than 0.57 log2/64.
+* c) To fully utilize the pipeline, p is separated into
+* two independent pieces of roughly equal complexities
+* p = [ R + R*S*(A2 + S*A4) ] +
+* [ S*(A1 + S*(A3 + S*A5)) ]
+* where S = R*R.
+*
+* Step 5. Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by
+* ans := T + ( T*p + t)
+* where T and t are the stored values for 2^(J/64).
+* Notes: 2^(J/64) is stored as T and t where T+t approximates
+* 2^(J/64) to roughly 85 bits; T is in extended precision
+* and t is in single precision. Note also that T is rounded
+* to 62 bits so that the last two bits of T are zero. The
+* reason for such a special form is that T-1, T-2, and T-8
+* will all be exact --- a property that will give much
+* more accurate computation of the function EXPM1.
+*
+* Step 6. Reconstruction of exp(X)
+* exp(X) = 2^M * 2^(J/64) * exp(R).
+* 6.1 If AdjFlag = 0, go to 6.3
+* 6.2 ans := ans * AdjScale
+* 6.3 Restore the user FPCR
+* 6.4 Return ans := ans * Scale. Exit.
+* Notes: If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R,
+* |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will
+* neither overflow nor underflow. If AdjFlag = 1, that
+* means that
+* X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380.
+* Hence, exp(X) may overflow or underflow or neither.
+* When that is the case, AdjScale = 2^(M1) where M1 is
+* approximately M. Thus 6.2 will never cause over/underflow.
+* Possible exception in 6.4 is overflow or underflow.
+* The inexact exception is not generated in 6.4. Although
+* one can argue that the inexact flag should always be
+* raised, to simulate that exception cost to much than the
+* flag is worth in practical uses.
+*
+* Step 7. Return 1 + X.
+* 7.1 ans := X
+* 7.2 Restore user FPCR.
+* 7.3 Return ans := 1 + ans. Exit
+* Notes: For non-zero X, the inexact exception will always be
+* raised by 7.3. That is the only exception raised by 7.3.
+* Note also that we use the FMOVEM instruction to move X
+* in Step 7.1 to avoid unnecessary trapping. (Although
+* the FMOVEM may not seem relevant since X is normalized,
+* the precaution will be useful in the library version of
+* this code where the separate entry for denormalized inputs
+* will be done away with.)
+*
+* Step 8. Handle exp(X) where |X| >= 16380log2.
+* 8.1 If |X| > 16480 log2, go to Step 9.
+* (mimic 2.2 - 2.6)
+* 8.2 N := round-to-integer( X * 64/log2 )
+* 8.3 Calculate J = N mod 64, J = 0,1,...,63
+* 8.4 K := (N-J)/64, M1 := truncate(K/2), M = K-M1, AdjFlag := 1.
+* 8.5 Calculate the address of the stored value 2^(J/64).
+* 8.6 Create the values Scale = 2^M, AdjScale = 2^M1.
+* 8.7 Go to Step 3.
+* Notes: Refer to notes for 2.2 - 2.6.
+*
+* Step 9. Handle exp(X), |X| > 16480 log2.
+* 9.1 If X < 0, go to 9.3
+* 9.2 ans := Huge, go to 9.4
+* 9.3 ans := Tiny.
+* 9.4 Restore user FPCR.
+* 9.5 Return ans := ans * ans. Exit.
+* Notes: Exp(X) will surely overflow or underflow, depending on
+* X's sign. "Huge" and "Tiny" are respectively large/tiny
+* extended-precision numbers whose square over/underflow
+* with an inexact result. Thus, 9.5 always raises the
+* inexact together with either overflow or underflow.
+*
+*
+* setoxm1d
+* --------
+*
+* Step 1. Set ans := 0
+*
+* Step 2. Return ans := X + ans. Exit.
+* Notes: This will return X with the appropriate rounding
+* precision prescribed by the user FPCR.
+*
+* setoxm1
+* -------
+*
+* Step 1. Check |X|
+* 1.1 If |X| >= 1/4, go to Step 1.3.
+* 1.2 Go to Step 7.
+* 1.3 If |X| < 70 log(2), go to Step 2.
+* 1.4 Go to Step 10.
+* Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.
+* However, it is conceivable |X| can be small very often
+* because EXPM1 is intended to evaluate exp(X)-1 accurately
+* when |X| is small. For further details on the comparisons,
+* see the notes on Step 1 of setox.
+*
+* Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ).
+* 2.1 N := round-to-nearest-integer( X * 64/log2 ).
+* 2.2 Calculate J = N mod 64; so J = 0,1,2,..., or 63.
+* 2.3 Calculate M = (N - J)/64; so N = 64M + J.
+* 2.4 Calculate the address of the stored value of 2^(J/64).
+* 2.5 Create the values Sc = 2^M and OnebySc := -2^(-M).
+* Notes: See the notes on Step 2 of setox.
+*
+* Step 3. Calculate X - N*log2/64.
+* 3.1 R := X + N*L1, where L1 := single-precision(-log2/64).
+* 3.2 R := R + N*L2, L2 := extended-precision(-log2/64 - L1).
+* Notes: Applying the analysis of Step 3 of setox in this case
+* shows that |R| <= 0.0055 (note that |X| <= 70 log2 in
+* this case).
+*
+* Step 4. Approximate exp(R)-1 by a polynomial
+* p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6)))))
+* Notes: a) In order to reduce memory access, the coefficients are
+* made as "short" as possible: A1 (which is 1/2), A5 and A6
+* are single precision; A2, A3 and A4 are double precision.
+* b) Even with the restriction above,
+* |p - (exp(R)-1)| < |R| * 2^(-72.7)
+* for all |R| <= 0.0055.
+* c) To fully utilize the pipeline, p is separated into
+* two independent pieces of roughly equal complexity
+* p = [ R*S*(A2 + S*(A4 + S*A6)) ] +
+* [ R + S*(A1 + S*(A3 + S*A5)) ]
+* where S = R*R.
+*
+* Step 5. Compute 2^(J/64)*p by
+* p := T*p
+* where T and t are the stored values for 2^(J/64).
+* Notes: 2^(J/64) is stored as T and t where T+t approximates
+* 2^(J/64) to roughly 85 bits; T is in extended precision
+* and t is in single precision. Note also that T is rounded
+* to 62 bits so that the last two bits of T are zero. The
+* reason for such a special form is that T-1, T-2, and T-8
+* will all be exact --- a property that will be exploited
+* in Step 6 below. The total relative error in p is no
+* bigger than 2^(-67.7) compared to the final result.
+*
+* Step 6. Reconstruction of exp(X)-1
+* exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ).
+* 6.1 If M <= 63, go to Step 6.3.
+* 6.2 ans := T + (p + (t + OnebySc)). Go to 6.6
+* 6.3 If M >= -3, go to 6.5.
+* 6.4 ans := (T + (p + t)) + OnebySc. Go to 6.6
+* 6.5 ans := (T + OnebySc) + (p + t).
+* 6.6 Restore user FPCR.
+* 6.7 Return ans := Sc * ans. Exit.
+* Notes: The various arrangements of the expressions give accurate
+* evaluations.
+*
+* Step 7. exp(X)-1 for |X| < 1/4.
+* 7.1 If |X| >= 2^(-65), go to Step 9.
+* 7.2 Go to Step 8.
+*
+* Step 8. Calculate exp(X)-1, |X| < 2^(-65).
+* 8.1 If |X| < 2^(-16312), goto 8.3
+* 8.2 Restore FPCR; return ans := X - 2^(-16382). Exit.
+* 8.3 X := X * 2^(140).
+* 8.4 Restore FPCR; ans := ans - 2^(-16382).
+* Return ans := ans*2^(140). Exit
+* Notes: The idea is to return "X - tiny" under the user
+* precision and rounding modes. To avoid unnecessary
+* inefficiency, we stay away from denormalized numbers the
+* best we can. For |X| >= 2^(-16312), the straightforward
+* 8.2 generates the inexact exception as the case warrants.
+*
+* Step 9. Calculate exp(X)-1, |X| < 1/4, by a polynomial
+* p = X + X*X*(B1 + X*(B2 + ... + X*B12))
+* Notes: a) In order to reduce memory access, the coefficients are
+* made as "short" as possible: B1 (which is 1/2), B9 to B12
+* are single precision; B3 to B8 are double precision; and
+* B2 is double extended.
+* b) Even with the restriction above,
+* |p - (exp(X)-1)| < |X| 2^(-70.6)
+* for all |X| <= 0.251.
+* Note that 0.251 is slightly bigger than 1/4.
+* c) To fully preserve accuracy, the polynomial is computed
+* as X + ( S*B1 + Q ) where S = X*X and
+* Q = X*S*(B2 + X*(B3 + ... + X*B12))
+* d) To fully utilize the pipeline, Q is separated into
+* two independent pieces of roughly equal complexity
+* Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] +
+* [ S*S*(B3 + S*(B5 + ... + S*B11)) ]
+*
+* Step 10. Calculate exp(X)-1 for |X| >= 70 log 2.
+* 10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all practical
+* purposes. Therefore, go to Step 1 of setox.
+* 10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical purposes.
+* ans := -1
+* Restore user FPCR
+* Return ans := ans + 2^(-126). Exit.
+* Notes: 10.2 will always create an inexact and return -1 + tiny
+* in the user rounding precision and mode.
+*
+
+setox IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+L2 DC.L $3FDC0000,$82E30865,$4361C4C6,$00000000
+
+EXPA3 DC.L $3FA55555,$55554431
+EXPA2 DC.L $3FC55555,$55554018
+
+HUGE DC.L $7FFE0000,$FFFFFFFF,$FFFFFFFF,$00000000
+TINY DC.L $00010000,$FFFFFFFF,$FFFFFFFF,$00000000
+
+EM1A4 DC.L $3F811111,$11174385
+EM1A3 DC.L $3FA55555,$55554F5A
+
+EM1A2 DC.L $3FC55555,$55555555,$00000000,$00000000
+
+EM1B8 DC.L $3EC71DE3,$A5774682
+EM1B7 DC.L $3EFA01A0,$19D7CB68
+
+EM1B6 DC.L $3F2A01A0,$1A019DF3
+EM1B5 DC.L $3F56C16C,$16C170E2
+
+EM1B4 DC.L $3F811111,$11111111
+EM1B3 DC.L $3FA55555,$55555555
+
+EM1B2 DC.L $3FFC0000,$AAAAAAAA,$AAAAAAAB
+ DC.L $00000000
+
+TWO140 DC.L $48B00000,$00000000
+TWON140 DC.L $37300000,$00000000
+
+EXPTBL
+ DC.L $3FFF0000,$80000000,$00000000,$00000000
+ DC.L $3FFF0000,$8164D1F3,$BC030774,$9F841A9B
+ DC.L $3FFF0000,$82CD8698,$AC2BA1D8,$9FC1D5B9
+ DC.L $3FFF0000,$843A28C3,$ACDE4048,$A0728369
+ DC.L $3FFF0000,$85AAC367,$CC487B14,$1FC5C95C
+ DC.L $3FFF0000,$871F6196,$9E8D1010,$1EE85C9F
+ DC.L $3FFF0000,$88980E80,$92DA8528,$9FA20729
+ DC.L $3FFF0000,$8A14D575,$496EFD9C,$A07BF9AF
+ DC.L $3FFF0000,$8B95C1E3,$EA8BD6E8,$A0020DCF
+ DC.L $3FFF0000,$8D1ADF5B,$7E5BA9E4,$205A63DA
+ DC.L $3FFF0000,$8EA4398B,$45CD53C0,$1EB70051
+ DC.L $3FFF0000,$9031DC43,$1466B1DC,$1F6EB029
+ DC.L $3FFF0000,$91C3D373,$AB11C338,$A0781494
+ DC.L $3FFF0000,$935A2B2F,$13E6E92C,$9EB319B0
+ DC.L $3FFF0000,$94F4EFA8,$FEF70960,$2017457D
+ DC.L $3FFF0000,$96942D37,$20185A00,$1F11D537
+ DC.L $3FFF0000,$9837F051,$8DB8A970,$9FB952DD
+ DC.L $3FFF0000,$99E04593,$20B7FA64,$1FE43087
+ DC.L $3FFF0000,$9B8D39B9,$D54E5538,$1FA2A818
+ DC.L $3FFF0000,$9D3ED9A7,$2CFFB750,$1FDE494D
+ DC.L $3FFF0000,$9EF53260,$91A111AC,$20504890
+ DC.L $3FFF0000,$A0B0510F,$B9714FC4,$A073691C
+ DC.L $3FFF0000,$A2704303,$0C496818,$1F9B7A05
+ DC.L $3FFF0000,$A43515AE,$09E680A0,$A0797126
+ DC.L $3FFF0000,$A5FED6A9,$B15138EC,$A071A140
+ DC.L $3FFF0000,$A7CD93B4,$E9653568,$204F62DA
+ DC.L $3FFF0000,$A9A15AB4,$EA7C0EF8,$1F283C4A
+ DC.L $3FFF0000,$AB7A39B5,$A93ED338,$9F9A7FDC
+ DC.L $3FFF0000,$AD583EEA,$42A14AC8,$A05B3FAC
+ DC.L $3FFF0000,$AF3B78AD,$690A4374,$1FDF2610
+ DC.L $3FFF0000,$B123F581,$D2AC2590,$9F705F90
+ DC.L $3FFF0000,$B311C412,$A9112488,$201F678A
+ DC.L $3FFF0000,$B504F333,$F9DE6484,$1F32FB13
+ DC.L $3FFF0000,$B6FD91E3,$28D17790,$20038B30
+ DC.L $3FFF0000,$B8FBAF47,$62FB9EE8,$200DC3CC
+ DC.L $3FFF0000,$BAFF5AB2,$133E45FC,$9F8B2AE6
+ DC.L $3FFF0000,$BD08A39F,$580C36C0,$A02BBF70
+ DC.L $3FFF0000,$BF1799B6,$7A731084,$A00BF518
+ DC.L $3FFF0000,$C12C4CCA,$66709458,$A041DD41
+ DC.L $3FFF0000,$C346CCDA,$24976408,$9FDF137B
+ DC.L $3FFF0000,$C5672A11,$5506DADC,$201F1568
+ DC.L $3FFF0000,$C78D74C8,$ABB9B15C,$1FC13A2E
+ DC.L $3FFF0000,$C9B9BD86,$6E2F27A4,$A03F8F03
+ DC.L $3FFF0000,$CBEC14FE,$F2727C5C,$1FF4907D
+ DC.L $3FFF0000,$CE248C15,$1F8480E4,$9E6E53E4
+ DC.L $3FFF0000,$D06333DA,$EF2B2594,$1FD6D45C
+ DC.L $3FFF0000,$D2A81D91,$F12AE45C,$A076EDB9
+ DC.L $3FFF0000,$D4F35AAB,$CFEDFA20,$9FA6DE21
+ DC.L $3FFF0000,$D744FCCA,$D69D6AF4,$1EE69A2F
+ DC.L $3FFF0000,$D99D15C2,$78AFD7B4,$207F439F
+ DC.L $3FFF0000,$DBFBB797,$DAF23754,$201EC207
+ DC.L $3FFF0000,$DE60F482,$5E0E9124,$9E8BE175
+ DC.L $3FFF0000,$E0CCDEEC,$2A94E110,$20032C4B
+ DC.L $3FFF0000,$E33F8972,$BE8A5A50,$2004DFF5
+ DC.L $3FFF0000,$E5B906E7,$7C8348A8,$1E72F47A
+ DC.L $3FFF0000,$E8396A50,$3C4BDC68,$1F722F22
+ DC.L $3FFF0000,$EAC0C6E7,$DD243930,$A017E945
+ DC.L $3FFF0000,$ED4F301E,$D9942B84,$1F401A5B
+ DC.L $3FFF0000,$EFE4B99B,$DCDAF5CC,$9FB9A9E3
+ DC.L $3FFF0000,$F281773C,$59FFB138,$20744C05
+ DC.L $3FFF0000,$F5257D15,$2486CC2C,$1F773A19
+ DC.L $3FFF0000,$F7D0DF73,$0AD13BB8,$1FFE90D5
+ DC.L $3FFF0000,$FA83B2DB,$722A033C,$A041ED22
+ DC.L $3FFF0000,$FD3E0C0C,$F486C174,$1F853F3A
+
+ADJFLAG equ L_SCR2
+SCALE equ FP_SCR1
+ADJSCALE equ FP_SCR2
+SC equ FP_SCR3
+ONEBYSC equ FP_SCR4
+
+ xref t_frcinx
+ xref t_extdnrm
+ xref t_unfl
+ xref t_ovfl
+
+ xdef setoxd
+setoxd:
+*--entry point for EXP(X), X is denormalized
+ MOVE.L (a0),d0
+ ANDI.L #$80000000,d0
+ ORI.L #$00800000,d0 ...sign(X)*2^(-126)
+ MOVE.L d0,-(sp)
+ FMOVE.S #:3F800000,fp0
+ fmove.l d1,fpcr
+ FADD.S (sp)+,fp0
+ bra t_frcinx
+
+ xdef setox
+setox:
+*--entry point for EXP(X), here X is finite, non-zero, and not NaN's
+
+*--Step 1.
+ MOVE.L (a0),d0 ...load part of input X
+ ANDI.L #$7FFF0000,d0 ...biased expo. of X
+ CMPI.L #$3FBE0000,d0 ...2^(-65)
+ BGE.B EXPC1 ...normal case
+ BRA.W EXPSM
+
+EXPC1:
+*--The case |X| >= 2^(-65)
+ MOVE.W 4(a0),d0 ...expo. and partial sig. of |X|
+ CMPI.L #$400CB167,d0 ...16380 log2 trunc. 16 bits
+ BLT.B EXPMAIN ...normal case
+ BRA.W EXPBIG
+
+EXPMAIN:
+*--Step 2.
+*--This is the normal branch: 2^(-65) <= |X| < 16380 log2.
+ FMOVE.X (a0),fp0 ...load input from (a0)
+
+ FMOVE.X fp0,fp1
+ FMUL.S #:42B8AA3B,fp0 ...64/log2 * X
+ fmovem.x fp2/fp3,-(a7) ...save fp2
+ CLR.L ADJFLAG(a6)
+ FMOVE.L fp0,d0 ...N = int( X * 64/log2 )
+ LEA EXPTBL,a1
+ FMOVE.L d0,fp0 ...convert to floating-format
+
+ MOVE.L d0,L_SCR1(a6) ...save N temporarily
+ ANDI.L #$3F,d0 ...D0 is J = N mod 64
+ LSL.L #4,d0
+ ADDA.L d0,a1 ...address of 2^(J/64)
+ MOVE.L L_SCR1(a6),d0
+ ASR.L #6,d0 ...D0 is M
+ ADDI.W #$3FFF,d0 ...biased expo. of 2^(M)
+ MOVE.W L2,L_SCR1(a6) ...prefetch L2, no need in CB
+
+EXPCONT1:
+*--Step 3.
+*--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
+*--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
+ FMOVE.X fp0,fp2
+ FMUL.S #:BC317218,fp0 ...N * L1, L1 = lead(-log2/64)
+ FMUL.X L2,fp2 ...N * L2, L1+L2 = -log2/64
+ FADD.X fp1,fp0 ...X + N*L1
+ FADD.X fp2,fp0 ...fp0 is R, reduced arg.
+* MOVE.W #$3FA5,EXPA3 ...load EXPA3 in cache
+
+*--Step 4.
+*--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
+*-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
+*--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
+*--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))]
+
+ FMOVE.X fp0,fp1
+ FMUL.X fp1,fp1 ...fp1 IS S = R*R
+
+ FMOVE.S #:3AB60B70,fp2 ...fp2 IS A5
+* CLR.W 2(a1) ...load 2^(J/64) in cache
+
+ FMUL.X fp1,fp2 ...fp2 IS S*A5
+ FMOVE.X fp1,fp3
+ FMUL.S #:3C088895,fp3 ...fp3 IS S*A4
+
+ FADD.D EXPA3,fp2 ...fp2 IS A3+S*A5
+ FADD.D EXPA2,fp3 ...fp3 IS A2+S*A4
+
+ FMUL.X fp1,fp2 ...fp2 IS S*(A3+S*A5)
+ MOVE.W d0,SCALE(a6) ...SCALE is 2^(M) in extended
+ clr.w SCALE+2(a6)
+ move.l #$80000000,SCALE+4(a6)
+ clr.l SCALE+8(a6)
+
+ FMUL.X fp1,fp3 ...fp3 IS S*(A2+S*A4)
+
+ FADD.S #:3F000000,fp2 ...fp2 IS A1+S*(A3+S*A5)
+ FMUL.X fp0,fp3 ...fp3 IS R*S*(A2+S*A4)
+
+ FMUL.X fp1,fp2 ...fp2 IS S*(A1+S*(A3+S*A5))
+ FADD.X fp3,fp0 ...fp0 IS R+R*S*(A2+S*A4),
+* ...fp3 released
+
+ FMOVE.X (a1)+,fp1 ...fp1 is lead. pt. of 2^(J/64)
+ FADD.X fp2,fp0 ...fp0 is EXP(R) - 1
+* ...fp2 released
+
+*--Step 5
+*--final reconstruction process
+*--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
+
+ FMUL.X fp1,fp0 ...2^(J/64)*(Exp(R)-1)
+ fmovem.x (a7)+,fp2/fp3 ...fp2 restored
+ FADD.S (a1),fp0 ...accurate 2^(J/64)
+
+ FADD.X fp1,fp0 ...2^(J/64) + 2^(J/64)*...
+ MOVE.L ADJFLAG(a6),d0
+
+*--Step 6
+ TST.L D0
+ BEQ.B NORMAL
+ADJUST:
+ FMUL.X ADJSCALE(a6),fp0
+NORMAL:
+ FMOVE.L d1,FPCR ...restore user FPCR
+ FMUL.X SCALE(a6),fp0 ...multiply 2^(M)
+ bra t_frcinx
+
+EXPSM:
+*--Step 7
+ FMOVEM.X (a0),fp0 ...in case X is denormalized
+ FMOVE.L d1,FPCR
+ FADD.S #:3F800000,fp0 ...1+X in user mode
+ bra t_frcinx
+
+EXPBIG:
+*--Step 8
+ CMPI.L #$400CB27C,d0 ...16480 log2
+ BGT.B EXP2BIG
+*--Steps 8.2 -- 8.6
+ FMOVE.X (a0),fp0 ...load input from (a0)
+
+ FMOVE.X fp0,fp1
+ FMUL.S #:42B8AA3B,fp0 ...64/log2 * X
+ fmovem.x fp2/fp3,-(a7) ...save fp2
+ MOVE.L #1,ADJFLAG(a6)
+ FMOVE.L fp0,d0 ...N = int( X * 64/log2 )
+ LEA EXPTBL,a1
+ FMOVE.L d0,fp0 ...convert to floating-format
+ MOVE.L d0,L_SCR1(a6) ...save N temporarily
+ ANDI.L #$3F,d0 ...D0 is J = N mod 64
+ LSL.L #4,d0
+ ADDA.L d0,a1 ...address of 2^(J/64)
+ MOVE.L L_SCR1(a6),d0
+ ASR.L #6,d0 ...D0 is K
+ MOVE.L d0,L_SCR1(a6) ...save K temporarily
+ ASR.L #1,d0 ...D0 is M1
+ SUB.L d0,L_SCR1(a6) ...a1 is M
+ ADDI.W #$3FFF,d0 ...biased expo. of 2^(M1)
+ MOVE.W d0,ADJSCALE(a6) ...ADJSCALE := 2^(M1)
+ clr.w ADJSCALE+2(a6)
+ move.l #$80000000,ADJSCALE+4(a6)
+ clr.l ADJSCALE+8(a6)
+ MOVE.L L_SCR1(a6),d0 ...D0 is M
+ ADDI.W #$3FFF,d0 ...biased expo. of 2^(M)
+ BRA.W EXPCONT1 ...go back to Step 3
+
+EXP2BIG:
+*--Step 9
+ FMOVE.L d1,FPCR
+ MOVE.L (a0),d0
+ bclr.b #sign_bit,(a0) ...setox always returns positive
+ TST.L d0
+ BLT t_unfl
+ BRA t_ovfl
+
+ xdef setoxm1d
+setoxm1d:
+*--entry point for EXPM1(X), here X is denormalized
+*--Step 0.
+ bra t_extdnrm
+
+
+ xdef setoxm1
+setoxm1:
+*--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
+
+*--Step 1.
+*--Step 1.1
+ MOVE.L (a0),d0 ...load part of input X
+ ANDI.L #$7FFF0000,d0 ...biased expo. of X
+ CMPI.L #$3FFD0000,d0 ...1/4
+ BGE.B EM1CON1 ...|X| >= 1/4
+ BRA.W EM1SM
+
+EM1CON1:
+*--Step 1.3
+*--The case |X| >= 1/4
+ MOVE.W 4(a0),d0 ...expo. and partial sig. of |X|
+ CMPI.L #$4004C215,d0 ...70log2 rounded up to 16 bits
+ BLE.B EM1MAIN ...1/4 <= |X| <= 70log2
+ BRA.W EM1BIG
+
+EM1MAIN:
+*--Step 2.
+*--This is the case: 1/4 <= |X| <= 70 log2.
+ FMOVE.X (a0),fp0 ...load input from (a0)
+
+ FMOVE.X fp0,fp1
+ FMUL.S #:42B8AA3B,fp0 ...64/log2 * X
+ fmovem.x fp2/fp3,-(a7) ...save fp2
+* MOVE.W #$3F81,EM1A4 ...prefetch in CB mode
+ FMOVE.L fp0,d0 ...N = int( X * 64/log2 )
+ LEA EXPTBL,a1
+ FMOVE.L d0,fp0 ...convert to floating-format
+
+ MOVE.L d0,L_SCR1(a6) ...save N temporarily
+ ANDI.L #$3F,d0 ...D0 is J = N mod 64
+ LSL.L #4,d0
+ ADDA.L d0,a1 ...address of 2^(J/64)
+ MOVE.L L_SCR1(a6),d0
+ ASR.L #6,d0 ...D0 is M
+ MOVE.L d0,L_SCR1(a6) ...save a copy of M
+* MOVE.W #$3FDC,L2 ...prefetch L2 in CB mode
+
+*--Step 3.
+*--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
+*--a0 points to 2^(J/64), D0 and a1 both contain M
+ FMOVE.X fp0,fp2
+ FMUL.S #:BC317218,fp0 ...N * L1, L1 = lead(-log2/64)
+ FMUL.X L2,fp2 ...N * L2, L1+L2 = -log2/64
+ FADD.X fp1,fp0 ...X + N*L1
+ FADD.X fp2,fp0 ...fp0 is R, reduced arg.
+* MOVE.W #$3FC5,EM1A2 ...load EM1A2 in cache
+ ADDI.W #$3FFF,d0 ...D0 is biased expo. of 2^M
+
+*--Step 4.
+*--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
+*-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))
+*--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
+*--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))]
+
+ FMOVE.X fp0,fp1
+ FMUL.X fp1,fp1 ...fp1 IS S = R*R
+
+ FMOVE.S #:3950097B,fp2 ...fp2 IS a6
+* CLR.W 2(a1) ...load 2^(J/64) in cache
+
+ FMUL.X fp1,fp2 ...fp2 IS S*A6
+ FMOVE.X fp1,fp3
+ FMUL.S #:3AB60B6A,fp3 ...fp3 IS S*A5
+
+ FADD.D EM1A4,fp2 ...fp2 IS A4+S*A6
+ FADD.D EM1A3,fp3 ...fp3 IS A3+S*A5
+ MOVE.W d0,SC(a6) ...SC is 2^(M) in extended
+ clr.w SC+2(a6)
+ move.l #$80000000,SC+4(a6)
+ clr.l SC+8(a6)
+
+ FMUL.X fp1,fp2 ...fp2 IS S*(A4+S*A6)
+ MOVE.L L_SCR1(a6),d0 ...D0 is M
+ NEG.W D0 ...D0 is -M
+ FMUL.X fp1,fp3 ...fp3 IS S*(A3+S*A5)
+ ADDI.W #$3FFF,d0 ...biased expo. of 2^(-M)
+ FADD.D EM1A2,fp2 ...fp2 IS A2+S*(A4+S*A6)
+ FADD.S #:3F000000,fp3 ...fp3 IS A1+S*(A3+S*A5)
+
+ FMUL.X fp1,fp2 ...fp2 IS S*(A2+S*(A4+S*A6))
+ ORI.W #$8000,d0 ...signed/expo. of -2^(-M)
+ MOVE.W d0,ONEBYSC(a6) ...OnebySc is -2^(-M)
+ clr.w ONEBYSC+2(a6)
+ move.l #$80000000,ONEBYSC+4(a6)
+ clr.l ONEBYSC+8(a6)
+ FMUL.X fp3,fp1 ...fp1 IS S*(A1+S*(A3+S*A5))
+* ...fp3 released
+
+ FMUL.X fp0,fp2 ...fp2 IS R*S*(A2+S*(A4+S*A6))
+ FADD.X fp1,fp0 ...fp0 IS R+S*(A1+S*(A3+S*A5))
+* ...fp1 released
+
+ FADD.X fp2,fp0 ...fp0 IS EXP(R)-1
+* ...fp2 released
+ fmovem.x (a7)+,fp2/fp3 ...fp2 restored
+
+*--Step 5
+*--Compute 2^(J/64)*p
+
+ FMUL.X (a1),fp0 ...2^(J/64)*(Exp(R)-1)
+
+*--Step 6
+*--Step 6.1
+ MOVE.L L_SCR1(a6),d0 ...retrieve M
+ CMPI.L #63,d0
+ BLE.B MLE63
+*--Step 6.2 M >= 64
+ FMOVE.S 12(a1),fp1 ...fp1 is t
+ FADD.X ONEBYSC(a6),fp1 ...fp1 is t+OnebySc
+ FADD.X fp1,fp0 ...p+(t+OnebySc), fp1 released
+ FADD.X (a1),fp0 ...T+(p+(t+OnebySc))
+ BRA.B EM1SCALE
+MLE63:
+*--Step 6.3 M <= 63
+ CMPI.L #-3,d0
+ BGE.B MGEN3
+MLTN3:
+*--Step 6.4 M <= -4
+ FADD.S 12(a1),fp0 ...p+t
+ FADD.X (a1),fp0 ...T+(p+t)
+ FADD.X ONEBYSC(a6),fp0 ...OnebySc + (T+(p+t))
+ BRA.B EM1SCALE
+MGEN3:
+*--Step 6.5 -3 <= M <= 63
+ FMOVE.X (a1)+,fp1 ...fp1 is T
+ FADD.S (a1),fp0 ...fp0 is p+t
+ FADD.X ONEBYSC(a6),fp1 ...fp1 is T+OnebySc
+ FADD.X fp1,fp0 ...(T+OnebySc)+(p+t)
+
+EM1SCALE:
+*--Step 6.6
+ FMOVE.L d1,FPCR
+ FMUL.X SC(a6),fp0
+
+ bra t_frcinx
+
+EM1SM:
+*--Step 7 |X| < 1/4.
+ CMPI.L #$3FBE0000,d0 ...2^(-65)
+ BGE.B EM1POLY
+
+EM1TINY:
+*--Step 8 |X| < 2^(-65)
+ CMPI.L #$00330000,d0 ...2^(-16312)
+ BLT.B EM12TINY
+*--Step 8.2
+ MOVE.L #$80010000,SC(a6) ...SC is -2^(-16382)
+ move.l #$80000000,SC+4(a6)
+ clr.l SC+8(a6)
+ FMOVE.X (a0),fp0
+ FMOVE.L d1,FPCR
+ FADD.X SC(a6),fp0
+
+ bra t_frcinx
+
+EM12TINY:
+*--Step 8.3
+ FMOVE.X (a0),fp0
+ FMUL.D TWO140,fp0
+ MOVE.L #$80010000,SC(a6)
+ move.l #$80000000,SC+4(a6)
+ clr.l SC+8(a6)
+ FADD.X SC(a6),fp0
+ FMOVE.L d1,FPCR
+ FMUL.D TWON140,fp0
+
+ bra t_frcinx
+
+EM1POLY:
+*--Step 9 exp(X)-1 by a simple polynomial
+ FMOVE.X (a0),fp0 ...fp0 is X
+ FMUL.X fp0,fp0 ...fp0 is S := X*X
+ fmovem.x fp2/fp3,-(a7) ...save fp2
+ FMOVE.S #:2F30CAA8,fp1 ...fp1 is B12
+ FMUL.X fp0,fp1 ...fp1 is S*B12
+ FMOVE.S #:310F8290,fp2 ...fp2 is B11
+ FADD.S #:32D73220,fp1 ...fp1 is B10+S*B12
+
+ FMUL.X fp0,fp2 ...fp2 is S*B11
+ FMUL.X fp0,fp1 ...fp1 is S*(B10 + ...
+
+ FADD.S #:3493F281,fp2 ...fp2 is B9+S*...
+ FADD.D EM1B8,fp1 ...fp1 is B8+S*...
+
+ FMUL.X fp0,fp2 ...fp2 is S*(B9+...
+ FMUL.X fp0,fp1 ...fp1 is S*(B8+...
+
+ FADD.D EM1B7,fp2 ...fp2 is B7+S*...
+ FADD.D EM1B6,fp1 ...fp1 is B6+S*...
+
+ FMUL.X fp0,fp2 ...fp2 is S*(B7+...
+ FMUL.X fp0,fp1 ...fp1 is S*(B6+...
+
+ FADD.D EM1B5,fp2 ...fp2 is B5+S*...
+ FADD.D EM1B4,fp1 ...fp1 is B4+S*...
+
+ FMUL.X fp0,fp2 ...fp2 is S*(B5+...
+ FMUL.X fp0,fp1 ...fp1 is S*(B4+...
+
+ FADD.D EM1B3,fp2 ...fp2 is B3+S*...
+ FADD.X EM1B2,fp1 ...fp1 is B2+S*...
+
+ FMUL.X fp0,fp2 ...fp2 is S*(B3+...
+ FMUL.X fp0,fp1 ...fp1 is S*(B2+...
+
+ FMUL.X fp0,fp2 ...fp2 is S*S*(B3+...)
+ FMUL.X (a0),fp1 ...fp1 is X*S*(B2...
+
+ FMUL.S #:3F000000,fp0 ...fp0 is S*B1
+ FADD.X fp2,fp1 ...fp1 is Q
+* ...fp2 released
+
+ fmovem.x (a7)+,fp2/fp3 ...fp2 restored
+
+ FADD.X fp1,fp0 ...fp0 is S*B1+Q
+* ...fp1 released
+
+ FMOVE.L d1,FPCR
+ FADD.X (a0),fp0
+
+ bra t_frcinx
+
+EM1BIG:
+*--Step 10 |X| > 70 log2
+ MOVE.L (a0),d0
+ TST.L d0
+ BGT.W EXPC1
+*--Step 10.2
+ FMOVE.S #:BF800000,fp0 ...fp0 is -1
+ FMOVE.L d1,FPCR
+ FADD.S #:00800000,fp0 ...-1 + 2^(-126)
+
+ bra t_frcinx
+
+ end
diff --git a/sys/arch/m68k/fpsp/sgetem.sa b/sys/arch/m68k/fpsp/sgetem.sa
new file mode 100644
index 00000000000..2a4f28f612d
--- /dev/null
+++ b/sys/arch/m68k/fpsp/sgetem.sa
@@ -0,0 +1,166 @@
+* $NetBSD: sgetem.sa,v 1.2 1994/10/26 07:49:45 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* sgetem.sa 3.1 12/10/90
+*
+* The entry point sGETEXP returns the exponent portion
+* of the input argument. The exponent bias is removed
+* and the exponent value is returned as an extended
+* precision number in fp0. sGETEXPD handles denormalized
+* numbers.
+*
+* The entry point sGETMAN extracts the mantissa of the
+* input argument. The mantissa is converted to an
+* extended precision number and returned in fp0. The
+* range of the result is [1.0 - 2.0).
+*
+*
+* Input: Double-extended number X in the ETEMP space in
+* the floating-point save stack.
+*
+* Output: The functions return exp(X) or man(X) in fp0.
+*
+* Modified: fp0.
+*
+
+SGETEM IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+ xref nrm_set
+
+*
+* This entry point is used by the unimplemented instruction exception
+* handler. It points a0 to the input operand.
+*
+*
+*
+* SGETEXP
+*
+
+ xdef sgetexp
+sgetexp:
+ move.w LOCAL_EX(a0),d0 ;get the exponent
+ bclr.l #15,d0 ;clear the sign bit
+ sub.w #$3fff,d0 ;subtract off the bias
+ fmove.w d0,fp0 ;move the exp to fp0
+ rts
+
+ xdef sgetexpd
+sgetexpd:
+ bclr.b #sign_bit,LOCAL_EX(a0)
+ bsr nrm_set ;normalize (exp will go negative)
+ move.w LOCAL_EX(a0),d0 ;load resulting exponent into d0
+ sub.w #$3fff,d0 ;subtract off the bias
+ fmove.w d0,fp0 ;move the exp to fp0
+ rts
+*
+*
+* This entry point is used by the unimplemented instruction exception
+* handler. It points a0 to the input operand.
+*
+*
+*
+* SGETMAN
+*
+*
+* For normalized numbers, leave the mantissa alone, simply load
+* with an exponent of +/- $3fff.
+*
+ xdef sgetman
+sgetman:
+ move.l USER_FPCR(a6),d0
+ andi.l #$ffffff00,d0 ;clear rounding precision and mode
+ fmove.l d0,fpcr ;this fpcr setting is used by the 882
+ move.w LOCAL_EX(a0),d0 ;get the exp (really just want sign bit)
+ or.w #$7fff,d0 ;clear old exp
+ bclr.l #14,d0 ;make it the new exp +-3fff
+ move.w d0,LOCAL_EX(a0) ;move the sign & exp back to fsave stack
+ fmove.x (a0),fp0 ;put new value back in fp0
+ rts
+
+*
+* For denormalized numbers, shift the mantissa until the j-bit = 1,
+* then load the exponent with +/1 $3fff.
+*
+ xdef sgetmand
+sgetmand:
+ move.l LOCAL_HI(a0),d0 ;load ms mant in d0
+ move.l LOCAL_LO(a0),d1 ;load ls mant in d1
+ bsr shft ;shift mantissa bits till msbit is set
+ move.l d0,LOCAL_HI(a0) ;put ms mant back on stack
+ move.l d1,LOCAL_LO(a0) ;put ls mant back on stack
+ bra.b sgetman
+
+*
+* SHFT
+*
+* Shifts the mantissa bits until msbit is set.
+* input:
+* ms mantissa part in d0
+* ls mantissa part in d1
+* output:
+* shifted bits in d0 and d1
+shft:
+ tst.l d0 ;if any bits set in ms mant
+ bne.b upper ;then branch
+* ;else no bits set in ms mant
+ tst.l d1 ;test if any bits set in ls mant
+ bne.b cont ;if set then continue
+ bra.b shft_end ;else return
+cont:
+ move.l d3,-(a7) ;save d3
+ exg d0,d1 ;shift ls mant to ms mant
+ bfffo d0{0:32},d3 ;find first 1 in ls mant to d0
+ lsl.l d3,d0 ;shift first 1 to integer bit in ms mant
+ move.l (a7)+,d3 ;restore d3
+ bra.b shft_end
+upper:
+
+ movem.l d3/d5/d6,-(a7) ;save registers
+ bfffo d0{0:32},d3 ;find first 1 in ls mant to d0
+ lsl.l d3,d0 ;shift ms mant until j-bit is set
+ move.l d1,d6 ;save ls mant in d6
+ lsl.l d3,d1 ;shift ls mant by count
+ move.l #32,d5
+ sub.l d3,d5 ;sub 32 from shift for ls mant
+ lsr.l d5,d6 ;shift off all bits but those that will
+* ;be shifted into ms mant
+ or.l d6,d0 ;shift the ls mant bits into the ms mant
+ movem.l (a7)+,d3/d5/d6 ;restore registers
+shft_end:
+ rts
+
+ end
diff --git a/sys/arch/m68k/fpsp/sint.sa b/sys/arch/m68k/fpsp/sint.sa
new file mode 100644
index 00000000000..d300a65dcf0
--- /dev/null
+++ b/sys/arch/m68k/fpsp/sint.sa
@@ -0,0 +1,272 @@
+* $NetBSD: sint.sa,v 1.2 1994/10/26 07:49:48 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* sint.sa 3.1 12/10/90
+*
+* The entry point sINT computes the rounded integer
+* equivalent of the input argument, sINTRZ computes
+* the integer rounded to zero of the input argument.
+*
+* Entry points sint and sintrz are called from do_func
+* to emulate the fint and fintrz unimplemented instructions,
+* respectively. Entry point sintdo is used by bindec.
+*
+* Input: (Entry points sint and sintrz) Double-extended
+* number X in the ETEMP space in the floating-point
+* save stack.
+* (Entry point sintdo) Double-extended number X in
+* location pointed to by the address register a0.
+* (Entry point sintd) Double-extended denormalized
+* number X in the ETEMP space in the floating-point
+* save stack.
+*
+* Output: The function returns int(X) or intrz(X) in fp0.
+*
+* Modifies: fp0.
+*
+* Algorithm: (sint and sintrz)
+*
+* 1. If exp(X) >= 63, return X.
+* If exp(X) < 0, return +/- 0 or +/- 1, according to
+* the rounding mode.
+*
+* 2. (X is in range) set rsc = 63 - exp(X). Unnormalize the
+* result to the exponent $403e.
+*
+* 3. Round the result in the mode given in USER_FPCR. For
+* sintrz, force round-to-zero mode.
+*
+* 4. Normalize the rounded result; store in fp0.
+*
+* For the denormalized cases, force the correct result
+* for the given sign and rounding mode.
+*
+* Sign(X)
+* RMODE + -
+* ----- --------
+* RN +0 -0
+* RZ +0 -0
+* RM +0 -1
+* RP +1 -0
+*
+
+SINT IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+ xref dnrm_lp
+ xref nrm_set
+ xref round
+ xref t_inx2
+ xref ld_pone
+ xref ld_mone
+ xref ld_pzero
+ xref ld_mzero
+ xref snzrinx
+
+*
+* FINT
+*
+ xdef sint
+sint:
+ bfextu FPCR_MODE(a6){2:2},d1 ;use user's mode for rounding
+* ;implicity has extend precision
+* ;in upper word.
+ move.l d1,L_SCR1(a6) ;save mode bits
+ bra.b sintexc
+
+*
+* FINT with extended denorm inputs.
+*
+ xdef sintd
+sintd:
+ btst.b #5,FPCR_MODE(a6)
+ beq snzrinx ;if round nearest or round zero, +/- 0
+ btst.b #4,FPCR_MODE(a6)
+ beq.b rnd_mns
+rnd_pls:
+ btst.b #sign_bit,LOCAL_EX(a0)
+ bne.b sintmz
+ bsr ld_pone ;if round plus inf and pos, answer is +1
+ bra t_inx2
+rnd_mns:
+ btst.b #sign_bit,LOCAL_EX(a0)
+ beq.b sintpz
+ bsr ld_mone ;if round mns inf and neg, answer is -1
+ bra t_inx2
+sintpz:
+ bsr ld_pzero
+ bra t_inx2
+sintmz:
+ bsr ld_mzero
+ bra t_inx2
+
+*
+* FINTRZ
+*
+ xdef sintrz
+sintrz:
+ move.l #1,L_SCR1(a6) ;use rz mode for rounding
+* ;implicity has extend precision
+* ;in upper word.
+ bra.b sintexc
+*
+* SINTDO
+*
+* Input: a0 points to an IEEE extended format operand
+* Output: fp0 has the result
+*
+* Exeptions:
+*
+* If the subroutine results in an inexact operation, the inx2 and
+* ainx bits in the USER_FPSR are set.
+*
+*
+ xdef sintdo
+sintdo:
+ bfextu FPCR_MODE(a6){2:2},d1 ;use user's mode for rounding
+* ;implicitly has ext precision
+* ;in upper word.
+ move.l d1,L_SCR1(a6) ;save mode bits
+*
+* Real work of sint is in sintexc
+*
+sintexc:
+ bclr.b #sign_bit,LOCAL_EX(a0) ;convert to internal extended
+* ;format
+ sne LOCAL_SGN(a0)
+ cmp.w #$403e,LOCAL_EX(a0) ;check if (unbiased) exp > 63
+ bgt.b out_rnge ;branch if exp < 63
+ cmp.w #$3ffd,LOCAL_EX(a0) ;check if (unbiased) exp < 0
+ bgt.w in_rnge ;if 63 >= exp > 0, do calc
+*
+* Input is less than zero. Restore sign, and check for directed
+* rounding modes. L_SCR1 contains the rmode in the lower byte.
+*
+un_rnge:
+ btst.b #1,L_SCR1+3(a6) ;check for rn and rz
+ beq.b un_rnrz
+ tst.b LOCAL_SGN(a0) ;check for sign
+ bne.b un_rmrp_neg
+*
+* Sign is +. If rp, load +1.0, if rm, load +0.0
+*
+ cmpi.b #3,L_SCR1+3(a6) ;check for rp
+ beq.b un_ldpone ;if rp, load +1.0
+ bsr ld_pzero ;if rm, load +0.0
+ bra t_inx2
+un_ldpone:
+ bsr ld_pone
+ bra t_inx2
+*
+* Sign is -. If rm, load -1.0, if rp, load -0.0
+*
+un_rmrp_neg:
+ cmpi.b #2,L_SCR1+3(a6) ;check for rm
+ beq.b un_ldmone ;if rm, load -1.0
+ bsr ld_mzero ;if rp, load -0.0
+ bra t_inx2
+un_ldmone:
+ bsr ld_mone
+ bra t_inx2
+*
+* Rmode is rn or rz; return signed zero
+*
+un_rnrz:
+ tst.b LOCAL_SGN(a0) ;check for sign
+ bne.b un_rnrz_neg
+ bsr ld_pzero
+ bra t_inx2
+un_rnrz_neg:
+ bsr ld_mzero
+ bra t_inx2
+
+*
+* Input is greater than 2^63. All bits are significant. Return
+* the input.
+*
+out_rnge:
+ bfclr LOCAL_SGN(a0){0:8} ;change back to IEEE ext format
+ beq.b intps
+ bset.b #sign_bit,LOCAL_EX(a0)
+intps:
+ fmove.l fpcr,-(sp)
+ fmove.l #0,fpcr
+ fmove.x LOCAL_EX(a0),fp0 ;if exp > 63
+* ;then return X to the user
+* ;there are no fraction bits
+ fmove.l (sp)+,fpcr
+ rts
+
+in_rnge:
+* ;shift off fraction bits
+ clr.l d0 ;clear d0 - initial g,r,s for
+* ;dnrm_lp
+ move.l #$403e,d1 ;set threshold for dnrm_lp
+* ;assumes a0 points to operand
+ bsr dnrm_lp
+* ;returns unnormalized number
+* ;pointed by a0
+* ;output d0 supplies g,r,s
+* ;used by round
+ move.l L_SCR1(a6),d1 ;use selected rounding mode
+*
+*
+ bsr round ;round the unnorm based on users
+* ;input a0 ptr to ext X
+* ; d0 g,r,s bits
+* ; d1 PREC/MODE info
+* ;output a0 ptr to rounded result
+* ;inexact flag set in USER_FPSR
+* ;if initial grs set
+*
+* normalize the rounded result and store value in fp0
+*
+ bsr nrm_set ;normalize the unnorm
+* ;Input: a0 points to operand to
+* ;be normalized
+* ;Output: a0 points to normalized
+* ;result
+ bfclr LOCAL_SGN(a0){0:8}
+ beq.b nrmrndp
+ bset.b #sign_bit,LOCAL_EX(a0) ;return to IEEE extended format
+nrmrndp:
+ fmove.l fpcr,-(sp)
+ fmove.l #0,fpcr
+ fmove.x LOCAL_EX(a0),fp0 ;move result to fp0
+ fmove.l (sp)+,fpcr
+ rts
+
+ end
diff --git a/sys/arch/m68k/fpsp/skeleton.sa b/sys/arch/m68k/fpsp/skeleton.sa
new file mode 100644
index 00000000000..4ed506d77c7
--- /dev/null
+++ b/sys/arch/m68k/fpsp/skeleton.sa
@@ -0,0 +1,482 @@
+* $NetBSD: skeleton.sa,v 1.3 1994/10/26 07:49:50 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* skeleton.sa 3.2 4/26/91
+*
+* This file contains code that is system dependent and will
+* need to be modified to install the FPSP.
+*
+* Each entry point for exception 'xxxx' begins with a 'jmp fpsp_xxxx'.
+* Put any target system specific handling that must be done immediately
+* before the jump instruction. If there no handling necessary, then
+* the 'fpsp_xxxx' handler entry point should be placed in the exception
+* table so that the 'jmp' can be eliminated. If the FPSP determines that the
+* exception is one that must be reported then there will be a
+* return from the package by a 'jmp real_xxxx'. At that point
+* the machine state will be identical to the state before
+* the FPSP was entered. In particular, whatever condition
+* that caused the exception will still be pending when the FPSP
+* package returns. Thus, there will be system specific code
+* to handle the exception.
+*
+* If the exception was completely handled by the package, then
+* the return will be via a 'jmp fpsp_done'. Unless there is
+* OS specific work to be done (such as handling a context switch or
+* interrupt) the user program can be resumed via 'rte'.
+*
+* In the following skeleton code, some typical 'real_xxxx' handling
+* code is shown. This code may need to be moved to an appropriate
+* place in the target system, or rewritten.
+*
+
+SKELETON IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 15
+*
+* The following counters are used for standalone testing
+*
+sigunimp dc.l 0
+sigbsun dc.l 0
+siginex dc.l 0
+sigdz dc.l 0
+sigunfl dc.l 0
+sigovfl dc.l 0
+sigoperr dc.l 0
+sigsnan dc.l 0
+sigunsupp dc.l 0
+
+ section 8
+
+ include fpsp.h
+
+ xref b1238_fix
+
+*
+* Divide by Zero exception
+*
+* All dz exceptions are 'real', hence no fpsp_dz entry point.
+*
+ xdef dz
+ xdef real_dz
+dz:
+real_dz:
+ link a6,#-LOCAL_SIZE
+ fsave -(sp)
+ bclr.b #E1,E_BYTE(a6)
+ frestore (sp)+
+ unlk a6
+
+ add.l #1,sigdz ;for standalone testing
+
+ rte
+*
+* Inexact exception
+*
+* All inexact exceptions are real, but the 'real' handler
+* will probably want to clear the pending exception.
+* The provided code will clear the E3 exception (if pending),
+* otherwise clear the E1 exception. The frestore is not really
+* necessary for E1 exceptions.
+*
+* Code following the 'inex' label is to handle bug #1232. In this
+* bug, if an E1 snan, ovfl, or unfl occured, and the process was
+* swapped out before taking the exception, the exception taken on
+* return was inex, rather than the correct exception. The snan, ovfl,
+* and unfl exception to be taken must not have been enabled. The
+* fix is to check for E1, and the existence of one of snan, ovfl,
+* or unfl bits set in the fpsr. If any of these are set, branch
+* to the appropriate handler for the exception in the fpsr. Note
+* that this fix is only for d43b parts, and is skipped if the
+* version number is not $40.
+*
+*
+ xdef real_inex
+ xdef inex
+inex:
+ link a6,#-LOCAL_SIZE
+ fsave -(sp)
+ cmpi.b #VER_40,(sp) ;test version number
+ bne.b not_fmt40
+ fmove.l fpsr,-(sp)
+ btst.b #E1,E_BYTE(a6) ;test for E1 set
+ beq.b not_b1232
+ btst.b #snan_bit,2(sp) ;test for snan
+ beq inex_ckofl
+ add.l #4,sp
+ frestore (sp)+
+ unlk a6
+ bra snan
+inex_ckofl:
+ btst.b #ovfl_bit,2(sp) ;test for ovfl
+ beq inex_ckufl
+ add.l #4,sp
+ frestore (sp)+
+ unlk a6
+ bra ovfl
+inex_ckufl:
+ btst.b #unfl_bit,2(sp) ;test for unfl
+ beq not_b1232
+ add.l #4,sp
+ frestore (sp)+
+ unlk a6
+ bra unfl
+
+*
+* We do not have the bug 1232 case. Clean up the stack and call
+* real_inex.
+*
+not_b1232:
+ add.l #4,sp
+ frestore (sp)+
+ unlk a6
+
+real_inex:
+
+ add.l #1,siginex ;for standalone testing
+
+ link a6,#-LOCAL_SIZE
+ fsave -(sp)
+not_fmt40:
+ bclr.b #E3,E_BYTE(a6) ;clear and test E3 flag
+ beq.b inex_cke1
+*
+* Clear dirty bit on dest resister in the frame before branching
+* to b1238_fix.
+*
+ movem.l d0/d1,USER_DA(a6)
+ bfextu CMDREG1B(a6){6:3},d0 ;get dest reg no
+ bclr.b d0,FPR_DIRTY_BITS(a6) ;clr dest dirty bit
+ bsr.l b1238_fix ;test for bug1238 case
+ movem.l USER_DA(a6),d0/d1
+ bra.b inex_done
+inex_cke1:
+ bclr.b #E1,E_BYTE(a6)
+inex_done:
+ frestore (sp)+
+ unlk a6
+ rte
+
+*
+* Overflow exception
+*
+ xref fpsp_ovfl
+ xdef real_ovfl
+ xdef ovfl
+ovfl:
+ jmp fpsp_ovfl
+real_ovfl:
+
+ add.l #1,sigovfl ;for standalone testing
+
+ link a6,#-LOCAL_SIZE
+ fsave -(sp)
+ bclr.b #E3,E_BYTE(a6) ;clear and test E3 flag
+ bne.b ovfl_done
+ bclr.b #E1,E_BYTE(a6)
+ovfl_done:
+ frestore (sp)+
+ unlk a6
+ rte
+
+*
+* Underflow exception
+*
+ xref fpsp_unfl
+ xdef real_unfl
+ xdef unfl
+unfl:
+ jmp fpsp_unfl
+real_unfl:
+
+ add.l #1,sigunfl ;for standalone testing
+
+ link a6,#-LOCAL_SIZE
+ fsave -(sp)
+ bclr.b #E3,E_BYTE(a6) ;clear and test E3 flag
+ bne.b unfl_done
+ bclr.b #E1,E_BYTE(a6)
+unfl_done:
+ frestore (sp)+
+ unlk a6
+ rte
+
+*
+* Signalling NAN exception
+*
+ xref fpsp_snan
+ xdef real_snan
+ xdef snan
+snan:
+ jmp fpsp_snan
+real_snan:
+ link a6,#-LOCAL_SIZE
+ fsave -(sp)
+ bclr.b #E1,E_BYTE(a6) ;snan is always an E1 exception
+ frestore (sp)+
+ unlk a6
+
+ add.l #1,sigsnan ;for standalone testing
+ rte
+
+*
+* Operand Error exception
+*
+ xref fpsp_operr
+ xdef real_operr
+ xdef operr
+operr:
+ jmp fpsp_operr
+real_operr:
+ link a6,#-LOCAL_SIZE
+ fsave -(sp)
+ bclr.b #E1,E_BYTE(a6) ;operr is always an E1 exception
+ frestore (sp)+
+ unlk a6
+
+ add.l #1,sigoperr ;for standalone testing
+
+ rte
+
+*
+* BSUN exception
+*
+* This sample handler simply clears the nan bit in the FPSR.
+*
+ xref fpsp_bsun
+ xdef real_bsun
+ xdef bsun
+bsun:
+ jmp fpsp_bsun
+real_bsun:
+ link a6,#-LOCAL_SIZE
+ fsave -(sp)
+ bclr.b #E1,E_BYTE(a6) ;bsun is always an E1 exception
+ fmove.l FPSR,-(sp)
+ bclr.b #nan_bit,(sp)
+ fmove.l (sp)+,FPSR
+ frestore (sp)+
+ unlk a6
+
+ add.l #1,sigbsun ;for standalone testing
+
+ rte
+
+*
+* F-line exception
+*
+* A 'real' F-line exception is one that the FPSP isn't supposed to
+* handle. E.g. an instruction with a co-processor ID that is not 1.
+*
+*
+ xref fpsp_fline
+ xdef real_fline
+ xdef fline
+fline:
+ jmp fpsp_fline
+real_fline:
+
+ add.l #1,sigunimp ;for standalone testing
+
+ rte
+
+*
+* Unsupported data type exception
+*
+ xref fpsp_unsupp
+ xdef real_unsupp
+ xdef unsupp
+unsupp:
+ jmp fpsp_unsupp
+real_unsupp:
+ link a6,#-LOCAL_SIZE
+ fsave -(sp)
+ bclr.b #E1,E_BYTE(a6) ;unsupp is always an E1 exception
+ frestore (sp)+
+ unlk a6
+
+ add.l #1,sigunsupp ;for standalone testing
+
+ rte
+
+*
+* Trace exception
+*
+ xdef real_trace
+real_trace:
+ rte
+
+*
+* fpsp_fmt_error --- exit point for frame format error
+*
+* The fpu stack frame does not match the frames existing
+* or planned at the time of this writing. The fpsp is
+* unable to handle frame sizes not in the following
+* version:size pairs:
+*
+* {4060, 4160} - busy frame
+* {4028, 4130} - unimp frame
+* {4000, 4100} - idle frame
+*
+* This entry point simply holds an f-line illegal value.
+* Replace this with a call to your kernel panic code or
+* code to handle future revisions of the fpu.
+*
+ xdef fpsp_fmt_error
+fpsp_fmt_error:
+
+ dc.l $f27f0000 ;f-line illegal
+
+*
+* fpsp_done --- FPSP exit point
+*
+* The exception has been handled by the package and we are ready
+* to return to user mode, but there may be OS specific code
+* to execute before we do. If there is, do it now.
+*
+*
+ xdef fpsp_done
+fpsp_done:
+ rte
+
+*
+* mem_write --- write to user or supervisor address space
+*
+* Writes to memory while in supervisor mode. copyout accomplishes
+* this via a 'moves' instruction. copyout is a UNIX SVR3 (and later) function.
+* If you don't have copyout, use the local copy of the function below.
+*
+* a0 - supervisor source address
+* a1 - user destination address
+* d0 - number of bytes to write (maximum count is 12)
+*
+* The supervisor source address is guaranteed to point into the supervisor
+* stack. The result is that a UNIX
+* process is allowed to sleep as a consequence of a page fault during
+* copyout. The probability of a page fault is exceedingly small because
+* the 68040 always reads the destination address and thus the page
+* faults should have already been handled.
+*
+* If the EXC_SR shows that the exception was from supervisor space,
+* then just do a dumb (and slow) memory move. In a UNIX environment
+* there shouldn't be any supervisor mode floating point exceptions.
+*
+ xdef mem_write
+mem_write:
+ btst.b #5,EXC_SR(a6) ;check for supervisor state
+ beq.b user_write
+super_write:
+ move.b (a0)+,(a1)+
+ subq.l #1,d0
+ bne.b super_write
+ rts
+user_write:
+ move.l d1,-(sp) ;preserve d1 just in case
+ move.l d0,-(sp)
+ move.l a1,-(sp)
+ move.l a0,-(sp)
+ jsr copyout
+ add.l #12,sp
+ move.l (sp)+,d1
+ rts
+*
+* mem_read --- read from user or supervisor address space
+*
+* Reads from memory while in supervisor mode. copyin accomplishes
+* this via a 'moves' instruction. copyin is a UNIX SVR3 (and later) function.
+* If you don't have copyin, use the local copy of the function below.
+*
+* The FPSP calls mem_read to read the original F-line instruction in order
+* to extract the data register number when the 'Dn' addressing mode is
+* used.
+*
+*Input:
+* a0 - user source address
+* a1 - supervisor destination address
+* d0 - number of bytes to read (maximum count is 12)
+*
+* Like mem_write, mem_read always reads with a supervisor
+* destination address on the supervisor stack. Also like mem_write,
+* the EXC_SR is checked and a simple memory copy is done if reading
+* from supervisor space is indicated.
+*
+ xdef mem_read
+mem_read:
+ btst.b #5,EXC_SR(a6) ;check for supervisor state
+ beq.b user_read
+super_read:
+ move.b (a0)+,(a1)+
+ subq.l #1,d0
+ bne.b super_read
+ rts
+user_read:
+ move.l d1,-(sp) ;preserve d1 just in case
+ move.l d0,-(sp)
+ move.l a1,-(sp)
+ move.l a0,-(sp)
+ jsr copyin
+ add.l #12,sp
+ move.l (sp)+,d1
+ rts
+
+*
+* Use these routines if your kernel doesn't have copyout/copyin equivalents.
+* Assumes that D0/D1/A0/A1 are scratch registers. copyout overwrites DFC,
+* and copyin overwrites SFC.
+*
+copyout:
+ move.l 4(sp),a0 ; source
+ move.l 8(sp),a1 ; destination
+ move.l 12(sp),d0 ; count
+ sub.l #1,d0 ; dec count by 1 for dbra
+ move.l #1,d1
+ movec d1,DFC ; set dfc for user data space
+moreout:
+ move.b (a0)+,d1 ; fetch supervisor byte
+ moves.b d1,(a1)+ ; write user byte
+ dbf.w d0,moreout
+ rts
+
+copyin:
+ move.l 4(sp),a0 ; source
+ move.l 8(sp),a1 ; destination
+ move.l 12(sp),d0 ; count
+ sub.l #1,d0 ; dec count by 1 for dbra
+ move.l #1,d1
+ movec d1,SFC ; set sfc for user space
+morein:
+ moves.b (a0)+,d1 ; fetch user byte
+ move.b d1,(a1)+ ; write supervisor byte
+ dbf.w d0,morein
+ rts
+
+ end
diff --git a/sys/arch/m68k/fpsp/slog2.sa b/sys/arch/m68k/fpsp/slog2.sa
new file mode 100644
index 00000000000..197beb498fb
--- /dev/null
+++ b/sys/arch/m68k/fpsp/slog2.sa
@@ -0,0 +1,213 @@
+* $NetBSD: slog2.sa,v 1.2 1994/10/26 07:49:52 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* slog2.sa 3.1 12/10/90
+*
+* The entry point slog10 computes the base-10
+* logarithm of an input argument X.
+* slog10d does the same except the input value is a
+* denormalized number.
+* sLog2 and sLog2d are the base-2 analogues.
+*
+* INPUT: Double-extended value in memory location pointed to
+* by address register a0.
+*
+* OUTPUT: log_10(X) or log_2(X) returned in floating-point
+* register fp0.
+*
+* ACCURACY and MONOTONICITY: The returned result is within 1.7
+* ulps in 64 significant bit, i.e. within 0.5003 ulp
+* to 53 bits if the result is subsequently rounded
+* to double precision. The result is provably monotonic
+* in double precision.
+*
+* SPEED: Two timings are measured, both in the copy-back mode.
+* The first one is measured when the function is invoked
+* the first time (so the instructions and data are not
+* in cache), and the second one is measured when the
+* function is reinvoked at the same input argument.
+*
+* ALGORITHM and IMPLEMENTATION NOTES:
+*
+* slog10d:
+*
+* Step 0. If X < 0, create a NaN and raise the invalid operation
+* flag. Otherwise, save FPCR in D1; set FpCR to default.
+* Notes: Default means round-to-nearest mode, no floating-point
+* traps, and precision control = double extended.
+*
+* Step 1. Call slognd to obtain Y = log(X), the natural log of X.
+* Notes: Even if X is denormalized, log(X) is always normalized.
+*
+* Step 2. Compute log_10(X) = log(X) * (1/log(10)).
+* 2.1 Restore the user FPCR
+* 2.2 Return ans := Y * INV_L10.
+*
+*
+* slog10:
+*
+* Step 0. If X < 0, create a NaN and raise the invalid operation
+* flag. Otherwise, save FPCR in D1; set FpCR to default.
+* Notes: Default means round-to-nearest mode, no floating-point
+* traps, and precision control = double extended.
+*
+* Step 1. Call sLogN to obtain Y = log(X), the natural log of X.
+*
+* Step 2. Compute log_10(X) = log(X) * (1/log(10)).
+* 2.1 Restore the user FPCR
+* 2.2 Return ans := Y * INV_L10.
+*
+*
+* sLog2d:
+*
+* Step 0. If X < 0, create a NaN and raise the invalid operation
+* flag. Otherwise, save FPCR in D1; set FpCR to default.
+* Notes: Default means round-to-nearest mode, no floating-point
+* traps, and precision control = double extended.
+*
+* Step 1. Call slognd to obtain Y = log(X), the natural log of X.
+* Notes: Even if X is denormalized, log(X) is always normalized.
+*
+* Step 2. Compute log_10(X) = log(X) * (1/log(2)).
+* 2.1 Restore the user FPCR
+* 2.2 Return ans := Y * INV_L2.
+*
+*
+* sLog2:
+*
+* Step 0. If X < 0, create a NaN and raise the invalid operation
+* flag. Otherwise, save FPCR in D1; set FpCR to default.
+* Notes: Default means round-to-nearest mode, no floating-point
+* traps, and precision control = double extended.
+*
+* Step 1. If X is not an integer power of two, i.e., X != 2^k,
+* go to Step 3.
+*
+* Step 2. Return k.
+* 2.1 Get integer k, X = 2^k.
+* 2.2 Restore the user FPCR.
+* 2.3 Return ans := convert-to-double-extended(k).
+*
+* Step 3. Call sLogN to obtain Y = log(X), the natural log of X.
+*
+* Step 4. Compute log_2(X) = log(X) * (1/log(2)).
+* 4.1 Restore the user FPCR
+* 4.2 Return ans := Y * INV_L2.
+*
+
+SLOG2 IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ xref t_frcinx
+ xref t_operr
+ xref slogn
+ xref slognd
+
+INV_L10 DC.L $3FFD0000,$DE5BD8A9,$37287195,$00000000
+
+INV_L2 DC.L $3FFF0000,$B8AA3B29,$5C17F0BC,$00000000
+
+ xdef slog10d
+slog10d:
+*--entry point for Log10(X), X is denormalized
+ move.l (a0),d0
+ blt.w invalid
+ move.l d1,-(sp)
+ clr.l d1
+ bsr slognd ...log(X), X denorm.
+ fmove.l (sp)+,fpcr
+ fmul.x INV_L10,fp0
+ bra t_frcinx
+
+ xdef slog10
+slog10:
+*--entry point for Log10(X), X is normalized
+
+ move.l (a0),d0
+ blt.w invalid
+ move.l d1,-(sp)
+ clr.l d1
+ bsr slogn ...log(X), X normal.
+ fmove.l (sp)+,fpcr
+ fmul.x INV_L10,fp0
+ bra t_frcinx
+
+
+ xdef slog2d
+slog2d:
+*--entry point for Log2(X), X is denormalized
+
+ move.l (a0),d0
+ blt.w invalid
+ move.l d1,-(sp)
+ clr.l d1
+ bsr slognd ...log(X), X denorm.
+ fmove.l (sp)+,fpcr
+ fmul.x INV_L2,fp0
+ bra t_frcinx
+
+ xdef slog2
+slog2:
+*--entry point for Log2(X), X is normalized
+ move.l (a0),d0
+ blt.w invalid
+
+ move.l 8(a0),d0
+ bne.b continue ...X is not 2^k
+
+ move.l 4(a0),d0
+ and.l #$7FFFFFFF,d0
+ tst.l d0
+ bne.b continue
+
+*--X = 2^k.
+ move.w (a0),d0
+ and.l #$00007FFF,d0
+ sub.l #$3FFF,d0
+ fmove.l d1,fpcr
+ fmove.l d0,fp0
+ bra t_frcinx
+
+continue:
+ move.l d1,-(sp)
+ clr.l d1
+ bsr slogn ...log(X), X normal.
+ fmove.l (sp)+,fpcr
+ fmul.x INV_L2,fp0
+ bra t_frcinx
+
+invalid:
+ bra t_operr
+
+ end
diff --git a/sys/arch/m68k/fpsp/slogn.sa b/sys/arch/m68k/fpsp/slogn.sa
new file mode 100644
index 00000000000..26afe941940
--- /dev/null
+++ b/sys/arch/m68k/fpsp/slogn.sa
@@ -0,0 +1,617 @@
+* $NetBSD: slogn.sa,v 1.3 1994/10/26 07:49:54 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* slogn.sa 3.1 12/10/90
+*
+* slogn computes the natural logarithm of an
+* input value. slognd does the same except the input value is a
+* denormalized number. slognp1 computes log(1+X), and slognp1d
+* computes log(1+X) for denormalized X.
+*
+* Input: Double-extended value in memory location pointed to by address
+* register a0.
+*
+* Output: log(X) or log(1+X) returned in floating-point register Fp0.
+*
+* Accuracy and Monotonicity: The returned result is within 2 ulps in
+* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+* result is subsequently rounded to double precision. The
+* result is provably monotonic in double precision.
+*
+* Speed: The program slogn takes approximately 190 cycles for input
+* argument X such that |X-1| >= 1/16, which is the the usual
+* situation. For those arguments, slognp1 takes approximately
+* 210 cycles. For the less common arguments, the program will
+* run no worse than 10% slower.
+*
+* Algorithm:
+* LOGN:
+* Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in
+* u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2.
+*
+* Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven
+* significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base
+* 2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7).
+*
+* Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u,
+* log(1+u) = poly.
+*
+* Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)
+* by k*log(2) + (log(F) + poly). The values of log(F) are calculated
+* beforehand and stored in the program.
+*
+* lognp1:
+* Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in
+* u where u = 2X/(2+X). Otherwise, move on to Step 2.
+*
+* Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2
+* of the algorithm for LOGN and compute log(1+X) as
+* k*log(2) + log(F) + poly where poly approximates log(1+u),
+* u = (Y-F)/F.
+*
+* Implementation Notes:
+* Note 1. There are 64 different possible values for F, thus 64 log(F)'s
+* need to be tabulated. Moreover, the values of 1/F are also
+* tabulated so that the division in (Y-F)/F can be performed by a
+* multiplication.
+*
+* Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value
+* Y-F has to be calculated carefully when 1/2 <= X < 3/2.
+*
+* Note 3. To fully exploit the pipeline, polynomials are usually separated
+* into two parts evaluated independently before being added up.
+*
+
+slogn IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+BOUNDS1 DC.L $3FFEF07D,$3FFF8841
+BOUNDS2 DC.L $3FFE8000,$3FFFC000
+
+LOGOF2 DC.L $3FFE0000,$B17217F7,$D1CF79AC,$00000000
+
+one DC.L $3F800000
+zero DC.L $00000000
+infty DC.L $7F800000
+negone DC.L $BF800000
+
+LOGA6 DC.L $3FC2499A,$B5E4040B
+LOGA5 DC.L $BFC555B5,$848CB7DB
+
+LOGA4 DC.L $3FC99999,$987D8730
+LOGA3 DC.L $BFCFFFFF,$FF6F7E97
+
+LOGA2 DC.L $3FD55555,$555555A4
+LOGA1 DC.L $BFE00000,$00000008
+
+LOGB5 DC.L $3F175496,$ADD7DAD6
+LOGB4 DC.L $3F3C71C2,$FE80C7E0
+
+LOGB3 DC.L $3F624924,$928BCCFF
+LOGB2 DC.L $3F899999,$999995EC
+
+LOGB1 DC.L $3FB55555,$55555555
+TWO DC.L $40000000,$00000000
+
+LTHOLD DC.L $3f990000,$80000000,$00000000,$00000000
+
+LOGTBL:
+ DC.L $3FFE0000,$FE03F80F,$E03F80FE,$00000000
+ DC.L $3FF70000,$FF015358,$833C47E2,$00000000
+ DC.L $3FFE0000,$FA232CF2,$52138AC0,$00000000
+ DC.L $3FF90000,$BDC8D83E,$AD88D549,$00000000
+ DC.L $3FFE0000,$F6603D98,$0F6603DA,$00000000
+ DC.L $3FFA0000,$9CF43DCF,$F5EAFD48,$00000000
+ DC.L $3FFE0000,$F2B9D648,$0F2B9D65,$00000000
+ DC.L $3FFA0000,$DA16EB88,$CB8DF614,$00000000
+ DC.L $3FFE0000,$EF2EB71F,$C4345238,$00000000
+ DC.L $3FFB0000,$8B29B775,$1BD70743,$00000000
+ DC.L $3FFE0000,$EBBDB2A5,$C1619C8C,$00000000
+ DC.L $3FFB0000,$A8D839F8,$30C1FB49,$00000000
+ DC.L $3FFE0000,$E865AC7B,$7603A197,$00000000
+ DC.L $3FFB0000,$C61A2EB1,$8CD907AD,$00000000
+ DC.L $3FFE0000,$E525982A,$F70C880E,$00000000
+ DC.L $3FFB0000,$E2F2A47A,$DE3A18AF,$00000000
+ DC.L $3FFE0000,$E1FC780E,$1FC780E2,$00000000
+ DC.L $3FFB0000,$FF64898E,$DF55D551,$00000000
+ DC.L $3FFE0000,$DEE95C4C,$A037BA57,$00000000
+ DC.L $3FFC0000,$8DB956A9,$7B3D0148,$00000000
+ DC.L $3FFE0000,$DBEB61EE,$D19C5958,$00000000
+ DC.L $3FFC0000,$9B8FE100,$F47BA1DE,$00000000
+ DC.L $3FFE0000,$D901B203,$6406C80E,$00000000
+ DC.L $3FFC0000,$A9372F1D,$0DA1BD17,$00000000
+ DC.L $3FFE0000,$D62B80D6,$2B80D62C,$00000000
+ DC.L $3FFC0000,$B6B07F38,$CE90E46B,$00000000
+ DC.L $3FFE0000,$D3680D36,$80D3680D,$00000000
+ DC.L $3FFC0000,$C3FD0329,$06488481,$00000000
+ DC.L $3FFE0000,$D0B69FCB,$D2580D0B,$00000000
+ DC.L $3FFC0000,$D11DE0FF,$15AB18CA,$00000000
+ DC.L $3FFE0000,$CE168A77,$25080CE1,$00000000
+ DC.L $3FFC0000,$DE1433A1,$6C66B150,$00000000
+ DC.L $3FFE0000,$CB8727C0,$65C393E0,$00000000
+ DC.L $3FFC0000,$EAE10B5A,$7DDC8ADD,$00000000
+ DC.L $3FFE0000,$C907DA4E,$871146AD,$00000000
+ DC.L $3FFC0000,$F7856E5E,$E2C9B291,$00000000
+ DC.L $3FFE0000,$C6980C69,$80C6980C,$00000000
+ DC.L $3FFD0000,$82012CA5,$A68206D7,$00000000
+ DC.L $3FFE0000,$C4372F85,$5D824CA6,$00000000
+ DC.L $3FFD0000,$882C5FCD,$7256A8C5,$00000000
+ DC.L $3FFE0000,$C1E4BBD5,$95F6E947,$00000000
+ DC.L $3FFD0000,$8E44C60B,$4CCFD7DE,$00000000
+ DC.L $3FFE0000,$BFA02FE8,$0BFA02FF,$00000000
+ DC.L $3FFD0000,$944AD09E,$F4351AF6,$00000000
+ DC.L $3FFE0000,$BD691047,$07661AA3,$00000000
+ DC.L $3FFD0000,$9A3EECD4,$C3EAA6B2,$00000000
+ DC.L $3FFE0000,$BB3EE721,$A54D880C,$00000000
+ DC.L $3FFD0000,$A0218434,$353F1DE8,$00000000
+ DC.L $3FFE0000,$B92143FA,$36F5E02E,$00000000
+ DC.L $3FFD0000,$A5F2FCAB,$BBC506DA,$00000000
+ DC.L $3FFE0000,$B70FBB5A,$19BE3659,$00000000
+ DC.L $3FFD0000,$ABB3B8BA,$2AD362A5,$00000000
+ DC.L $3FFE0000,$B509E68A,$9B94821F,$00000000
+ DC.L $3FFD0000,$B1641795,$CE3CA97B,$00000000
+ DC.L $3FFE0000,$B30F6352,$8917C80B,$00000000
+ DC.L $3FFD0000,$B7047551,$5D0F1C61,$00000000
+ DC.L $3FFE0000,$B11FD3B8,$0B11FD3C,$00000000
+ DC.L $3FFD0000,$BC952AFE,$EA3D13E1,$00000000
+ DC.L $3FFE0000,$AF3ADDC6,$80AF3ADE,$00000000
+ DC.L $3FFD0000,$C2168ED0,$F458BA4A,$00000000
+ DC.L $3FFE0000,$AD602B58,$0AD602B6,$00000000
+ DC.L $3FFD0000,$C788F439,$B3163BF1,$00000000
+ DC.L $3FFE0000,$AB8F69E2,$8359CD11,$00000000
+ DC.L $3FFD0000,$CCECAC08,$BF04565D,$00000000
+ DC.L $3FFE0000,$A9C84A47,$A07F5638,$00000000
+ DC.L $3FFD0000,$D2420487,$2DD85160,$00000000
+ DC.L $3FFE0000,$A80A80A8,$0A80A80B,$00000000
+ DC.L $3FFD0000,$D7894992,$3BC3588A,$00000000
+ DC.L $3FFE0000,$A655C439,$2D7B73A8,$00000000
+ DC.L $3FFD0000,$DCC2C4B4,$9887DACC,$00000000
+ DC.L $3FFE0000,$A4A9CF1D,$96833751,$00000000
+ DC.L $3FFD0000,$E1EEBD3E,$6D6A6B9E,$00000000
+ DC.L $3FFE0000,$A3065E3F,$AE7CD0E0,$00000000
+ DC.L $3FFD0000,$E70D785C,$2F9F5BDC,$00000000
+ DC.L $3FFE0000,$A16B312E,$A8FC377D,$00000000
+ DC.L $3FFD0000,$EC1F392C,$5179F283,$00000000
+ DC.L $3FFE0000,$9FD809FD,$809FD80A,$00000000
+ DC.L $3FFD0000,$F12440D3,$E36130E6,$00000000
+ DC.L $3FFE0000,$9E4CAD23,$DD5F3A20,$00000000
+ DC.L $3FFD0000,$F61CCE92,$346600BB,$00000000
+ DC.L $3FFE0000,$9CC8E160,$C3FB19B9,$00000000
+ DC.L $3FFD0000,$FB091FD3,$8145630A,$00000000
+ DC.L $3FFE0000,$9B4C6F9E,$F03A3CAA,$00000000
+ DC.L $3FFD0000,$FFE97042,$BFA4C2AD,$00000000
+ DC.L $3FFE0000,$99D722DA,$BDE58F06,$00000000
+ DC.L $3FFE0000,$825EFCED,$49369330,$00000000
+ DC.L $3FFE0000,$9868C809,$868C8098,$00000000
+ DC.L $3FFE0000,$84C37A7A,$B9A905C9,$00000000
+ DC.L $3FFE0000,$97012E02,$5C04B809,$00000000
+ DC.L $3FFE0000,$87224C2E,$8E645FB7,$00000000
+ DC.L $3FFE0000,$95A02568,$095A0257,$00000000
+ DC.L $3FFE0000,$897B8CAC,$9F7DE298,$00000000
+ DC.L $3FFE0000,$94458094,$45809446,$00000000
+ DC.L $3FFE0000,$8BCF55DE,$C4CD05FE,$00000000
+ DC.L $3FFE0000,$92F11384,$0497889C,$00000000
+ DC.L $3FFE0000,$8E1DC0FB,$89E125E5,$00000000
+ DC.L $3FFE0000,$91A2B3C4,$D5E6F809,$00000000
+ DC.L $3FFE0000,$9066E68C,$955B6C9B,$00000000
+ DC.L $3FFE0000,$905A3863,$3E06C43B,$00000000
+ DC.L $3FFE0000,$92AADE74,$C7BE59E0,$00000000
+ DC.L $3FFE0000,$8F1779D9,$FDC3A219,$00000000
+ DC.L $3FFE0000,$94E9BFF6,$15845643,$00000000
+ DC.L $3FFE0000,$8DDA5202,$37694809,$00000000
+ DC.L $3FFE0000,$9723A1B7,$20134203,$00000000
+ DC.L $3FFE0000,$8CA29C04,$6514E023,$00000000
+ DC.L $3FFE0000,$995899C8,$90EB8990,$00000000
+ DC.L $3FFE0000,$8B70344A,$139BC75A,$00000000
+ DC.L $3FFE0000,$9B88BDAA,$3A3DAE2F,$00000000
+ DC.L $3FFE0000,$8A42F870,$5669DB46,$00000000
+ DC.L $3FFE0000,$9DB4224F,$FFE1157C,$00000000
+ DC.L $3FFE0000,$891AC73A,$E9819B50,$00000000
+ DC.L $3FFE0000,$9FDADC26,$8B7A12DA,$00000000
+ DC.L $3FFE0000,$87F78087,$F78087F8,$00000000
+ DC.L $3FFE0000,$A1FCFF17,$CE733BD4,$00000000
+ DC.L $3FFE0000,$86D90544,$7A34ACC6,$00000000
+ DC.L $3FFE0000,$A41A9E8F,$5446FB9F,$00000000
+ DC.L $3FFE0000,$85BF3761,$2CEE3C9B,$00000000
+ DC.L $3FFE0000,$A633CD7E,$6771CD8B,$00000000
+ DC.L $3FFE0000,$84A9F9C8,$084A9F9D,$00000000
+ DC.L $3FFE0000,$A8489E60,$0B435A5E,$00000000
+ DC.L $3FFE0000,$83993052,$3FBE3368,$00000000
+ DC.L $3FFE0000,$AA59233C,$CCA4BD49,$00000000
+ DC.L $3FFE0000,$828CBFBE,$B9A020A3,$00000000
+ DC.L $3FFE0000,$AC656DAE,$6BCC4985,$00000000
+ DC.L $3FFE0000,$81848DA8,$FAF0D277,$00000000
+ DC.L $3FFE0000,$AE6D8EE3,$60BB2468,$00000000
+ DC.L $3FFE0000,$80808080,$80808081,$00000000
+ DC.L $3FFE0000,$B07197A2,$3C46C654,$00000000
+
+ADJK equ L_SCR1
+
+X equ FP_SCR1
+XDCARE equ X+2
+XFRAC equ X+4
+
+F equ FP_SCR2
+FFRAC equ F+4
+
+KLOG2 equ FP_SCR3
+
+SAVEU equ FP_SCR4
+
+ xref t_frcinx
+ xref t_extdnrm
+ xref t_operr
+ xref t_dz
+
+ xdef slognd
+slognd:
+*--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
+
+ MOVE.L #-100,ADJK(a6) ...INPUT = 2^(ADJK) * FP0
+
+*----normalize the input value by left shifting k bits (k to be determined
+*----below), adjusting exponent and storing -k to ADJK
+*----the value TWOTO100 is no longer needed.
+*----Note that this code assumes the denormalized input is NON-ZERO.
+
+ MoveM.L D2-D7,-(A7) ...save some registers
+ Clr.L D3 ...D3 is exponent of smallest norm. #
+ Move.L 4(A0),D4
+ Move.L 8(A0),D5 ...(D4,D5) is (Hi_X,Lo_X)
+ Clr.L D2 ...D2 used for holding K
+
+ Tst.L D4
+ BNE.B HiX_not0
+
+HiX_0:
+ Move.L D5,D4
+ Clr.L D5
+ Move.L #32,D2
+ Clr.L D6
+ BFFFO D4{0:32},D6
+ LSL.L D6,D4
+ Add.L D6,D2 ...(D3,D4,D5) is normalized
+
+ Move.L D3,X(a6)
+ Move.L D4,XFRAC(a6)
+ Move.L D5,XFRAC+4(a6)
+ Neg.L D2
+ Move.L D2,ADJK(a6)
+ FMove.X X(a6),FP0
+ MoveM.L (A7)+,D2-D7 ...restore registers
+ LEA X(a6),A0
+ Bra.B LOGBGN ...begin regular log(X)
+
+
+HiX_not0:
+ Clr.L D6
+ BFFFO D4{0:32},D6 ...find first 1
+ Move.L D6,D2 ...get k
+ LSL.L D6,D4
+ Move.L D5,D7 ...a copy of D5
+ LSL.L D6,D5
+ Neg.L D6
+ AddI.L #32,D6
+ LSR.L D6,D7
+ Or.L D7,D4 ...(D3,D4,D5) normalized
+
+ Move.L D3,X(a6)
+ Move.L D4,XFRAC(a6)
+ Move.L D5,XFRAC+4(a6)
+ Neg.L D2
+ Move.L D2,ADJK(a6)
+ FMove.X X(a6),FP0
+ MoveM.L (A7)+,D2-D7 ...restore registers
+ LEA X(a6),A0
+ Bra.B LOGBGN ...begin regular log(X)
+
+
+ xdef slogn
+slogn:
+*--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
+
+ FMOVE.X (A0),FP0 ...LOAD INPUT
+ CLR.L ADJK(a6)
+
+LOGBGN:
+*--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
+*--A FINITE, NON-ZERO, NORMALIZED NUMBER.
+
+ move.l (a0),d0
+ move.w 4(a0),d0
+
+ move.l (a0),X(a6)
+ move.l 4(a0),X+4(a6)
+ move.l 8(a0),X+8(a6)
+
+ TST.L D0 ...CHECK IF X IS NEGATIVE
+ BLT.W LOGNEG ...LOG OF NEGATIVE ARGUMENT IS INVALID
+ CMP2.L BOUNDS1,D0 ...X IS POSITIVE, CHECK IF X IS NEAR 1
+ BCC.W LOGNEAR1 ...BOUNDS IS ROUGHLY [15/16, 17/16]
+
+LOGMAIN:
+*--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
+
+*--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
+*--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
+*--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
+*-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
+*--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
+*--LOG(1+U) CAN BE VERY EFFICIENT.
+*--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
+*--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
+
+*--GET K, Y, F, AND ADDRESS OF 1/F.
+ ASR.L #8,D0
+ ASR.L #8,D0 ...SHIFTED 16 BITS, BIASED EXPO. OF X
+ SUBI.L #$3FFF,D0 ...THIS IS K
+ ADD.L ADJK(a6),D0 ...ADJUST K, ORIGINAL INPUT MAY BE DENORM.
+ LEA LOGTBL,A0 ...BASE ADDRESS OF 1/F AND LOG(F)
+ FMOVE.L D0,FP1 ...CONVERT K TO FLOATING-POINT FORMAT
+
+*--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
+ MOVE.L #$3FFF0000,X(a6) ...X IS NOW Y, I.E. 2^(-K)*X
+ MOVE.L XFRAC(a6),FFRAC(a6)
+ ANDI.L #$FE000000,FFRAC(a6) ...FIRST 7 BITS OF Y
+ ORI.L #$01000000,FFRAC(a6) ...GET F: ATTACH A 1 AT THE EIGHTH BIT
+ MOVE.L FFRAC(a6),D0 ...READY TO GET ADDRESS OF 1/F
+ ANDI.L #$7E000000,D0
+ ASR.L #8,D0
+ ASR.L #8,D0
+ ASR.L #4,D0 ...SHIFTED 20, D0 IS THE DISPLACEMENT
+ ADDA.L D0,A0 ...A0 IS THE ADDRESS FOR 1/F
+
+ FMOVE.X X(a6),FP0
+ move.l #$3fff0000,F(a6)
+ clr.l F+8(a6)
+ FSUB.X F(a6),FP0 ...Y-F
+ FMOVEm.X FP2/fp3,-(sp) ...SAVE FP2 WHILE FP0 IS NOT READY
+*--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
+*--REGISTERS SAVED: FPCR, FP1, FP2
+
+LP1CONT1:
+*--AN RE-ENTRY POINT FOR LOGNP1
+ FMUL.X (A0),FP0 ...FP0 IS U = (Y-F)/F
+ FMUL.X LOGOF2,FP1 ...GET K*LOG2 WHILE FP0 IS NOT READY
+ FMOVE.X FP0,FP2
+ FMUL.X FP2,FP2 ...FP2 IS V=U*U
+ FMOVE.X FP1,KLOG2(a6) ...PUT K*LOG2 IN MEMEORY, FREE FP1
+
+*--LOG(1+U) IS APPROXIMATED BY
+*--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
+*--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))]
+
+ FMOVE.X FP2,FP3
+ FMOVE.X FP2,FP1
+
+ FMUL.D LOGA6,FP1 ...V*A6
+ FMUL.D LOGA5,FP2 ...V*A5
+
+ FADD.D LOGA4,FP1 ...A4+V*A6
+ FADD.D LOGA3,FP2 ...A3+V*A5
+
+ FMUL.X FP3,FP1 ...V*(A4+V*A6)
+ FMUL.X FP3,FP2 ...V*(A3+V*A5)
+
+ FADD.D LOGA2,FP1 ...A2+V*(A4+V*A6)
+ FADD.D LOGA1,FP2 ...A1+V*(A3+V*A5)
+
+ FMUL.X FP3,FP1 ...V*(A2+V*(A4+V*A6))
+ ADDA.L #16,A0 ...ADDRESS OF LOG(F)
+ FMUL.X FP3,FP2 ...V*(A1+V*(A3+V*A5)), FP3 RELEASED
+
+ FMUL.X FP0,FP1 ...U*V*(A2+V*(A4+V*A6))
+ FADD.X FP2,FP0 ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED
+
+ FADD.X (A0),FP1 ...LOG(F)+U*V*(A2+V*(A4+V*A6))
+ FMOVEm.X (sp)+,FP2/fp3 ...RESTORE FP2
+ FADD.X FP1,FP0 ...FP0 IS LOG(F) + LOG(1+U)
+
+ fmove.l d1,fpcr
+ FADD.X KLOG2(a6),FP0 ...FINAL ADD
+ bra t_frcinx
+
+
+LOGNEAR1:
+*--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
+ FMOVE.X FP0,FP1
+ FSUB.S one,FP1 ...FP1 IS X-1
+ FADD.S one,FP0 ...FP0 IS X+1
+ FADD.X FP1,FP1 ...FP1 IS 2(X-1)
+*--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
+*--IN U, U = 2(X-1)/(X+1) = FP1/FP0
+
+LP1CONT2:
+*--THIS IS AN RE-ENTRY POINT FOR LOGNP1
+ FDIV.X FP0,FP1 ...FP1 IS U
+ FMOVEm.X FP2/fp3,-(sp) ...SAVE FP2
+*--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
+*--LET V=U*U, W=V*V, CALCULATE
+*--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
+*--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] )
+ FMOVE.X FP1,FP0
+ FMUL.X FP0,FP0 ...FP0 IS V
+ FMOVE.X FP1,SAVEU(a6) ...STORE U IN MEMORY, FREE FP1
+ FMOVE.X FP0,FP1
+ FMUL.X FP1,FP1 ...FP1 IS W
+
+ FMOVE.D LOGB5,FP3
+ FMOVE.D LOGB4,FP2
+
+ FMUL.X FP1,FP3 ...W*B5
+ FMUL.X FP1,FP2 ...W*B4
+
+ FADD.D LOGB3,FP3 ...B3+W*B5
+ FADD.D LOGB2,FP2 ...B2+W*B4
+
+ FMUL.X FP3,FP1 ...W*(B3+W*B5), FP3 RELEASED
+
+ FMUL.X FP0,FP2 ...V*(B2+W*B4)
+
+ FADD.D LOGB1,FP1 ...B1+W*(B3+W*B5)
+ FMUL.X SAVEU(a6),FP0 ...FP0 IS U*V
+
+ FADD.X FP2,FP1 ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
+ FMOVEm.X (sp)+,FP2/fp3 ...FP2 RESTORED
+
+ FMUL.X FP1,FP0 ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
+
+ fmove.l d1,fpcr
+ FADD.X SAVEU(a6),FP0
+ bra t_frcinx
+ rts
+
+LOGNEG:
+*--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
+ bra t_operr
+
+ xdef slognp1d
+slognp1d:
+*--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
+* Simply return the denorm
+
+ bra t_extdnrm
+
+ xdef slognp1
+slognp1:
+*--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
+
+ FMOVE.X (A0),FP0 ...LOAD INPUT
+ fabs.x fp0 ;test magnitude
+ fcmp.x LTHOLD,fp0 ;compare with min threshold
+ fbgt.w LP1REAL ;if greater, continue
+ fmove.l #0,fpsr ;clr N flag from compare
+ fmove.l d1,fpcr
+ fmove.x (a0),fp0 ;return signed argument
+ bra t_frcinx
+
+LP1REAL:
+ FMOVE.X (A0),FP0 ...LOAD INPUT
+ CLR.L ADJK(a6)
+ FMOVE.X FP0,FP1 ...FP1 IS INPUT Z
+ FADD.S one,FP0 ...X := ROUND(1+Z)
+ FMOVE.X FP0,X(a6)
+ MOVE.W XFRAC(a6),XDCARE(a6)
+ MOVE.L X(a6),D0
+ TST.L D0
+ BLE.W LP1NEG0 ...LOG OF ZERO OR -VE
+ CMP2.L BOUNDS2,D0
+ BCS.W LOGMAIN ...BOUNDS2 IS [1/2,3/2]
+*--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
+*--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
+*--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
+
+LP1NEAR1:
+*--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
+ CMP2.L BOUNDS1,D0
+ BCS.B LP1CARE
+
+LP1ONE16:
+*--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
+*--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
+ FADD.X FP1,FP1 ...FP1 IS 2Z
+ FADD.S one,FP0 ...FP0 IS 1+X
+*--U = FP1/FP0
+ BRA.W LP1CONT2
+
+LP1CARE:
+*--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
+*--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
+*--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
+*--THERE ARE ONLY TWO CASES.
+*--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
+*--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z
+*--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
+*--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
+
+ MOVE.L XFRAC(a6),FFRAC(a6)
+ ANDI.L #$FE000000,FFRAC(a6)
+ ORI.L #$01000000,FFRAC(a6) ...F OBTAINED
+ CMPI.L #$3FFF8000,D0 ...SEE IF 1+Z > 1
+ BGE.B KISZERO
+
+KISNEG1:
+ FMOVE.S TWO,FP0
+ move.l #$3fff0000,F(a6)
+ clr.l F+8(a6)
+ FSUB.X F(a6),FP0 ...2-F
+ MOVE.L FFRAC(a6),D0
+ ANDI.L #$7E000000,D0
+ ASR.L #8,D0
+ ASR.L #8,D0
+ ASR.L #4,D0 ...D0 CONTAINS DISPLACEMENT FOR 1/F
+ FADD.X FP1,FP1 ...GET 2Z
+ FMOVEm.X FP2/fp3,-(sp) ...SAVE FP2
+ FADD.X FP1,FP0 ...FP0 IS Y-F = (2-F)+2Z
+ LEA LOGTBL,A0 ...A0 IS ADDRESS OF 1/F
+ ADDA.L D0,A0
+ FMOVE.S negone,FP1 ...FP1 IS K = -1
+ BRA.W LP1CONT1
+
+KISZERO:
+ FMOVE.S one,FP0
+ move.l #$3fff0000,F(a6)
+ clr.l F+8(a6)
+ FSUB.X F(a6),FP0 ...1-F
+ MOVE.L FFRAC(a6),D0
+ ANDI.L #$7E000000,D0
+ ASR.L #8,D0
+ ASR.L #8,D0
+ ASR.L #4,D0
+ FADD.X FP1,FP0 ...FP0 IS Y-F
+ FMOVEm.X FP2/fp3,-(sp) ...FP2 SAVED
+ LEA LOGTBL,A0
+ ADDA.L D0,A0 ...A0 IS ADDRESS OF 1/F
+ FMOVE.S zero,FP1 ...FP1 IS K = 0
+ BRA.W LP1CONT1
+
+LP1NEG0:
+*--FPCR SAVED. D0 IS X IN COMPACT FORM.
+ TST.L D0
+ BLT.B LP1NEG
+LP1ZERO:
+ FMOVE.S negone,FP0
+
+ fmove.l d1,fpcr
+ bra t_dz
+
+LP1NEG:
+ FMOVE.S zero,FP0
+
+ fmove.l d1,fpcr
+ bra t_operr
+
+ end
diff --git a/sys/arch/m68k/fpsp/smovecr.sa b/sys/arch/m68k/fpsp/smovecr.sa
new file mode 100644
index 00000000000..9e13b64b1c4
--- /dev/null
+++ b/sys/arch/m68k/fpsp/smovecr.sa
@@ -0,0 +1,187 @@
+* $NetBSD: smovecr.sa,v 1.2 1994/10/26 07:49:57 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* smovecr.sa 3.1 12/10/90
+*
+* The entry point sMOVECR returns the constant at the
+* offset given in the instruction field.
+*
+* Input: An offset in the instruction word.
+*
+* Output: The constant rounded to the user's rounding
+* mode unchecked for overflow.
+*
+* Modified: fp0.
+*
+
+SMOVECR IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+ xref nrm_set
+ xref round
+ xref PIRN
+ xref PIRZRM
+ xref PIRP
+ xref SMALRN
+ xref SMALRZRM
+ xref SMALRP
+ xref BIGRN
+ xref BIGRZRM
+ xref BIGRP
+
+FZERO dc.l 00000000
+*
+* FMOVECR
+*
+ xdef smovcr
+smovcr:
+ bfextu CMDREG1B(a6){9:7},d0 ;get offset
+ bfextu USER_FPCR(a6){26:2},d1 ;get rmode
+*
+* check range of offset
+*
+ tst.b d0 ;if zero, offset is to pi
+ beq.b PI_TBL ;it is pi
+ cmpi.b #$0a,d0 ;check range $01 - $0a
+ ble.b Z_VAL ;if in this range, return zero
+ cmpi.b #$0e,d0 ;check range $0b - $0e
+ ble.b SM_TBL ;valid constants in this range
+ cmpi.b #$2f,d0 ;check range $10 - $2f
+ ble.b Z_VAL ;if in this range, return zero
+ cmpi.b #$3f,d0 ;check range $30 - $3f
+ ble BG_TBL ;valid constants in this range
+Z_VAL:
+ fmove.s FZERO,fp0
+ rts
+PI_TBL:
+ tst.b d1 ;offset is zero, check for rmode
+ beq.b PI_RN ;if zero, rn mode
+ cmpi.b #$3,d1 ;check for rp
+ beq.b PI_RP ;if 3, rp mode
+PI_RZRM:
+ lea.l PIRZRM,a0 ;rmode is rz or rm, load PIRZRM in a0
+ bra set_finx
+PI_RN:
+ lea.l PIRN,a0 ;rmode is rn, load PIRN in a0
+ bra set_finx
+PI_RP:
+ lea.l PIRP,a0 ;rmode is rp, load PIRP in a0
+ bra set_finx
+SM_TBL:
+ subi.l #$b,d0 ;make offset in 0 - 4 range
+ tst.b d1 ;check for rmode
+ beq.b SM_RN ;if zero, rn mode
+ cmpi.b #$3,d1 ;check for rp
+ beq.b SM_RP ;if 3, rp mode
+SM_RZRM:
+ lea.l SMALRZRM,a0 ;rmode is rz or rm, load SMRZRM in a0
+ cmpi.b #$2,d0 ;check if result is inex
+ ble set_finx ;if 0 - 2, it is inexact
+ bra no_finx ;if 3, it is exact
+SM_RN:
+ lea.l SMALRN,a0 ;rmode is rn, load SMRN in a0
+ cmpi.b #$2,d0 ;check if result is inex
+ ble set_finx ;if 0 - 2, it is inexact
+ bra no_finx ;if 3, it is exact
+SM_RP:
+ lea.l SMALRP,a0 ;rmode is rp, load SMRP in a0
+ cmpi.b #$2,d0 ;check if result is inex
+ ble set_finx ;if 0 - 2, it is inexact
+ bra no_finx ;if 3, it is exact
+BG_TBL:
+ subi.l #$30,d0 ;make offset in 0 - f range
+ tst.b d1 ;check for rmode
+ beq.b BG_RN ;if zero, rn mode
+ cmpi.b #$3,d1 ;check for rp
+ beq.b BG_RP ;if 3, rp mode
+BG_RZRM:
+ lea.l BIGRZRM,a0 ;rmode is rz or rm, load BGRZRM in a0
+ cmpi.b #$1,d0 ;check if result is inex
+ ble set_finx ;if 0 - 1, it is inexact
+ cmpi.b #$7,d0 ;second check
+ ble no_finx ;if 0 - 7, it is exact
+ bra set_finx ;if 8 - f, it is inexact
+BG_RN:
+ lea.l BIGRN,a0 ;rmode is rn, load BGRN in a0
+ cmpi.b #$1,d0 ;check if result is inex
+ ble set_finx ;if 0 - 1, it is inexact
+ cmpi.b #$7,d0 ;second check
+ ble no_finx ;if 0 - 7, it is exact
+ bra set_finx ;if 8 - f, it is inexact
+BG_RP:
+ lea.l BIGRP,a0 ;rmode is rp, load SMRP in a0
+ cmpi.b #$1,d0 ;check if result is inex
+ ble set_finx ;if 0 - 1, it is inexact
+ cmpi.b #$7,d0 ;second check
+ ble no_finx ;if 0 - 7, it is exact
+* bra set_finx ;if 8 - f, it is inexact
+set_finx:
+ or.l #inx2a_mask,USER_FPSR(a6) ;set inex2/ainex
+no_finx:
+ mulu.l #12,d0 ;use offset to point into tables
+ move.l d1,L_SCR1(a6) ;load mode for round call
+ bfextu USER_FPCR(a6){24:2},d1 ;get precision
+ tst.l d1 ;check if extended precision
+*
+* Precision is extended
+*
+ bne.b not_ext ;if extended, do not call round
+ fmovem.x (a0,d0),fp0 ;return result in fp0
+ rts
+*
+* Precision is single or double
+*
+not_ext:
+ swap d1 ;rnd prec in upper word of d1
+ add.l L_SCR1(a6),d1 ;merge rmode in low word of d1
+ move.l (a0,d0),FP_SCR1(a6) ;load first word to temp storage
+ move.l 4(a0,d0),FP_SCR1+4(a6) ;load second word
+ move.l 8(a0,d0),FP_SCR1+8(a6) ;load third word
+ clr.l d0 ;clear g,r,s
+ lea FP_SCR1(a6),a0
+ btst.b #sign_bit,LOCAL_EX(a0)
+ sne LOCAL_SGN(a0) ;convert to internal ext. format
+
+ bsr round ;go round the mantissa
+
+ bfclr LOCAL_SGN(a0){0:8} ;convert back to IEEE ext format
+ beq.b fin_fcr
+ bset.b #sign_bit,LOCAL_EX(a0)
+fin_fcr:
+ fmovem.x (a0),fp0
+ rts
+
+ end
diff --git a/sys/arch/m68k/fpsp/srem_mod.sa b/sys/arch/m68k/fpsp/srem_mod.sa
new file mode 100644
index 00000000000..822097985ee
--- /dev/null
+++ b/sys/arch/m68k/fpsp/srem_mod.sa
@@ -0,0 +1,446 @@
+* $NetBSD: srem_mod.sa,v 1.3 1994/10/26 07:49:58 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* srem_mod.sa 3.1 12/10/90
+*
+* The entry point sMOD computes the floating point MOD of the
+* input values X and Y. The entry point sREM computes the floating
+* point (IEEE) REM of the input values X and Y.
+*
+* INPUT
+* -----
+* Double-extended value Y is pointed to by address in register
+* A0. Double-extended value X is located in -12(A0). The values
+* of X and Y are both nonzero and finite; although either or both
+* of them can be denormalized. The special cases of zeros, NaNs,
+* and infinities are handled elsewhere.
+*
+* OUTPUT
+* ------
+* FREM(X,Y) or FMOD(X,Y), depending on entry point.
+*
+* ALGORITHM
+* ---------
+*
+* Step 1. Save and strip signs of X and Y: signX := sign(X),
+* signY := sign(Y), X := |X|, Y := |Y|,
+* signQ := signX EOR signY. Record whether MOD or REM
+* is requested.
+*
+* Step 2. Set L := expo(X)-expo(Y), k := 0, Q := 0.
+* If (L < 0) then
+* R := X, go to Step 4.
+* else
+* R := 2^(-L)X, j := L.
+* endif
+*
+* Step 3. Perform MOD(X,Y)
+* 3.1 If R = Y, go to Step 9.
+* 3.2 If R > Y, then { R := R - Y, Q := Q + 1}
+* 3.3 If j = 0, go to Step 4.
+* 3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to
+* Step 3.1.
+*
+* Step 4. At this point, R = X - QY = MOD(X,Y). Set
+* Last_Subtract := false (used in Step 7 below). If
+* MOD is requested, go to Step 6.
+*
+* Step 5. R = MOD(X,Y), but REM(X,Y) is requested.
+* 5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to
+* Step 6.
+* 5.2 If R > Y/2, then { set Last_Subtract := true,
+* Q := Q + 1, Y := signY*Y }. Go to Step 6.
+* 5.3 This is the tricky case of R = Y/2. If Q is odd,
+* then { Q := Q + 1, signX := -signX }.
+*
+* Step 6. R := signX*R.
+*
+* Step 7. If Last_Subtract = true, R := R - Y.
+*
+* Step 8. Return signQ, last 7 bits of Q, and R as required.
+*
+* Step 9. At this point, R = 2^(-j)*X - Q Y = Y. Thus,
+* X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1),
+* R := 0. Return signQ, last 7 bits of Q, and R.
+*
+
+SREM_MOD IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+Mod_Flag equ L_SCR3
+SignY equ FP_SCR3+4
+SignX equ FP_SCR3+8
+SignQ equ FP_SCR3+12
+Sc_Flag equ FP_SCR4
+
+Y equ FP_SCR1
+Y_Hi equ Y+4
+Y_Lo equ Y+8
+
+R equ FP_SCR2
+R_Hi equ R+4
+R_Lo equ R+8
+
+
+Scale DC.L $00010000,$80000000,$00000000,$00000000
+
+ xref t_avoid_unsupp
+
+ xdef smod
+smod:
+
+ Clr.L Mod_Flag(a6)
+ BRA.B Mod_Rem
+
+ xdef srem
+srem:
+
+ Move.L #1,Mod_Flag(a6)
+
+Mod_Rem:
+*..Save sign of X and Y
+ MoveM.L D2-D7,-(A7) ...save data registers
+ Move.W (A0),D3
+ Move.W D3,SignY(a6)
+ AndI.L #$00007FFF,D3 ...Y := |Y|
+
+*
+ Move.L 4(A0),D4
+ Move.L 8(A0),D5 ...(D3,D4,D5) is |Y|
+
+ Tst.L D3
+ BNE.B Y_Normal
+
+ Move.L #$00003FFE,D3 ...$3FFD + 1
+ Tst.L D4
+ BNE.B HiY_not0
+
+HiY_0:
+ Move.L D5,D4
+ CLR.L D5
+ SubI.L #32,D3
+ CLR.L D6
+ BFFFO D4{0:32},D6
+ LSL.L D6,D4
+ Sub.L D6,D3 ...(D3,D4,D5) is normalized
+* ...with bias $7FFD
+ BRA.B Chk_X
+
+HiY_not0:
+ CLR.L D6
+ BFFFO D4{0:32},D6
+ Sub.L D6,D3
+ LSL.L D6,D4
+ Move.L D5,D7 ...a copy of D5
+ LSL.L D6,D5
+ Neg.L D6
+ AddI.L #32,D6
+ LSR.L D6,D7
+ Or.L D7,D4 ...(D3,D4,D5) normalized
+* ...with bias $7FFD
+ BRA.B Chk_X
+
+Y_Normal:
+ AddI.L #$00003FFE,D3 ...(D3,D4,D5) normalized
+* ...with bias $7FFD
+
+Chk_X:
+ Move.W -12(A0),D0
+ Move.W D0,SignX(a6)
+ Move.W SignY(a6),D1
+ EOr.L D0,D1
+ AndI.L #$00008000,D1
+ Move.W D1,SignQ(a6) ...sign(Q) obtained
+ AndI.L #$00007FFF,D0
+ Move.L -8(A0),D1
+ Move.L -4(A0),D2 ...(D0,D1,D2) is |X|
+ Tst.L D0
+ BNE.B X_Normal
+ Move.L #$00003FFE,D0
+ Tst.L D1
+ BNE.B HiX_not0
+
+HiX_0:
+ Move.L D2,D1
+ CLR.L D2
+ SubI.L #32,D0
+ CLR.L D6
+ BFFFO D1{0:32},D6
+ LSL.L D6,D1
+ Sub.L D6,D0 ...(D0,D1,D2) is normalized
+* ...with bias $7FFD
+ BRA.B Init
+
+HiX_not0:
+ CLR.L D6
+ BFFFO D1{0:32},D6
+ Sub.L D6,D0
+ LSL.L D6,D1
+ Move.L D2,D7 ...a copy of D2
+ LSL.L D6,D2
+ Neg.L D6
+ AddI.L #32,D6
+ LSR.L D6,D7
+ Or.L D7,D1 ...(D0,D1,D2) normalized
+* ...with bias $7FFD
+ BRA.B Init
+
+X_Normal:
+ AddI.L #$00003FFE,D0 ...(D0,D1,D2) normalized
+* ...with bias $7FFD
+
+Init:
+*
+ Move.L D3,L_SCR1(a6) ...save biased expo(Y)
+ move.l d0,L_SCR2(a6) ;save d0
+ Sub.L D3,D0 ...L := expo(X)-expo(Y)
+* Move.L D0,L ...D0 is j
+ CLR.L D6 ...D6 := carry <- 0
+ CLR.L D3 ...D3 is Q
+ MoveA.L #0,A1 ...A1 is k; j+k=L, Q=0
+
+*..(Carry,D1,D2) is R
+ Tst.L D0
+ BGE.B Mod_Loop
+
+*..expo(X) < expo(Y). Thus X = mod(X,Y)
+*
+ move.l L_SCR2(a6),d0 ;restore d0
+ BRA.W Get_Mod
+
+*..At this point R = 2^(-L)X; Q = 0; k = 0; and k+j = L
+
+
+Mod_Loop:
+ Tst.L D6 ...test carry bit
+ BGT.B R_GT_Y
+
+*..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
+ Cmp.L D4,D1 ...compare hi(R) and hi(Y)
+ BNE.B R_NE_Y
+ Cmp.L D5,D2 ...compare lo(R) and lo(Y)
+ BNE.B R_NE_Y
+
+*..At this point, R = Y
+ BRA.W Rem_is_0
+
+R_NE_Y:
+*..use the borrow of the previous compare
+ BCS.B R_LT_Y ...borrow is set iff R < Y
+
+R_GT_Y:
+*..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
+*..and Y < (D1,D2) < 2Y. Either way, perform R - Y
+ Sub.L D5,D2 ...lo(R) - lo(Y)
+ SubX.L D4,D1 ...hi(R) - hi(Y)
+ CLR.L D6 ...clear carry
+ AddQ.L #1,D3 ...Q := Q + 1
+
+R_LT_Y:
+*..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
+ Tst.L D0 ...see if j = 0.
+ BEQ.B PostLoop
+
+ Add.L D3,D3 ...Q := 2Q
+ Add.L D2,D2 ...lo(R) = 2lo(R)
+ AddX.L D1,D1 ...hi(R) = 2hi(R) + carry
+ SCS D6 ...set Carry if 2(R) overflows
+ AddQ.L #1,A1 ...k := k+1
+ SubQ.L #1,D0 ...j := j - 1
+*..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
+
+ BRA.B Mod_Loop
+
+PostLoop:
+*..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
+
+*..normalize R.
+ Move.L L_SCR1(a6),D0 ...new biased expo of R
+ Tst.L D1
+ BNE.B HiR_not0
+
+HiR_0:
+ Move.L D2,D1
+ CLR.L D2
+ SubI.L #32,D0
+ CLR.L D6
+ BFFFO D1{0:32},D6
+ LSL.L D6,D1
+ Sub.L D6,D0 ...(D0,D1,D2) is normalized
+* ...with bias $7FFD
+ BRA.B Get_Mod
+
+HiR_not0:
+ CLR.L D6
+ BFFFO D1{0:32},D6
+ BMI.B Get_Mod ...already normalized
+ Sub.L D6,D0
+ LSL.L D6,D1
+ Move.L D2,D7 ...a copy of D2
+ LSL.L D6,D2
+ Neg.L D6
+ AddI.L #32,D6
+ LSR.L D6,D7
+ Or.L D7,D1 ...(D0,D1,D2) normalized
+
+*
+Get_Mod:
+ CmpI.L #$000041FE,D0
+ BGE.B No_Scale
+Do_Scale:
+ Move.W D0,R(a6)
+ clr.w R+2(a6)
+ Move.L D1,R_Hi(a6)
+ Move.L D2,R_Lo(a6)
+ Move.L L_SCR1(a6),D6
+ Move.W D6,Y(a6)
+ clr.w Y+2(a6)
+ Move.L D4,Y_Hi(a6)
+ Move.L D5,Y_Lo(a6)
+ FMove.X R(a6),fp0 ...no exception
+ Move.L #1,Sc_Flag(a6)
+ BRA.B ModOrRem
+No_Scale:
+ Move.L D1,R_Hi(a6)
+ Move.L D2,R_Lo(a6)
+ SubI.L #$3FFE,D0
+ Move.W D0,R(a6)
+ clr.w R+2(a6)
+ Move.L L_SCR1(a6),D6
+ SubI.L #$3FFE,D6
+ Move.L D6,L_SCR1(a6)
+ FMove.X R(a6),fp0
+ Move.W D6,Y(a6)
+ Move.L D4,Y_Hi(a6)
+ Move.L D5,Y_Lo(a6)
+ Clr.L Sc_Flag(a6)
+
+*
+
+
+ModOrRem:
+ Move.L Mod_Flag(a6),D6
+ BEQ.B Fix_Sign
+
+ Move.L L_SCR1(a6),D6 ...new biased expo(Y)
+ SubQ.L #1,D6 ...biased expo(Y/2)
+ Cmp.L D6,D0
+ BLT.B Fix_Sign
+ BGT.B Last_Sub
+
+ Cmp.L D4,D1
+ BNE.B Not_EQ
+ Cmp.L D5,D2
+ BNE.B Not_EQ
+ BRA.W Tie_Case
+
+Not_EQ:
+ BCS.B Fix_Sign
+
+Last_Sub:
+*
+ FSub.X Y(a6),fp0 ...no exceptions
+ AddQ.L #1,D3 ...Q := Q + 1
+
+*
+
+Fix_Sign:
+*..Get sign of X
+ Move.W SignX(a6),D6
+ BGE.B Get_Q
+ FNeg.X fp0
+
+*..Get Q
+*
+Get_Q:
+ clr.l d6
+ Move.W SignQ(a6),D6 ...D6 is sign(Q)
+ Move.L #8,D7
+ LSR.L D7,D6
+ AndI.L #$0000007F,D3 ...7 bits of Q
+ Or.L D6,D3 ...sign and bits of Q
+ Swap D3
+ FMove.L fpsr,D6
+ AndI.L #$FF00FFFF,D6
+ Or.L D3,D6
+ FMove.L D6,fpsr ...put Q in fpsr
+
+*
+Restore:
+ MoveM.L (A7)+,D2-D7
+ FMove.L USER_FPCR(a6),fpcr
+ Move.L Sc_Flag(a6),D0
+ BEQ.B Finish
+ FMul.X Scale(pc),fp0 ...may cause underflow
+ bra t_avoid_unsupp ;check for denorm as a
+* ;result of the scaling
+
+Finish:
+ fmove.x fp0,fp0 ;capture exceptions & round
+ rts
+
+Rem_is_0:
+*..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
+ AddQ.L #1,D3
+ CmpI.L #8,D0 ...D0 is j
+ BGE.B Q_Big
+
+ LSL.L D0,D3
+ BRA.B Set_R_0
+
+Q_Big:
+ CLR.L D3
+
+Set_R_0:
+ FMove.S #:00000000,fp0
+ Clr.L Sc_Flag(a6)
+ BRA.W Fix_Sign
+
+Tie_Case:
+*..Check parity of Q
+ Move.L D3,D6
+ AndI.L #$00000001,D6
+ Tst.L D6
+ BEq.W Fix_Sign ...Q is even
+
+*..Q is odd, Q := Q + 1, signX := -signX
+ AddQ.L #1,D3
+ Move.W SignX(a6),D6
+ EOrI.L #$00008000,D6
+ Move.W D6,SignX(a6)
+ BRA.W Fix_Sign
+
+ End
diff --git a/sys/arch/m68k/fpsp/ssin.sa b/sys/arch/m68k/fpsp/ssin.sa
new file mode 100644
index 00000000000..672281a19ea
--- /dev/null
+++ b/sys/arch/m68k/fpsp/ssin.sa
@@ -0,0 +1,771 @@
+* $NetBSD: ssin.sa,v 1.3 1994/10/26 07:50:01 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* ssin.sa 3.3 7/29/91
+*
+* The entry point sSIN computes the sine of an input argument
+* sCOS computes the cosine, and sSINCOS computes both. The
+* corresponding entry points with a "d" computes the same
+* corresponding function values for denormalized inputs.
+*
+* Input: Double-extended number X in location pointed to
+* by address register a0.
+*
+* Output: The funtion value sin(X) or cos(X) returned in Fp0 if SIN or
+* COS is requested. Otherwise, for SINCOS, sin(X) is returned
+* in Fp0, and cos(X) is returned in Fp1.
+*
+* Modifies: Fp0 for SIN or COS; both Fp0 and Fp1 for SINCOS.
+*
+* Accuracy and Monotonicity: The returned result is within 1 ulp in
+* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+* result is subsequently rounded to double precision. The
+* result is provably monotonic in double precision.
+*
+* Speed: The programs sSIN and sCOS take approximately 150 cycles for
+* input argument X such that |X| < 15Pi, which is the the usual
+* situation. The speed for sSINCOS is approximately 190 cycles.
+*
+* Algorithm:
+*
+* SIN and COS:
+* 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1.
+*
+* 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7.
+*
+* 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
+* k = N mod 4, so in particular, k = 0,1,2,or 3. Overwirte
+* k by k := k + AdjN.
+*
+* 4. If k is even, go to 6.
+*
+* 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. Return sgn*cos(r)
+* where cos(r) is approximated by an even polynomial in r,
+* 1 + r*r*(B1+s*(B2+ ... + s*B8)), s = r*r.
+* Exit.
+*
+* 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r)
+* where sin(r) is approximated by an odd polynomial in r
+* r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r.
+* Exit.
+*
+* 7. If |X| > 1, go to 9.
+*
+* 8. (|X|<2**(-40)) If SIN is invoked, return X; otherwise return 1.
+*
+* 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 3.
+*
+* SINCOS:
+* 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
+*
+* 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
+* k = N mod 4, so in particular, k = 0,1,2,or 3.
+*
+* 3. If k is even, go to 5.
+*
+* 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), i.e.
+* j1 exclusive or with the l.s.b. of k.
+* sgn1 := (-1)**j1, sgn2 := (-1)**j2.
+* SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where
+* sin(r) and cos(r) are computed as odd and even polynomials
+* in r, respectively. Exit
+*
+* 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1.
+* SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where
+* sin(r) and cos(r) are computed as odd and even polynomials
+* in r, respectively. Exit
+*
+* 6. If |X| > 1, go to 8.
+*
+* 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit.
+*
+* 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
+*
+
+SSIN IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+BOUNDS1 DC.L $3FD78000,$4004BC7E
+TWOBYPI DC.L $3FE45F30,$6DC9C883
+
+SINA7 DC.L $BD6AAA77,$CCC994F5
+SINA6 DC.L $3DE61209,$7AAE8DA1
+
+SINA5 DC.L $BE5AE645,$2A118AE4
+SINA4 DC.L $3EC71DE3,$A5341531
+
+SINA3 DC.L $BF2A01A0,$1A018B59,$00000000,$00000000
+
+SINA2 DC.L $3FF80000,$88888888,$888859AF,$00000000
+
+SINA1 DC.L $BFFC0000,$AAAAAAAA,$AAAAAA99,$00000000
+
+COSB8 DC.L $3D2AC4D0,$D6011EE3
+COSB7 DC.L $BDA9396F,$9F45AC19
+
+COSB6 DC.L $3E21EED9,$0612C972
+COSB5 DC.L $BE927E4F,$B79D9FCF
+
+COSB4 DC.L $3EFA01A0,$1A01D423,$00000000,$00000000
+
+COSB3 DC.L $BFF50000,$B60B60B6,$0B61D438,$00000000
+
+COSB2 DC.L $3FFA0000,$AAAAAAAA,$AAAAAB5E
+COSB1 DC.L $BF000000
+
+INVTWOPI DC.L $3FFC0000,$A2F9836E,$4E44152A
+
+TWOPI1 DC.L $40010000,$C90FDAA2,$00000000,$00000000
+TWOPI2 DC.L $3FDF0000,$85A308D4,$00000000,$00000000
+
+ xref PITBL
+
+INARG equ FP_SCR4
+
+X equ FP_SCR5
+XDCARE equ X+2
+XFRAC equ X+4
+
+RPRIME equ FP_SCR1
+SPRIME equ FP_SCR2
+
+POSNEG1 equ L_SCR1
+TWOTO63 equ L_SCR1
+
+ENDFLAG equ L_SCR2
+N equ L_SCR2
+
+ADJN equ L_SCR3
+
+ xref t_frcinx
+ xref t_extdnrm
+ xref sto_cos
+
+ xdef ssind
+ssind:
+*--SIN(X) = X FOR DENORMALIZED X
+ bra t_extdnrm
+
+ xdef scosd
+scosd:
+*--COS(X) = 1 FOR DENORMALIZED X
+
+ FMOVE.S #:3F800000,FP0
+*
+* 9D25B Fix: Sometimes the previous fmove.s sets fpsr bits
+*
+ fmove.l #0,fpsr
+*
+ bra t_frcinx
+
+ xdef ssin
+ssin:
+*--SET ADJN TO 0
+ CLR.L ADJN(a6)
+ BRA.B SINBGN
+
+ xdef scos
+scos:
+*--SET ADJN TO 1
+ MOVE.L #1,ADJN(a6)
+
+SINBGN:
+*--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
+
+ FMOVE.X (a0),FP0 ...LOAD INPUT
+
+ MOVE.L (A0),D0
+ MOVE.W 4(A0),D0
+ FMOVE.X FP0,X(a6)
+ ANDI.L #$7FFFFFFF,D0 ...COMPACTIFY X
+
+ CMPI.L #$3FD78000,D0 ...|X| >= 2**(-40)?
+ BGE.B SOK1
+ BRA.W SINSM
+
+SOK1:
+ CMPI.L #$4004BC7E,D0 ...|X| < 15 PI?
+ BLT.B SINMAIN
+ BRA.W REDUCEX
+
+SINMAIN:
+*--THIS IS THE USUAL CASE, |X| <= 15 PI.
+*--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
+ FMOVE.X FP0,FP1
+ FMUL.D TWOBYPI,FP1 ...X*2/PI
+
+*--HIDE THE NEXT THREE INSTRUCTIONS
+ LEA PITBL+$200,A1 ...TABLE OF N*PI/2, N = -32,...,32
+
+
+*--FP1 IS NOW READY
+ FMOVE.L FP1,N(a6) ...CONVERT TO INTEGER
+
+ MOVE.L N(a6),D0
+ ASL.L #4,D0
+ ADDA.L D0,A1 ...A1 IS THE ADDRESS OF N*PIBY2
+* ...WHICH IS IN TWO PIECES Y1 & Y2
+
+ FSUB.X (A1)+,FP0 ...X-Y1
+*--HIDE THE NEXT ONE
+ FSUB.S (A1),FP0 ...FP0 IS R = (X-Y1)-Y2
+
+SINCONT:
+*--continuation from REDUCEX
+
+*--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
+ MOVE.L N(a6),D0
+ ADD.L ADJN(a6),D0 ...SEE IF D0 IS ODD OR EVEN
+ ROR.L #1,D0 ...D0 WAS ODD IFF D0 IS NEGATIVE
+ TST.L D0
+ BLT.W COSPOLY
+
+SINPOLY:
+*--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
+*--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
+*--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
+*--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
+*--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
+*--WHERE T=S*S.
+*--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
+*--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
+ FMOVE.X FP0,X(a6) ...X IS R
+ FMUL.X FP0,FP0 ...FP0 IS S
+*---HIDE THE NEXT TWO WHILE WAITING FOR FP0
+ FMOVE.D SINA7,FP3
+ FMOVE.D SINA6,FP2
+*--FP0 IS NOW READY
+ FMOVE.X FP0,FP1
+ FMUL.X FP1,FP1 ...FP1 IS T
+*--HIDE THE NEXT TWO WHILE WAITING FOR FP1
+
+ ROR.L #1,D0
+ ANDI.L #$80000000,D0
+* ...LEAST SIG. BIT OF D0 IN SIGN POSITION
+ EOR.L D0,X(a6) ...X IS NOW R'= SGN*R
+
+ FMUL.X FP1,FP3 ...TA7
+ FMUL.X FP1,FP2 ...TA6
+
+ FADD.D SINA5,FP3 ...A5+TA7
+ FADD.D SINA4,FP2 ...A4+TA6
+
+ FMUL.X FP1,FP3 ...T(A5+TA7)
+ FMUL.X FP1,FP2 ...T(A4+TA6)
+
+ FADD.D SINA3,FP3 ...A3+T(A5+TA7)
+ FADD.X SINA2,FP2 ...A2+T(A4+TA6)
+
+ FMUL.X FP3,FP1 ...T(A3+T(A5+TA7))
+
+ FMUL.X FP0,FP2 ...S(A2+T(A4+TA6))
+ FADD.X SINA1,FP1 ...A1+T(A3+T(A5+TA7))
+ FMUL.X X(a6),FP0 ...R'*S
+
+ FADD.X FP2,FP1 ...[A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
+*--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
+*--FP2 RELEASED, RESTORE NOW AND TAKE FULL ADVANTAGE OF HIDING
+
+
+ FMUL.X FP1,FP0 ...SIN(R')-R'
+*--FP1 RELEASED.
+
+ FMOVE.L d1,FPCR ;restore users exceptions
+ FADD.X X(a6),FP0 ;last inst - possible exception set
+ bra t_frcinx
+
+
+COSPOLY:
+*--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
+*--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY
+*--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
+*--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
+*--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
+*--WHERE T=S*S.
+*--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
+*--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
+*--AND IS THEREFORE STORED AS SINGLE PRECISION.
+
+ FMUL.X FP0,FP0 ...FP0 IS S
+*---HIDE THE NEXT TWO WHILE WAITING FOR FP0
+ FMOVE.D COSB8,FP2
+ FMOVE.D COSB7,FP3
+*--FP0 IS NOW READY
+ FMOVE.X FP0,FP1
+ FMUL.X FP1,FP1 ...FP1 IS T
+*--HIDE THE NEXT TWO WHILE WAITING FOR FP1
+ FMOVE.X FP0,X(a6) ...X IS S
+ ROR.L #1,D0
+ ANDI.L #$80000000,D0
+* ...LEAST SIG. BIT OF D0 IN SIGN POSITION
+
+ FMUL.X FP1,FP2 ...TB8
+*--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
+ EOR.L D0,X(a6) ...X IS NOW S'= SGN*S
+ ANDI.L #$80000000,D0
+
+ FMUL.X FP1,FP3 ...TB7
+*--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
+ ORI.L #$3F800000,D0 ...D0 IS SGN IN SINGLE
+ MOVE.L D0,POSNEG1(a6)
+
+ FADD.D COSB6,FP2 ...B6+TB8
+ FADD.D COSB5,FP3 ...B5+TB7
+
+ FMUL.X FP1,FP2 ...T(B6+TB8)
+ FMUL.X FP1,FP3 ...T(B5+TB7)
+
+ FADD.D COSB4,FP2 ...B4+T(B6+TB8)
+ FADD.X COSB3,FP3 ...B3+T(B5+TB7)
+
+ FMUL.X FP1,FP2 ...T(B4+T(B6+TB8))
+ FMUL.X FP3,FP1 ...T(B3+T(B5+TB7))
+
+ FADD.X COSB2,FP2 ...B2+T(B4+T(B6+TB8))
+ FADD.S COSB1,FP1 ...B1+T(B3+T(B5+TB7))
+
+ FMUL.X FP2,FP0 ...S(B2+T(B4+T(B6+TB8)))
+*--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
+*--FP2 RELEASED.
+
+
+ FADD.X FP1,FP0
+*--FP1 RELEASED
+
+ FMUL.X X(a6),FP0
+
+ FMOVE.L d1,FPCR ;restore users exceptions
+ FADD.S POSNEG1(a6),FP0 ;last inst - possible exception set
+ bra t_frcinx
+
+
+SINBORS:
+*--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
+*--IF |X| < 2**(-40), RETURN X OR 1.
+ CMPI.L #$3FFF8000,D0
+ BGT.B REDUCEX
+
+
+SINSM:
+ MOVE.L ADJN(a6),D0
+ TST.L D0
+ BGT.B COSTINY
+
+SINTINY:
+ CLR.W XDCARE(a6) ...JUST IN CASE
+ FMOVE.L d1,FPCR ;restore users exceptions
+ FMOVE.X X(a6),FP0 ;last inst - possible exception set
+ bra t_frcinx
+
+
+COSTINY:
+ FMOVE.S #:3F800000,FP0
+
+ FMOVE.L d1,FPCR ;restore users exceptions
+ FSUB.S #:00800000,FP0 ;last inst - possible exception set
+ bra t_frcinx
+
+
+REDUCEX:
+*--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
+*--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
+*--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
+
+ FMOVEM.X FP2-FP5,-(A7) ...save FP2 through FP5
+ MOVE.L D2,-(A7)
+ FMOVE.S #:00000000,FP1
+*--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
+*--there is a danger of unwanted overflow in first LOOP iteration. In this
+*--case, reduce argument by one remainder step to make subsequent reduction
+*--safe.
+ cmpi.l #$7ffeffff,d0 ;is argument dangerously large?
+ bne.b LOOP
+ move.l #$7ffe0000,FP_SCR2(a6) ;yes
+* ;create 2**16383*PI/2
+ move.l #$c90fdaa2,FP_SCR2+4(a6)
+ clr.l FP_SCR2+8(a6)
+ ftst.x fp0 ;test sign of argument
+ move.l #$7fdc0000,FP_SCR3(a6) ;create low half of 2**16383*
+* ;PI/2 at FP_SCR3
+ move.l #$85a308d3,FP_SCR3+4(a6)
+ clr.l FP_SCR3+8(a6)
+ fblt.w red_neg
+ or.w #$8000,FP_SCR2(a6) ;positive arg
+ or.w #$8000,FP_SCR3(a6)
+red_neg:
+ fadd.x FP_SCR2(a6),fp0 ;high part of reduction is exact
+ fmove.x fp0,fp1 ;save high result in fp1
+ fadd.x FP_SCR3(a6),fp0 ;low part of reduction
+ fsub.x fp0,fp1 ;determine low component of result
+ fadd.x FP_SCR3(a6),fp1 ;fp0/fp1 are reduced argument.
+
+*--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
+*--integer quotient will be stored in N
+*--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
+
+LOOP:
+ FMOVE.X FP0,INARG(a6) ...+-2**K * F, 1 <= F < 2
+ MOVE.W INARG(a6),D0
+ MOVE.L D0,A1 ...save a copy of D0
+ ANDI.L #$00007FFF,D0
+ SUBI.L #$00003FFF,D0 ...D0 IS K
+ CMPI.L #28,D0
+ BLE.B LASTLOOP
+CONTLOOP:
+ SUBI.L #27,D0 ...D0 IS L := K-27
+ CLR.L ENDFLAG(a6)
+ BRA.B WORK
+LASTLOOP:
+ CLR.L D0 ...D0 IS L := 0
+ MOVE.L #1,ENDFLAG(a6)
+
+WORK:
+*--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
+*--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
+
+*--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
+*--2**L * (PIby2_1), 2**L * (PIby2_2)
+
+ MOVE.L #$00003FFE,D2 ...BIASED EXPO OF 2/PI
+ SUB.L D0,D2 ...BIASED EXPO OF 2**(-L)*(2/PI)
+
+ MOVE.L #$A2F9836E,FP_SCR1+4(a6)
+ MOVE.L #$4E44152A,FP_SCR1+8(a6)
+ MOVE.W D2,FP_SCR1(a6) ...FP_SCR1 is 2**(-L)*(2/PI)
+
+ FMOVE.X FP0,FP2
+ FMUL.X FP_SCR1(a6),FP2
+*--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
+*--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
+*--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
+*--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
+*--US THE DESIRED VALUE IN FLOATING POINT.
+
+*--HIDE SIX CYCLES OF INSTRUCTION
+ MOVE.L A1,D2
+ SWAP D2
+ ANDI.L #$80000000,D2
+ ORI.L #$5F000000,D2 ...D2 IS SIGN(INARG)*2**63 IN SGL
+ MOVE.L D2,TWOTO63(a6)
+
+ MOVE.L D0,D2
+ ADDI.L #$00003FFF,D2 ...BIASED EXPO OF 2**L * (PI/2)
+
+*--FP2 IS READY
+ FADD.S TWOTO63(a6),FP2 ...THE FRACTIONAL PART OF FP1 IS ROUNDED
+
+*--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1 and 2**(L)*Piby2_2
+ MOVE.W D2,FP_SCR2(a6)
+ CLR.W FP_SCR2+2(a6)
+ MOVE.L #$C90FDAA2,FP_SCR2+4(a6)
+ CLR.L FP_SCR2+8(a6) ...FP_SCR2 is 2**(L) * Piby2_1
+
+*--FP2 IS READY
+ FSUB.S TWOTO63(a6),FP2 ...FP2 is N
+
+ ADDI.L #$00003FDD,D0
+ MOVE.W D0,FP_SCR3(a6)
+ CLR.W FP_SCR3+2(a6)
+ MOVE.L #$85A308D3,FP_SCR3+4(a6)
+ CLR.L FP_SCR3+8(a6) ...FP_SCR3 is 2**(L) * Piby2_2
+
+ MOVE.L ENDFLAG(a6),D0
+
+*--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
+*--P2 = 2**(L) * Piby2_2
+ FMOVE.X FP2,FP4
+ FMul.X FP_SCR2(a6),FP4 ...W = N*P1
+ FMove.X FP2,FP5
+ FMul.X FP_SCR3(a6),FP5 ...w = N*P2
+ FMove.X FP4,FP3
+*--we want P+p = W+w but |p| <= half ulp of P
+*--Then, we need to compute A := R-P and a := r-p
+ FAdd.X FP5,FP3 ...FP3 is P
+ FSub.X FP3,FP4 ...W-P
+
+ FSub.X FP3,FP0 ...FP0 is A := R - P
+ FAdd.X FP5,FP4 ...FP4 is p = (W-P)+w
+
+ FMove.X FP0,FP3 ...FP3 A
+ FSub.X FP4,FP1 ...FP1 is a := r - p
+
+*--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
+*--|r| <= half ulp of R.
+ FAdd.X FP1,FP0 ...FP0 is R := A+a
+*--No need to calculate r if this is the last loop
+ TST.L D0
+ BGT.W RESTORE
+
+*--Need to calculate r
+ FSub.X FP0,FP3 ...A-R
+ FAdd.X FP3,FP1 ...FP1 is r := (A-R)+a
+ BRA.W LOOP
+
+RESTORE:
+ FMOVE.L FP2,N(a6)
+ MOVE.L (A7)+,D2
+ FMOVEM.X (A7)+,FP2-FP5
+
+
+ MOVE.L ADJN(a6),D0
+ CMPI.L #4,D0
+
+ BLT.W SINCONT
+ BRA.B SCCONT
+
+ xdef ssincosd
+ssincosd:
+*--SIN AND COS OF X FOR DENORMALIZED X
+
+ FMOVE.S #:3F800000,FP1
+ bsr sto_cos ;store cosine result
+ bra t_extdnrm
+
+ xdef ssincos
+ssincos:
+*--SET ADJN TO 4
+ MOVE.L #4,ADJN(a6)
+
+ FMOVE.X (a0),FP0 ...LOAD INPUT
+
+ MOVE.L (A0),D0
+ MOVE.W 4(A0),D0
+ FMOVE.X FP0,X(a6)
+ ANDI.L #$7FFFFFFF,D0 ...COMPACTIFY X
+
+ CMPI.L #$3FD78000,D0 ...|X| >= 2**(-40)?
+ BGE.B SCOK1
+ BRA.W SCSM
+
+SCOK1:
+ CMPI.L #$4004BC7E,D0 ...|X| < 15 PI?
+ BLT.B SCMAIN
+ BRA.W REDUCEX
+
+
+SCMAIN:
+*--THIS IS THE USUAL CASE, |X| <= 15 PI.
+*--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
+ FMOVE.X FP0,FP1
+ FMUL.D TWOBYPI,FP1 ...X*2/PI
+
+*--HIDE THE NEXT THREE INSTRUCTIONS
+ LEA PITBL+$200,A1 ...TABLE OF N*PI/2, N = -32,...,32
+
+
+*--FP1 IS NOW READY
+ FMOVE.L FP1,N(a6) ...CONVERT TO INTEGER
+
+ MOVE.L N(a6),D0
+ ASL.L #4,D0
+ ADDA.L D0,A1 ...ADDRESS OF N*PIBY2, IN Y1, Y2
+
+ FSUB.X (A1)+,FP0 ...X-Y1
+ FSUB.S (A1),FP0 ...FP0 IS R = (X-Y1)-Y2
+
+SCCONT:
+*--continuation point from REDUCEX
+
+*--HIDE THE NEXT TWO
+ MOVE.L N(a6),D0
+ ROR.L #1,D0
+
+ TST.L D0 ...D0 < 0 IFF N IS ODD
+ BGE.W NEVEN
+
+NODD:
+*--REGISTERS SAVED SO FAR: D0, A0, FP2.
+
+ FMOVE.X FP0,RPRIME(a6)
+ FMUL.X FP0,FP0 ...FP0 IS S = R*R
+ FMOVE.D SINA7,FP1 ...A7
+ FMOVE.D COSB8,FP2 ...B8
+ FMUL.X FP0,FP1 ...SA7
+ MOVE.L d2,-(A7)
+ MOVE.L D0,d2
+ FMUL.X FP0,FP2 ...SB8
+ ROR.L #1,d2
+ ANDI.L #$80000000,d2
+
+ FADD.D SINA6,FP1 ...A6+SA7
+ EOR.L D0,d2
+ ANDI.L #$80000000,d2
+ FADD.D COSB7,FP2 ...B7+SB8
+
+ FMUL.X FP0,FP1 ...S(A6+SA7)
+ EOR.L d2,RPRIME(a6)
+ MOVE.L (A7)+,d2
+ FMUL.X FP0,FP2 ...S(B7+SB8)
+ ROR.L #1,D0
+ ANDI.L #$80000000,D0
+
+ FADD.D SINA5,FP1 ...A5+S(A6+SA7)
+ MOVE.L #$3F800000,POSNEG1(a6)
+ EOR.L D0,POSNEG1(a6)
+ FADD.D COSB6,FP2 ...B6+S(B7+SB8)
+
+ FMUL.X FP0,FP1 ...S(A5+S(A6+SA7))
+ FMUL.X FP0,FP2 ...S(B6+S(B7+SB8))
+ FMOVE.X FP0,SPRIME(a6)
+
+ FADD.D SINA4,FP1 ...A4+S(A5+S(A6+SA7))
+ EOR.L D0,SPRIME(a6)
+ FADD.D COSB5,FP2 ...B5+S(B6+S(B7+SB8))
+
+ FMUL.X FP0,FP1 ...S(A4+...)
+ FMUL.X FP0,FP2 ...S(B5+...)
+
+ FADD.D SINA3,FP1 ...A3+S(A4+...)
+ FADD.D COSB4,FP2 ...B4+S(B5+...)
+
+ FMUL.X FP0,FP1 ...S(A3+...)
+ FMUL.X FP0,FP2 ...S(B4+...)
+
+ FADD.X SINA2,FP1 ...A2+S(A3+...)
+ FADD.X COSB3,FP2 ...B3+S(B4+...)
+
+ FMUL.X FP0,FP1 ...S(A2+...)
+ FMUL.X FP0,FP2 ...S(B3+...)
+
+ FADD.X SINA1,FP1 ...A1+S(A2+...)
+ FADD.X COSB2,FP2 ...B2+S(B3+...)
+
+ FMUL.X FP0,FP1 ...S(A1+...)
+ FMUL.X FP2,FP0 ...S(B2+...)
+
+
+
+ FMUL.X RPRIME(a6),FP1 ...R'S(A1+...)
+ FADD.S COSB1,FP0 ...B1+S(B2...)
+ FMUL.X SPRIME(a6),FP0 ...S'(B1+S(B2+...))
+
+ move.l d1,-(sp) ;restore users mode & precision
+ andi.l #$ff,d1 ;mask off all exceptions
+ fmove.l d1,FPCR
+ FADD.X RPRIME(a6),FP1 ...COS(X)
+ bsr sto_cos ;store cosine result
+ FMOVE.L (sp)+,FPCR ;restore users exceptions
+ FADD.S POSNEG1(a6),FP0 ...SIN(X)
+
+ bra t_frcinx
+
+
+NEVEN:
+*--REGISTERS SAVED SO FAR: FP2.
+
+ FMOVE.X FP0,RPRIME(a6)
+ FMUL.X FP0,FP0 ...FP0 IS S = R*R
+ FMOVE.D COSB8,FP1 ...B8
+ FMOVE.D SINA7,FP2 ...A7
+ FMUL.X FP0,FP1 ...SB8
+ FMOVE.X FP0,SPRIME(a6)
+ FMUL.X FP0,FP2 ...SA7
+ ROR.L #1,D0
+ ANDI.L #$80000000,D0
+ FADD.D COSB7,FP1 ...B7+SB8
+ FADD.D SINA6,FP2 ...A6+SA7
+ EOR.L D0,RPRIME(a6)
+ EOR.L D0,SPRIME(a6)
+ FMUL.X FP0,FP1 ...S(B7+SB8)
+ ORI.L #$3F800000,D0
+ MOVE.L D0,POSNEG1(a6)
+ FMUL.X FP0,FP2 ...S(A6+SA7)
+
+ FADD.D COSB6,FP1 ...B6+S(B7+SB8)
+ FADD.D SINA5,FP2 ...A5+S(A6+SA7)
+
+ FMUL.X FP0,FP1 ...S(B6+S(B7+SB8))
+ FMUL.X FP0,FP2 ...S(A5+S(A6+SA7))
+
+ FADD.D COSB5,FP1 ...B5+S(B6+S(B7+SB8))
+ FADD.D SINA4,FP2 ...A4+S(A5+S(A6+SA7))
+
+ FMUL.X FP0,FP1 ...S(B5+...)
+ FMUL.X FP0,FP2 ...S(A4+...)
+
+ FADD.D COSB4,FP1 ...B4+S(B5+...)
+ FADD.D SINA3,FP2 ...A3+S(A4+...)
+
+ FMUL.X FP0,FP1 ...S(B4+...)
+ FMUL.X FP0,FP2 ...S(A3+...)
+
+ FADD.X COSB3,FP1 ...B3+S(B4+...)
+ FADD.X SINA2,FP2 ...A2+S(A3+...)
+
+ FMUL.X FP0,FP1 ...S(B3+...)
+ FMUL.X FP0,FP2 ...S(A2+...)
+
+ FADD.X COSB2,FP1 ...B2+S(B3+...)
+ FADD.X SINA1,FP2 ...A1+S(A2+...)
+
+ FMUL.X FP0,FP1 ...S(B2+...)
+ fmul.x fp2,fp0 ...s(a1+...)
+
+
+
+ FADD.S COSB1,FP1 ...B1+S(B2...)
+ FMUL.X RPRIME(a6),FP0 ...R'S(A1+...)
+ FMUL.X SPRIME(a6),FP1 ...S'(B1+S(B2+...))
+
+ move.l d1,-(sp) ;save users mode & precision
+ andi.l #$ff,d1 ;mask off all exceptions
+ fmove.l d1,FPCR
+ FADD.S POSNEG1(a6),FP1 ...COS(X)
+ bsr sto_cos ;store cosine result
+ FMOVE.L (sp)+,FPCR ;restore users exceptions
+ FADD.X RPRIME(a6),FP0 ...SIN(X)
+
+ bra t_frcinx
+
+SCBORS:
+ CMPI.L #$3FFF8000,D0
+ BGT.W REDUCEX
+
+
+SCSM:
+ CLR.W XDCARE(a6)
+ FMOVE.S #:3F800000,FP1
+
+ move.l d1,-(sp) ;save users mode & precision
+ andi.l #$ff,d1 ;mask off all exceptions
+ fmove.l d1,FPCR
+ FSUB.S #:00800000,FP1
+ bsr sto_cos ;store cosine result
+ FMOVE.L (sp)+,FPCR ;restore users exceptions
+ FMOVE.X X(a6),FP0
+ bra t_frcinx
+
+ end
diff --git a/sys/arch/m68k/fpsp/ssinh.sa b/sys/arch/m68k/fpsp/ssinh.sa
new file mode 100644
index 00000000000..8b555f076c1
--- /dev/null
+++ b/sys/arch/m68k/fpsp/ssinh.sa
@@ -0,0 +1,160 @@
+* $NetBSD: ssinh.sa,v 1.3 1994/10/26 07:50:05 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* ssinh.sa 3.1 12/10/90
+*
+* The entry point sSinh computes the hyperbolic sine of
+* an input argument; sSinhd does the same except for denormalized
+* input.
+*
+* Input: Double-extended number X in location pointed to
+* by address register a0.
+*
+* Output: The value sinh(X) returned in floating-point register Fp0.
+*
+* Accuracy and Monotonicity: The returned result is within 3 ulps in
+* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+* result is subsequently rounded to double precision. The
+* result is provably monotonic in double precision.
+*
+* Speed: The program sSINH takes approximately 280 cycles.
+*
+* Algorithm:
+*
+* SINH
+* 1. If |X| > 16380 log2, go to 3.
+*
+* 2. (|X| <= 16380 log2) Sinh(X) is obtained by the formulae
+* y = |X|, sgn = sign(X), and z = expm1(Y),
+* sinh(X) = sgn*(1/2)*( z + z/(1+z) ).
+* Exit.
+*
+* 3. If |X| > 16480 log2, go to 5.
+*
+* 4. (16380 log2 < |X| <= 16480 log2)
+* sinh(X) = sign(X) * exp(|X|)/2.
+* However, invoking exp(|X|) may cause premature overflow.
+* Thus, we calculate sinh(X) as follows:
+* Y := |X|
+* sgn := sign(X)
+* sgnFact := sgn * 2**(16380)
+* Y' := Y - 16381 log2
+* sinh(X) := sgnFact * exp(Y').
+* Exit.
+*
+* 5. (|X| > 16480 log2) sinh(X) must overflow. Return
+* sign(X)*Huge*Huge to generate overflow and an infinity with
+* the appropriate sign. Huge is the largest finite number in
+* extended format. Exit.
+*
+
+SSINH IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+T1 DC.L $40C62D38,$D3D64634 ... 16381 LOG2 LEAD
+T2 DC.L $3D6F90AE,$B1E75CC7 ... 16381 LOG2 TRAIL
+
+ xref t_frcinx
+ xref t_ovfl
+ xref t_extdnrm
+ xref setox
+ xref setoxm1
+
+ xdef ssinhd
+ssinhd:
+*--SINH(X) = X FOR DENORMALIZED X
+
+ bra t_extdnrm
+
+ xdef ssinh
+ssinh:
+ FMOVE.x (a0),FP0 ...LOAD INPUT
+
+ move.l (a0),d0
+ move.w 4(a0),d0
+ move.l d0,a1 save a copy of original (compacted) operand
+ AND.L #$7FFFFFFF,D0
+ CMP.L #$400CB167,D0
+ BGT.B SINHBIG
+
+*--THIS IS THE USUAL CASE, |X| < 16380 LOG2
+*--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) )
+
+ FABS.X FP0 ...Y = |X|
+
+ movem.l a1/d1,-(sp)
+ fmovem.x fp0,(a0)
+ clr.l d1
+ bsr setoxm1 ...FP0 IS Z = EXPM1(Y)
+ fmove.l #0,fpcr
+ movem.l (sp)+,a1/d1
+
+ FMOVE.X FP0,FP1
+ FADD.S #:3F800000,FP1 ...1+Z
+ FMOVE.X FP0,-(sp)
+ FDIV.X FP1,FP0 ...Z/(1+Z)
+ MOVE.L a1,d0
+ AND.L #$80000000,D0
+ OR.L #$3F000000,D0
+ FADD.X (sp)+,FP0
+ MOVE.L D0,-(sp)
+
+ fmove.l d1,fpcr
+ fmul.s (sp)+,fp0 ;last fp inst - possible exceptions set
+
+ bra t_frcinx
+
+SINHBIG:
+ cmp.l #$400CB2B3,D0
+ bgt t_ovfl
+ FABS.X FP0
+ FSUB.D T1(pc),FP0 ...(|X|-16381LOG2_LEAD)
+ clr.l -(sp)
+ move.l #$80000000,-(sp)
+ move.l a1,d0
+ AND.L #$80000000,D0
+ OR.L #$7FFB0000,D0
+ MOVE.L D0,-(sp) ...EXTENDED FMT
+ FSUB.D T2(pc),FP0 ...|X| - 16381 LOG2, ACCURATE
+
+ move.l d1,-(sp)
+ clr.l d1
+ fmovem.x fp0,(a0)
+ bsr setox
+ fmove.l (sp)+,fpcr
+
+ fmul.x (sp)+,fp0 ;possible exception
+ bra t_frcinx
+
+ end
diff --git a/sys/arch/m68k/fpsp/stan.sa b/sys/arch/m68k/fpsp/stan.sa
new file mode 100644
index 00000000000..9bc9904a000
--- /dev/null
+++ b/sys/arch/m68k/fpsp/stan.sa
@@ -0,0 +1,480 @@
+* $NetBSD: stan.sa,v 1.3 1994/10/26 07:50:10 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* stan.sa 3.3 7/29/91
+*
+* The entry point stan computes the tangent of
+* an input argument;
+* stand does the same except for denormalized input.
+*
+* Input: Double-extended number X in location pointed to
+* by address register a0.
+*
+* Output: The value tan(X) returned in floating-point register Fp0.
+*
+* Accuracy and Monotonicity: The returned result is within 3 ulp in
+* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+* result is subsequently rounded to double precision. The
+* result is provably monotonic in double precision.
+*
+* Speed: The program sTAN takes approximately 170 cycles for
+* input argument X such that |X| < 15Pi, which is the the usual
+* situation.
+*
+* Algorithm:
+*
+* 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
+*
+* 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
+* k = N mod 2, so in particular, k = 0 or 1.
+*
+* 3. If k is odd, go to 5.
+*
+* 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a
+* rational function U/V where
+* U = r + r*s*(P1 + s*(P2 + s*P3)), and
+* V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r.
+* Exit.
+*
+* 4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by a
+* rational function U/V where
+* U = r + r*s*(P1 + s*(P2 + s*P3)), and
+* V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,
+* -Cot(r) = -V/U. Exit.
+*
+* 6. If |X| > 1, go to 8.
+*
+* 7. (|X|<2**(-40)) Tan(X) = X. Exit.
+*
+* 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
+*
+
+STAN IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+BOUNDS1 DC.L $3FD78000,$4004BC7E
+TWOBYPI DC.L $3FE45F30,$6DC9C883
+
+TANQ4 DC.L $3EA0B759,$F50F8688
+TANP3 DC.L $BEF2BAA5,$A8924F04
+
+TANQ3 DC.L $BF346F59,$B39BA65F,$00000000,$00000000
+
+TANP2 DC.L $3FF60000,$E073D3FC,$199C4A00,$00000000
+
+TANQ2 DC.L $3FF90000,$D23CD684,$15D95FA1,$00000000
+
+TANP1 DC.L $BFFC0000,$8895A6C5,$FB423BCA,$00000000
+
+TANQ1 DC.L $BFFD0000,$EEF57E0D,$A84BC8CE,$00000000
+
+INVTWOPI DC.L $3FFC0000,$A2F9836E,$4E44152A,$00000000
+
+TWOPI1 DC.L $40010000,$C90FDAA2,$00000000,$00000000
+TWOPI2 DC.L $3FDF0000,$85A308D4,$00000000,$00000000
+
+*--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
+*--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
+*--MOST 69 BITS LONG.
+ xdef PITBL
+PITBL:
+ DC.L $C0040000,$C90FDAA2,$2168C235,$21800000
+ DC.L $C0040000,$C2C75BCD,$105D7C23,$A0D00000
+ DC.L $C0040000,$BC7EDCF7,$FF523611,$A1E80000
+ DC.L $C0040000,$B6365E22,$EE46F000,$21480000
+ DC.L $C0040000,$AFEDDF4D,$DD3BA9EE,$A1200000
+ DC.L $C0040000,$A9A56078,$CC3063DD,$21FC0000
+ DC.L $C0040000,$A35CE1A3,$BB251DCB,$21100000
+ DC.L $C0040000,$9D1462CE,$AA19D7B9,$A1580000
+ DC.L $C0040000,$96CBE3F9,$990E91A8,$21E00000
+ DC.L $C0040000,$90836524,$88034B96,$20B00000
+ DC.L $C0040000,$8A3AE64F,$76F80584,$A1880000
+ DC.L $C0040000,$83F2677A,$65ECBF73,$21C40000
+ DC.L $C0030000,$FB53D14A,$A9C2F2C2,$20000000
+ DC.L $C0030000,$EEC2D3A0,$87AC669F,$21380000
+ DC.L $C0030000,$E231D5F6,$6595DA7B,$A1300000
+ DC.L $C0030000,$D5A0D84C,$437F4E58,$9FC00000
+ DC.L $C0030000,$C90FDAA2,$2168C235,$21000000
+ DC.L $C0030000,$BC7EDCF7,$FF523611,$A1680000
+ DC.L $C0030000,$AFEDDF4D,$DD3BA9EE,$A0A00000
+ DC.L $C0030000,$A35CE1A3,$BB251DCB,$20900000
+ DC.L $C0030000,$96CBE3F9,$990E91A8,$21600000
+ DC.L $C0030000,$8A3AE64F,$76F80584,$A1080000
+ DC.L $C0020000,$FB53D14A,$A9C2F2C2,$1F800000
+ DC.L $C0020000,$E231D5F6,$6595DA7B,$A0B00000
+ DC.L $C0020000,$C90FDAA2,$2168C235,$20800000
+ DC.L $C0020000,$AFEDDF4D,$DD3BA9EE,$A0200000
+ DC.L $C0020000,$96CBE3F9,$990E91A8,$20E00000
+ DC.L $C0010000,$FB53D14A,$A9C2F2C2,$1F000000
+ DC.L $C0010000,$C90FDAA2,$2168C235,$20000000
+ DC.L $C0010000,$96CBE3F9,$990E91A8,$20600000
+ DC.L $C0000000,$C90FDAA2,$2168C235,$1F800000
+ DC.L $BFFF0000,$C90FDAA2,$2168C235,$1F000000
+ DC.L $00000000,$00000000,$00000000,$00000000
+ DC.L $3FFF0000,$C90FDAA2,$2168C235,$9F000000
+ DC.L $40000000,$C90FDAA2,$2168C235,$9F800000
+ DC.L $40010000,$96CBE3F9,$990E91A8,$A0600000
+ DC.L $40010000,$C90FDAA2,$2168C235,$A0000000
+ DC.L $40010000,$FB53D14A,$A9C2F2C2,$9F000000
+ DC.L $40020000,$96CBE3F9,$990E91A8,$A0E00000
+ DC.L $40020000,$AFEDDF4D,$DD3BA9EE,$20200000
+ DC.L $40020000,$C90FDAA2,$2168C235,$A0800000
+ DC.L $40020000,$E231D5F6,$6595DA7B,$20B00000
+ DC.L $40020000,$FB53D14A,$A9C2F2C2,$9F800000
+ DC.L $40030000,$8A3AE64F,$76F80584,$21080000
+ DC.L $40030000,$96CBE3F9,$990E91A8,$A1600000
+ DC.L $40030000,$A35CE1A3,$BB251DCB,$A0900000
+ DC.L $40030000,$AFEDDF4D,$DD3BA9EE,$20A00000
+ DC.L $40030000,$BC7EDCF7,$FF523611,$21680000
+ DC.L $40030000,$C90FDAA2,$2168C235,$A1000000
+ DC.L $40030000,$D5A0D84C,$437F4E58,$1FC00000
+ DC.L $40030000,$E231D5F6,$6595DA7B,$21300000
+ DC.L $40030000,$EEC2D3A0,$87AC669F,$A1380000
+ DC.L $40030000,$FB53D14A,$A9C2F2C2,$A0000000
+ DC.L $40040000,$83F2677A,$65ECBF73,$A1C40000
+ DC.L $40040000,$8A3AE64F,$76F80584,$21880000
+ DC.L $40040000,$90836524,$88034B96,$A0B00000
+ DC.L $40040000,$96CBE3F9,$990E91A8,$A1E00000
+ DC.L $40040000,$9D1462CE,$AA19D7B9,$21580000
+ DC.L $40040000,$A35CE1A3,$BB251DCB,$A1100000
+ DC.L $40040000,$A9A56078,$CC3063DD,$A1FC0000
+ DC.L $40040000,$AFEDDF4D,$DD3BA9EE,$21200000
+ DC.L $40040000,$B6365E22,$EE46F000,$A1480000
+ DC.L $40040000,$BC7EDCF7,$FF523611,$21E80000
+ DC.L $40040000,$C2C75BCD,$105D7C23,$20D00000
+ DC.L $40040000,$C90FDAA2,$2168C235,$A1800000
+
+INARG equ FP_SCR4
+
+TWOTO63 equ L_SCR1
+ENDFLAG equ L_SCR2
+N equ L_SCR3
+
+ xref t_frcinx
+ xref t_extdnrm
+
+ xdef stand
+stand:
+*--TAN(X) = X FOR DENORMALIZED X
+
+ bra t_extdnrm
+
+ xdef stan
+stan:
+ FMOVE.X (a0),FP0 ...LOAD INPUT
+
+ MOVE.L (A0),D0
+ MOVE.W 4(A0),D0
+ ANDI.L #$7FFFFFFF,D0
+
+ CMPI.L #$3FD78000,D0 ...|X| >= 2**(-40)?
+ BGE.B TANOK1
+ BRA.W TANSM
+TANOK1:
+ CMPI.L #$4004BC7E,D0 ...|X| < 15 PI?
+ BLT.B TANMAIN
+ BRA.W REDUCEX
+
+
+TANMAIN:
+*--THIS IS THE USUAL CASE, |X| <= 15 PI.
+*--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
+ FMOVE.X FP0,FP1
+ FMUL.D TWOBYPI,FP1 ...X*2/PI
+
+*--HIDE THE NEXT TWO INSTRUCTIONS
+ lea.l PITBL+$200,a1 ...TABLE OF N*PI/2, N = -32,...,32
+
+*--FP1 IS NOW READY
+ FMOVE.L FP1,D0 ...CONVERT TO INTEGER
+
+ ASL.L #4,D0
+ ADDA.L D0,a1 ...ADDRESS N*PIBY2 IN Y1, Y2
+
+ FSUB.X (a1)+,FP0 ...X-Y1
+*--HIDE THE NEXT ONE
+
+ FSUB.S (a1),FP0 ...FP0 IS R = (X-Y1)-Y2
+
+ ROR.L #5,D0
+ ANDI.L #$80000000,D0 ...D0 WAS ODD IFF D0 < 0
+
+TANCONT:
+
+ TST.L D0
+ BLT.W NODD
+
+ FMOVE.X FP0,FP1
+ FMUL.X FP1,FP1 ...S = R*R
+
+ FMOVE.D TANQ4,FP3
+ FMOVE.D TANP3,FP2
+
+ FMUL.X FP1,FP3 ...SQ4
+ FMUL.X FP1,FP2 ...SP3
+
+ FADD.D TANQ3,FP3 ...Q3+SQ4
+ FADD.X TANP2,FP2 ...P2+SP3
+
+ FMUL.X FP1,FP3 ...S(Q3+SQ4)
+ FMUL.X FP1,FP2 ...S(P2+SP3)
+
+ FADD.X TANQ2,FP3 ...Q2+S(Q3+SQ4)
+ FADD.X TANP1,FP2 ...P1+S(P2+SP3)
+
+ FMUL.X FP1,FP3 ...S(Q2+S(Q3+SQ4))
+ FMUL.X FP1,FP2 ...S(P1+S(P2+SP3))
+
+ FADD.X TANQ1,FP3 ...Q1+S(Q2+S(Q3+SQ4))
+ FMUL.X FP0,FP2 ...RS(P1+S(P2+SP3))
+
+ FMUL.X FP3,FP1 ...S(Q1+S(Q2+S(Q3+SQ4)))
+
+
+ FADD.X FP2,FP0 ...R+RS(P1+S(P2+SP3))
+
+
+ FADD.S #:3F800000,FP1 ...1+S(Q1+...)
+
+ FMOVE.L d1,fpcr ;restore users exceptions
+ FDIV.X FP1,FP0 ;last inst - possible exception set
+
+ bra t_frcinx
+
+NODD:
+ FMOVE.X FP0,FP1
+ FMUL.X FP0,FP0 ...S = R*R
+
+ FMOVE.D TANQ4,FP3
+ FMOVE.D TANP3,FP2
+
+ FMUL.X FP0,FP3 ...SQ4
+ FMUL.X FP0,FP2 ...SP3
+
+ FADD.D TANQ3,FP3 ...Q3+SQ4
+ FADD.X TANP2,FP2 ...P2+SP3
+
+ FMUL.X FP0,FP3 ...S(Q3+SQ4)
+ FMUL.X FP0,FP2 ...S(P2+SP3)
+
+ FADD.X TANQ2,FP3 ...Q2+S(Q3+SQ4)
+ FADD.X TANP1,FP2 ...P1+S(P2+SP3)
+
+ FMUL.X FP0,FP3 ...S(Q2+S(Q3+SQ4))
+ FMUL.X FP0,FP2 ...S(P1+S(P2+SP3))
+
+ FADD.X TANQ1,FP3 ...Q1+S(Q2+S(Q3+SQ4))
+ FMUL.X FP1,FP2 ...RS(P1+S(P2+SP3))
+
+ FMUL.X FP3,FP0 ...S(Q1+S(Q2+S(Q3+SQ4)))
+
+
+ FADD.X FP2,FP1 ...R+RS(P1+S(P2+SP3))
+ FADD.S #:3F800000,FP0 ...1+S(Q1+...)
+
+
+ FMOVE.X FP1,-(sp)
+ EORI.L #$80000000,(sp)
+
+ FMOVE.L d1,fpcr ;restore users exceptions
+ FDIV.X (sp)+,FP0 ;last inst - possible exception set
+
+ bra t_frcinx
+
+TANBORS:
+*--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
+*--IF |X| < 2**(-40), RETURN X OR 1.
+ CMPI.L #$3FFF8000,D0
+ BGT.B REDUCEX
+
+TANSM:
+
+ FMOVE.X FP0,-(sp)
+ FMOVE.L d1,fpcr ;restore users exceptions
+ FMOVE.X (sp)+,FP0 ;last inst - posibble exception set
+
+ bra t_frcinx
+
+
+REDUCEX:
+*--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
+*--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
+*--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
+
+ FMOVEM.X FP2-FP5,-(A7) ...save FP2 through FP5
+ MOVE.L D2,-(A7)
+ FMOVE.S #:00000000,FP1
+
+*--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
+*--there is a danger of unwanted overflow in first LOOP iteration. In this
+*--case, reduce argument by one remainder step to make subsequent reduction
+*--safe.
+ cmpi.l #$7ffeffff,d0 ;is argument dangerously large?
+ bne.b LOOP
+ move.l #$7ffe0000,FP_SCR2(a6) ;yes
+* ;create 2**16383*PI/2
+ move.l #$c90fdaa2,FP_SCR2+4(a6)
+ clr.l FP_SCR2+8(a6)
+ ftst.x fp0 ;test sign of argument
+ move.l #$7fdc0000,FP_SCR3(a6) ;create low half of 2**16383*
+* ;PI/2 at FP_SCR3
+ move.l #$85a308d3,FP_SCR3+4(a6)
+ clr.l FP_SCR3+8(a6)
+ fblt.w red_neg
+ or.w #$8000,FP_SCR2(a6) ;positive arg
+ or.w #$8000,FP_SCR3(a6)
+red_neg:
+ fadd.x FP_SCR2(a6),fp0 ;high part of reduction is exact
+ fmove.x fp0,fp1 ;save high result in fp1
+ fadd.x FP_SCR3(a6),fp0 ;low part of reduction
+ fsub.x fp0,fp1 ;determine low component of result
+ fadd.x FP_SCR3(a6),fp1 ;fp0/fp1 are reduced argument.
+
+*--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
+*--integer quotient will be stored in N
+*--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
+
+LOOP:
+ FMOVE.X FP0,INARG(a6) ...+-2**K * F, 1 <= F < 2
+ MOVE.W INARG(a6),D0
+ MOVE.L D0,A1 ...save a copy of D0
+ ANDI.L #$00007FFF,D0
+ SUBI.L #$00003FFF,D0 ...D0 IS K
+ CMPI.L #28,D0
+ BLE.B LASTLOOP
+CONTLOOP:
+ SUBI.L #27,D0 ...D0 IS L := K-27
+ CLR.L ENDFLAG(a6)
+ BRA.B WORK
+LASTLOOP:
+ CLR.L D0 ...D0 IS L := 0
+ MOVE.L #1,ENDFLAG(a6)
+
+WORK:
+*--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
+*--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
+
+*--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
+*--2**L * (PIby2_1), 2**L * (PIby2_2)
+
+ MOVE.L #$00003FFE,D2 ...BIASED EXPO OF 2/PI
+ SUB.L D0,D2 ...BIASED EXPO OF 2**(-L)*(2/PI)
+
+ MOVE.L #$A2F9836E,FP_SCR1+4(a6)
+ MOVE.L #$4E44152A,FP_SCR1+8(a6)
+ MOVE.W D2,FP_SCR1(a6) ...FP_SCR1 is 2**(-L)*(2/PI)
+
+ FMOVE.X FP0,FP2
+ FMUL.X FP_SCR1(a6),FP2
+*--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
+*--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
+*--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
+*--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
+*--US THE DESIRED VALUE IN FLOATING POINT.
+
+*--HIDE SIX CYCLES OF INSTRUCTION
+ MOVE.L A1,D2
+ SWAP D2
+ ANDI.L #$80000000,D2
+ ORI.L #$5F000000,D2 ...D2 IS SIGN(INARG)*2**63 IN SGL
+ MOVE.L D2,TWOTO63(a6)
+
+ MOVE.L D0,D2
+ ADDI.L #$00003FFF,D2 ...BIASED EXPO OF 2**L * (PI/2)
+
+*--FP2 IS READY
+ FADD.S TWOTO63(a6),FP2 ...THE FRACTIONAL PART OF FP1 IS ROUNDED
+
+*--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1 and 2**(L)*Piby2_2
+ MOVE.W D2,FP_SCR2(a6)
+ CLR.W FP_SCR2+2(a6)
+ MOVE.L #$C90FDAA2,FP_SCR2+4(a6)
+ CLR.L FP_SCR2+8(a6) ...FP_SCR2 is 2**(L) * Piby2_1
+
+*--FP2 IS READY
+ FSUB.S TWOTO63(a6),FP2 ...FP2 is N
+
+ ADDI.L #$00003FDD,D0
+ MOVE.W D0,FP_SCR3(a6)
+ CLR.W FP_SCR3+2(a6)
+ MOVE.L #$85A308D3,FP_SCR3+4(a6)
+ CLR.L FP_SCR3+8(a6) ...FP_SCR3 is 2**(L) * Piby2_2
+
+ MOVE.L ENDFLAG(a6),D0
+
+*--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
+*--P2 = 2**(L) * Piby2_2
+ FMOVE.X FP2,FP4
+ FMul.X FP_SCR2(a6),FP4 ...W = N*P1
+ FMove.X FP2,FP5
+ FMul.X FP_SCR3(a6),FP5 ...w = N*P2
+ FMove.X FP4,FP3
+*--we want P+p = W+w but |p| <= half ulp of P
+*--Then, we need to compute A := R-P and a := r-p
+ FAdd.X FP5,FP3 ...FP3 is P
+ FSub.X FP3,FP4 ...W-P
+
+ FSub.X FP3,FP0 ...FP0 is A := R - P
+ FAdd.X FP5,FP4 ...FP4 is p = (W-P)+w
+
+ FMove.X FP0,FP3 ...FP3 A
+ FSub.X FP4,FP1 ...FP1 is a := r - p
+
+*--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
+*--|r| <= half ulp of R.
+ FAdd.X FP1,FP0 ...FP0 is R := A+a
+*--No need to calculate r if this is the last loop
+ TST.L D0
+ BGT.W RESTORE
+
+*--Need to calculate r
+ FSub.X FP0,FP3 ...A-R
+ FAdd.X FP3,FP1 ...FP1 is r := (A-R)+a
+ BRA.W LOOP
+
+RESTORE:
+ FMOVE.L FP2,N(a6)
+ MOVE.L (A7)+,D2
+ FMOVEM.X (A7)+,FP2-FP5
+
+
+ MOVE.L N(a6),D0
+ ROR.L #1,D0
+
+
+ BRA.W TANCONT
+
+ end
diff --git a/sys/arch/m68k/fpsp/stanh.sa b/sys/arch/m68k/fpsp/stanh.sa
new file mode 100644
index 00000000000..6c1697c4226
--- /dev/null
+++ b/sys/arch/m68k/fpsp/stanh.sa
@@ -0,0 +1,210 @@
+* $NetBSD: stanh.sa,v 1.3 1994/10/26 07:50:12 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* stanh.sa 3.1 12/10/90
+*
+* The entry point sTanh computes the hyperbolic tangent of
+* an input argument; sTanhd does the same except for denormalized
+* input.
+*
+* Input: Double-extended number X in location pointed to
+* by address register a0.
+*
+* Output: The value tanh(X) returned in floating-point register Fp0.
+*
+* Accuracy and Monotonicity: The returned result is within 3 ulps in
+* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+* result is subsequently rounded to double precision. The
+* result is provably monotonic in double precision.
+*
+* Speed: The program stanh takes approximately 270 cycles.
+*
+* Algorithm:
+*
+* TANH
+* 1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3.
+*
+* 2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by
+* sgn := sign(X), y := 2|X|, z := expm1(Y), and
+* tanh(X) = sgn*( z/(2+z) ).
+* Exit.
+*
+* 3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1,
+* go to 7.
+*
+* 4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6.
+*
+* 5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by
+* sgn := sign(X), y := 2|X|, z := exp(Y),
+* tanh(X) = sgn - [ sgn*2/(1+z) ].
+* Exit.
+*
+* 6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we
+* calculate Tanh(X) by
+* sgn := sign(X), Tiny := 2**(-126),
+* tanh(X) := sgn - sgn*Tiny.
+* Exit.
+*
+* 7. (|X| < 2**(-40)). Tanh(X) = X. Exit.
+*
+
+STANH IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+X equ FP_SCR5
+XDCARE equ X+2
+XFRAC equ X+4
+
+SGN equ L_SCR3
+
+V equ FP_SCR6
+
+BOUNDS1 DC.L $3FD78000,$3FFFDDCE ... 2^(-40), (5/2)LOG2
+
+ xref t_frcinx
+ xref t_extdnrm
+ xref setox
+ xref setoxm1
+
+ xdef stanhd
+stanhd:
+*--TANH(X) = X FOR DENORMALIZED X
+
+ bra t_extdnrm
+
+ xdef stanh
+stanh:
+ FMOVE.X (a0),FP0 ...LOAD INPUT
+
+ FMOVE.X FP0,X(a6)
+ move.l (a0),d0
+ move.w 4(a0),d0
+ MOVE.L D0,X(a6)
+ AND.L #$7FFFFFFF,D0
+ CMP2.L BOUNDS1(pc),D0 ...2**(-40) < |X| < (5/2)LOG2 ?
+ BCS.B TANHBORS
+
+*--THIS IS THE USUAL CASE
+*--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
+
+ MOVE.L X(a6),D0
+ MOVE.L D0,SGN(a6)
+ AND.L #$7FFF0000,D0
+ ADD.L #$00010000,D0 ...EXPONENT OF 2|X|
+ MOVE.L D0,X(a6)
+ AND.L #$80000000,SGN(a6)
+ FMOVE.X X(a6),FP0 ...FP0 IS Y = 2|X|
+
+ move.l d1,-(a7)
+ clr.l d1
+ fmovem.x fp0,(a0)
+ bsr setoxm1 ...FP0 IS Z = EXPM1(Y)
+ move.l (a7)+,d1
+
+ FMOVE.X FP0,FP1
+ FADD.S #:40000000,FP1 ...Z+2
+ MOVE.L SGN(a6),D0
+ FMOVE.X FP1,V(a6)
+ EOR.L D0,V(a6)
+
+ FMOVE.L d1,FPCR ;restore users exceptions
+ FDIV.X V(a6),FP0
+ bra t_frcinx
+
+TANHBORS:
+ CMP.L #$3FFF8000,D0
+ BLT.W TANHSM
+
+ CMP.L #$40048AA1,D0
+ BGT.W TANHHUGE
+
+*-- (5/2) LOG2 < |X| < 50 LOG2,
+*--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
+*--TANH(X) = SGN - SGN*2/[EXP(Y)+1].
+
+ MOVE.L X(a6),D0
+ MOVE.L D0,SGN(a6)
+ AND.L #$7FFF0000,D0
+ ADD.L #$00010000,D0 ...EXPO OF 2|X|
+ MOVE.L D0,X(a6) ...Y = 2|X|
+ AND.L #$80000000,SGN(a6)
+ MOVE.L SGN(a6),D0
+ FMOVE.X X(a6),FP0 ...Y = 2|X|
+
+ move.l d1,-(a7)
+ clr.l d1
+ fmovem.x fp0,(a0)
+ bsr setox ...FP0 IS EXP(Y)
+ move.l (a7)+,d1
+ move.l SGN(a6),d0
+ FADD.S #:3F800000,FP0 ...EXP(Y)+1
+
+ EOR.L #$C0000000,D0 ...-SIGN(X)*2
+ FMOVE.S d0,FP1 ...-SIGN(X)*2 IN SGL FMT
+ FDIV.X FP0,FP1 ...-SIGN(X)2 / [EXP(Y)+1 ]
+
+ MOVE.L SGN(a6),D0
+ OR.L #$3F800000,D0 ...SGN
+ FMOVE.S d0,FP0 ...SGN IN SGL FMT
+
+ FMOVE.L d1,FPCR ;restore users exceptions
+ FADD.X fp1,FP0
+
+ bra t_frcinx
+
+TANHSM:
+ CLR.W XDCARE(a6)
+
+ FMOVE.L d1,FPCR ;restore users exceptions
+ FMOVE.X X(a6),FP0 ;last inst - possible exception set
+
+ bra t_frcinx
+
+TANHHUGE:
+*---RETURN SGN(X) - SGN(X)EPS
+ MOVE.L X(a6),D0
+ AND.L #$80000000,D0
+ OR.L #$3F800000,D0
+ FMOVE.S d0,FP0
+ AND.L #$80000000,D0
+ EOR.L #$80800000,D0 ...-SIGN(X)*EPS
+
+ FMOVE.L d1,FPCR ;restore users exceptions
+ FADD.S d0,FP0
+
+ bra t_frcinx
+
+ end
diff --git a/sys/arch/m68k/fpsp/sto_res.sa b/sys/arch/m68k/fpsp/sto_res.sa
new file mode 100644
index 00000000000..2f9141b41c1
--- /dev/null
+++ b/sys/arch/m68k/fpsp/sto_res.sa
@@ -0,0 +1,123 @@
+* $NetBSD: sto_res.sa,v 1.3 1994/10/26 07:50:14 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* sto_res.sa 3.1 12/10/90
+*
+* Takes the result and puts it in where the user expects it.
+* Library functions return result in fp0. If fp0 is not the
+* users destination register then fp0 is moved to the the
+* correct floating-point destination register. fp0 and fp1
+* are then restored to the original contents.
+*
+* Input: result in fp0,fp1
+*
+* d2 & a0 should be kept unmodified
+*
+* Output: moves the result to the true destination reg or mem
+*
+* Modifies: destination floating point register
+*
+
+STO_RES IDNT 2,1 Motorola 040 Floating Point Software Package
+
+
+ section 8
+
+ include fpsp.h
+
+ xdef sto_cos
+sto_cos:
+ bfextu CMDREG1B(a6){13:3},d0 ;extract cos destination
+ cmpi.b #3,d0 ;check for fp0/fp1 cases
+ ble.b c_fp0123
+ fmovem.x fp1,-(a7)
+ moveq.l #7,d1
+ sub.l d0,d1 ;d1 = 7- (dest. reg. no.)
+ clr.l d0
+ bset.l d1,d0 ;d0 is dynamic register mask
+ fmovem.x (a7)+,d0
+ rts
+c_fp0123:
+ tst.b d0
+ beq.b c_is_fp0
+ cmpi.b #1,d0
+ beq.b c_is_fp1
+ cmpi.b #2,d0
+ beq.b c_is_fp2
+c_is_fp3:
+ fmovem.x fp1,USER_FP3(a6)
+ rts
+c_is_fp2:
+ fmovem.x fp1,USER_FP2(a6)
+ rts
+c_is_fp1:
+ fmovem.x fp1,USER_FP1(a6)
+ rts
+c_is_fp0:
+ fmovem.x fp1,USER_FP0(a6)
+ rts
+
+
+ xdef sto_res
+sto_res:
+ bfextu CMDREG1B(a6){6:3},d0 ;extract destination register
+ cmpi.b #3,d0 ;check for fp0/fp1 cases
+ ble.b fp0123
+ fmovem.x fp0,-(a7)
+ moveq.l #7,d1
+ sub.l d0,d1 ;d1 = 7- (dest. reg. no.)
+ clr.l d0
+ bset.l d1,d0 ;d0 is dynamic register mask
+ fmovem.x (a7)+,d0
+ rts
+fp0123:
+ tst.b d0
+ beq.b is_fp0
+ cmpi.b #1,d0
+ beq.b is_fp1
+ cmpi.b #2,d0
+ beq.b is_fp2
+is_fp3:
+ fmovem.x fp0,USER_FP3(a6)
+ rts
+is_fp2:
+ fmovem.x fp0,USER_FP2(a6)
+ rts
+is_fp1:
+ fmovem.x fp0,USER_FP1(a6)
+ rts
+is_fp0:
+ fmovem.x fp0,USER_FP0(a6)
+ rts
+
+ end
diff --git a/sys/arch/m68k/fpsp/stwotox.sa b/sys/arch/m68k/fpsp/stwotox.sa
new file mode 100644
index 00000000000..f0583bf30a7
--- /dev/null
+++ b/sys/arch/m68k/fpsp/stwotox.sa
@@ -0,0 +1,452 @@
+* $NetBSD: stwotox.sa,v 1.3 1994/10/26 07:50:15 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* stwotox.sa 3.1 12/10/90
+*
+* stwotox --- 2**X
+* stwotoxd --- 2**X for denormalized X
+* stentox --- 10**X
+* stentoxd --- 10**X for denormalized X
+*
+* Input: Double-extended number X in location pointed to
+* by address register a0.
+*
+* Output: The function values are returned in Fp0.
+*
+* Accuracy and Monotonicity: The returned result is within 2 ulps in
+* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+* result is subsequently rounded to double precision. The
+* result is provably monotonic in double precision.
+*
+* Speed: The program stwotox takes approximately 190 cycles and the
+* program stentox takes approximately 200 cycles.
+*
+* Algorithm:
+*
+* twotox
+* 1. If |X| > 16480, go to ExpBig.
+*
+* 2. If |X| < 2**(-70), go to ExpSm.
+*
+* 3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore
+* decompose N as
+* N = 64(M + M') + j, j = 0,1,2,...,63.
+*
+* 4. Overwrite r := r * log2. Then
+* 2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
+* Go to expr to compute that expression.
+*
+* tentox
+* 1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.
+*
+* 2. If |X| < 2**(-70), go to ExpSm.
+*
+* 3. Set y := X*log_2(10)*64 (base 2 log of 10). Set
+* N := round-to-int(y). Decompose N as
+* N = 64(M + M') + j, j = 0,1,2,...,63.
+*
+* 4. Define r as
+* r := ((X - N*L1)-N*L2) * L10
+* where L1, L2 are the leading and trailing parts of log_10(2)/64
+* and L10 is the natural log of 10. Then
+* 10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
+* Go to expr to compute that expression.
+*
+* expr
+* 1. Fetch 2**(j/64) from table as Fact1 and Fact2.
+*
+* 2. Overwrite Fact1 and Fact2 by
+* Fact1 := 2**(M) * Fact1
+* Fact2 := 2**(M) * Fact2
+* Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).
+*
+* 3. Calculate P where 1 + P approximates exp(r):
+* P = r + r*r*(A1+r*(A2+...+r*A5)).
+*
+* 4. Let AdjFact := 2**(M'). Return
+* AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).
+* Exit.
+*
+* ExpBig
+* 1. Generate overflow by Huge * Huge if X > 0; otherwise, generate
+* underflow by Tiny * Tiny.
+*
+* ExpSm
+* 1. Return 1 + X.
+*
+
+STWOTOX IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+BOUNDS1 DC.L $3FB98000,$400D80C0 ... 2^(-70),16480
+BOUNDS2 DC.L $3FB98000,$400B9B07 ... 2^(-70),16480 LOG2/LOG10
+
+L2TEN64 DC.L $406A934F,$0979A371 ... 64LOG10/LOG2
+L10TWO1 DC.L $3F734413,$509F8000 ... LOG2/64LOG10
+
+L10TWO2 DC.L $BFCD0000,$C0219DC1,$DA994FD2,$00000000
+
+LOG10 DC.L $40000000,$935D8DDD,$AAA8AC17,$00000000
+
+LOG2 DC.L $3FFE0000,$B17217F7,$D1CF79AC,$00000000
+
+EXPA5 DC.L $3F56C16D,$6F7BD0B2
+EXPA4 DC.L $3F811112,$302C712C
+EXPA3 DC.L $3FA55555,$55554CC1
+EXPA2 DC.L $3FC55555,$55554A54
+EXPA1 DC.L $3FE00000,$00000000,$00000000,$00000000
+
+HUGE DC.L $7FFE0000,$FFFFFFFF,$FFFFFFFF,$00000000
+TINY DC.L $00010000,$FFFFFFFF,$FFFFFFFF,$00000000
+
+EXPTBL
+ DC.L $3FFF0000,$80000000,$00000000,$3F738000
+ DC.L $3FFF0000,$8164D1F3,$BC030773,$3FBEF7CA
+ DC.L $3FFF0000,$82CD8698,$AC2BA1D7,$3FBDF8A9
+ DC.L $3FFF0000,$843A28C3,$ACDE4046,$3FBCD7C9
+ DC.L $3FFF0000,$85AAC367,$CC487B15,$BFBDE8DA
+ DC.L $3FFF0000,$871F6196,$9E8D1010,$3FBDE85C
+ DC.L $3FFF0000,$88980E80,$92DA8527,$3FBEBBF1
+ DC.L $3FFF0000,$8A14D575,$496EFD9A,$3FBB80CA
+ DC.L $3FFF0000,$8B95C1E3,$EA8BD6E7,$BFBA8373
+ DC.L $3FFF0000,$8D1ADF5B,$7E5BA9E6,$BFBE9670
+ DC.L $3FFF0000,$8EA4398B,$45CD53C0,$3FBDB700
+ DC.L $3FFF0000,$9031DC43,$1466B1DC,$3FBEEEB0
+ DC.L $3FFF0000,$91C3D373,$AB11C336,$3FBBFD6D
+ DC.L $3FFF0000,$935A2B2F,$13E6E92C,$BFBDB319
+ DC.L $3FFF0000,$94F4EFA8,$FEF70961,$3FBDBA2B
+ DC.L $3FFF0000,$96942D37,$20185A00,$3FBE91D5
+ DC.L $3FFF0000,$9837F051,$8DB8A96F,$3FBE8D5A
+ DC.L $3FFF0000,$99E04593,$20B7FA65,$BFBCDE7B
+ DC.L $3FFF0000,$9B8D39B9,$D54E5539,$BFBEBAAF
+ DC.L $3FFF0000,$9D3ED9A7,$2CFFB751,$BFBD86DA
+ DC.L $3FFF0000,$9EF53260,$91A111AE,$BFBEBEDD
+ DC.L $3FFF0000,$A0B0510F,$B9714FC2,$3FBCC96E
+ DC.L $3FFF0000,$A2704303,$0C496819,$BFBEC90B
+ DC.L $3FFF0000,$A43515AE,$09E6809E,$3FBBD1DB
+ DC.L $3FFF0000,$A5FED6A9,$B15138EA,$3FBCE5EB
+ DC.L $3FFF0000,$A7CD93B4,$E965356A,$BFBEC274
+ DC.L $3FFF0000,$A9A15AB4,$EA7C0EF8,$3FBEA83C
+ DC.L $3FFF0000,$AB7A39B5,$A93ED337,$3FBECB00
+ DC.L $3FFF0000,$AD583EEA,$42A14AC6,$3FBE9301
+ DC.L $3FFF0000,$AF3B78AD,$690A4375,$BFBD8367
+ DC.L $3FFF0000,$B123F581,$D2AC2590,$BFBEF05F
+ DC.L $3FFF0000,$B311C412,$A9112489,$3FBDFB3C
+ DC.L $3FFF0000,$B504F333,$F9DE6484,$3FBEB2FB
+ DC.L $3FFF0000,$B6FD91E3,$28D17791,$3FBAE2CB
+ DC.L $3FFF0000,$B8FBAF47,$62FB9EE9,$3FBCDC3C
+ DC.L $3FFF0000,$BAFF5AB2,$133E45FB,$3FBEE9AA
+ DC.L $3FFF0000,$BD08A39F,$580C36BF,$BFBEAEFD
+ DC.L $3FFF0000,$BF1799B6,$7A731083,$BFBCBF51
+ DC.L $3FFF0000,$C12C4CCA,$66709456,$3FBEF88A
+ DC.L $3FFF0000,$C346CCDA,$24976407,$3FBD83B2
+ DC.L $3FFF0000,$C5672A11,$5506DADD,$3FBDF8AB
+ DC.L $3FFF0000,$C78D74C8,$ABB9B15D,$BFBDFB17
+ DC.L $3FFF0000,$C9B9BD86,$6E2F27A3,$BFBEFE3C
+ DC.L $3FFF0000,$CBEC14FE,$F2727C5D,$BFBBB6F8
+ DC.L $3FFF0000,$CE248C15,$1F8480E4,$BFBCEE53
+ DC.L $3FFF0000,$D06333DA,$EF2B2595,$BFBDA4AE
+ DC.L $3FFF0000,$D2A81D91,$F12AE45A,$3FBC9124
+ DC.L $3FFF0000,$D4F35AAB,$CFEDFA1F,$3FBEB243
+ DC.L $3FFF0000,$D744FCCA,$D69D6AF4,$3FBDE69A
+ DC.L $3FFF0000,$D99D15C2,$78AFD7B6,$BFB8BC61
+ DC.L $3FFF0000,$DBFBB797,$DAF23755,$3FBDF610
+ DC.L $3FFF0000,$DE60F482,$5E0E9124,$BFBD8BE1
+ DC.L $3FFF0000,$E0CCDEEC,$2A94E111,$3FBACB12
+ DC.L $3FFF0000,$E33F8972,$BE8A5A51,$3FBB9BFE
+ DC.L $3FFF0000,$E5B906E7,$7C8348A8,$3FBCF2F4
+ DC.L $3FFF0000,$E8396A50,$3C4BDC68,$3FBEF22F
+ DC.L $3FFF0000,$EAC0C6E7,$DD24392F,$BFBDBF4A
+ DC.L $3FFF0000,$ED4F301E,$D9942B84,$3FBEC01A
+ DC.L $3FFF0000,$EFE4B99B,$DCDAF5CB,$3FBE8CAC
+ DC.L $3FFF0000,$F281773C,$59FFB13A,$BFBCBB3F
+ DC.L $3FFF0000,$F5257D15,$2486CC2C,$3FBEF73A
+ DC.L $3FFF0000,$F7D0DF73,$0AD13BB9,$BFB8B795
+ DC.L $3FFF0000,$FA83B2DB,$722A033A,$3FBEF84B
+ DC.L $3FFF0000,$FD3E0C0C,$F486C175,$BFBEF581
+
+N equ L_SCR1
+
+X equ FP_SCR1
+XDCARE equ X+2
+XFRAC equ X+4
+
+ADJFACT equ FP_SCR2
+
+FACT1 equ FP_SCR3
+FACT1HI equ FACT1+4
+FACT1LOW equ FACT1+8
+
+FACT2 equ FP_SCR4
+FACT2HI equ FACT2+4
+FACT2LOW equ FACT2+8
+
+ xref t_unfl
+ xref t_ovfl
+ xref t_frcinx
+
+ xdef stwotoxd
+stwotoxd:
+*--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
+
+ fmove.l d1,fpcr ...set user's rounding mode/precision
+ Fmove.S #:3F800000,FP0 ...RETURN 1 + X
+ move.l (a0),d0
+ or.l #$00800001,d0
+ fadd.s d0,fp0
+ bra t_frcinx
+
+ xdef stwotox
+stwotox:
+*--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
+ FMOVEM.X (a0),FP0 ...LOAD INPUT, do not set cc's
+
+ MOVE.L (A0),D0
+ MOVE.W 4(A0),D0
+ FMOVE.X FP0,X(a6)
+ ANDI.L #$7FFFFFFF,D0
+
+ CMPI.L #$3FB98000,D0 ...|X| >= 2**(-70)?
+ BGE.B TWOOK1
+ BRA.W EXPBORS
+
+TWOOK1:
+ CMPI.L #$400D80C0,D0 ...|X| > 16480?
+ BLE.B TWOMAIN
+ BRA.W EXPBORS
+
+
+TWOMAIN:
+*--USUAL CASE, 2^(-70) <= |X| <= 16480
+
+ FMOVE.X FP0,FP1
+ FMUL.S #:42800000,FP1 ...64 * X
+
+ FMOVE.L FP1,N(a6) ...N = ROUND-TO-INT(64 X)
+ MOVE.L d2,-(sp)
+ LEA EXPTBL,a1 ...LOAD ADDRESS OF TABLE OF 2^(J/64)
+ FMOVE.L N(a6),FP1 ...N --> FLOATING FMT
+ MOVE.L N(a6),D0
+ MOVE.L D0,d2
+ ANDI.L #$3F,D0 ...D0 IS J
+ ASL.L #4,D0 ...DISPLACEMENT FOR 2^(J/64)
+ ADDA.L D0,a1 ...ADDRESS FOR 2^(J/64)
+ ASR.L #6,d2 ...d2 IS L, N = 64L + J
+ MOVE.L d2,D0
+ ASR.L #1,D0 ...D0 IS M
+ SUB.L D0,d2 ...d2 IS M', N = 64(M+M') + J
+ ADDI.L #$3FFF,d2
+ MOVE.W d2,ADJFACT(a6) ...ADJFACT IS 2^(M')
+ MOVE.L (sp)+,d2
+*--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
+*--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
+*--ADJFACT = 2^(M').
+*--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
+
+ FMUL.S #:3C800000,FP1 ...(1/64)*N
+ MOVE.L (a1)+,FACT1(a6)
+ MOVE.L (a1)+,FACT1HI(a6)
+ MOVE.L (a1)+,FACT1LOW(a6)
+ MOVE.W (a1)+,FACT2(a6)
+ clr.w FACT2+2(a6)
+
+ FSUB.X FP1,FP0 ...X - (1/64)*INT(64 X)
+
+ MOVE.W (a1)+,FACT2HI(a6)
+ clr.w FACT2HI+2(a6)
+ clr.l FACT2LOW(a6)
+ ADD.W D0,FACT1(a6)
+
+ FMUL.X LOG2,FP0 ...FP0 IS R
+ ADD.W D0,FACT2(a6)
+
+ BRA.W expr
+
+EXPBORS:
+*--FPCR, D0 SAVED
+ CMPI.L #$3FFF8000,D0
+ BGT.B EXPBIG
+
+EXPSM:
+*--|X| IS SMALL, RETURN 1 + X
+
+ FMOVE.L d1,FPCR ;restore users exceptions
+ FADD.S #:3F800000,FP0 ...RETURN 1 + X
+
+ bra t_frcinx
+
+EXPBIG:
+*--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
+*--REGISTERS SAVE SO FAR ARE FPCR AND D0
+ MOVE.L X(a6),D0
+ TST.L D0
+ BLT.B EXPNEG
+
+ bclr.b #7,(a0) ;t_ovfl expects positive value
+ bra t_ovfl
+
+EXPNEG:
+ bclr.b #7,(a0) ;t_unfl expects positive value
+ bra t_unfl
+
+ xdef stentoxd
+stentoxd:
+*--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
+
+ fmove.l d1,fpcr ...set user's rounding mode/precision
+ Fmove.S #:3F800000,FP0 ...RETURN 1 + X
+ move.l (a0),d0
+ or.l #$00800001,d0
+ fadd.s d0,fp0
+ bra t_frcinx
+
+ xdef stentox
+stentox:
+*--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
+ FMOVEM.X (a0),FP0 ...LOAD INPUT, do not set cc's
+
+ MOVE.L (A0),D0
+ MOVE.W 4(A0),D0
+ FMOVE.X FP0,X(a6)
+ ANDI.L #$7FFFFFFF,D0
+
+ CMPI.L #$3FB98000,D0 ...|X| >= 2**(-70)?
+ BGE.B TENOK1
+ BRA.W EXPBORS
+
+TENOK1:
+ CMPI.L #$400B9B07,D0 ...|X| <= 16480*log2/log10 ?
+ BLE.B TENMAIN
+ BRA.W EXPBORS
+
+TENMAIN:
+*--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
+
+ FMOVE.X FP0,FP1
+ FMUL.D L2TEN64,FP1 ...X*64*LOG10/LOG2
+
+ FMOVE.L FP1,N(a6) ...N=INT(X*64*LOG10/LOG2)
+ MOVE.L d2,-(sp)
+ LEA EXPTBL,a1 ...LOAD ADDRESS OF TABLE OF 2^(J/64)
+ FMOVE.L N(a6),FP1 ...N --> FLOATING FMT
+ MOVE.L N(a6),D0
+ MOVE.L D0,d2
+ ANDI.L #$3F,D0 ...D0 IS J
+ ASL.L #4,D0 ...DISPLACEMENT FOR 2^(J/64)
+ ADDA.L D0,a1 ...ADDRESS FOR 2^(J/64)
+ ASR.L #6,d2 ...d2 IS L, N = 64L + J
+ MOVE.L d2,D0
+ ASR.L #1,D0 ...D0 IS M
+ SUB.L D0,d2 ...d2 IS M', N = 64(M+M') + J
+ ADDI.L #$3FFF,d2
+ MOVE.W d2,ADJFACT(a6) ...ADJFACT IS 2^(M')
+ MOVE.L (sp)+,d2
+
+*--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
+*--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
+*--ADJFACT = 2^(M').
+*--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
+
+ FMOVE.X FP1,FP2
+
+ FMUL.D L10TWO1,FP1 ...N*(LOG2/64LOG10)_LEAD
+ MOVE.L (a1)+,FACT1(a6)
+
+ FMUL.X L10TWO2,FP2 ...N*(LOG2/64LOG10)_TRAIL
+
+ MOVE.L (a1)+,FACT1HI(a6)
+ MOVE.L (a1)+,FACT1LOW(a6)
+ FSUB.X FP1,FP0 ...X - N L_LEAD
+ MOVE.W (a1)+,FACT2(a6)
+
+ FSUB.X FP2,FP0 ...X - N L_TRAIL
+
+ clr.w FACT2+2(a6)
+ MOVE.W (a1)+,FACT2HI(a6)
+ clr.w FACT2HI+2(a6)
+ clr.l FACT2LOW(a6)
+
+ FMUL.X LOG10,FP0 ...FP0 IS R
+
+ ADD.W D0,FACT1(a6)
+ ADD.W D0,FACT2(a6)
+
+expr:
+*--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
+*--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
+*--FP0 IS R. THE FOLLOWING CODE COMPUTES
+*-- 2**(M'+M) * 2**(J/64) * EXP(R)
+
+ FMOVE.X FP0,FP1
+ FMUL.X FP1,FP1 ...FP1 IS S = R*R
+
+ FMOVE.D EXPA5,FP2 ...FP2 IS A5
+ FMOVE.D EXPA4,FP3 ...FP3 IS A4
+
+ FMUL.X FP1,FP2 ...FP2 IS S*A5
+ FMUL.X FP1,FP3 ...FP3 IS S*A4
+
+ FADD.D EXPA3,FP2 ...FP2 IS A3+S*A5
+ FADD.D EXPA2,FP3 ...FP3 IS A2+S*A4
+
+ FMUL.X FP1,FP2 ...FP2 IS S*(A3+S*A5)
+ FMUL.X FP1,FP3 ...FP3 IS S*(A2+S*A4)
+
+ FADD.D EXPA1,FP2 ...FP2 IS A1+S*(A3+S*A5)
+ FMUL.X FP0,FP3 ...FP3 IS R*S*(A2+S*A4)
+
+ FMUL.X FP1,FP2 ...FP2 IS S*(A1+S*(A3+S*A5))
+ FADD.X FP3,FP0 ...FP0 IS R+R*S*(A2+S*A4)
+
+ FADD.X FP2,FP0 ...FP0 IS EXP(R) - 1
+
+
+*--FINAL RECONSTRUCTION PROCESS
+*--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1) - (1 OR 0)
+
+ FMUL.X FACT1(a6),FP0
+ FADD.X FACT2(a6),FP0
+ FADD.X FACT1(a6),FP0
+
+ FMOVE.L d1,FPCR ;restore users exceptions
+ clr.w ADJFACT+2(a6)
+ move.l #$80000000,ADJFACT+4(a6)
+ clr.l ADJFACT+8(a6)
+ FMUL.X ADJFACT(a6),FP0 ...FINAL ADJUSTMENT
+
+ bra t_frcinx
+
+ end
diff --git a/sys/arch/m68k/fpsp/tbldo.sa b/sys/arch/m68k/fpsp/tbldo.sa
new file mode 100644
index 00000000000..f61a9fcee50
--- /dev/null
+++ b/sys/arch/m68k/fpsp/tbldo.sa
@@ -0,0 +1,579 @@
+* $NetBSD: tbldo.sa,v 1.2 1994/10/26 07:50:18 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* tbldo.sa 3.1 12/10/90
+*
+* Modified:
+* 8/16/90 chinds The table was constructed to use only one level
+* of indirection in do_func for monoadic
+* functions. Dyadic functions require two
+* levels, and the tables are still contained
+* in do_func. The table is arranged for
+* index with a 10-bit index, with the first
+* 7 bits the opcode, and the remaining 3
+* the stag. For dyadic functions, all
+* valid addresses are to the generic entry
+* point.
+*
+
+TBLDO IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ xref ld_pinf,ld_pone,ld_ppi2
+ xref t_dz2,t_operr
+ xref serror,sone,szero,sinf,snzrinx
+ xref sopr_inf,spi_2,src_nan,szr_inf
+
+ xref smovcr
+ xref pmod,prem,pscale
+ xref satanh,satanhd
+ xref sacos,sacosd,sasin,sasind,satan,satand
+ xref setox,setoxd,setoxm1,setoxm1d,setoxm1i
+ xref sgetexp,sgetexpd,sgetman,sgetmand
+ xref sint,sintd,sintrz
+ xref ssincos,ssincosd,ssincosi,ssincosnan,ssincosz
+ xref scos,scosd,ssin,ssind,stan,stand
+ xref scosh,scoshd,ssinh,ssinhd,stanh,stanhd
+ xref sslog10,sslog2,sslogn,sslognp1
+ xref sslog10d,sslog2d,sslognd,slognp1d
+ xref stentox,stentoxd,stwotox,stwotoxd
+
+* instruction ;opcode-stag Notes
+ xdef tblpre
+tblpre:
+ dc.l smovcr ;$00-0 fmovecr all
+ dc.l smovcr ;$00-1 fmovecr all
+ dc.l smovcr ;$00-2 fmovecr all
+ dc.l smovcr ;$00-3 fmovecr all
+ dc.l smovcr ;$00-4 fmovecr all
+ dc.l smovcr ;$00-5 fmovecr all
+ dc.l smovcr ;$00-6 fmovecr all
+ dc.l smovcr ;$00-7 fmovecr all
+
+ dc.l sint ;$01-0 fint norm
+ dc.l szero ;$01-1 fint zero
+ dc.l sinf ;$01-2 fint inf
+ dc.l src_nan ;$01-3 fint nan
+ dc.l sintd ;$01-4 fint denorm inx
+ dc.l serror ;$01-5 fint ERROR
+ dc.l serror ;$01-6 fint ERROR
+ dc.l serror ;$01-7 fint ERROR
+
+ dc.l ssinh ;$02-0 fsinh norm
+ dc.l szero ;$02-1 fsinh zero
+ dc.l sinf ;$02-2 fsinh inf
+ dc.l src_nan ;$02-3 fsinh nan
+ dc.l ssinhd ;$02-4 fsinh denorm
+ dc.l serror ;$02-5 fsinh ERROR
+ dc.l serror ;$02-6 fsinh ERROR
+ dc.l serror ;$02-7 fsinh ERROR
+
+ dc.l sintrz ;$03-0 fintrz norm
+ dc.l szero ;$03-1 fintrz zero
+ dc.l sinf ;$03-2 fintrz inf
+ dc.l src_nan ;$03-3 fintrz nan
+ dc.l snzrinx ;$03-4 fintrz denorm inx
+ dc.l serror ;$03-5 fintrz ERROR
+ dc.l serror ;$03-6 fintrz ERROR
+ dc.l serror ;$03-7 fintrz ERROR
+
+ dc.l serror ;$04-0 ERROR - illegal extension
+ dc.l serror ;$04-1 ERROR - illegal extension
+ dc.l serror ;$04-2 ERROR - illegal extension
+ dc.l serror ;$04-3 ERROR - illegal extension
+ dc.l serror ;$04-4 ERROR - illegal extension
+ dc.l serror ;$04-5 ERROR - illegal extension
+ dc.l serror ;$04-6 ERROR - illegal extension
+ dc.l serror ;$04-7 ERROR - illegal extension
+
+ dc.l serror ;$05-0 ERROR - illegal extension
+ dc.l serror ;$05-1 ERROR - illegal extension
+ dc.l serror ;$05-2 ERROR - illegal extension
+ dc.l serror ;$05-3 ERROR - illegal extension
+ dc.l serror ;$05-4 ERROR - illegal extension
+ dc.l serror ;$05-5 ERROR - illegal extension
+ dc.l serror ;$05-6 ERROR - illegal extension
+ dc.l serror ;$05-7 ERROR - illegal extension
+
+ dc.l sslognp1 ;$06-0 flognp1 norm
+ dc.l szero ;$06-1 flognp1 zero
+ dc.l sopr_inf ;$06-2 flognp1 inf
+ dc.l src_nan ;$06-3 flognp1 nan
+ dc.l slognp1d ;$06-4 flognp1 denorm
+ dc.l serror ;$06-5 flognp1 ERROR
+ dc.l serror ;$06-6 flognp1 ERROR
+ dc.l serror ;$06-7 flognp1 ERROR
+
+ dc.l serror ;$07-0 ERROR - illegal extension
+ dc.l serror ;$07-1 ERROR - illegal extension
+ dc.l serror ;$07-2 ERROR - illegal extension
+ dc.l serror ;$07-3 ERROR - illegal extension
+ dc.l serror ;$07-4 ERROR - illegal extension
+ dc.l serror ;$07-5 ERROR - illegal extension
+ dc.l serror ;$07-6 ERROR - illegal extension
+ dc.l serror ;$07-7 ERROR - illegal extension
+
+ dc.l setoxm1 ;$08-0 fetoxm1 norm
+ dc.l szero ;$08-1 fetoxm1 zero
+ dc.l setoxm1i ;$08-2 fetoxm1 inf
+ dc.l src_nan ;$08-3 fetoxm1 nan
+ dc.l setoxm1d ;$08-4 fetoxm1 denorm
+ dc.l serror ;$08-5 fetoxm1 ERROR
+ dc.l serror ;$08-6 fetoxm1 ERROR
+ dc.l serror ;$08-7 fetoxm1 ERROR
+
+ dc.l stanh ;$09-0 ftanh norm
+ dc.l szero ;$09-1 ftanh zero
+ dc.l sone ;$09-2 ftanh inf
+ dc.l src_nan ;$09-3 ftanh nan
+ dc.l stanhd ;$09-4 ftanh denorm
+ dc.l serror ;$09-5 ftanh ERROR
+ dc.l serror ;$09-6 ftanh ERROR
+ dc.l serror ;$09-7 ftanh ERROR
+
+ dc.l satan ;$0a-0 fatan norm
+ dc.l szero ;$0a-1 fatan zero
+ dc.l spi_2 ;$0a-2 fatan inf
+ dc.l src_nan ;$0a-3 fatan nan
+ dc.l satand ;$0a-4 fatan denorm
+ dc.l serror ;$0a-5 fatan ERROR
+ dc.l serror ;$0a-6 fatan ERROR
+ dc.l serror ;$0a-7 fatan ERROR
+
+ dc.l serror ;$0b-0 ERROR - illegal extension
+ dc.l serror ;$0b-1 ERROR - illegal extension
+ dc.l serror ;$0b-2 ERROR - illegal extension
+ dc.l serror ;$0b-3 ERROR - illegal extension
+ dc.l serror ;$0b-4 ERROR - illegal extension
+ dc.l serror ;$0b-5 ERROR - illegal extension
+ dc.l serror ;$0b-6 ERROR - illegal extension
+ dc.l serror ;$0b-7 ERROR - illegal extension
+
+ dc.l sasin ;$0c-0 fasin norm
+ dc.l szero ;$0c-1 fasin zero
+ dc.l t_operr ;$0c-2 fasin inf
+ dc.l src_nan ;$0c-3 fasin nan
+ dc.l sasind ;$0c-4 fasin denorm
+ dc.l serror ;$0c-5 fasin ERROR
+ dc.l serror ;$0c-6 fasin ERROR
+ dc.l serror ;$0c-7 fasin ERROR
+
+ dc.l satanh ;$0d-0 fatanh norm
+ dc.l szero ;$0d-1 fatanh zero
+ dc.l t_operr ;$0d-2 fatanh inf
+ dc.l src_nan ;$0d-3 fatanh nan
+ dc.l satanhd ;$0d-4 fatanh denorm
+ dc.l serror ;$0d-5 fatanh ERROR
+ dc.l serror ;$0d-6 fatanh ERROR
+ dc.l serror ;$0d-7 fatanh ERROR
+
+ dc.l ssin ;$0e-0 fsin norm
+ dc.l szero ;$0e-1 fsin zero
+ dc.l t_operr ;$0e-2 fsin inf
+ dc.l src_nan ;$0e-3 fsin nan
+ dc.l ssind ;$0e-4 fsin denorm
+ dc.l serror ;$0e-5 fsin ERROR
+ dc.l serror ;$0e-6 fsin ERROR
+ dc.l serror ;$0e-7 fsin ERROR
+
+ dc.l stan ;$0f-0 ftan norm
+ dc.l szero ;$0f-1 ftan zero
+ dc.l t_operr ;$0f-2 ftan inf
+ dc.l src_nan ;$0f-3 ftan nan
+ dc.l stand ;$0f-4 ftan denorm
+ dc.l serror ;$0f-5 ftan ERROR
+ dc.l serror ;$0f-6 ftan ERROR
+ dc.l serror ;$0f-7 ftan ERROR
+
+ dc.l setox ;$10-0 fetox norm
+ dc.l ld_pone ;$10-1 fetox zero
+ dc.l szr_inf ;$10-2 fetox inf
+ dc.l src_nan ;$10-3 fetox nan
+ dc.l setoxd ;$10-4 fetox denorm
+ dc.l serror ;$10-5 fetox ERROR
+ dc.l serror ;$10-6 fetox ERROR
+ dc.l serror ;$10-7 fetox ERROR
+
+ dc.l stwotox ;$11-0 ftwotox norm
+ dc.l ld_pone ;$11-1 ftwotox zero
+ dc.l szr_inf ;$11-2 ftwotox inf
+ dc.l src_nan ;$11-3 ftwotox nan
+ dc.l stwotoxd ;$11-4 ftwotox denorm
+ dc.l serror ;$11-5 ftwotox ERROR
+ dc.l serror ;$11-6 ftwotox ERROR
+ dc.l serror ;$11-7 ftwotox ERROR
+
+ dc.l stentox ;$12-0 ftentox norm
+ dc.l ld_pone ;$12-1 ftentox zero
+ dc.l szr_inf ;$12-2 ftentox inf
+ dc.l src_nan ;$12-3 ftentox nan
+ dc.l stentoxd ;$12-4 ftentox denorm
+ dc.l serror ;$12-5 ftentox ERROR
+ dc.l serror ;$12-6 ftentox ERROR
+ dc.l serror ;$12-7 ftentox ERROR
+
+ dc.l serror ;$13-0 ERROR - illegal extension
+ dc.l serror ;$13-1 ERROR - illegal extension
+ dc.l serror ;$13-2 ERROR - illegal extension
+ dc.l serror ;$13-3 ERROR - illegal extension
+ dc.l serror ;$13-4 ERROR - illegal extension
+ dc.l serror ;$13-5 ERROR - illegal extension
+ dc.l serror ;$13-6 ERROR - illegal extension
+ dc.l serror ;$13-7 ERROR - illegal extension
+
+ dc.l sslogn ;$14-0 flogn norm
+ dc.l t_dz2 ;$14-1 flogn zero
+ dc.l sopr_inf ;$14-2 flogn inf
+ dc.l src_nan ;$14-3 flogn nan
+ dc.l sslognd ;$14-4 flogn denorm
+ dc.l serror ;$14-5 flogn ERROR
+ dc.l serror ;$14-6 flogn ERROR
+ dc.l serror ;$14-7 flogn ERROR
+
+ dc.l sslog10 ;$15-0 flog10 norm
+ dc.l t_dz2 ;$15-1 flog10 zero
+ dc.l sopr_inf ;$15-2 flog10 inf
+ dc.l src_nan ;$15-3 flog10 nan
+ dc.l sslog10d ;$15-4 flog10 denorm
+ dc.l serror ;$15-5 flog10 ERROR
+ dc.l serror ;$15-6 flog10 ERROR
+ dc.l serror ;$15-7 flog10 ERROR
+
+ dc.l sslog2 ;$16-0 flog2 norm
+ dc.l t_dz2 ;$16-1 flog2 zero
+ dc.l sopr_inf ;$16-2 flog2 inf
+ dc.l src_nan ;$16-3 flog2 nan
+ dc.l sslog2d ;$16-4 flog2 denorm
+ dc.l serror ;$16-5 flog2 ERROR
+ dc.l serror ;$16-6 flog2 ERROR
+ dc.l serror ;$16-7 flog2 ERROR
+
+ dc.l serror ;$17-0 ERROR - illegal extension
+ dc.l serror ;$17-1 ERROR - illegal extension
+ dc.l serror ;$17-2 ERROR - illegal extension
+ dc.l serror ;$17-3 ERROR - illegal extension
+ dc.l serror ;$17-4 ERROR - illegal extension
+ dc.l serror ;$17-5 ERROR - illegal extension
+ dc.l serror ;$17-6 ERROR - illegal extension
+ dc.l serror ;$17-7 ERROR - illegal extension
+
+ dc.l serror ;$18-0 ERROR - illegal extension
+ dc.l serror ;$18-1 ERROR - illegal extension
+ dc.l serror ;$18-2 ERROR - illegal extension
+ dc.l serror ;$18-3 ERROR - illegal extension
+ dc.l serror ;$18-4 ERROR - illegal extension
+ dc.l serror ;$18-5 ERROR - illegal extension
+ dc.l serror ;$18-6 ERROR - illegal extension
+ dc.l serror ;$18-7 ERROR - illegal extension
+
+ dc.l scosh ;$19-0 fcosh norm
+ dc.l ld_pone ;$19-1 fcosh zero
+ dc.l ld_pinf ;$19-2 fcosh inf
+ dc.l src_nan ;$19-3 fcosh nan
+ dc.l scoshd ;$19-4 fcosh denorm
+ dc.l serror ;$19-5 fcosh ERROR
+ dc.l serror ;$19-6 fcosh ERROR
+ dc.l serror ;$19-7 fcosh ERROR
+
+ dc.l serror ;$1a-0 ERROR - illegal extension
+ dc.l serror ;$1a-1 ERROR - illegal extension
+ dc.l serror ;$1a-2 ERROR - illegal extension
+ dc.l serror ;$1a-3 ERROR - illegal extension
+ dc.l serror ;$1a-4 ERROR - illegal extension
+ dc.l serror ;$1a-5 ERROR - illegal extension
+ dc.l serror ;$1a-6 ERROR - illegal extension
+ dc.l serror ;$1a-7 ERROR - illegal extension
+
+ dc.l serror ;$1b-0 ERROR - illegal extension
+ dc.l serror ;$1b-1 ERROR - illegal extension
+ dc.l serror ;$1b-2 ERROR - illegal extension
+ dc.l serror ;$1b-3 ERROR - illegal extension
+ dc.l serror ;$1b-4 ERROR - illegal extension
+ dc.l serror ;$1b-5 ERROR - illegal extension
+ dc.l serror ;$1b-6 ERROR - illegal extension
+ dc.l serror ;$1b-7 ERROR - illegal extension
+
+ dc.l sacos ;$1c-0 facos norm
+ dc.l ld_ppi2 ;$1c-1 facos zero
+ dc.l t_operr ;$1c-2 facos inf
+ dc.l src_nan ;$1c-3 facos nan
+ dc.l sacosd ;$1c-4 facos denorm
+ dc.l serror ;$1c-5 facos ERROR
+ dc.l serror ;$1c-6 facos ERROR
+ dc.l serror ;$1c-7 facos ERROR
+
+ dc.l scos ;$1d-0 fcos norm
+ dc.l ld_pone ;$1d-1 fcos zero
+ dc.l t_operr ;$1d-2 fcos inf
+ dc.l src_nan ;$1d-3 fcos nan
+ dc.l scosd ;$1d-4 fcos denorm
+ dc.l serror ;$1d-5 fcos ERROR
+ dc.l serror ;$1d-6 fcos ERROR
+ dc.l serror ;$1d-7 fcos ERROR
+
+ dc.l sgetexp ;$1e-0 fgetexp norm
+ dc.l szero ;$1e-1 fgetexp zero
+ dc.l t_operr ;$1e-2 fgetexp inf
+ dc.l src_nan ;$1e-3 fgetexp nan
+ dc.l sgetexpd ;$1e-4 fgetexp denorm
+ dc.l serror ;$1e-5 fgetexp ERROR
+ dc.l serror ;$1e-6 fgetexp ERROR
+ dc.l serror ;$1e-7 fgetexp ERROR
+
+ dc.l sgetman ;$1f-0 fgetman norm
+ dc.l szero ;$1f-1 fgetman zero
+ dc.l t_operr ;$1f-2 fgetman inf
+ dc.l src_nan ;$1f-3 fgetman nan
+ dc.l sgetmand ;$1f-4 fgetman denorm
+ dc.l serror ;$1f-5 fgetman ERROR
+ dc.l serror ;$1f-6 fgetman ERROR
+ dc.l serror ;$1f-7 fgetman ERROR
+
+ dc.l serror ;$20-0 ERROR - illegal extension
+ dc.l serror ;$20-1 ERROR - illegal extension
+ dc.l serror ;$20-2 ERROR - illegal extension
+ dc.l serror ;$20-3 ERROR - illegal extension
+ dc.l serror ;$20-4 ERROR - illegal extension
+ dc.l serror ;$20-5 ERROR - illegal extension
+ dc.l serror ;$20-6 ERROR - illegal extension
+ dc.l serror ;$20-7 ERROR - illegal extension
+
+ dc.l pmod ;$21-0 fmod all
+ dc.l pmod ;$21-1 fmod all
+ dc.l pmod ;$21-2 fmod all
+ dc.l pmod ;$21-3 fmod all
+ dc.l pmod ;$21-4 fmod all
+ dc.l serror ;$21-5 fmod ERROR
+ dc.l serror ;$21-6 fmod ERROR
+ dc.l serror ;$21-7 fmod ERROR
+
+ dc.l serror ;$22-0 ERROR - illegal extension
+ dc.l serror ;$22-1 ERROR - illegal extension
+ dc.l serror ;$22-2 ERROR - illegal extension
+ dc.l serror ;$22-3 ERROR - illegal extension
+ dc.l serror ;$22-4 ERROR - illegal extension
+ dc.l serror ;$22-5 ERROR - illegal extension
+ dc.l serror ;$22-6 ERROR - illegal extension
+ dc.l serror ;$22-7 ERROR - illegal extension
+
+ dc.l serror ;$23-0 ERROR - illegal extension
+ dc.l serror ;$23-1 ERROR - illegal extension
+ dc.l serror ;$23-2 ERROR - illegal extension
+ dc.l serror ;$23-3 ERROR - illegal extension
+ dc.l serror ;$23-4 ERROR - illegal extension
+ dc.l serror ;$23-5 ERROR - illegal extension
+ dc.l serror ;$23-6 ERROR - illegal extension
+ dc.l serror ;$23-7 ERROR - illegal extension
+
+ dc.l serror ;$24-0 ERROR - illegal extension
+ dc.l serror ;$24-1 ERROR - illegal extension
+ dc.l serror ;$24-2 ERROR - illegal extension
+ dc.l serror ;$24-3 ERROR - illegal extension
+ dc.l serror ;$24-4 ERROR - illegal extension
+ dc.l serror ;$24-5 ERROR - illegal extension
+ dc.l serror ;$24-6 ERROR - illegal extension
+ dc.l serror ;$24-7 ERROR - illegal extension
+
+ dc.l prem ;$25-0 frem all
+ dc.l prem ;$25-1 frem all
+ dc.l prem ;$25-2 frem all
+ dc.l prem ;$25-3 frem all
+ dc.l prem ;$25-4 frem all
+ dc.l serror ;$25-5 frem ERROR
+ dc.l serror ;$25-6 frem ERROR
+ dc.l serror ;$25-7 frem ERROR
+
+ dc.l pscale ;$26-0 fscale all
+ dc.l pscale ;$26-1 fscale all
+ dc.l pscale ;$26-2 fscale all
+ dc.l pscale ;$26-3 fscale all
+ dc.l pscale ;$26-4 fscale all
+ dc.l serror ;$26-5 fscale ERROR
+ dc.l serror ;$26-6 fscale ERROR
+ dc.l serror ;$26-7 fscale ERROR
+
+ dc.l serror ;$27-0 ERROR - illegal extension
+ dc.l serror ;$27-1 ERROR - illegal extension
+ dc.l serror ;$27-2 ERROR - illegal extension
+ dc.l serror ;$27-3 ERROR - illegal extension
+ dc.l serror ;$27-4 ERROR - illegal extension
+ dc.l serror ;$27-5 ERROR - illegal extension
+ dc.l serror ;$27-6 ERROR - illegal extension
+ dc.l serror ;$27-7 ERROR - illegal extension
+
+ dc.l serror ;$28-0 ERROR - illegal extension
+ dc.l serror ;$28-1 ERROR - illegal extension
+ dc.l serror ;$28-2 ERROR - illegal extension
+ dc.l serror ;$28-3 ERROR - illegal extension
+ dc.l serror ;$28-4 ERROR - illegal extension
+ dc.l serror ;$28-5 ERROR - illegal extension
+ dc.l serror ;$28-6 ERROR - illegal extension
+ dc.l serror ;$28-7 ERROR - illegal extension
+
+ dc.l serror ;$29-0 ERROR - illegal extension
+ dc.l serror ;$29-1 ERROR - illegal extension
+ dc.l serror ;$29-2 ERROR - illegal extension
+ dc.l serror ;$29-3 ERROR - illegal extension
+ dc.l serror ;$29-4 ERROR - illegal extension
+ dc.l serror ;$29-5 ERROR - illegal extension
+ dc.l serror ;$29-6 ERROR - illegal extension
+ dc.l serror ;$29-7 ERROR - illegal extension
+
+ dc.l serror ;$2a-0 ERROR - illegal extension
+ dc.l serror ;$2a-1 ERROR - illegal extension
+ dc.l serror ;$2a-2 ERROR - illegal extension
+ dc.l serror ;$2a-3 ERROR - illegal extension
+ dc.l serror ;$2a-4 ERROR - illegal extension
+ dc.l serror ;$2a-5 ERROR - illegal extension
+ dc.l serror ;$2a-6 ERROR - illegal extension
+ dc.l serror ;$2a-7 ERROR - illegal extension
+
+ dc.l serror ;$2b-0 ERROR - illegal extension
+ dc.l serror ;$2b-1 ERROR - illegal extension
+ dc.l serror ;$2b-2 ERROR - illegal extension
+ dc.l serror ;$2b-3 ERROR - illegal extension
+ dc.l serror ;$2b-4 ERROR - illegal extension
+ dc.l serror ;$2b-5 ERROR - illegal extension
+ dc.l serror ;$2b-6 ERROR - illegal extension
+ dc.l serror ;$2b-7 ERROR - illegal extension
+
+ dc.l serror ;$2c-0 ERROR - illegal extension
+ dc.l serror ;$2c-1 ERROR - illegal extension
+ dc.l serror ;$2c-2 ERROR - illegal extension
+ dc.l serror ;$2c-3 ERROR - illegal extension
+ dc.l serror ;$2c-4 ERROR - illegal extension
+ dc.l serror ;$2c-5 ERROR - illegal extension
+ dc.l serror ;$2c-6 ERROR - illegal extension
+ dc.l serror ;$2c-7 ERROR - illegal extension
+
+ dc.l serror ;$2d-0 ERROR - illegal extension
+ dc.l serror ;$2d-1 ERROR - illegal extension
+ dc.l serror ;$2d-2 ERROR - illegal extension
+ dc.l serror ;$2d-3 ERROR - illegal extension
+ dc.l serror ;$2d-4 ERROR - illegal extension
+ dc.l serror ;$2d-5 ERROR - illegal extension
+ dc.l serror ;$2d-6 ERROR - illegal extension
+ dc.l serror ;$2d-7 ERROR - illegal extension
+
+ dc.l serror ;$2e-0 ERROR - illegal extension
+ dc.l serror ;$2e-1 ERROR - illegal extension
+ dc.l serror ;$2e-2 ERROR - illegal extension
+ dc.l serror ;$2e-3 ERROR - illegal extension
+ dc.l serror ;$2e-4 ERROR - illegal extension
+ dc.l serror ;$2e-5 ERROR - illegal extension
+ dc.l serror ;$2e-6 ERROR - illegal extension
+ dc.l serror ;$2e-7 ERROR - illegal extension
+
+ dc.l serror ;$2f-0 ERROR - illegal extension
+ dc.l serror ;$2f-1 ERROR - illegal extension
+ dc.l serror ;$2f-2 ERROR - illegal extension
+ dc.l serror ;$2f-3 ERROR - illegal extension
+ dc.l serror ;$2f-4 ERROR - illegal extension
+ dc.l serror ;$2f-5 ERROR - illegal extension
+ dc.l serror ;$2f-6 ERROR - illegal extension
+ dc.l serror ;$2f-7 ERROR - illegal extension
+
+ dc.l ssincos ;$30-0 fsincos norm
+ dc.l ssincosz ;$30-1 fsincos zero
+ dc.l ssincosi ;$30-2 fsincos inf
+ dc.l ssincosnan ;$30-3 fsincos nan
+ dc.l ssincosd ;$30-4 fsincos denorm
+ dc.l serror ;$30-5 fsincos ERROR
+ dc.l serror ;$30-6 fsincos ERROR
+ dc.l serror ;$30-7 fsincos ERROR
+
+ dc.l ssincos ;$31-0 fsincos norm
+ dc.l ssincosz ;$31-1 fsincos zero
+ dc.l ssincosi ;$31-2 fsincos inf
+ dc.l ssincosnan ;$31-3 fsincos nan
+ dc.l ssincosd ;$31-4 fsincos denorm
+ dc.l serror ;$31-5 fsincos ERROR
+ dc.l serror ;$31-6 fsincos ERROR
+ dc.l serror ;$31-7 fsincos ERROR
+
+ dc.l ssincos ;$32-0 fsincos norm
+ dc.l ssincosz ;$32-1 fsincos zero
+ dc.l ssincosi ;$32-2 fsincos inf
+ dc.l ssincosnan ;$32-3 fsincos nan
+ dc.l ssincosd ;$32-4 fsincos denorm
+ dc.l serror ;$32-5 fsincos ERROR
+ dc.l serror ;$32-6 fsincos ERROR
+ dc.l serror ;$32-7 fsincos ERROR
+
+ dc.l ssincos ;$33-0 fsincos norm
+ dc.l ssincosz ;$33-1 fsincos zero
+ dc.l ssincosi ;$33-2 fsincos inf
+ dc.l ssincosnan ;$33-3 fsincos nan
+ dc.l ssincosd ;$33-4 fsincos denorm
+ dc.l serror ;$33-5 fsincos ERROR
+ dc.l serror ;$33-6 fsincos ERROR
+ dc.l serror ;$33-7 fsincos ERROR
+
+ dc.l ssincos ;$34-0 fsincos norm
+ dc.l ssincosz ;$34-1 fsincos zero
+ dc.l ssincosi ;$34-2 fsincos inf
+ dc.l ssincosnan ;$34-3 fsincos nan
+ dc.l ssincosd ;$34-4 fsincos denorm
+ dc.l serror ;$34-5 fsincos ERROR
+ dc.l serror ;$34-6 fsincos ERROR
+ dc.l serror ;$34-7 fsincos ERROR
+
+ dc.l ssincos ;$35-0 fsincos norm
+ dc.l ssincosz ;$35-1 fsincos zero
+ dc.l ssincosi ;$35-2 fsincos inf
+ dc.l ssincosnan ;$35-3 fsincos nan
+ dc.l ssincosd ;$35-4 fsincos denorm
+ dc.l serror ;$35-5 fsincos ERROR
+ dc.l serror ;$35-6 fsincos ERROR
+ dc.l serror ;$35-7 fsincos ERROR
+
+ dc.l ssincos ;$36-0 fsincos norm
+ dc.l ssincosz ;$36-1 fsincos zero
+ dc.l ssincosi ;$36-2 fsincos inf
+ dc.l ssincosnan ;$36-3 fsincos nan
+ dc.l ssincosd ;$36-4 fsincos denorm
+ dc.l serror ;$36-5 fsincos ERROR
+ dc.l serror ;$36-6 fsincos ERROR
+ dc.l serror ;$36-7 fsincos ERROR
+
+ dc.l ssincos ;$37-0 fsincos norm
+ dc.l ssincosz ;$37-1 fsincos zero
+ dc.l ssincosi ;$37-2 fsincos inf
+ dc.l ssincosnan ;$37-3 fsincos nan
+ dc.l ssincosd ;$37-4 fsincos denorm
+ dc.l serror ;$37-5 fsincos ERROR
+ dc.l serror ;$37-6 fsincos ERROR
+ dc.l serror ;$37-7 fsincos ERROR
+
+ end
diff --git a/sys/arch/m68k/fpsp/util.sa b/sys/arch/m68k/fpsp/util.sa
new file mode 100644
index 00000000000..9c03ac747b5
--- /dev/null
+++ b/sys/arch/m68k/fpsp/util.sa
@@ -0,0 +1,773 @@
+* $NetBSD: util.sa,v 1.3 1994/10/26 07:50:20 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* util.sa 3.7 7/29/91
+*
+* This file contains routines used by other programs.
+*
+* ovf_res: used by overflow to force the correct
+* result. ovf_r_k, ovf_r_x2, ovf_r_x3 are
+* derivatives of this routine.
+* get_fline: get user's opcode word
+* g_dfmtou: returns the destination format.
+* g_opcls: returns the opclass of the float instruction.
+* g_rndpr: returns the rounding precision.
+* reg_dest: write byte, word, or long data to Dn
+*
+
+UTIL IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+ xref mem_read
+
+ xdef g_dfmtou
+ xdef g_opcls
+ xdef g_rndpr
+ xdef get_fline
+ xdef reg_dest
+
+*
+* Final result table for ovf_res. Note that the negative counterparts
+* are unnecessary as ovf_res always returns the sign separately from
+* the exponent.
+* ;+inf
+EXT_PINF dc.l $7fff0000,$00000000,$00000000,$00000000
+* ;largest +ext
+EXT_PLRG dc.l $7ffe0000,$ffffffff,$ffffffff,$00000000
+* ;largest magnitude +sgl in ext
+SGL_PLRG dc.l $407e0000,$ffffff00,$00000000,$00000000
+* ;largest magnitude +dbl in ext
+DBL_PLRG dc.l $43fe0000,$ffffffff,$fffff800,$00000000
+* ;largest -ext
+
+tblovfl:
+ dc.l EXT_RN
+ dc.l EXT_RZ
+ dc.l EXT_RM
+ dc.l EXT_RP
+ dc.l SGL_RN
+ dc.l SGL_RZ
+ dc.l SGL_RM
+ dc.l SGL_RP
+ dc.l DBL_RN
+ dc.l DBL_RZ
+ dc.l DBL_RM
+ dc.l DBL_RP
+ dc.l error
+ dc.l error
+ dc.l error
+ dc.l error
+
+
+*
+* ovf_r_k --- overflow result calculation
+*
+* This entry point is used by kernel_ex.
+*
+* This forces the destination precision to be extended
+*
+* Input: operand in ETEMP
+* Output: a result is in ETEMP (internal extended format)
+*
+ xdef ovf_r_k
+ovf_r_k:
+ lea ETEMP(a6),a0 ;a0 points to source operand
+ bclr.b #sign_bit,ETEMP_EX(a6)
+ sne ETEMP_SGN(a6) ;convert to internal IEEE format
+
+*
+* ovf_r_x2 --- overflow result calculation
+*
+* This entry point used by x_ovfl. (opclass 0 and 2)
+*
+* Input a0 points to an operand in the internal extended format
+* Output a0 points to the result in the internal extended format
+*
+* This sets the round precision according to the user's FPCR unless the
+* instruction is fsgldiv or fsglmul or fsadd, fdadd, fsub, fdsub, fsmul,
+* fdmul, fsdiv, fddiv, fssqrt, fsmove, fdmove, fsabs, fdabs, fsneg, fdneg.
+* If the instruction is fsgldiv of fsglmul, the rounding precision must be
+* extended. If the instruction is not fsgldiv or fsglmul but a force-
+* precision instruction, the rounding precision is then set to the force
+* precision.
+
+ xdef ovf_r_x2
+ovf_r_x2:
+ btst.b #E3,E_BYTE(a6) ;check for nu exception
+ beq.l ovf_e1_exc ;it is cu exception
+ovf_e3_exc:
+ move.w CMDREG3B(a6),d0 ;get the command word
+ andi.w #$00000060,d0 ;clear all bits except 6 and 5
+ cmpi.l #$00000040,d0
+ beq.l ovff_sgl ;force precision is single
+ cmpi.l #$00000060,d0
+ beq.l ovff_dbl ;force precision is double
+ move.w CMDREG3B(a6),d0 ;get the command word again
+ andi.l #$7f,d0 ;clear all except operation
+ cmpi.l #$33,d0
+ beq.l ovf_fsgl ;fsglmul or fsgldiv
+ cmpi.l #$30,d0
+ beq.l ovf_fsgl
+ bra ovf_fpcr ;instruction is none of the above
+* ;use FPCR
+ovf_e1_exc:
+ move.w CMDREG1B(a6),d0 ;get command word
+ andi.l #$00000044,d0 ;clear all bits except 6 and 2
+ cmpi.l #$00000040,d0
+ beq.l ovff_sgl ;the instruction is force single
+ cmpi.l #$00000044,d0
+ beq.l ovff_dbl ;the instruction is force double
+ move.w CMDREG1B(a6),d0 ;again get the command word
+ andi.l #$0000007f,d0 ;clear all except the op code
+ cmpi.l #$00000027,d0
+ beq.l ovf_fsgl ;fsglmul
+ cmpi.l #$00000024,d0
+ beq.l ovf_fsgl ;fsgldiv
+ bra ovf_fpcr ;none of the above, use FPCR
+*
+*
+* Inst is either fsgldiv or fsglmul. Force extended precision.
+*
+ovf_fsgl:
+ clr.l d0
+ bra.b ovf_res
+
+ovff_sgl:
+ move.l #$00000001,d0 ;set single
+ bra.b ovf_res
+ovff_dbl:
+ move.l #$00000002,d0 ;set double
+ bra.b ovf_res
+*
+* The precision is in the fpcr.
+*
+ovf_fpcr:
+ bfextu FPCR_MODE(a6){0:2},d0 ;set round precision
+ bra.b ovf_res
+
+*
+*
+* ovf_r_x3 --- overflow result calculation
+*
+* This entry point used by x_ovfl. (opclass 3 only)
+*
+* Input a0 points to an operand in the internal extended format
+* Output a0 points to the result in the internal extended format
+*
+* This sets the round precision according to the destination size.
+*
+ xdef ovf_r_x3
+ovf_r_x3:
+ bsr g_dfmtou ;get dest fmt in d0{1:0}
+* ;for fmovout, the destination format
+* ;is the rounding precision
+
+*
+* ovf_res --- overflow result calculation
+*
+* Input:
+* a0 points to operand in internal extended format
+* Output:
+* a0 points to result in internal extended format
+*
+ xdef ovf_res
+ovf_res:
+ lsl.l #2,d0 ;move round precision to d0{3:2}
+ bfextu FPCR_MODE(a6){2:2},d1 ;set round mode
+ or.l d1,d0 ;index is fmt:mode in d0{3:0}
+ lea.l tblovfl,a1 ;load a1 with table address
+ move.l (a1,d0*4),a1 ;use d0 as index to the table
+ jmp (a1) ;go to the correct routine
+*
+*case DEST_FMT = EXT
+*
+EXT_RN:
+ lea.l EXT_PINF,a1 ;answer is +/- infinity
+ bset.b #inf_bit,FPSR_CC(a6)
+ bra set_sign ;now go set the sign
+EXT_RZ:
+ lea.l EXT_PLRG,a1 ;answer is +/- large number
+ bra set_sign ;now go set the sign
+EXT_RM:
+ tst.b LOCAL_SGN(a0) ;if negative overflow
+ beq.b e_rm_pos
+e_rm_neg:
+ lea.l EXT_PINF,a1 ;answer is negative infinity
+ or.l #neginf_mask,USER_FPSR(a6)
+ bra end_ovfr
+e_rm_pos:
+ lea.l EXT_PLRG,a1 ;answer is large positive number
+ bra end_ovfr
+EXT_RP:
+ tst.b LOCAL_SGN(a0) ;if negative overflow
+ beq.b e_rp_pos
+e_rp_neg:
+ lea.l EXT_PLRG,a1 ;answer is large negative number
+ bset.b #neg_bit,FPSR_CC(a6)
+ bra end_ovfr
+e_rp_pos:
+ lea.l EXT_PINF,a1 ;answer is positive infinity
+ bset.b #inf_bit,FPSR_CC(a6)
+ bra end_ovfr
+*
+*case DEST_FMT = DBL
+*
+DBL_RN:
+ lea.l EXT_PINF,a1 ;answer is +/- infinity
+ bset.b #inf_bit,FPSR_CC(a6)
+ bra set_sign
+DBL_RZ:
+ lea.l DBL_PLRG,a1 ;answer is +/- large number
+ bra set_sign ;now go set the sign
+DBL_RM:
+ tst.b LOCAL_SGN(a0) ;if negative overflow
+ beq.b d_rm_pos
+d_rm_neg:
+ lea.l EXT_PINF,a1 ;answer is negative infinity
+ or.l #neginf_mask,USER_FPSR(a6)
+ bra end_ovfr ;inf is same for all precisions (ext,dbl,sgl)
+d_rm_pos:
+ lea.l DBL_PLRG,a1 ;answer is large positive number
+ bra end_ovfr
+DBL_RP:
+ tst.b LOCAL_SGN(a0) ;if negative overflow
+ beq.b d_rp_pos
+d_rp_neg:
+ lea.l DBL_PLRG,a1 ;answer is large negative number
+ bset.b #neg_bit,FPSR_CC(a6)
+ bra end_ovfr
+d_rp_pos:
+ lea.l EXT_PINF,a1 ;answer is positive infinity
+ bset.b #inf_bit,FPSR_CC(a6)
+ bra end_ovfr
+*
+*case DEST_FMT = SGL
+*
+SGL_RN:
+ lea.l EXT_PINF,a1 ;answer is +/- infinity
+ bset.b #inf_bit,FPSR_CC(a6)
+ bra.b set_sign
+SGL_RZ:
+ lea.l SGL_PLRG,a1 ;anwer is +/- large number
+ bra.b set_sign
+SGL_RM:
+ tst.b LOCAL_SGN(a0) ;if negative overflow
+ beq.b s_rm_pos
+s_rm_neg:
+ lea.l EXT_PINF,a1 ;answer is negative infinity
+ or.l #neginf_mask,USER_FPSR(a6)
+ bra.b end_ovfr
+s_rm_pos:
+ lea.l SGL_PLRG,a1 ;answer is large positive number
+ bra.b end_ovfr
+SGL_RP:
+ tst.b LOCAL_SGN(a0) ;if negative overflow
+ beq.b s_rp_pos
+s_rp_neg:
+ lea.l SGL_PLRG,a1 ;answer is large negative number
+ bset.b #neg_bit,FPSR_CC(a6)
+ bra.b end_ovfr
+s_rp_pos:
+ lea.l EXT_PINF,a1 ;answer is postive infinity
+ bset.b #inf_bit,FPSR_CC(a6)
+ bra.b end_ovfr
+
+set_sign:
+ tst.b LOCAL_SGN(a0) ;if negative overflow
+ beq.b end_ovfr
+neg_sign:
+ bset.b #neg_bit,FPSR_CC(a6)
+
+end_ovfr:
+ move.w LOCAL_EX(a1),LOCAL_EX(a0) ;do not overwrite sign
+ move.l LOCAL_HI(a1),LOCAL_HI(a0)
+ move.l LOCAL_LO(a1),LOCAL_LO(a0)
+ rts
+
+
+*
+* ERROR
+*
+error:
+ rts
+*
+* get_fline --- get f-line opcode of interrupted instruction
+*
+* Returns opcode in the low word of d0.
+*
+get_fline:
+ move.l USER_FPIAR(a6),a0 ;opcode address
+ clr.l -(a7) ;reserve a word on the stack
+ lea.l 2(a7),a1 ;point to low word of temporary
+ move.l #2,d0 ;count
+ bsr.l mem_read
+ move.l (a7)+,d0
+ rts
+*
+* g_rndpr --- put rounding precision in d0{1:0}
+*
+* valid return codes are:
+* 00 - extended
+* 01 - single
+* 10 - double
+*
+* begin
+* get rounding precision (cmdreg3b{6:5})
+* begin
+* case opclass = 011 (move out)
+* get destination format - this is the also the rounding precision
+*
+* case opclass = 0x0
+* if E3
+* *case RndPr(from cmdreg3b{6:5} = 11 then RND_PREC = DBL
+* *case RndPr(from cmdreg3b{6:5} = 10 then RND_PREC = SGL
+* case RndPr(from cmdreg3b{6:5} = 00 | 01
+* use precision from FPCR{7:6}
+* case 00 then RND_PREC = EXT
+* case 01 then RND_PREC = SGL
+* case 10 then RND_PREC = DBL
+* else E1
+* use precision in FPCR{7:6}
+* case 00 then RND_PREC = EXT
+* case 01 then RND_PREC = SGL
+* case 10 then RND_PREC = DBL
+* end
+*
+g_rndpr:
+ bsr.w g_opcls ;get opclass in d0{2:0}
+ cmp.w #$0003,d0 ;check for opclass 011
+ bne.b op_0x0
+
+*
+* For move out instructions (opclass 011) the destination format
+* is the same as the rounding precision. Pass results from g_dfmtou.
+*
+ bsr.w g_dfmtou
+ rts
+op_0x0:
+ btst.b #E3,E_BYTE(a6)
+ beq.l unf_e1_exc ;branch to e1 underflow
+unf_e3_exc:
+ move.l CMDREG3B(a6),d0 ;rounding precision in d0{10:9}
+ bfextu d0{9:2},d0 ;move the rounding prec bits to d0{1:0}
+ cmpi.l #$2,d0
+ beq.l unff_sgl ;force precision is single
+ cmpi.l #$3,d0 ;force precision is double
+ beq.l unff_dbl
+ move.w CMDREG3B(a6),d0 ;get the command word again
+ andi.l #$7f,d0 ;clear all except operation
+ cmpi.l #$33,d0
+ beq.l unf_fsgl ;fsglmul or fsgldiv
+ cmpi.l #$30,d0
+ beq.l unf_fsgl ;fsgldiv or fsglmul
+ bra unf_fpcr
+unf_e1_exc:
+ move.l CMDREG1B(a6),d0 ;get 32 bits off the stack, 1st 16 bits
+* ;are the command word
+ andi.l #$00440000,d0 ;clear all bits except bits 6 and 2
+ cmpi.l #$00400000,d0
+ beq.l unff_sgl ;force single
+ cmpi.l #$00440000,d0 ;force double
+ beq.l unff_dbl
+ move.l CMDREG1B(a6),d0 ;get the command word again
+ andi.l #$007f0000,d0 ;clear all bits except the operation
+ cmpi.l #$00270000,d0
+ beq.l unf_fsgl ;fsglmul
+ cmpi.l #$00240000,d0
+ beq.l unf_fsgl ;fsgldiv
+ bra unf_fpcr
+
+*
+* Convert to return format. The values from cmdreg3b and the return
+* values are:
+* cmdreg3b return precision
+* -------- ------ ---------
+* 00,01 0 ext
+* 10 1 sgl
+* 11 2 dbl
+* Force single
+*
+unff_sgl:
+ move.l #1,d0 ;return 1
+ rts
+*
+* Force double
+*
+unff_dbl:
+ move.l #2,d0 ;return 2
+ rts
+*
+* Force extended
+*
+unf_fsgl:
+ clr.l d0
+ rts
+*
+* Get rounding precision set in FPCR{7:6}.
+*
+unf_fpcr:
+ move.l USER_FPCR(a6),d0 ;rounding precision bits in d0{7:6}
+ bfextu d0{24:2},d0 ;move the rounding prec bits to d0{1:0}
+ rts
+*
+* g_opcls --- put opclass in d0{2:0}
+*
+g_opcls:
+ btst.b #E3,E_BYTE(a6)
+ beq.b opc_1b ;if set, go to cmdreg1b
+opc_3b:
+ clr.l d0 ;if E3, only opclass 0x0 is possible
+ rts
+opc_1b:
+ move.l CMDREG1B(a6),d0
+ bfextu d0{0:3},d0 ;shift opclass bits d0{31:29} to d0{2:0}
+ rts
+*
+* g_dfmtou --- put destination format in d0{1:0}
+*
+* If E1, the format is from cmdreg1b{12:10}
+* If E3, the format is extended.
+*
+* Dest. Fmt.
+* extended 010 -> 00
+* single 001 -> 01
+* double 101 -> 10
+*
+g_dfmtou:
+ btst.b #E3,E_BYTE(a6)
+ beq.b op011
+ clr.l d0 ;if E1, size is always ext
+ rts
+op011:
+ move.l CMDREG1B(a6),d0
+ bfextu d0{3:3},d0 ;dest fmt from cmdreg1b{12:10}
+ cmp.b #1,d0 ;check for single
+ bne.b not_sgl
+ move.l #1,d0
+ rts
+not_sgl:
+ cmp.b #5,d0 ;check for double
+ bne.b not_dbl
+ move.l #2,d0
+ rts
+not_dbl:
+ clr.l d0 ;must be extended
+ rts
+
+*
+*
+* Final result table for unf_sub. Note that the negative counterparts
+* are unnecessary as unf_sub always returns the sign separately from
+* the exponent.
+* ;+zero
+EXT_PZRO dc.l $00000000,$00000000,$00000000,$00000000
+* ;+zero
+SGL_PZRO dc.l $3f810000,$00000000,$00000000,$00000000
+* ;+zero
+DBL_PZRO dc.l $3c010000,$00000000,$00000000,$00000000
+* ;smallest +ext denorm
+EXT_PSML dc.l $00000000,$00000000,$00000001,$00000000
+* ;smallest +sgl denorm
+SGL_PSML dc.l $3f810000,$00000100,$00000000,$00000000
+* ;smallest +dbl denorm
+DBL_PSML dc.l $3c010000,$00000000,$00000800,$00000000
+*
+* UNF_SUB --- underflow result calculation
+*
+* Input:
+* d0 contains round precision
+* a0 points to input operand in the internal extended format
+*
+* Output:
+* a0 points to correct internal extended precision result.
+*
+
+tblunf:
+ dc.l uEXT_RN
+ dc.l uEXT_RZ
+ dc.l uEXT_RM
+ dc.l uEXT_RP
+ dc.l uSGL_RN
+ dc.l uSGL_RZ
+ dc.l uSGL_RM
+ dc.l uSGL_RP
+ dc.l uDBL_RN
+ dc.l uDBL_RZ
+ dc.l uDBL_RM
+ dc.l uDBL_RP
+ dc.l uDBL_RN
+ dc.l uDBL_RZ
+ dc.l uDBL_RM
+ dc.l uDBL_RP
+
+ xdef unf_sub
+unf_sub:
+ lsl.l #2,d0 ;move round precision to d0{3:2}
+ bfextu FPCR_MODE(a6){2:2},d1 ;set round mode
+ or.l d1,d0 ;index is fmt:mode in d0{3:0}
+ lea.l tblunf,a1 ;load a1 with table address
+ move.l (a1,d0*4),a1 ;use d0 as index to the table
+ jmp (a1) ;go to the correct routine
+*
+*case DEST_FMT = EXT
+*
+uEXT_RN:
+ lea.l EXT_PZRO,a1 ;answer is +/- zero
+ bset.b #z_bit,FPSR_CC(a6)
+ bra uset_sign ;now go set the sign
+uEXT_RZ:
+ lea.l EXT_PZRO,a1 ;answer is +/- zero
+ bset.b #z_bit,FPSR_CC(a6)
+ bra uset_sign ;now go set the sign
+uEXT_RM:
+ tst.b LOCAL_SGN(a0) ;if negative underflow
+ beq.b ue_rm_pos
+ue_rm_neg:
+ lea.l EXT_PSML,a1 ;answer is negative smallest denorm
+ bset.b #neg_bit,FPSR_CC(a6)
+ bra end_unfr
+ue_rm_pos:
+ lea.l EXT_PZRO,a1 ;answer is positive zero
+ bset.b #z_bit,FPSR_CC(a6)
+ bra end_unfr
+uEXT_RP:
+ tst.b LOCAL_SGN(a0) ;if negative underflow
+ beq.b ue_rp_pos
+ue_rp_neg:
+ lea.l EXT_PZRO,a1 ;answer is negative zero
+ ori.l #negz_mask,USER_FPSR(a6)
+ bra end_unfr
+ue_rp_pos:
+ lea.l EXT_PSML,a1 ;answer is positive smallest denorm
+ bra end_unfr
+*
+*case DEST_FMT = DBL
+*
+uDBL_RN:
+ lea.l DBL_PZRO,a1 ;answer is +/- zero
+ bset.b #z_bit,FPSR_CC(a6)
+ bra uset_sign
+uDBL_RZ:
+ lea.l DBL_PZRO,a1 ;answer is +/- zero
+ bset.b #z_bit,FPSR_CC(a6)
+ bra uset_sign ;now go set the sign
+uDBL_RM:
+ tst.b LOCAL_SGN(a0) ;if negative overflow
+ beq.b ud_rm_pos
+ud_rm_neg:
+ lea.l DBL_PSML,a1 ;answer is smallest denormalized negative
+ bset.b #neg_bit,FPSR_CC(a6)
+ bra end_unfr
+ud_rm_pos:
+ lea.l DBL_PZRO,a1 ;answer is positive zero
+ bset.b #z_bit,FPSR_CC(a6)
+ bra end_unfr
+uDBL_RP:
+ tst.b LOCAL_SGN(a0) ;if negative overflow
+ beq.b ud_rp_pos
+ud_rp_neg:
+ lea.l DBL_PZRO,a1 ;answer is negative zero
+ ori.l #negz_mask,USER_FPSR(a6)
+ bra end_unfr
+ud_rp_pos:
+ lea.l DBL_PSML,a1 ;answer is smallest denormalized negative
+ bra end_unfr
+*
+*case DEST_FMT = SGL
+*
+uSGL_RN:
+ lea.l SGL_PZRO,a1 ;answer is +/- zero
+ bset.b #z_bit,FPSR_CC(a6)
+ bra.b uset_sign
+uSGL_RZ:
+ lea.l SGL_PZRO,a1 ;answer is +/- zero
+ bset.b #z_bit,FPSR_CC(a6)
+ bra.b uset_sign
+uSGL_RM:
+ tst.b LOCAL_SGN(a0) ;if negative overflow
+ beq.b us_rm_pos
+us_rm_neg:
+ lea.l SGL_PSML,a1 ;answer is smallest denormalized negative
+ bset.b #neg_bit,FPSR_CC(a6)
+ bra.b end_unfr
+us_rm_pos:
+ lea.l SGL_PZRO,a1 ;answer is positive zero
+ bset.b #z_bit,FPSR_CC(a6)
+ bra.b end_unfr
+uSGL_RP:
+ tst.b LOCAL_SGN(a0) ;if negative overflow
+ beq.b us_rp_pos
+us_rp_neg:
+ lea.l SGL_PZRO,a1 ;answer is negative zero
+ ori.l #negz_mask,USER_FPSR(a6)
+ bra.b end_unfr
+us_rp_pos:
+ lea.l SGL_PSML,a1 ;answer is smallest denormalized positive
+ bra.b end_unfr
+
+uset_sign:
+ tst.b LOCAL_SGN(a0) ;if negative overflow
+ beq.b end_unfr
+uneg_sign:
+ bset.b #neg_bit,FPSR_CC(a6)
+
+end_unfr:
+ move.w LOCAL_EX(a1),LOCAL_EX(a0) ;be careful not to overwrite sign
+ move.l LOCAL_HI(a1),LOCAL_HI(a0)
+ move.l LOCAL_LO(a1),LOCAL_LO(a0)
+ rts
+*
+* reg_dest --- write byte, word, or long data to Dn
+*
+*
+* Input:
+* L_SCR1: Data
+* d1: data size and dest register number formatted as:
+*
+* 32 5 4 3 2 1 0
+* -----------------------------------------------
+* | 0 | Size | Dest Reg # |
+* -----------------------------------------------
+*
+* Size is:
+* 0 - Byte
+* 1 - Word
+* 2 - Long/Single
+*
+pregdst:
+ dc.l byte_d0
+ dc.l byte_d1
+ dc.l byte_d2
+ dc.l byte_d3
+ dc.l byte_d4
+ dc.l byte_d5
+ dc.l byte_d6
+ dc.l byte_d7
+ dc.l word_d0
+ dc.l word_d1
+ dc.l word_d2
+ dc.l word_d3
+ dc.l word_d4
+ dc.l word_d5
+ dc.l word_d6
+ dc.l word_d7
+ dc.l long_d0
+ dc.l long_d1
+ dc.l long_d2
+ dc.l long_d3
+ dc.l long_d4
+ dc.l long_d5
+ dc.l long_d6
+ dc.l long_d7
+
+reg_dest:
+ lea.l pregdst,a0
+ move.l (a0,d1*4),a0
+ jmp (a0)
+
+byte_d0:
+ move.b L_SCR1(a6),USER_D0+3(a6)
+ rts
+byte_d1:
+ move.b L_SCR1(a6),USER_D1+3(a6)
+ rts
+byte_d2:
+ move.b L_SCR1(a6),d2
+ rts
+byte_d3:
+ move.b L_SCR1(a6),d3
+ rts
+byte_d4:
+ move.b L_SCR1(a6),d4
+ rts
+byte_d5:
+ move.b L_SCR1(a6),d5
+ rts
+byte_d6:
+ move.b L_SCR1(a6),d6
+ rts
+byte_d7:
+ move.b L_SCR1(a6),d7
+ rts
+word_d0:
+ move.w L_SCR1(a6),USER_D0+2(a6)
+ rts
+word_d1:
+ move.w L_SCR1(a6),USER_D1+2(a6)
+ rts
+word_d2:
+ move.w L_SCR1(a6),d2
+ rts
+word_d3:
+ move.w L_SCR1(a6),d3
+ rts
+word_d4:
+ move.w L_SCR1(a6),d4
+ rts
+word_d5:
+ move.w L_SCR1(a6),d5
+ rts
+word_d6:
+ move.w L_SCR1(a6),d6
+ rts
+word_d7:
+ move.w L_SCR1(a6),d7
+ rts
+long_d0:
+ move.l L_SCR1(a6),USER_D0(a6)
+ rts
+long_d1:
+ move.l L_SCR1(a6),USER_D1(a6)
+ rts
+long_d2:
+ move.l L_SCR1(a6),d2
+ rts
+long_d3:
+ move.l L_SCR1(a6),d3
+ rts
+long_d4:
+ move.l L_SCR1(a6),d4
+ rts
+long_d5:
+ move.l L_SCR1(a6),d5
+ rts
+long_d6:
+ move.l L_SCR1(a6),d6
+ rts
+long_d7:
+ move.l L_SCR1(a6),d7
+ rts
+ end
diff --git a/sys/arch/m68k/fpsp/x_bsun.sa b/sys/arch/m68k/fpsp/x_bsun.sa
new file mode 100644
index 00000000000..b3da064c209
--- /dev/null
+++ b/sys/arch/m68k/fpsp/x_bsun.sa
@@ -0,0 +1,72 @@
+* $NetBSD: x_bsun.sa,v 1.2 1994/10/26 07:50:22 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* x_bsun.sa 3.3 7/1/91
+*
+* fpsp_bsun --- FPSP handler for branch/set on unordered exception
+*
+* Copy the PC to FPIAR to maintain 881/882 compatability
+*
+* The real_bsun handler will need to perform further corrective
+* measures as outlined in the 040 User's Manual on pages
+* 9-41f, section 9.8.3.
+*
+
+X_BSUN IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+ xref real_bsun
+
+ xdef fpsp_bsun
+fpsp_bsun:
+*
+ link a6,#-LOCAL_SIZE
+ fsave -(a7)
+ movem.l d0-d1/a0-a1,USER_DA(a6)
+ fmovem.x fp0-fp3,USER_FP0(a6)
+ fmovem.l fpcr/fpsr/fpiar,USER_FPCR(a6)
+
+*
+ move.l EXC_PC(a6),USER_FPIAR(a6)
+*
+ movem.l USER_DA(a6),d0-d1/a0-a1
+ fmovem.x USER_FP0(a6),fp0-fp3
+ fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar
+ frestore (a7)+
+ unlk a6
+ bra.l real_bsun
+*
+ end
diff --git a/sys/arch/m68k/fpsp/x_fline.sa b/sys/arch/m68k/fpsp/x_fline.sa
new file mode 100644
index 00000000000..9f72985a72b
--- /dev/null
+++ b/sys/arch/m68k/fpsp/x_fline.sa
@@ -0,0 +1,129 @@
+* $NetBSD: x_fline.sa,v 1.2 1994/10/26 07:50:23 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* x_fline.sa 3.3 1/10/91
+*
+* fpsp_fline --- FPSP handler for fline exception
+*
+* First determine if the exception is one of the unimplemented
+* floating point instructions. If so, let fpsp_unimp handle it.
+* Next, determine if the instruction is an fmovecr with a non-zero
+* <ea> field. If so, handle here and return. Otherwise, it
+* must be a real F-line exception.
+*
+
+X_FLINE IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+ xref real_fline
+ xref fpsp_unimp
+ xref uni_2
+ xref mem_read
+ xref fpsp_fmt_error
+
+ xdef fpsp_fline
+fpsp_fline:
+*
+* check for unimplemented vector first. Use EXC_VEC-4 because
+* the equate is valid only after a 'link a6' has pushed one more
+* long onto the stack.
+*
+ cmp.w #UNIMP_VEC,EXC_VEC-4(a7)
+ beq.l fpsp_unimp
+
+*
+* fmovecr with non-zero <ea> handling here
+*
+ sub.l #4,a7 ;4 accounts for 2-word difference
+* ;between six word frame (unimp) and
+* ;four word frame
+ link a6,#-LOCAL_SIZE
+ fsave -(a7)
+ movem.l d0-d1/a0-a1,USER_DA(a6)
+ movea.l EXC_PC+4(a6),a0 ;get address of fline instruction
+ lea.l L_SCR1(a6),a1 ;use L_SCR1 as scratch
+ move.l #4,d0
+ add.l #4,a6 ;to offset the sub.l #4,a7 above so that
+* ;a6 can point correctly to the stack frame
+* ;before branching to mem_read
+ bsr.l mem_read
+ sub.l #4,a6
+ move.l L_SCR1(a6),d0 ;d0 contains the fline and command word
+ bfextu d0{4:3},d1 ;extract coprocessor id
+ cmpi.b #1,d1 ;check if cpid=1
+ bne.w not_mvcr ;exit if not
+ bfextu d0{16:6},d1
+ cmpi.b #$17,d1 ;check if it is an FMOVECR encoding
+ bne.w not_mvcr
+* ;if an FMOVECR instruction, fix stack
+* ;and go to FPSP_UNIMP
+fix_stack:
+ cmpi.b #VER_40,(a7) ;test for orig unimp frame
+ bne.b ck_rev
+ sub.l #UNIMP_40_SIZE-4,a7 ;emulate an orig fsave
+ move.b #VER_40,(a7)
+ move.b #UNIMP_40_SIZE-4,1(a7)
+ clr.w 2(a7)
+ bra.b fix_con
+ck_rev:
+ cmpi.b #VER_41,(a7) ;test for rev unimp frame
+ bne.l fpsp_fmt_error ;if not $40 or $41, exit with error
+ sub.l #UNIMP_41_SIZE-4,a7 ;emulate a rev fsave
+ move.b #VER_41,(a7)
+ move.b #UNIMP_41_SIZE-4,1(a7)
+ clr.w 2(a7)
+fix_con:
+ move.w EXC_SR+4(a6),EXC_SR(a6) ;move stacked sr to new position
+ move.l EXC_PC+4(a6),EXC_PC(a6) ;move stacked pc to new position
+ fmove.l EXC_PC(a6),FPIAR ;point FPIAR to fline inst
+ move.l #4,d1
+ add.l d1,EXC_PC(a6) ;increment stacked pc value to next inst
+ move.w #$202c,EXC_VEC(a6) ;reformat vector to unimp
+ clr.l EXC_EA(a6) ;clear the EXC_EA field
+ move.w d0,CMDREG1B(a6) ;move the lower word into CMDREG1B
+ clr.l E_BYTE(a6)
+ bset.b #UFLAG,T_BYTE(a6)
+ movem.l USER_DA(a6),d0-d1/a0-a1 ;restore data registers
+ bra.l uni_2
+
+not_mvcr:
+ movem.l USER_DA(a6),d0-d1/a0-a1 ;restore data registers
+ frestore (a7)+
+ unlk a6
+ add.l #4,a7
+ bra.l real_fline
+
+ end
diff --git a/sys/arch/m68k/fpsp/x_operr.sa b/sys/arch/m68k/fpsp/x_operr.sa
new file mode 100644
index 00000000000..9e1292d108a
--- /dev/null
+++ b/sys/arch/m68k/fpsp/x_operr.sa
@@ -0,0 +1,381 @@
+* $NetBSD: x_operr.sa,v 1.4 1994/10/26 07:50:24 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* x_operr.sa 3.5 7/1/91
+*
+* fpsp_operr --- FPSP handler for operand error exception
+*
+* See 68040 User's Manual pp. 9-44f
+*
+* Note 1: For trap disabled 040 does the following:
+* If the dest is a fp reg, then an extended precision non_signaling
+* NAN is stored in the dest reg. If the dest format is b, w, or l and
+* the source op is a NAN, then garbage is stored as the result (actually
+* the upper 32 bits of the mantissa are sent to the integer unit). If
+* the dest format is integer (b, w, l) and the operr is caused by
+* integer overflow, or the source op is inf, then the result stored is
+* garbage.
+* There are three cases in which operr is incorrectly signaled on the
+* 040. This occurs for move_out of format b, w, or l for the largest
+* negative integer (-2^7 for b, -2^15 for w, -2^31 for l).
+*
+* On opclass = 011 fmove.(b,w,l) that causes a conversion
+* overflow -> OPERR, the exponent in wbte (and fpte) is:
+* byte 56 - (62 - exp)
+* word 48 - (62 - exp)
+* long 32 - (62 - exp)
+*
+* where exp = (true exp) - 1
+*
+* So, wbtemp and fptemp will contain the following on erroneoulsy
+* signalled operr:
+* fpts = 1
+* fpte = $4000 (15 bit externally)
+* byte fptm = $ffffffff ffffff80
+* word fptm = $ffffffff ffff8000
+* long fptm = $ffffffff 80000000
+*
+* Note 2: For trap enabled 040 does the following:
+* If the inst is move_out, then same as Note 1.
+* If the inst is not move_out, the dest is not modified.
+* The exceptional operand is not defined for integer overflow
+* during a move_out.
+*
+
+X_OPERR IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+ xref mem_write
+ xref real_operr
+ xref real_inex
+ xref get_fline
+ xref fpsp_done
+ xref reg_dest
+
+ xdef fpsp_operr
+fpsp_operr:
+*
+ link a6,#-LOCAL_SIZE
+ fsave -(a7)
+ movem.l d0-d1/a0-a1,USER_DA(a6)
+ fmovem.x fp0-fp3,USER_FP0(a6)
+ fmovem.l fpcr/fpsr/fpiar,USER_FPCR(a6)
+
+*
+* Check if this is an opclass 3 instruction.
+* If so, fall through, else branch to operr_end
+*
+ btst.b #TFLAG,T_BYTE(a6)
+ beq.b operr_end
+
+*
+* If the destination size is B,W,or L, the operr must be
+* handled here.
+*
+ move.l CMDREG1B(a6),d0
+ bfextu d0{3:3},d0 ;0=long, 4=word, 6=byte
+ tst.b d0 ;determine size; check long
+ beq.w operr_long
+ cmpi.b #4,d0 ;check word
+ beq.w operr_word
+ cmpi.b #6,d0 ;check byte
+ beq.w operr_byte
+
+*
+* The size is not B,W,or L, so the operr is handled by the
+* kernel handler. Set the operr bits and clean up, leaving
+* only the integer exception frame on the stack, and the
+* fpu in the original exceptional state.
+*
+operr_end:
+ bset.b #operr_bit,FPSR_EXCEPT(a6)
+ bset.b #aiop_bit,FPSR_AEXCEPT(a6)
+
+ movem.l USER_DA(a6),d0-d1/a0-a1
+ fmovem.x USER_FP0(a6),fp0-fp3
+ fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar
+ frestore (a7)+
+ unlk a6
+ bra.l real_operr
+
+operr_long:
+ moveq.l #4,d1 ;write size to d1
+ move.b STAG(a6),d0 ;test stag for nan
+ andi.b #$e0,d0 ;clr all but tag
+ cmpi.b #$60,d0 ;check for nan
+ beq operr_nan
+ cmpi.l #$80000000,FPTEMP_LO(a6) ;test if ls lword is special
+ bne.b chklerr ;if not equal, check for incorrect operr
+ bsr check_upper ;check if exp and ms mant are special
+ tst.l d0
+ bne.b chklerr ;if d0 is true, check for incorrect operr
+ move.l #$80000000,d0 ;store special case result
+ bsr operr_store
+ bra.w not_enabled ;clean and exit
+*
+* CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE
+*
+chklerr:
+ move.w FPTEMP_EX(a6),d0
+ and.w #$7FFF,d0 ;ignore sign bit
+ cmp.w #$3FFE,d0 ;this is the only possible exponent value
+ bne.b chklerr2
+fixlong:
+ move.l FPTEMP_LO(a6),d0
+ bsr operr_store
+ bra.w not_enabled
+chklerr2:
+ move.w FPTEMP_EX(a6),d0
+ and.w #$7FFF,d0 ;ignore sign bit
+ cmp.w #$4000,d0
+ bcc.w store_max ;exponent out of range
+
+ move.l FPTEMP_LO(a6),d0
+ and.l #$7FFF0000,d0 ;look for all 1's on bits 30-16
+ cmp.l #$7FFF0000,d0
+ beq.b fixlong
+
+ tst.l FPTEMP_LO(a6)
+ bpl.b chklepos
+ cmp.l #$FFFFFFFF,FPTEMP_HI(a6)
+ beq.b fixlong
+ bra.w store_max
+chklepos:
+ tst.l FPTEMP_HI(a6)
+ beq.b fixlong
+ bra.w store_max
+
+operr_word:
+ moveq.l #2,d1 ;write size to d1
+ move.b STAG(a6),d0 ;test stag for nan
+ andi.b #$e0,d0 ;clr all but tag
+ cmpi.b #$60,d0 ;check for nan
+ beq.w operr_nan
+ cmpi.l #$ffff8000,FPTEMP_LO(a6) ;test if ls lword is special
+ bne.b chkwerr ;if not equal, check for incorrect operr
+ bsr check_upper ;check if exp and ms mant are special
+ tst.l d0
+ bne.b chkwerr ;if d0 is true, check for incorrect operr
+ move.l #$80000000,d0 ;store special case result
+ bsr operr_store
+ bra.w not_enabled ;clean and exit
+*
+* CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE
+*
+chkwerr:
+ move.w FPTEMP_EX(a6),d0
+ and.w #$7FFF,d0 ;ignore sign bit
+ cmp.w #$3FFE,d0 ;this is the only possible exponent value
+ bne.b store_max
+ move.l FPTEMP_LO(a6),d0
+ swap d0
+ bsr operr_store
+ bra.w not_enabled
+
+operr_byte:
+ moveq.l #1,d1 ;write size to d1
+ move.b STAG(a6),d0 ;test stag for nan
+ andi.b #$e0,d0 ;clr all but tag
+ cmpi.b #$60,d0 ;check for nan
+ beq.b operr_nan
+ cmpi.l #$ffffff80,FPTEMP_LO(a6) ;test if ls lword is special
+ bne.b chkberr ;if not equal, check for incorrect operr
+ bsr check_upper ;check if exp and ms mant are special
+ tst.l d0
+ bne.b chkberr ;if d0 is true, check for incorrect operr
+ move.l #$80000000,d0 ;store special case result
+ bsr operr_store
+ bra.w not_enabled ;clean and exit
+*
+* CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE
+*
+chkberr:
+ move.w FPTEMP_EX(a6),d0
+ and.w #$7FFF,d0 ;ignore sign bit
+ cmp.w #$3FFE,d0 ;this is the only possible exponent value
+ bne.b store_max
+ move.l FPTEMP_LO(a6),d0
+ asl.l #8,d0
+ swap d0
+ bsr operr_store
+ bra.w not_enabled
+
+*
+* This operr condition is not of the special case. Set operr
+* and aiop and write the portion of the nan to memory for the
+* given size.
+*
+operr_nan:
+ or.l #opaop_mask,USER_FPSR(a6) ;set operr & aiop
+
+ move.l ETEMP_HI(a6),d0 ;output will be from upper 32 bits
+ bsr operr_store
+ bra end_operr
+*
+* Store_max loads the max pos or negative for the size, sets
+* the operr and aiop bits, and clears inex and ainex, incorrectly
+* set by the 040.
+*
+store_max:
+ or.l #opaop_mask,USER_FPSR(a6) ;set operr & aiop
+ bclr.b #inex2_bit,FPSR_EXCEPT(a6)
+ bclr.b #ainex_bit,FPSR_AEXCEPT(a6)
+ fmove.l #0,FPSR
+
+ tst.w FPTEMP_EX(a6) ;check sign
+ blt.b load_neg
+ move.l #$7fffffff,d0
+ bsr operr_store
+ bra end_operr
+load_neg:
+ move.l #$80000000,d0
+ bsr operr_store
+ bra end_operr
+
+*
+* This routine stores the data in d0, for the given size in d1,
+* to memory or data register as required. A read of the fline
+* is required to determine the destination.
+*
+operr_store:
+ move.l d0,L_SCR1(a6) ;move write data to L_SCR1
+ move.l d1,-(a7) ;save register size
+ bsr.l get_fline ;fline returned in d0
+ move.l (a7)+,d1
+ bftst d0{26:3} ;if mode is zero, dest is Dn
+ bne.b dest_mem
+*
+* Destination is Dn. Get register number from d0. Data is on
+* the stack at (a7). D1 has size: 1=byte,2=word,4=long/single
+*
+ andi.l #7,d0 ;isolate register number
+ cmpi.l #4,d1
+ beq.b op_long ;the most frequent case
+ cmpi.l #2,d1
+ bne.b op_con
+ or.l #8,d0
+ bra.b op_con
+op_long:
+ or.l #$10,d0
+op_con:
+ move.l d0,d1 ;format size:reg for reg_dest
+ bra.l reg_dest ;call to reg_dest returns to caller
+* ;of operr_store
+*
+* Destination is memory. Get <ea> from integer exception frame
+* and call mem_write.
+*
+dest_mem:
+ lea.l L_SCR1(a6),a0 ;put ptr to write data in a0
+ move.l EXC_EA(a6),a1 ;put user destination address in a1
+ move.l d1,d0 ;put size in d0
+ bsr.l mem_write
+ rts
+*
+* Check the exponent for $c000 and the upper 32 bits of the
+* mantissa for $ffffffff. If both are true, return d0 clr
+* and store the lower n bits of the least lword of FPTEMP
+* to d0 for write out. If not, it is a real operr, and set d0.
+*
+check_upper:
+ cmpi.l #$ffffffff,FPTEMP_HI(a6) ;check if first byte is all 1's
+ bne.b true_operr ;if not all 1's then was true operr
+ cmpi.w #$c000,FPTEMP_EX(a6) ;check if incorrectly signalled
+ beq.b not_true_operr ;branch if not true operr
+ cmpi.w #$bfff,FPTEMP_EX(a6) ;check if incorrectly signalled
+ beq.b not_true_operr ;branch if not true operr
+true_operr:
+ move.l #1,d0 ;signal real operr
+ rts
+not_true_operr:
+ clr.l d0 ;signal no real operr
+ rts
+
+*
+* End_operr tests for operr enabled. If not, it cleans up the stack
+* and does an rte. If enabled, it cleans up the stack and branches
+* to the kernel operr handler with only the integer exception
+* frame on the stack and the fpu in the original exceptional state
+* with correct data written to the destination.
+*
+end_operr:
+ btst.b #operr_bit,FPCR_ENABLE(a6)
+ beq.b not_enabled
+enabled:
+ movem.l USER_DA(a6),d0-d1/a0-a1
+ fmovem.x USER_FP0(a6),fp0-fp3
+ fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar
+ frestore (a7)+
+ unlk a6
+ bra.l real_operr
+
+not_enabled:
+*
+* It is possible to have either inex2 or inex1 exceptions with the
+* operr. If the inex enable bit is set in the FPCR, and either
+* inex2 or inex1 occured, we must clean up and branch to the
+* real inex handler.
+*
+ck_inex:
+ move.b FPCR_ENABLE(a6),d0
+ and.b FPSR_EXCEPT(a6),d0
+ andi.b #$3,d0
+ beq.w operr_exit
+*
+* Inexact enabled and reported, and we must take an inexact exception.
+*
+take_inex:
+ move.b #INEX_VEC,EXC_VEC+1(a6)
+ move.l USER_FPSR(a6),FPSR_SHADOW(a6)
+ or.l #sx_mask,E_BYTE(a6)
+ movem.l USER_DA(a6),d0-d1/a0-a1
+ fmovem.x USER_FP0(a6),fp0-fp3
+ fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar
+ frestore (a7)+
+ unlk a6
+ bra.l real_inex
+*
+* Since operr is only an E1 exception, there is no need to frestore
+* any state back to the fpu.
+*
+operr_exit:
+ movem.l USER_DA(a6),d0-d1/a0-a1
+ fmovem.x USER_FP0(a6),fp0-fp3
+ fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar
+ unlk a6
+ bra.l fpsp_done
+
+ end
diff --git a/sys/arch/m68k/fpsp/x_ovfl.sa b/sys/arch/m68k/fpsp/x_ovfl.sa
new file mode 100644
index 00000000000..c161fcb5564
--- /dev/null
+++ b/sys/arch/m68k/fpsp/x_ovfl.sa
@@ -0,0 +1,210 @@
+* $NetBSD: x_ovfl.sa,v 1.2 1994/10/26 07:50:26 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* x_ovfl.sa 3.5 7/1/91
+*
+* fpsp_ovfl --- FPSP handler for overflow exception
+*
+* Overflow occurs when a floating-point intermediate result is
+* too large to be represented in a floating-point data register,
+* or when storing to memory, the contents of a floating-point
+* data register are too large to be represented in the
+* destination format.
+*
+* Trap disabled results
+*
+* If the instruction is move_out, then garbage is stored in the
+* destination. If the instruction is not move_out, then the
+* destination is not affected. For 68881 compatibility, the
+* following values should be stored at the destination, based
+* on the current rounding mode:
+*
+* RN Infinity with the sign of the intermediate result.
+* RZ Largest magnitude number, with the sign of the
+* intermediate result.
+* RM For pos overflow, the largest pos number. For neg overflow,
+* -infinity
+* RP For pos overflow, +infinity. For neg overflow, the largest
+* neg number
+*
+* Trap enabled results
+* All trap disabled code applies. In addition the exceptional
+* operand needs to be made available to the users exception handler
+* with a bias of $6000 subtracted from the exponent.
+*
+
+X_OVFL IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+ xref ovf_r_x2
+ xref ovf_r_x3
+ xref store
+ xref real_ovfl
+ xref real_inex
+ xref fpsp_done
+ xref g_opcls
+ xref b1238_fix
+
+ xdef fpsp_ovfl
+fpsp_ovfl:
+ link a6,#-LOCAL_SIZE
+ fsave -(a7)
+ movem.l d0-d1/a0-a1,USER_DA(a6)
+ fmovem.x fp0-fp3,USER_FP0(a6)
+ fmovem.l fpcr/fpsr/fpiar,USER_FPCR(a6)
+
+*
+* The 040 doesn't set the AINEX bit in the FPSR, the following
+* line temporarily rectifies this error.
+*
+ bset.b #ainex_bit,FPSR_AEXCEPT(a6)
+*
+ bsr.l ovf_adj ;denormalize, round & store interm op
+*
+* if overflow traps not enabled check for inexact exception
+*
+ btst.b #ovfl_bit,FPCR_ENABLE(a6)
+ beq.b ck_inex
+*
+ btst.b #E3,E_BYTE(a6)
+ beq.b no_e3_1
+ bfextu CMDREG3B(a6){6:3},d0 ;get dest reg no
+ bclr.b d0,FPR_DIRTY_BITS(a6) ;clr dest dirty bit
+ bsr.l b1238_fix
+ move.l USER_FPSR(a6),FPSR_SHADOW(a6)
+ or.l #sx_mask,E_BYTE(a6)
+no_e3_1:
+ movem.l USER_DA(a6),d0-d1/a0-a1
+ fmovem.x USER_FP0(a6),fp0-fp3
+ fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar
+ frestore (a7)+
+ unlk a6
+ bra.l real_ovfl
+*
+* It is possible to have either inex2 or inex1 exceptions with the
+* ovfl. If the inex enable bit is set in the FPCR, and either
+* inex2 or inex1 occured, we must clean up and branch to the
+* real inex handler.
+*
+ck_inex:
+* move.b FPCR_ENABLE(a6),d0
+* and.b FPSR_EXCEPT(a6),d0
+* andi.b #$3,d0
+ btst.b #inex2_bit,FPCR_ENABLE(a6)
+ beq.b ovfl_exit
+*
+* Inexact enabled and reported, and we must take an inexact exception.
+*
+take_inex:
+ btst.b #E3,E_BYTE(a6)
+ beq.b no_e3_2
+ bfextu CMDREG3B(a6){6:3},d0 ;get dest reg no
+ bclr.b d0,FPR_DIRTY_BITS(a6) ;clr dest dirty bit
+ bsr.l b1238_fix
+ move.l USER_FPSR(a6),FPSR_SHADOW(a6)
+ or.l #sx_mask,E_BYTE(a6)
+no_e3_2:
+ move.b #INEX_VEC,EXC_VEC+1(a6)
+ movem.l USER_DA(a6),d0-d1/a0-a1
+ fmovem.x USER_FP0(a6),fp0-fp3
+ fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar
+ frestore (a7)+
+ unlk a6
+ bra.l real_inex
+
+ovfl_exit:
+ bclr.b #E3,E_BYTE(a6) ;test and clear E3 bit
+ beq.b e1_set
+*
+* Clear dirty bit on dest resister in the frame before branching
+* to b1238_fix.
+*
+ bfextu CMDREG3B(a6){6:3},d0 ;get dest reg no
+ bclr.b d0,FPR_DIRTY_BITS(a6) ;clr dest dirty bit
+ bsr.l b1238_fix ;test for bug1238 case
+
+ move.l USER_FPSR(a6),FPSR_SHADOW(a6)
+ or.l #sx_mask,E_BYTE(a6)
+ movem.l USER_DA(a6),d0-d1/a0-a1
+ fmovem.x USER_FP0(a6),fp0-fp3
+ fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar
+ frestore (a7)+
+ unlk a6
+ bra.l fpsp_done
+e1_set:
+ movem.l USER_DA(a6),d0-d1/a0-a1
+ fmovem.x USER_FP0(a6),fp0-fp3
+ fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar
+ unlk a6
+ bra.l fpsp_done
+
+*
+* ovf_adj
+*
+ovf_adj:
+*
+* Have a0 point to the correct operand.
+*
+ btst.b #E3,E_BYTE(a6) ;test E3 bit
+ beq.b ovf_e1
+
+ lea WBTEMP(a6),a0
+ bra.b ovf_com
+ovf_e1:
+ lea ETEMP(a6),a0
+
+ovf_com:
+ bclr.b #sign_bit,LOCAL_EX(a0)
+ sne LOCAL_SGN(a0)
+
+ bsr.l g_opcls ;returns opclass in d0
+ cmpi.w #3,d0 ;check for opclass3
+ bne.b not_opc011
+
+*
+* FPSR_CC is saved and restored because ovf_r_x3 affects it. The
+* CCs are defined to be 'not affected' for the opclass3 instruction.
+*
+ move.b FPSR_CC(a6),L_SCR1(a6)
+ bsr.l ovf_r_x3 ;returns a0 pointing to result
+ move.b L_SCR1(a6),FPSR_CC(a6)
+ bra.l store ;stores to memory or register
+
+not_opc011:
+ bsr.l ovf_r_x2 ;returns a0 pointing to result
+ bra.l store ;stores to memory or register
+
+ end
diff --git a/sys/arch/m68k/fpsp/x_snan.sa b/sys/arch/m68k/fpsp/x_snan.sa
new file mode 100644
index 00000000000..0dba98298ab
--- /dev/null
+++ b/sys/arch/m68k/fpsp/x_snan.sa
@@ -0,0 +1,302 @@
+* $NetBSD: x_snan.sa,v 1.3 1994/10/26 07:50:28 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* x_snan.sa 3.3 7/1/91
+*
+* fpsp_snan --- FPSP handler for signalling NAN exception
+*
+* SNAN for float -> integer conversions (integer conversion of
+* an SNAN) is a non-maskable run-time exception.
+*
+* For trap disabled the 040 does the following:
+* If the dest data format is s, d, or x, then the SNAN bit in the NAN
+* is set to one and the resulting non-signaling NAN (truncated if
+* necessary) is transferred to the dest. If the dest format is b, w,
+* or l, then garbage is written to the dest (actually the upper 32 bits
+* of the mantissa are sent to the integer unit).
+*
+* For trap enabled the 040 does the following:
+* If the inst is move_out, then the results are the same as for trap
+* disabled with the exception posted. If the instruction is not move_
+* out, the dest. is not modified, and the exception is posted.
+*
+
+X_SNAN IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+ xref get_fline
+ xref mem_write
+ xref real_snan
+ xref real_inex
+ xref fpsp_done
+ xref reg_dest
+
+ xdef fpsp_snan
+fpsp_snan:
+ link a6,#-LOCAL_SIZE
+ fsave -(a7)
+ movem.l d0-d1/a0-a1,USER_DA(a6)
+ fmovem.x fp0-fp3,USER_FP0(a6)
+ fmovem.l fpcr/fpsr/fpiar,USER_FPCR(a6)
+
+*
+* Check if trap enabled
+*
+ btst.b #snan_bit,FPCR_ENABLE(a6)
+ bne.b ena ;If enabled, then branch
+
+ bsr.l move_out ;else SNAN disabled
+*
+* It is possible to have an inex1 exception with the
+* snan. If the inex enable bit is set in the FPCR, and either
+* inex2 or inex1 occured, we must clean up and branch to the
+* real inex handler.
+*
+ck_inex:
+ move.b FPCR_ENABLE(a6),d0
+ and.b FPSR_EXCEPT(a6),d0
+ andi.b #$3,d0
+ beq.w end_snan
+*
+* Inexact enabled and reported, and we must take an inexact exception.
+*
+take_inex:
+ move.b #INEX_VEC,EXC_VEC+1(a6)
+ movem.l USER_DA(a6),d0-d1/a0-a1
+ fmovem.x USER_FP0(a6),fp0-fp3
+ fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar
+ frestore (a7)+
+ unlk a6
+ bra.l real_inex
+*
+* SNAN is enabled. Check if inst is move_out.
+* Make any corrections to the 040 output as necessary.
+*
+ena:
+ btst.b #5,CMDREG1B(a6) ;if set, inst is move out
+ beq.w not_out
+
+ bsr.l move_out
+
+report_snan:
+ move.b (a7),VER_TMP(a6)
+ cmpi.b #VER_40,(a7) ;test for orig unimp frame
+ bne.b ck_rev
+ moveq.l #13,d0 ;need to zero 14 lwords
+ bra.b rep_con
+ck_rev:
+ moveq.l #11,d0 ;need to zero 12 lwords
+rep_con:
+ clr.l (a7)
+loop1:
+ clr.l -(a7) ;clear and dec a7
+ dbra.w d0,loop1
+ move.b VER_TMP(a6),(a7) ;format a busy frame
+ move.b #BUSY_SIZE-4,1(a7)
+ move.l USER_FPSR(a6),FPSR_SHADOW(a6)
+ or.l #sx_mask,E_BYTE(a6)
+ movem.l USER_DA(a6),d0-d1/a0-a1
+ fmovem.x USER_FP0(a6),fp0-fp3
+ fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar
+ frestore (a7)+
+ unlk a6
+ bra.l real_snan
+*
+* Exit snan handler by expanding the unimp frame into a busy frame
+*
+end_snan:
+ bclr.b #E1,E_BYTE(a6)
+
+ move.b (a7),VER_TMP(a6)
+ cmpi.b #VER_40,(a7) ;test for orig unimp frame
+ bne.b ck_rev2
+ moveq.l #13,d0 ;need to zero 14 lwords
+ bra.b rep_con2
+ck_rev2:
+ moveq.l #11,d0 ;need to zero 12 lwords
+rep_con2:
+ clr.l (a7)
+loop2:
+ clr.l -(a7) ;clear and dec a7
+ dbra.w d0,loop2
+ move.b VER_TMP(a6),(a7) ;format a busy frame
+ move.b #BUSY_SIZE-4,1(a7) ;write busy size
+ move.l USER_FPSR(a6),FPSR_SHADOW(a6)
+ or.l #sx_mask,E_BYTE(a6)
+ movem.l USER_DA(a6),d0-d1/a0-a1
+ fmovem.x USER_FP0(a6),fp0-fp3
+ fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar
+ frestore (a7)+
+ unlk a6
+ bra.l fpsp_done
+
+*
+* Move_out
+*
+move_out:
+ move.l EXC_EA(a6),a0 ;get <ea> from exc frame
+
+ bfextu CMDREG1B(a6){3:3},d0 ;move rx field to d0{2:0}
+ tst.l d0 ;check for long
+ beq.b sto_long ;branch if move_out long
+
+ cmpi.l #4,d0 ;check for word
+ beq.b sto_word ;branch if move_out word
+
+ cmpi.l #6,d0 ;check for byte
+ beq.b sto_byte ;branch if move_out byte
+
+*
+* Not byte, word or long
+*
+ rts
+*
+* Get the 32 most significant bits of etemp mantissa
+*
+sto_long:
+ move.l ETEMP_HI(a6),d1
+ move.l #4,d0 ;load byte count
+*
+* Set signalling nan bit
+*
+ bset.l #30,d1
+*
+* Store to the users destination address
+*
+ tst.l a0 ;check if <ea> is 0
+ beq.b wrt_dn ;destination is a data register
+
+ move.l d1,-(a7) ;move the snan onto the stack
+ move.l a0,a1 ;load dest addr into a1
+ move.l a7,a0 ;load src addr of snan into a0
+ bsr.l mem_write ;write snan to user memory
+ move.l (a7)+,d1 ;clear off stack
+ rts
+*
+* Get the 16 most significant bits of etemp mantissa
+*
+sto_word:
+ move.l ETEMP_HI(a6),d1
+ move.l #2,d0 ;load byte count
+*
+* Set signalling nan bit
+*
+ bset.l #30,d1
+*
+* Store to the users destination address
+*
+ tst.l a0 ;check if <ea> is 0
+ beq.b wrt_dn ;destination is a data register
+
+ move.l d1,-(a7) ;move the snan onto the stack
+ move.l a0,a1 ;load dest addr into a1
+ move.l a7,a0 ;point to low word
+ bsr.l mem_write ;write snan to user memory
+ move.l (a7)+,d1 ;clear off stack
+ rts
+*
+* Get the 8 most significant bits of etemp mantissa
+*
+sto_byte:
+ move.l ETEMP_HI(a6),d1
+ move.l #1,d0 ;load byte count
+*
+* Set signalling nan bit
+*
+ bset.l #30,d1
+*
+* Store to the users destination address
+*
+ tst.l a0 ;check if <ea> is 0
+ beq.b wrt_dn ;destination is a data register
+ move.l d1,-(a7) ;move the snan onto the stack
+ move.l a0,a1 ;load dest addr into a1
+ move.l a7,a0 ;point to source byte
+ bsr.l mem_write ;write snan to user memory
+ move.l (a7)+,d1 ;clear off stack
+ rts
+
+*
+* wrt_dn --- write to a data register
+*
+* We get here with D1 containing the data to write and D0 the
+* number of bytes to write: 1=byte,2=word,4=long.
+*
+wrt_dn:
+ move.l d1,L_SCR1(a6) ;data
+ move.l d0,-(a7) ;size
+ bsr.l get_fline ;returns fline word in d0
+ move.l d0,d1
+ andi.l #$7,d1 ;d1 now holds register number
+ move.l (sp)+,d0 ;get original size
+ cmpi.l #4,d0
+ beq.b wrt_long
+ cmpi.l #2,d0
+ bne.b wrt_byte
+wrt_word:
+ or.l #$8,d1
+ bra.l reg_dest
+wrt_long:
+ or.l #$10,d1
+ bra.l reg_dest
+wrt_byte:
+ bra.l reg_dest
+*
+* Check if it is a src nan or dst nan
+*
+not_out:
+ move.l DTAG(a6),d0
+ bfextu d0{0:3},d0 ;isolate dtag in lsbs
+
+ cmpi.b #3,d0 ;check for nan in destination
+ bne.b issrc ;destination nan has priority
+dst_nan:
+ btst.b #6,FPTEMP_HI(a6) ;check if dest nan is an snan
+ bne.b issrc ;no, so check source for snan
+ move.w FPTEMP_EX(a6),d0
+ bra.b cont
+issrc:
+ move.w ETEMP_EX(a6),d0
+cont:
+ btst.l #15,d0 ;test for sign of snan
+ beq.b clr_neg
+ bset.b #neg_bit,FPSR_CC(a6)
+ bra.w report_snan
+clr_neg:
+ bclr.b #neg_bit,FPSR_CC(a6)
+ bra.w report_snan
+
+ end
diff --git a/sys/arch/m68k/fpsp/x_store.sa b/sys/arch/m68k/fpsp/x_store.sa
new file mode 100644
index 00000000000..4139d87b862
--- /dev/null
+++ b/sys/arch/m68k/fpsp/x_store.sa
@@ -0,0 +1,281 @@
+* $NetBSD: x_store.sa,v 1.3 1994/10/26 07:50:29 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* x_store.sa 3.2 1/24/91
+*
+* store --- store operand to memory or register
+*
+* Used by underflow and overflow handlers.
+*
+* a6 = points to fp value to be stored.
+*
+
+X_STORE IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+fpreg_mask:
+ dc.b $80,$40,$20,$10,$08,$04,$02,$01
+
+ include fpsp.h
+
+ xref mem_write
+ xref get_fline
+ xref g_opcls
+ xref g_dfmtou
+ xref reg_dest
+
+ xdef dest_ext
+ xdef dest_dbl
+ xdef dest_sgl
+
+ xdef store
+store:
+ btst.b #E3,E_BYTE(a6)
+ beq.b E1_sto
+E3_sto:
+ move.l CMDREG3B(a6),d0
+ bfextu d0{6:3},d0 ;isolate dest. reg from cmdreg3b
+sto_fp:
+ lea fpreg_mask,a1
+ move.b (a1,d0.w),d0 ;convert reg# to dynamic register mask
+ tst.b LOCAL_SGN(a0)
+ beq.b is_pos
+ bset.b #sign_bit,LOCAL_EX(a0)
+is_pos:
+ fmovem.x (a0),d0 ;move to correct register
+*
+* if fp0-fp3 is being modified, we must put a copy
+* in the USER_FPn variable on the stack because all exception
+* handlers restore fp0-fp3 from there.
+*
+ cmp.b #$80,d0
+ bne.b not_fp0
+ fmovem.x fp0,USER_FP0(a6)
+ rts
+not_fp0:
+ cmp.b #$40,d0
+ bne.b not_fp1
+ fmovem.x fp1,USER_FP1(a6)
+ rts
+not_fp1:
+ cmp.b #$20,d0
+ bne.b not_fp2
+ fmovem.x fp2,USER_FP2(a6)
+ rts
+not_fp2:
+ cmp.b #$10,d0
+ bne.b not_fp3
+ fmovem.x fp3,USER_FP3(a6)
+ rts
+not_fp3:
+ rts
+
+E1_sto:
+ bsr.l g_opcls ;returns opclass in d0
+ cmpi.b #3,d0
+ beq opc011 ;branch if opclass 3
+ move.l CMDREG1B(a6),d0
+ bfextu d0{6:3},d0 ;extract destination register
+ bra.b sto_fp
+
+opc011:
+ bsr.l g_dfmtou ;returns dest format in d0
+* ;ext=00, sgl=01, dbl=10
+ move.l a0,a1 ;save source addr in a1
+ move.l EXC_EA(a6),a0 ;get the address
+ tst.l d0 ;if dest format is extended
+ beq.w dest_ext ;then branch
+ cmpi.l #1,d0 ;if dest format is single
+ beq.b dest_sgl ;then branch
+*
+* fall through to dest_dbl
+*
+
+*
+* dest_dbl --- write double precision value to user space
+*
+*Input
+* a0 -> destination address
+* a1 -> source in extended precision
+*Output
+* a0 -> destroyed
+* a1 -> destroyed
+* d0 -> 0
+*
+*Changes extended precision to double precision.
+* Note: no attempt is made to round the extended value to double.
+* dbl_sign = ext_sign
+* dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias)
+* get rid of ext integer bit
+* dbl_mant = ext_mant{62:12}
+*
+* --------------- --------------- ---------------
+* extended -> |s| exp | |1| ms mant | | ls mant |
+* --------------- --------------- ---------------
+* 95 64 63 62 32 31 11 0
+* | |
+* | |
+* | |
+* v v
+* --------------- ---------------
+* double -> |s|exp| mant | | mant |
+* --------------- ---------------
+* 63 51 32 31 0
+*
+dest_dbl:
+ clr.l d0 ;clear d0
+ move.w LOCAL_EX(a1),d0 ;get exponent
+ sub.w #$3fff,d0 ;subtract extended precision bias
+ cmp.w #$4000,d0 ;check if inf
+ beq.b inf ;if so, special case
+ add.w #$3ff,d0 ;add double precision bias
+ swap d0 ;d0 now in upper word
+ lsl.l #4,d0 ;d0 now in proper place for dbl prec exp
+ tst.b LOCAL_SGN(a1)
+ beq.b get_mant ;if postive, go process mantissa
+ bset.l #31,d0 ;if negative, put in sign information
+* ; before continuing
+ bra.b get_mant ;go process mantissa
+inf:
+ move.l #$7ff00000,d0 ;load dbl inf exponent
+ clr.l LOCAL_HI(a1) ;clear msb
+ tst.b LOCAL_SGN(a1)
+ beq.b dbl_inf ;if positive, go ahead and write it
+ bset.l #31,d0 ;if negative put in sign information
+dbl_inf:
+ move.l d0,LOCAL_EX(a1) ;put the new exp back on the stack
+ bra.b dbl_wrt
+get_mant:
+ move.l LOCAL_HI(a1),d1 ;get ms mantissa
+ bfextu d1{1:20},d1 ;get upper 20 bits of ms
+ or.l d1,d0 ;put these bits in ms word of double
+ move.l d0,LOCAL_EX(a1) ;put the new exp back on the stack
+ move.l LOCAL_HI(a1),d1 ;get ms mantissa
+ move.l #21,d0 ;load shift count
+ lsl.l d0,d1 ;put lower 11 bits in upper bits
+ move.l d1,LOCAL_HI(a1) ;build lower lword in memory
+ move.l LOCAL_LO(a1),d1 ;get ls mantissa
+ bfextu d1{0:21},d0 ;get ls 21 bits of double
+ or.l d0,LOCAL_HI(a1) ;put them in double result
+dbl_wrt:
+ move.l #$8,d0 ;byte count for double precision number
+ exg a0,a1 ;a0=supervisor source, a1=user dest
+ bsr.l mem_write ;move the number to the user's memory
+ rts
+*
+* dest_sgl --- write single precision value to user space
+*
+*Input
+* a0 -> destination address
+* a1 -> source in extended precision
+*
+*Output
+* a0 -> destroyed
+* a1 -> destroyed
+* d0 -> 0
+*
+*Changes extended precision to single precision.
+* sgl_sign = ext_sign
+* sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias)
+* get rid of ext integer bit
+* sgl_mant = ext_mant{62:12}
+*
+* --------------- --------------- ---------------
+* extended -> |s| exp | |1| ms mant | | ls mant |
+* --------------- --------------- ---------------
+* 95 64 63 62 40 32 31 12 0
+* | |
+* | |
+* | |
+* v v
+* ---------------
+* single -> |s|exp| mant |
+* ---------------
+* 31 22 0
+*
+dest_sgl:
+ clr.l d0
+ move.w LOCAL_EX(a1),d0 ;get exponent
+ sub.w #$3fff,d0 ;subtract extended precision bias
+ cmp.w #$4000,d0 ;check if inf
+ beq.b sinf ;if so, special case
+ add.w #$7f,d0 ;add single precision bias
+ swap d0 ;put exp in upper word of d0
+ lsl.l #7,d0 ;shift it into single exp bits
+ tst.b LOCAL_SGN(a1)
+ beq.b get_sman ;if positive, continue
+ bset.l #31,d0 ;if negative, put in sign first
+ bra.b get_sman ;get mantissa
+sinf:
+ move.l #$7f800000,d0 ;load single inf exp to d0
+ tst.b LOCAL_SGN(a1)
+ beq.b sgl_wrt ;if positive, continue
+ bset.l #31,d0 ;if negative, put in sign info
+ bra.b sgl_wrt
+
+get_sman:
+ move.l LOCAL_HI(a1),d1 ;get ms mantissa
+ bfextu d1{1:23},d1 ;get upper 23 bits of ms
+ or.l d1,d0 ;put these bits in ms word of single
+
+sgl_wrt:
+ move.l d0,L_SCR1(a6) ;put the new exp back on the stack
+ move.l #$4,d0 ;byte count for single precision number
+ tst.l a0 ;users destination address
+ beq.b sgl_Dn ;destination is a data register
+ exg a0,a1 ;a0=supervisor source, a1=user dest
+ lea.l L_SCR1(a6),a0 ;point a0 to data
+ bsr.l mem_write ;move the number to the user's memory
+ rts
+sgl_Dn:
+ bsr.l get_fline ;returns fline word in d0
+ and.w #$7,d0 ;isolate register number
+ move.l d0,d1 ;d1 has size:reg formatted for reg_dest
+ or.l #$10,d1 ;reg_dest wants size added to reg#
+ bra.l reg_dest ;size is X, rts in reg_dest will
+* ;return to caller of dest_sgl
+
+dest_ext:
+ tst.b LOCAL_SGN(a1) ;put back sign into exponent word
+ beq.b dstx_cont
+ bset.b #sign_bit,LOCAL_EX(a1)
+dstx_cont:
+ clr.b LOCAL_SGN(a1) ;clear out the sign byte
+
+ move.l #$0c,d0 ;byte count for extended number
+ exg a0,a1 ;a0=supervisor source, a1=user dest
+ bsr.l mem_write ;move the number to the user's memory
+ rts
+
+ end
diff --git a/sys/arch/m68k/fpsp/x_unfl.sa b/sys/arch/m68k/fpsp/x_unfl.sa
new file mode 100644
index 00000000000..9987455bc88
--- /dev/null
+++ b/sys/arch/m68k/fpsp/x_unfl.sa
@@ -0,0 +1,294 @@
+* $NetBSD: x_unfl.sa,v 1.3 1994/10/26 07:50:30 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* x_unfl.sa 3.4 7/1/91
+*
+* fpsp_unfl --- FPSP handler for underflow exception
+*
+* Trap disabled results
+* For 881/2 compatibility, sw must denormalize the intermediate
+* result, then store the result. Denormalization is accomplished
+* by taking the intermediate result (which is always normalized) and
+* shifting the mantissa right while incrementing the exponent until
+* it is equal to the denormalized exponent for the destination
+* format. After denormalizatoin, the result is rounded to the
+* destination format.
+*
+* Trap enabled results
+* All trap disabled code applies. In addition the exceptional
+* operand needs to made available to the user with a bias of $6000
+* added to the exponent.
+*
+
+X_UNFL IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+ xref denorm
+ xref round
+ xref store
+ xref g_rndpr
+ xref g_opcls
+ xref g_dfmtou
+ xref real_unfl
+ xref real_inex
+ xref fpsp_done
+ xref b1238_fix
+
+ xdef fpsp_unfl
+fpsp_unfl:
+ link a6,#-LOCAL_SIZE
+ fsave -(a7)
+ movem.l d0-d1/a0-a1,USER_DA(a6)
+ fmovem.x fp0-fp3,USER_FP0(a6)
+ fmovem.l fpcr/fpsr/fpiar,USER_FPCR(a6)
+
+*
+ bsr.l unf_res ;denormalize, round & store interm op
+*
+* If underflow exceptions are not enabled, check for inexact
+* exception
+*
+ btst.b #unfl_bit,FPCR_ENABLE(a6)
+ beq.b ck_inex
+
+ btst.b #E3,E_BYTE(a6)
+ beq.b no_e3_1
+*
+* Clear dirty bit on dest resister in the frame before branching
+* to b1238_fix.
+*
+ bfextu CMDREG3B(a6){6:3},d0 ;get dest reg no
+ bclr.b d0,FPR_DIRTY_BITS(a6) ;clr dest dirty bit
+ bsr.l b1238_fix ;test for bug1238 case
+ move.l USER_FPSR(a6),FPSR_SHADOW(a6)
+ or.l #sx_mask,E_BYTE(a6)
+no_e3_1:
+ movem.l USER_DA(a6),d0-d1/a0-a1
+ fmovem.x USER_FP0(a6),fp0-fp3
+ fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar
+ frestore (a7)+
+ unlk a6
+ bra.l real_unfl
+*
+* It is possible to have either inex2 or inex1 exceptions with the
+* unfl. If the inex enable bit is set in the FPCR, and either
+* inex2 or inex1 occured, we must clean up and branch to the
+* real inex handler.
+*
+ck_inex:
+ move.b FPCR_ENABLE(a6),d0
+ and.b FPSR_EXCEPT(a6),d0
+ andi.b #$3,d0
+ beq.b unfl_done
+
+*
+* Inexact enabled and reported, and we must take an inexact exception
+*
+take_inex:
+ btst.b #E3,E_BYTE(a6)
+ beq.b no_e3_2
+*
+* Clear dirty bit on dest resister in the frame before branching
+* to b1238_fix.
+*
+ bfextu CMDREG3B(a6){6:3},d0 ;get dest reg no
+ bclr.b d0,FPR_DIRTY_BITS(a6) ;clr dest dirty bit
+ bsr.l b1238_fix ;test for bug1238 case
+ move.l USER_FPSR(a6),FPSR_SHADOW(a6)
+ or.l #sx_mask,E_BYTE(a6)
+no_e3_2:
+ move.b #INEX_VEC,EXC_VEC+1(a6)
+ movem.l USER_DA(a6),d0-d1/a0-a1
+ fmovem.x USER_FP0(a6),fp0-fp3
+ fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar
+ frestore (a7)+
+ unlk a6
+ bra.l real_inex
+
+unfl_done:
+ bclr.b #E3,E_BYTE(a6)
+ beq.b e1_set ;if set then branch
+*
+* Clear dirty bit on dest resister in the frame before branching
+* to b1238_fix.
+*
+ bfextu CMDREG3B(a6){6:3},d0 ;get dest reg no
+ bclr.b d0,FPR_DIRTY_BITS(a6) ;clr dest dirty bit
+ bsr.l b1238_fix ;test for bug1238 case
+ move.l USER_FPSR(a6),FPSR_SHADOW(a6)
+ or.l #sx_mask,E_BYTE(a6)
+ movem.l USER_DA(a6),d0-d1/a0-a1
+ fmovem.x USER_FP0(a6),fp0-fp3
+ fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar
+ frestore (a7)+
+ unlk a6
+ bra.l fpsp_done
+e1_set:
+ movem.l USER_DA(a6),d0-d1/a0-a1
+ fmovem.x USER_FP0(a6),fp0-fp3
+ fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar
+ unlk a6
+ bra.l fpsp_done
+*
+* unf_res --- underflow result calculation
+*
+unf_res:
+ bsr.l g_rndpr ;returns RND_PREC in d0 0=ext,
+* ;1=sgl, 2=dbl
+* ;we need the RND_PREC in the
+* ;upper word for round
+ clr.w -(a7)
+ move.w d0,-(a7) ;copy RND_PREC to stack
+*
+*
+* If the exception bit set is E3, the exceptional operand from the
+* fpu is in WBTEMP; else it is in FPTEMP.
+*
+ btst.b #E3,E_BYTE(a6)
+ beq.b unf_E1
+unf_E3:
+ lea WBTEMP(a6),a0 ;a0 now points to operand
+*
+* Test for fsgldiv and fsglmul. If the inst was one of these, then
+* force the precision to extended for the denorm routine. Use
+* the user's precision for the round routine.
+*
+ move.w CMDREG3B(a6),d1 ;check for fsgldiv or fsglmul
+ andi.w #$7f,d1
+ cmpi.w #$30,d1 ;check for sgldiv
+ beq.b unf_sgl
+ cmpi.w #$33,d1 ;check for sglmul
+ bne.b unf_cont ;if not, use fpcr prec in round
+unf_sgl:
+ clr.l d0
+ move.w #$1,(a7) ;override g_rndpr precision
+* ;force single
+ bra.b unf_cont
+unf_E1:
+ lea FPTEMP(a6),a0 ;a0 now points to operand
+unf_cont:
+ bclr.b #sign_bit,LOCAL_EX(a0) ;clear sign bit
+ sne LOCAL_SGN(a0) ;store sign
+
+ bsr.l denorm ;returns denorm, a0 points to it
+*
+* WARNING:
+* ;d0 has guard,round sticky bit
+* ;make sure that it is not corrupted
+* ;before it reaches the round subroutine
+* ;also ensure that a0 isn't corrupted
+
+*
+* Set up d1 for round subroutine d1 contains the PREC/MODE
+* information respectively on upper/lower register halves.
+*
+ bfextu FPCR_MODE(a6){2:2},d1 ;get mode from FPCR
+* ;mode in lower d1
+ add.l (a7)+,d1 ;merge PREC/MODE
+*
+* WARNING: a0 and d0 are assumed to be intact between the denorm and
+* round subroutines. All code between these two subroutines
+* must not corrupt a0 and d0.
+*
+*
+* Perform Round
+* Input: a0 points to input operand
+* d0{31:29} has guard, round, sticky
+* d1{01:00} has rounding mode
+* d1{17:16} has rounding precision
+* Output: a0 points to rounded operand
+*
+
+ bsr.l round ;returns rounded denorm at (a0)
+*
+* Differentiate between store to memory vs. store to register
+*
+unf_store:
+ bsr.l g_opcls ;returns opclass in d0{2:0}
+ cmpi.b #$3,d0
+ bne.b not_opc011
+*
+* At this point, a store to memory is pending
+*
+opc011:
+ bsr.l g_dfmtou
+ tst.b d0
+ beq.b ext_opc011 ;If extended, do not subtract
+* ;If destination format is sgl/dbl,
+ tst.b LOCAL_HI(a0) ;If rounded result is normal,don't
+* ;subtract
+ bmi.b ext_opc011
+ subq.w #1,LOCAL_EX(a0) ;account for denorm bias vs.
+* ;normalized bias
+* ; normalized denormalized
+* ;single $7f $7e
+* ;double $3ff $3fe
+*
+ext_opc011:
+ bsr.l store ;stores to memory
+ bra.b unf_done ;finish up
+
+*
+* At this point, a store to a float register is pending
+*
+not_opc011:
+ bsr.l store ;stores to float register
+* ;a0 is not corrupted on a store to a
+* ;float register.
+*
+* Set the condition codes according to result
+*
+ tst.l LOCAL_HI(a0) ;check upper mantissa
+ bne.b ck_sgn
+ tst.l LOCAL_LO(a0) ;check lower mantissa
+ bne.b ck_sgn
+ bset.b #z_bit,FPSR_CC(a6) ;set condition codes if zero
+ck_sgn:
+ btst.b #sign_bit,LOCAL_EX(a0) ;check the sign bit
+ beq.b unf_done
+ bset.b #neg_bit,FPSR_CC(a6)
+
+*
+* Finish.
+*
+unf_done:
+ btst.b #inex2_bit,FPSR_EXCEPT(a6)
+ beq.b no_aunfl
+ bset.b #aunfl_bit,FPSR_AEXCEPT(a6)
+no_aunfl:
+ rts
+
+ end
diff --git a/sys/arch/m68k/fpsp/x_unimp.sa b/sys/arch/m68k/fpsp/x_unimp.sa
new file mode 100644
index 00000000000..3abf3f57eb9
--- /dev/null
+++ b/sys/arch/m68k/fpsp/x_unimp.sa
@@ -0,0 +1,102 @@
+* $NetBSD: x_unimp.sa,v 1.2 1994/10/26 07:50:32 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* x_unimp.sa 3.3 7/1/91
+*
+* fpsp_unimp --- FPSP handler for unimplemented instruction
+* exception.
+*
+* Invoked when the user program encounters a floating-point
+* op-code that hardware does not support. Trap vector# 11
+* (See table 8-1 MC68030 User's Manual).
+*
+*
+* Note: An fsave for an unimplemented inst. will create a short
+* fsave stack.
+*
+* Input: 1. Six word stack frame for unimplemented inst, four word
+* for illegal
+* (See table 8-7 MC68030 User's Manual).
+* 2. Unimp (short) fsave state frame created here by fsave
+* instruction.
+*
+
+X_UNIMP IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+ xref get_op
+ xref do_func
+ xref sto_res
+ xref gen_except
+ xref fpsp_fmt_error
+
+ xdef fpsp_unimp
+ xdef uni_2
+fpsp_unimp:
+ link a6,#-LOCAL_SIZE
+ fsave -(a7)
+uni_2:
+ movem.l d0-d1/a0-a1,USER_DA(a6)
+ fmovem.x fp0-fp3,USER_FP0(a6)
+ fmovem.l fpcr/fpsr/fpiar,USER_FPCR(a6)
+ move.b (a7),d0 ;test for valid version num
+ andi.b #$f0,d0 ;test for $4x
+ cmpi.b #VER_4,d0 ;must be $4x or exit
+ bne.l fpsp_fmt_error
+*
+* Temporary D25B Fix
+* The following lines are used to ensure that the FPSR
+* exception byte and condition codes are clear before proceeding
+*
+ move.l USER_FPSR(a6),d0
+ and.l #$FF00FF,d0 ;clear all but accrued exceptions
+ move.l d0,USER_FPSR(a6)
+ fmove.l #0,FPSR ;clear all user bits
+ fmove.l #0,FPCR ;clear all user exceptions for FPSP
+
+ clr.b UFLG_TMP(a6) ;clr flag for unsupp data
+
+ bsr.l get_op ;go get operand(s)
+ clr.b STORE_FLG(a6)
+ bsr.l do_func ;do the function
+ fsave -(a7) ;capture possible exc state
+ tst.b STORE_FLG(a6)
+ bne.b no_store ;if STORE_FLG is set, no store
+ bsr.l sto_res ;store the result in user space
+no_store:
+ bra.l gen_except ;post any exceptions and return
+
+ end
diff --git a/sys/arch/m68k/fpsp/x_unsupp.sa b/sys/arch/m68k/fpsp/x_unsupp.sa
new file mode 100644
index 00000000000..cf44cfa780b
--- /dev/null
+++ b/sys/arch/m68k/fpsp/x_unsupp.sa
@@ -0,0 +1,107 @@
+* $NetBSD: x_unsupp.sa,v 1.2 1994/10/26 07:50:33 cgd Exp $
+
+* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+* M68000 Hi-Performance Microprocessor Division
+* M68040 Software Package
+*
+* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+* All rights reserved.
+*
+* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+* To the maximum extent permitted by applicable law,
+* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+* PARTICULAR PURPOSE and any warranty against infringement with
+* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+* and any accompanying written materials.
+*
+* To the maximum extent permitted by applicable law,
+* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+* SOFTWARE. Motorola assumes no responsibility for the maintenance
+* and support of the SOFTWARE.
+*
+* You are hereby granted a copyright license to use, modify, and
+* distribute the SOFTWARE so long as this entire notice is retained
+* without alteration in any modified and/or redistributed versions,
+* and that such modified versions are clearly identified as such.
+* No licenses are granted by implication, estoppel or otherwise
+* under any patents or trademarks of Motorola, Inc.
+
+*
+* x_unsupp.sa 3.3 7/1/91
+*
+* fpsp_unsupp --- FPSP handler for unsupported data type exception
+*
+* Trap vector #55 (See table 8-1 Mc68030 User's manual).
+* Invoked when the user program encounters a data format (packed) that
+* hardware does not support or a data type (denormalized numbers or un-
+* normalized numbers).
+* Normalizes denorms and unnorms, unpacks packed numbers then stores
+* them back into the machine to let the 040 finish the operation.
+*
+* Unsupp calls two routines:
+* 1. get_op - gets the operand(s)
+* 2. res_func - restore the function back into the 040 or
+* if fmove.p fpm,<ea> then pack source (fpm)
+* and store in users memory <ea>.
+*
+* Input: Long fsave stack frame
+*
+
+X_UNSUPP IDNT 2,1 Motorola 040 Floating Point Software Package
+
+ section 8
+
+ include fpsp.h
+
+ xref get_op
+ xref res_func
+ xref gen_except
+ xref fpsp_fmt_error
+
+ xdef fpsp_unsupp
+fpsp_unsupp:
+*
+ link a6,#-LOCAL_SIZE
+ fsave -(a7)
+ movem.l d0-d1/a0-a1,USER_DA(a6)
+ fmovem.x fp0-fp3,USER_FP0(a6)
+ fmovem.l fpcr/fpsr/fpiar,USER_FPCR(a6)
+
+
+ move.b (a7),VER_TMP(a6) ;save version number
+ move.b (a7),d0 ;test for valid version num
+ andi.b #$f0,d0 ;test for $4x
+ cmpi.b #VER_4,d0 ;must be $4x or exit
+ bne.l fpsp_fmt_error
+
+ fmove.l #0,FPSR ;clear all user status bits
+ fmove.l #0,FPCR ;clear all user control bits
+*
+* The following lines are used to ensure that the FPSR
+* exception byte and condition codes are clear before proceeding,
+* except in the case of fmove, which leaves the cc's intact.
+*
+unsupp_con:
+ move.l USER_FPSR(a6),d1
+ btst #5,CMDREG1B(a6) ;looking for fmove out
+ bne fmove_con
+ and.l #$FF00FF,d1 ;clear all but aexcs and qbyte
+ bra.b end_fix
+fmove_con:
+ and.l #$0FFF40FF,d1 ;clear all but cc's, snan bit, aexcs, and qbyte
+end_fix:
+ move.l d1,USER_FPSR(a6)
+
+ st UFLG_TMP(a6) ;set flag for unsupp data
+
+ bsr.l get_op ;everything okay, go get operand(s)
+ bsr.l res_func ;fix up stack frame so can restore it
+ clr.l -(a7)
+ move.b VER_TMP(a6),(a7) ;move idle fmt word to top of stack
+ bra.l gen_except
+*
+ end