diff options
Diffstat (limited to 'sys/arch/m68k/fpsp')
60 files changed, 20732 insertions, 0 deletions
diff --git a/sys/arch/m68k/fpsp/DYADIC.CI5 b/sys/arch/m68k/fpsp/DYADIC.CI5 new file mode 100644 index 00000000000..43cd547c30b --- /dev/null +++ b/sys/arch/m68k/fpsp/DYADIC.CI5 @@ -0,0 +1,77 @@ +* $NetBSD: DYADIC.CI5,v 1.2 1994/10/26 07:48:26 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* DYADIC.CI5 1.2 4/30/91 +* +* DYADIC.CI5 --- DYADIC template for CI5 compiler +* + + xref _OPa_ + xref tag + + xdef _OPd_ +_OPd_: + link a6,#-LOCAL_SIZE + fmovem.x fp2-fp3,USER_FP2(a6) + fmovem.l fpsr/fpcr,USER_FPSR(a6) ; user's rounding mode/precision + fmove.l #0,fpcr ; force rounding mode/prec to extended,rn +* +* copy, convert and tag input arguments +* + fmove.d 8(a6),fp0 + fmove.x fp0,FPTEMP(a6) + lea FPTEMP(a6),a0 + bsr tag + move.b d0,DTAG(a6) + + fmove.d 16(a6),fp0 + fmove.x fp0,ETEMP(a6) + lea ETEMP(a6),a0 + bsr tag + move.b d0,STAG(a6) + + bsr _OPa_ + + fmove.l fpsr,d0 ; update status register + or.b FPSR_AEXCEPT(a6),d0 ;add previously accrued exceptions + swap.w d0 + or.b FPSR_QBYTE(a6),d0 ; pickup sign of quotient byte + swap.w d0 + fmove.l d0,fpsr +* +* Result is now in FP0 +* + fmovem.x USER_FP2(a6),fp2-fp3 ; note: FP0/FP1 not restored + unlk a6 + rts + diff --git a/sys/arch/m68k/fpsp/DYADIC.GCC b/sys/arch/m68k/fpsp/DYADIC.GCC new file mode 100644 index 00000000000..eacfa477ce3 --- /dev/null +++ b/sys/arch/m68k/fpsp/DYADIC.GCC @@ -0,0 +1,160 @@ +* $NetBSD: DYADIC.GCC,v 1.2 1994/10/26 07:48:27 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* DYADIC.GCC --- DYADIC template for GCC compiler +* +* This is based on the generic template. The only difference is that +* GCC does not need the d0-d1/a0-a1 registers saved. +* +* Customizations: +* 2. Likewise, don't save FP0/FP1 if they are scratch +* registers. +* 3. Delete updating of the fpsr if you only care about +* the result. +* 5. Move the result to d0/d1 if the compiler is that old. +* + + xref _OPa_ + xref tag + + xdef _OPs_ +_OPs_: + link a6,#-LOCAL_SIZE + fmovem.x fp0-fp3,USER_FP0(a6) + fmove.l fpsr,USER_FPSR(a6) + fmove.l fpcr,USER_FPCR(a6) ; user's rounding mode/precision + fmove.l #0,fpcr ; force rounding mode/prec to extended,rn +* +* copy, convert and tag input arguments +* + fmove.s 8(a6),fp0 + fmove.x fp0,FPTEMP(a6) + lea FPTEMP(a6),a0 + bsr tag + move.b d0,DTAG(a6) + + fmove.s 12(a6),fp0 + fmove.x fp0,ETEMP(a6) + lea ETEMP(a6),a0 + bsr tag + move.b d0,STAG(a6) + + bsr _OPa_ + + fmove.l fpsr,d0 ; update status register + or.b FPSR_AEXCEPT(a6),d0 ;add previously accrued exceptions + swap.w d0 + or.b FPSR_QBYTE(a6),d0 ; pickup sign of quotient byte + swap.w d0 + fmove.l d0,fpsr +* +* Result is now in FP0 +* + fmovem.x USER_FP1(a6),fp1-fp3 ; note: FP0 not restored + unlk a6 + rts + + xdef _OPd_ +_OPd_: + link a6,#-LOCAL_SIZE + fmovem.x fp0-fp3,USER_FP0(a6) + fmove.l fpsr,USER_FPSR(a6) + fmove.l fpcr,USER_FPCR(a6) ; user's rounding mode/precision + fmove.l #0,fpcr ; force rounding mode/prec to extended,rn +* +* copy, convert and tag input arguments +* + fmove.d 8(a6),fp0 + fmove.x fp0,FPTEMP(a6) + lea FPTEMP(a6),a0 + bsr tag + move.b d0,DTAG(a6) + + fmove.d 16(a6),fp0 + fmove.x fp0,ETEMP(a6) + lea ETEMP(a6),a0 + bsr tag + move.b d0,STAG(a6) + + bsr _OPa_ + + fmove.l fpsr,d0 ; update status register + or.b FPSR_AEXCEPT(a6),d0 ;add previously accrued exceptions + swap.w d0 + or.b FPSR_QBYTE(a6),d0 ; pickup sign of quotient byte + swap.w d0 + fmove.l d0,fpsr +* +* Result is now in FP0 +* + fmovem.x USER_FP1(a6),fp1-fp3 ; note: FP0 not restored + unlk a6 + rts + + xdef _OPx_ +_OPx_: + link a6,#-LOCAL_SIZE + fmovem.x fp0-fp3,USER_FP0(a6) + fmove.l fpsr,USER_FPSR(a6) + fmove.l fpcr,USER_FPCR(a6) ; user's rounding mode/precision + fmove.l #0,fpcr ; force rounding mode/prec to extended,rn +* +* copy, convert and tag input arguments +* + fmove.x 8(a6),fp0 + fmove.x fp0,FPTEMP(a6) + lea FPTEMP(a6),a0 + bsr tag + move.b d0,DTAG(a6) + + fmove.x 20(a6),fp0 + fmove.x fp0,ETEMP(a6) + lea ETEMP(a6),a0 + bsr tag + move.b d0,STAG(a6) + + bsr _OPa_ + + fmove.l fpsr,d0 ; update status register + or.b FPSR_AEXCEPT(a6),d0 ;add previously accrued exceptions + swap.w d0 + or.b FPSR_QBYTE(a6),d0 ; pickup sign of quotient byte + swap.w d0 + fmove.l d0,fpsr +* +* Result is now in FP0 +* + fmovem.x USER_FP1(a6),fp1-fp3 ; note: FP0 not restored + unlk a6 + rts + diff --git a/sys/arch/m68k/fpsp/DYADIC.GEN b/sys/arch/m68k/fpsp/DYADIC.GEN new file mode 100644 index 00000000000..fa3797fa6d1 --- /dev/null +++ b/sys/arch/m68k/fpsp/DYADIC.GEN @@ -0,0 +1,179 @@ +* $NetBSD: DYADIC.GEN,v 1.2 1994/10/26 07:48:29 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* DYADIC.GEN 1.2 4/30/91 +* +* DYADIC.GEN --- generic DYADIC template +* +* This version saves all registers that will be used by the emulation +* routines and restores all but FP0 on exit. The FPSR is +* updated to reflect the result of the operation. Return value +* is placed in FP0 for single, double and extended results. +* +* The package subroutines expect the incoming FPCR to be zeroed +* since they need extended precision to work properly. The +* 'final' FPCR is expected in USER_FPCR(a6) so that the calculated result +* can be properly sized and rounded. Also, if the incoming FPCR +* has enabled any exceptions, the exception will be taken on the +* final fmovem in this template. +* +* Customizations: +* 1. Remove the movem.l at the entry and exit of +* each routine if your compiler treats those +* registers as scratch. +* 2. Likewise, don't save FP0/FP1 if they are scratch +* registers. +* 3. Delete updating of the fpsr if you only care about +* the result. +* 4. Remove the _OPs_ and _OPx_ entry points if your compiler +* treats everything as doubles. +* 5. Move the result to d0/d1 if the compiler is that old. +* + + xref _OPa_ + xref tag + + xdef _OPs_ +_OPs_: + link a6,#-LOCAL_SIZE + movem.l d0-d1/a0-a1,USER_DA(a6) + fmovem.x fp0-fp3,USER_FP0(a6) + fmovem.l fpsr/fpcr,USER_FPSR(a6) ; user's rounding mode/precision + fmove.l #0,fpcr ; force rounding mode/prec to extended,rn +* +* copy, convert and tag input arguments +* + fmove.s 8(a6),fp0 + fmove.x fp0,FPTEMP(a6) + lea FPTEMP(a6),a0 + bsr tag + move.b d0,DTAG(a6) + + fmove.s 12(a6),fp0 + fmove.x fp0,ETEMP(a6) + lea ETEMP(a6),a0 + bsr tag + move.b d0,STAG(a6) + + bsr _OPa_ + + fmove.l fpsr,d0 ; update status register + or.b FPSR_AEXCEPT(a6),d0 ;add previously accrued exceptions + swap.w d0 + or.b FPSR_QBYTE(a6),d0 ; pickup sign of quotient byte + swap.w d0 + fmove.l d0,fpsr +* +* Result is now in FP0 +* + movem.l USER_DA(a6),d0-d1/a0-a1 + fmovem.x USER_FP1(a6),fp1-fp3 ; note: FP0 not restored + unlk a6 + rts + + xdef _OPd_ +_OPd_: + link a6,#-LOCAL_SIZE + movem.l d0-d1/a0-a1,USER_DA(a6) + fmovem.x fp0-fp3,USER_FP0(a6) + fmovem.l fpsr/fpcr,USER_FPSR(a6) ; user's rounding mode/precision + fmove.l #0,fpcr ; force rounding mode/prec to extended,rn +* +* copy, convert and tag input arguments +* + fmove.d 8(a6),fp0 + fmove.x fp0,FPTEMP(a6) + lea FPTEMP(a6),a0 + bsr tag + move.b d0,DTAG(a6) + + fmove.d 16(a6),fp0 + fmove.x fp0,ETEMP(a6) + lea ETEMP(a6),a0 + bsr tag + move.b d0,STAG(a6) + + bsr _OPa_ + + fmove.l fpsr,d0 ; update status register + or.b FPSR_AEXCEPT(a6),d0 ;add previously accrued exceptions + swap.w d0 + or.b FPSR_QBYTE(a6),d0 ; pickup sign of quotient byte + swap.w d0 + fmove.l d0,fpsr +* +* Result is now in FP0 +* + movem.l USER_DA(a6),d0-d1/a0-a1 + fmovem.x USER_FP1(a6),fp1-fp3 ; note: FP0 not restored + unlk a6 + rts + + xdef _OPx_ +_OPx_: + link a6,#-LOCAL_SIZE + movem.l d0-d1/a0-a1,USER_DA(a6) + fmovem.x fp0-fp3,USER_FP0(a6) + fmovem.l fpsr/fpcr,USER_FPSR(a6) ; user's rounding mode/precision + fmove.l #0,fpcr ; force rounding mode/prec to extended,rn +* +* copy, convert and tag input arguments +* + fmove.x 8(a6),fp0 + fmove.x fp0,FPTEMP(a6) + lea FPTEMP(a6),a0 + bsr tag + move.b d0,DTAG(a6) + + fmove.x 20(a6),fp0 + fmove.x fp0,ETEMP(a6) + lea ETEMP(a6),a0 + bsr tag + move.b d0,STAG(a6) + + bsr _OPa_ + + fmove.l fpsr,d0 ; update status register + or.b FPSR_AEXCEPT(a6),d0 ;add previously accrued exceptions + swap.w d0 + or.b FPSR_QBYTE(a6),d0 ; pickup sign of quotient byte + swap.w d0 + fmove.l d0,fpsr +* +* Result is now in FP0 +* + movem.l USER_DA(a6),d0-d1/a0-a1 + fmovem.x USER_FP1(a6),fp1-fp3 ; note: FP0 not restored + unlk a6 + rts + diff --git a/sys/arch/m68k/fpsp/DYADIC.R3V6 b/sys/arch/m68k/fpsp/DYADIC.R3V6 new file mode 100644 index 00000000000..843a44a2386 --- /dev/null +++ b/sys/arch/m68k/fpsp/DYADIC.R3V6 @@ -0,0 +1,72 @@ +* $NetBSD: DYADIC.R3V6,v 1.2 1994/10/26 07:48:31 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* DYADIC.R3V6 1.2 4/30/91 +* +* DYADIC.R3V6 --- DYADIC template for MCD R3V6 native C compiler +* +* The MCD compiler is old. It returns float and double values +* as a double stored in d0/d1. There is no support for single or extended +* precision operations. It's not clear whether the float registers +* should be preserved, so for speed they're not. +* + + xref _OPa_ + xref tag + + xdef _OPd_ +_OPd_: + link a6,#-LOCAL_SIZE + fmovem.l fpsr/fpcr,USER_FPSR(a6) ; user's rounding mode/precision + fmove.l #0,fpcr ; force rounding mode/prec to extended,rn +* +* copy and convert arguments to ETEMP, FPTEMP. +* + fmove.d 8(a6),fp0 + fmove.x fp0,FPTEMP(a6) + lea FPTEMP(a6),a0 + bsr tag + move.b d0,DTAG(a6) + + fmove.d 16(a6),fp0 + fmove.x fp0,ETEMP(a6) + lea ETEMP(a6),a0 + bsr tag + move.b d0,STAG(a6) + + bsr _OPa_ + + fmove.d fp0,USER_D0(a6) ; result goes into d0/d1 pair + movem.l USER_D0(a6),d0-d1 + unlk a6 + rts diff --git a/sys/arch/m68k/fpsp/FPSP.sa b/sys/arch/m68k/fpsp/FPSP.sa new file mode 100644 index 00000000000..1a3692d4667 --- /dev/null +++ b/sys/arch/m68k/fpsp/FPSP.sa @@ -0,0 +1,79 @@ +* $NetBSD: FPSP.sa,v 1.2 1994/10/26 07:48:33 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* FPSP.sa 3.1 12/10/90 +* +* Init file for testing FPSP software package. +* +* Takes over the exception vectors that the FPSP handles. +* + +FPSP IDNT 2,1 Motorola 040 Floating Point Software Package + +CODE_ST equ $10000 ;address of test code start + +FLINE_VEC equ $2c +BSUN_VEC equ $c0 +INEX2_VEC equ $c4 +DZ_VEC equ $c8 +UNFL_VEC equ $cc +OPERR_VEC equ $d0 +OVFL_VEC equ $d4 +SNAN_VEC equ $d8 +UNSUP_VEC equ $dc + + xref fline,unsupp + xref bsun,inex,dz,unfl + xref operr,ovfl,snan + + section 7 + +* Load vector table with addresses of FPSP routines and +* branch to CODE_ST, start address of test code. + + xdef start +start: + movec.l VBR,a0 + move.l #fline,FLINE_VEC(a0) + move.l #bsun,BSUN_VEC(a0) + move.l #inex,INEX2_VEC(a0) + move.l #dz,DZ_VEC(a0) + move.l #unfl,UNFL_VEC(a0) + move.l #operr,OPERR_VEC(a0) + move.l #ovfl,OVFL_VEC(a0) + move.l #snan,SNAN_VEC(a0) + move.l #unsupp,UNSUP_VEC(a0) + + jmp CODE_ST + + end diff --git a/sys/arch/m68k/fpsp/L_ENTRY.AWK b/sys/arch/m68k/fpsp/L_ENTRY.AWK new file mode 100644 index 00000000000..44cf26a5682 --- /dev/null +++ b/sys/arch/m68k/fpsp/L_ENTRY.AWK @@ -0,0 +1,84 @@ +# $NetBSD: L_ENTRY.AWK,v 1.2 1994/10/26 07:48:34 cgd Exp $ + +# MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +# M68000 Hi-Performance Microprocessor Division +# M68040 Software Package +# +# M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +# All rights reserved. +# +# THE SOFTWARE is provided on an "AS IS" basis and without warranty. +# To the maximum extent permitted by applicable law, +# MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +# INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +# PARTICULAR PURPOSE and any warranty against infringement with +# regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +# and any accompanying written materials. +# +# To the maximum extent permitted by applicable law, +# IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +# (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +# PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +# OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +# SOFTWARE. Motorola assumes no responsibility for the maintenance +# and support of the SOFTWARE. +# +# You are hereby granted a copyright license to use, modify, and +# distribute the SOFTWARE so long as this entire notice is retained +# without alteration in any modified and/or redistributed versions, +# and that such modified versions are clearly identified as such. +# No licenses are granted by implication, estoppel or otherwise +# under any patents or trademarks of Motorola, Inc. + +# L_ENTRY.AWK 1.1 3/27/91 + +BEGIN{ + print "echo \" section 8\"" + print "echo \" include l_fpsp.h\"" + print "echo \"\"" + print "echo \" xref tag\"" + print "echo \" xref szero\"" + print "echo \" xref sinf\"" + print "echo \" xref sopr_inf\"" + print "echo \" xref sone\"" + print "echo \" xref spi_2\"" + print "echo \" xref szr_inf\"" + print "echo \" xref src_nan\"" + print "echo \" xref t_operr\"" + print "echo \" xref t_dz2\"" + print "echo \" xref snzrinx\"" + print "echo \" xref ld_pone\"" + print "echo \" xref ld_pinf\"" + print "echo \" xref ld_ppi2\"" + print "echo \" xref ssincosz\"" + print "echo \" xref ssincosi\"" + print "echo \" xref ssincosnan\"" + print "echo \" xref setoxm1i\"" + utmp = 100 + } + +$4=="MONADIC"{ + printf "sed 's/_OPs_/" $1 "/g' MONADIC." SYS " | " + printf "sed 's/_OPd_/" $2 "/g' | " + printf "sed 's/_OPx_/" $3 "/g' | " + printf "sed 's/_OPr_/" $5 "/g' | " + printf "sed 's/_OPz_/" $6 "/g' | " + printf "sed 's/_OPi_/" $7 "/g' | " + printf "sed 's/_OPn_/" $8 "/g' | " + printf "sed 's/_OPm_/" $9 "/g' | " + utmp += 1 + printf "sed 's/_TMP_/" PREFIX utmp "/g'\n " + } + +$4=="DYADIC"{ + printf "sed 's/_OPs_/" $1 "/g' DYADIC." SYS " | " + printf "sed 's/_OPd_/" $2 "/g' | " + printf "sed 's/_OPx_/" $3 "/g' | " + printf "sed 's/_OPa_/" $5 "/g' | " + utmp += 1 + printf "sed 's/_TMP_/" PREFIX utmp "/g'\n" + } + +END{ + print "echo \" end\"" + } diff --git a/sys/arch/m68k/fpsp/L_LIST b/sys/arch/m68k/fpsp/L_LIST new file mode 100644 index 00000000000..7eb9b0feb5a --- /dev/null +++ b/sys/arch/m68k/fpsp/L_LIST @@ -0,0 +1,81 @@ +# $NetBSD: L_LIST,v 1.2 1994/10/26 07:48:38 cgd Exp $ +# +# MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +# M68000 Hi-Performance Microprocessor Division +# M68040 Software Package +# +# M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +# All rights reserved. +# +# THE SOFTWARE is provided on an "AS IS" basis and without warranty. +# To the maximum extent permitted by applicable law, +# MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +# INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +# PARTICULAR PURPOSE and any warranty against infringement with +# regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +# and any accompanying written materials. +# +# To the maximum extent permitted by applicable law, +# IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +# (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +# PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +# OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +# SOFTWARE. Motorola assumes no responsibility for the maintenance +# and support of the SOFTWARE. +# +# You are hereby granted a copyright license to use, modify, and +# distribute the SOFTWARE so long as this entire notice is retained +# without alteration in any modified and/or redistributed versions, +# and that such modified versions are clearly identified as such. +# No licenses are granted by implication, estoppel or otherwise +# under any patents or trademarks of Motorola, Inc. +# +# L_LIST 1.2 4/30/91 +# +# Each line specifies the entry points for one function. The first +# 3 items are the library entry point names for the single, double and +# extended precision versions of the function. Change them to +# suit your system. The next item is +# either MONADIC or DYADIC. The remaining 5 items are the labels +# in the FPSP code that correspond to subroutines to handle Regular, +# Zero, Infinity, Nan and Denorm input values. +# +# The first 3 +# +#Sgl Dbl. Ext. Type Reg Zero Inf Nan Denorm +#---------------------- ---- --- ---- --- --- ------ +facoss facosd facosx MONADIC sacos ld_ppi2 t_operr mon_nan sacosd +fasins fasind fasinx MONADIC sasin szero t_operr mon_nan sasind +fatans fatand fatanx MONADIC satan szero spi_2 mon_nan satand +fatanhs fatanhd fatanhx MONADIC satanh szero t_operr mon_nan satanhd +fcoss fcosd fcosx MONADIC scos ld_pone t_operr mon_nan scosd +fcoshs fcoshd fcoshx MONADIC scosh ld_pone ld_pinf mon_nan scoshd +fetoxs fetoxd fetoxx MONADIC setox ld_pone szr_inf mon_nan setoxd +fetoxm1s fetoxm1d fetoxm1x MONADIC setoxm1 szero setoxm1i mon_nan setoxm1d +fgetexps fgetexpd fgetexpx MONADIC sgetexp szero t_operr mon_nan sgetexpd +fsins fsind fsinx MONADIC ssin szero t_operr mon_nan ssind +fsinhs fsinhd fsinhx MONADIC ssinh szero sinf mon_nan ssinhd +ftans ftand ftanx MONADIC stan szero t_operr mon_nan stand +ftanhs ftanhd ftanhx MONADIC stanh szero sone mon_nan stanhd +ftentoxs ftentoxd ftentoxx MONADIC stentox ld_pone szr_inf mon_nan stentoxd +ftwotoxs ftwotoxd ftwotoxx MONADIC stwotox ld_pone szr_inf mon_nan stwotoxd +fgetmans fgetmand fgetmanx MONADIC sgetman szero t_operr mon_nan sgetmand +flogns flognd flognx MONADIC sslogn t_dz2 sopr_inf mon_nan sslognd +flog2s flog2d flog2x MONADIC sslog2 t_dz2 sopr_inf mon_nan sslog2d +flog10s flog10d flog10x MONADIC sslog10 t_dz2 sopr_inf mon_nan sslog10d +flognp1s flognp1d flognp1x MONADIC sslognp1 szero sopr_inf mon_nan slognp1d +fints fintd fintx MONADIC l_sint szero sinf mon_nan l_sintd +fintrzs fintrzd fintrzx MONADIC l_sintrz szero sinf mon_nan snzrinx +frems fremd fremx DYADIC prem +fmods fmodd fmodx DYADIC pmod +fscales fscaled fscalex DYADIC pscale +# +# 68040 native instructions added for completeness +# +fabss fabsd fabsx MONADIC sabs sabs sabs sabs sabs +fnegs fnegd fnegx MONADIC sneg sneg sneg sneg sneg +fsqrts fsqrtd fsqrtx MONADIC ssqrt ssqrt ssqrt ssqrt ssqrt +fadds faddd faddx DYADIC sadd +fsubs fsubd fsubx DYADIC ssub +fmuls fmuld fmulx DYADIC smul +fdivs fdivd fdivx DYADIC sdiv diff --git a/sys/arch/m68k/fpsp/MONADIC.CI5 b/sys/arch/m68k/fpsp/MONADIC.CI5 new file mode 100644 index 00000000000..56cc6586347 --- /dev/null +++ b/sys/arch/m68k/fpsp/MONADIC.CI5 @@ -0,0 +1,93 @@ +* $NetBSD: MONADIC.CI5,v 1.2 1994/10/26 07:48:39 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* MONADIC.CI5 1.3 4/30/91 +* +* MONADIC.CI5 --- MONADIC template for CI5 compiler +* + + xref tag + xref _OPr_ + xref _OPz_ + xref _OPi_ + xref _OPn_ + xref _OPm_ + + + xdef _OPd_ +_OPd_: + link a6,#-LOCAL_SIZE + fmovem.x fp2-fp3,USER_FP2(a6) + fmove.l fpsr,USER_FPSR(a6) + fmove.l fpcr,d1 ; user's rounding mode/precision + fmove.l #0,fpcr ; force rounding mode/prec to extended,rn +* +* copy, convert and tag input argument +* + fmove.d 8(a6),fp0 + fmove.x fp0,ETEMP(a6) + lea ETEMP(a6),a0 + bsr tag + move.b d0,STAG(a6) + tst.b d0 + bne.b _TMP_2 + bsr _OPr_ ; normalized (regular) number + bra.b _TMP_6 +_TMP_2: + cmp.b #$20,d0 ; zero? + bne.b _TMP_3 + bsr _OPz_ + bra.b _TMP_6 +_TMP_3: + cmp.b #$40,d0 ; infinity? + bne.b _TMP_4 + bsr _OPi_ + bra.b _TMP_6 +_TMP_4: + cmp.b #$60,d0 ; NaN? + bne.b _TMP_5 + bsr _OPn_ + bra.b _TMP_6 +_TMP_5: + bsr _OPm_ ; assuming a denorm... + +_TMP_6: + fmove.l fpsr,d0 ; update status register + or.b USER_FPSR+3(a6),d0 ;add previously accrued exceptions + move.l d0,USER_FPSR(a6) +* +* Result is now in FP0 +* + fmovem.x USER_FP2(a6),fp2-fp3 ; note: FP1 not restored + unlk a6 + rts diff --git a/sys/arch/m68k/fpsp/MONADIC.GCC b/sys/arch/m68k/fpsp/MONADIC.GCC new file mode 100644 index 00000000000..a8b7ce142ae --- /dev/null +++ b/sys/arch/m68k/fpsp/MONADIC.GCC @@ -0,0 +1,203 @@ +* $NetBSD: MONADIC.GCC,v 1.2 1994/10/26 07:48:40 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* MONADIC.GCC --- MONADIC template for GCC compiler +* +* This is based on the generic template. The only difference is that +* GCC does not need the d0-d1/a0-a1 registers saved. +* +* Customizations: +* 2. Likewise, don't save FP0/FP1 if they are scratch +* registers. +* 3. Delete handling of the fpsr if you only care about +* the result. +* 5. Move the result to d0/d1 if the compiler is that old. +* + + xref tag + xref _OPr_ + xref _OPz_ + xref _OPi_ + xref _OPn_ + xref _OPm_ + + xdef _OPs_ +_OPs_: + link a6,#-LOCAL_SIZE + fmovem.x fp0-fp3,USER_FP0(a6) + fmove.l fpsr,USER_FPSR(a6) + fmove.l fpcr,USER_FPCR(a6) + fmove.l fpcr,d1 ; user's rounding mode/precision + fmove.l #0,fpcr ; force rounding mode/prec to extended,rn +* +* copy, convert and tag input argument +* + fmove.s 8(a6),fp0 + fmove.x fp0,ETEMP(a6) + lea ETEMP(a6),a0 + bsr tag + move.b d0,STAG(a6) + tst.b d0 + bne.b _TMP_2 + bsr _OPr_ ; normalized (regular) number + bra.b _TMP_6 +_TMP_2: + cmp.b #$20,d0 ; zero? + bne.b _TMP_3 + bsr _OPz_ + bra.b _TMP_6 +_TMP_3: + cmp.b #$40,d0 ; infinity? + bne.b _TMP_4 + bsr _OPi_ + bra.b _TMP_6 +_TMP_4: + cmp.b #$60,d0 ; NaN? + bne.b _TMP_5 + bsr _OPn_ + bra.b _TMP_6 +_TMP_5: + bsr _OPm_ ; assuming a denorm... + +_TMP_6: + fmove.l fpsr,d0 ; update status register + or.b FPSR_AEXCEPT(a6),d0 ;add previously accrued exceptions + fmove.l d0,fpsr +* +* Result is now in FP0 +* + fmovem.x USER_FP1(a6),fp1-fp3 ; note: FP0 not restored + fmove.l USER_FPCR(a6),fpcr ; fpcr restored + unlk a6 + rts + + xdef _OPd_ +_OPd_: + link a6,#-LOCAL_SIZE + fmovem.x fp0-fp3,USER_FP0(a6) + fmove.l fpsr,USER_FPSR(a6) + fmove.l fpcr,USER_FPCR(a6) + fmove.l fpcr,d1 ; user's rounding mode/precision + fmove.l #0,fpcr ; force rounding mode/prec to extended,rn +* +* copy, convert and tag input argument +* + fmove.d 8(a6),fp0 + fmove.x fp0,ETEMP(a6) + lea ETEMP(a6),a0 + bsr tag + move.b d0,STAG(a6) + tst.b d0 + bne.b _TMP_7 + bsr _OPr_ ; normalized (regular) number + bra.b _TMP_B +_TMP_7: + cmp.b #$20,d0 ; zero? + bne.b _TMP_8 + bsr _OPz_ + bra.b _TMP_B +_TMP_8: + cmp.b #$40,d0 ; infinity? + bne.b _TMP_9 + bsr _OPi_ + bra.b _TMP_B +_TMP_9: + cmp.b #$60,d0 ; NaN? + bne.b _TMP_A + bsr _OPn_ + bra.b _TMP_B +_TMP_A: + bsr _OPm_ ; assuming a denorm... + +_TMP_B: + fmove.l fpsr,d0 ; update status register + or.b FPSR_AEXCEPT(a6),d0 ;add previously accrued exceptions + fmove.l d0,fpsr +* +* Result is now in FP0 +* + fmovem.x USER_FP1(a6),fp1-fp3 ; note: FP0 not restored + fmove.l USER_FPCR(a6),fpcr ; fpcr restored + unlk a6 + rts + + xdef _OPx_ +_OPx_: + link a6,#-LOCAL_SIZE + fmovem.x fp0-fp3,USER_FP0(a6) + fmove.l fpsr,USER_FPSR(a6) + fmove.l fpcr,USER_FPCR(a6) + fmove.l fpcr,d1 ; user's rounding mode/precision + fmove.l #0,fpcr ; force rounding mode/prec to extended,rn +* +* copy, convert and tag input argument +* + fmove.x 8(a6),fp0 + fmove.x fp0,ETEMP(a6) + lea ETEMP(a6),a0 + bsr tag + move.b d0,STAG(a6) + tst.b d0 + bne.b _TMP_C + bsr _OPr_ ; normalized (regular) number + bra.b _TMP_G +_TMP_C: + cmp.b #$20,d0 ; zero? + bne.b _TMP_D + bsr _OPz_ + bra.b _TMP_G +_TMP_D: + cmp.b #$40,d0 ; infinity? + bne.b _TMP_E + bsr _OPi_ + bra.b _TMP_G +_TMP_E: + cmp.b #$60,d0 ; NaN? + bne.b _TMP_F + bsr _OPn_ + bra.b _TMP_G +_TMP_F: + bsr _OPm_ ; assuming a denorm... + +_TMP_G: + fmove.l fpsr,d0 ; update status register + or.b FPSR_AEXCEPT(a6),d0 ;add previously accrued exceptions + fmove.l d0,fpsr +* +* Result is now in FP0 +* + fmovem.x USER_FP1(a6),fp1-fp3 ; note: FP0 not restored + fmove.l USER_FPCR(a6),fpcr ; fpcr restored + unlk a6 + rts + diff --git a/sys/arch/m68k/fpsp/MONADIC.GEN b/sys/arch/m68k/fpsp/MONADIC.GEN new file mode 100644 index 00000000000..5e6581b9d99 --- /dev/null +++ b/sys/arch/m68k/fpsp/MONADIC.GEN @@ -0,0 +1,230 @@ +* $NetBSD: MONADIC.GEN,v 1.3 1994/10/26 07:48:42 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* MONADIC.GEN 1.5 5/18/92 +* +* MONADIC.GEN 1.4 1/16/92 +* +* MONADIC.GEN 1.3 4/30/91 +* +* MONADIC.GEN --- generic MONADIC template +* +* This version saves all registers that will be used by the emulation +* routines and restores all but FP0 on exit. The FPSR is +* updated to reflect the result of the operation. Return value +* is placed in FP0 for single, double and extended results. +* +* The package subroutines expect the incoming FPCR to be zeroed +* since they need extended precision to work properly. The +* 'final' FPCR is expected in d1 so that the calculated result +* can be properly sized and rounded. Also, if the incoming FPCR +* has enabled any exceptions, the exception will be taken on the +* final fmovem in this template. +* +* Customizations: +* 1. Remove the movem.l at the entry and exit of +* each routine if your compiler treats those +* registers as scratch. +* 2. Likewise, don't save FP0/FP1 if they are scratch +* registers. +* 3. Delete handling of the fpsr if you only care about +* the result. +* 4. Some (most?) C compilers convert all float arguments +* to double, and provide no support at all for extended +* precision so remove the _OPs_ and _OPx_ entry points. +* 5. Move the result to d0/d1 if the compiler is that old. +* + + xref tag + xref _OPr_ + xref _OPz_ + xref _OPi_ + xref _OPn_ + xref _OPm_ + + xdef _OPs_ +_OPs_: + link a6,#-LOCAL_SIZE + movem.l d0-d1/a0-a1,USER_DA(a6) + fmovem.x fp0-fp3,USER_FP0(a6) + fmove.l fpsr,USER_FPSR(a6) + fmove.l fpcr,USER_FPCR(a6) + fmove.l fpcr,d1 ; user's rounding mode/precision + fmove.l #0,fpcr ; force rounding mode/prec to extended,rn +* +* copy, convert and tag input argument +* + fmove.s 8(a6),fp0 + fmove.x fp0,ETEMP(a6) + lea ETEMP(a6),a0 + bsr tag + move.b d0,STAG(a6) + tst.b d0 + bne.b _TMP_2 + bsr _OPr_ ; normalized (regular) number + bra.b _TMP_6 +_TMP_2: + cmp.b #$20,d0 ; zero? + bne.b _TMP_3 + bsr _OPz_ + bra.b _TMP_6 +_TMP_3: + cmp.b #$40,d0 ; infinity? + bne.b _TMP_4 + bsr _OPi_ + bra.b _TMP_6 +_TMP_4: + cmp.b #$60,d0 ; NaN? + bne.b _TMP_5 + bsr _OPn_ + bra.b _TMP_6 +_TMP_5: + bsr _OPm_ ; assuming a denorm... + +_TMP_6: + fmove.l fpsr,d0 ; update status register + or.b FPSR_AEXCEPT(a6),d0 ;add previously accrued exceptions + fmove.l d0,fpsr +* +* Result is now in FP0 +* + movem.l USER_DA(a6),d0-d1/a0-a1 + fmovem.x USER_FP1(a6),fp1-fp3 ; note: FP0 not restored + fmove.l USER_FPCR(a6),fpcr ; fpcr restored + unlk a6 + rts + + xdef _OPd_ +_OPd_: + link a6,#-LOCAL_SIZE + movem.l d0-d1/a0-a1,USER_DA(a6) + fmovem.x fp0-fp3,USER_FP0(a6) + fmove.l fpsr,USER_FPSR(a6) + fmove.l fpcr,USER_FPCR(a6) + fmove.l fpcr,d1 ; user's rounding mode/precision + fmove.l #0,fpcr ; force rounding mode/prec to extended,rn +* +* copy, convert and tag input argument +* + fmove.d 8(a6),fp0 + fmove.x fp0,ETEMP(a6) + lea ETEMP(a6),a0 + bsr tag + move.b d0,STAG(a6) + tst.b d0 + bne.b _TMP_7 + bsr _OPr_ ; normalized (regular) number + bra.b _TMP_B +_TMP_7: + cmp.b #$20,d0 ; zero? + bne.b _TMP_8 + bsr _OPz_ + bra.b _TMP_B +_TMP_8: + cmp.b #$40,d0 ; infinity? + bne.b _TMP_9 + bsr _OPi_ + bra.b _TMP_B +_TMP_9: + cmp.b #$60,d0 ; NaN? + bne.b _TMP_A + bsr _OPn_ + bra.b _TMP_B +_TMP_A: + bsr _OPm_ ; assuming a denorm... + +_TMP_B: + fmove.l fpsr,d0 ; update status register + or.b FPSR_AEXCEPT(a6),d0 ;add previously accrued exceptions + fmove.l d0,fpsr +* +* Result is now in FP0 +* + movem.l USER_DA(a6),d0-d1/a0-a1 + fmovem.x USER_FP1(a6),fp1-fp3 ; note: FP0 not restored + fmove.l USER_FPCR(a6),fpcr ; fpcr restored + unlk a6 + rts + + xdef _OPx_ +_OPx_: + link a6,#-LOCAL_SIZE + movem.l d0-d1/a0-a1,USER_DA(a6) + fmovem.x fp0-fp3,USER_FP0(a6) + fmove.l fpsr,USER_FPSR(a6) + fmove.l fpcr,USER_FPCR(a6) + fmove.l fpcr,d1 ; user's rounding mode/precision + fmove.l #0,fpcr ; force rounding mode/prec to extended,rn +* +* copy, convert and tag input argument +* + fmove.x 8(a6),fp0 + fmove.x fp0,ETEMP(a6) + lea ETEMP(a6),a0 + bsr tag + move.b d0,STAG(a6) + tst.b d0 + bne.b _TMP_C + bsr _OPr_ ; normalized (regular) number + bra.b _TMP_G +_TMP_C: + cmp.b #$20,d0 ; zero? + bne.b _TMP_D + bsr _OPz_ + bra.b _TMP_G +_TMP_D: + cmp.b #$40,d0 ; infinity? + bne.b _TMP_E + bsr _OPi_ + bra.b _TMP_G +_TMP_E: + cmp.b #$60,d0 ; NaN? + bne.b _TMP_F + bsr _OPn_ + bra.b _TMP_G +_TMP_F: + bsr _OPm_ ; assuming a denorm... + +_TMP_G: + fmove.l fpsr,d0 ; update status register + or.b FPSR_AEXCEPT(a6),d0 ;add previously accrued exceptions + fmove.l d0,fpsr +* +* Result is now in FP0 +* + movem.l USER_DA(a6),d0-d1/a0-a1 + fmovem.x USER_FP1(a6),fp1-fp3 ; note: FP0 not restored + fmove.l USER_FPCR(a6),fpcr ; fpcr restored + unlk a6 + rts + diff --git a/sys/arch/m68k/fpsp/MONADIC.R3V6 b/sys/arch/m68k/fpsp/MONADIC.R3V6 new file mode 100644 index 00000000000..a4e494acc19 --- /dev/null +++ b/sys/arch/m68k/fpsp/MONADIC.R3V6 @@ -0,0 +1,91 @@ +* $NetBSD: MONADIC.R3V6,v 1.2 1994/10/26 07:48:44 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* MONADIC.R3V6 1.3 4/30/91 +* +* MONADIC.R3V6 --- MONADIC template for MCD R3V6 native C compiler +* +* The MCD compiler is old. It returns float and double values +* as a double stored in d0/d1. There is no support for single or extended +* precision operations. It's not clear whether the float registers +* should be preserved, so for speed, they're not. +* + + xref tag + xref _OPr_ + xref _OPz_ + xref _OPi_ + xref _OPn_ + xref _OPm_ + + xdef _OPd_ +_OPd_: + link a6,#-LOCAL_SIZE + fmove.l fpcr,d1 ; user's rounding mode/precision + fmove.l #0,fpcr ; force rounding mode/prec to extended,rn +* +* copy, convert and tag input argument +* + fmove.d 8(a6),fp0 + fmove.x fp0,ETEMP(a6) + lea ETEMP(a6),a0 + bsr tag + move.b d0,STAG(a6) + tst.b d0 + bne.b _TMP_2 + bsr _OPr_ ; normalized (regular) number + bra.b _TMP_6 +_TMP_2: + cmp.b #$20,d0 ; zero? + bne.b _TMP_3 + bsr _OPz_ + bra.b _TMP_6 +_TMP_3: + cmp.b #$40,d0 ; infinity? + bne.b _TMP_4 + bsr _OPi_ + bra.b _TMP_6 +_TMP_4: + cmp.b #$60,d0 ; NaN? + bne.b _TMP_5 + bsr _OPn_ + bra.b _TMP_6 +_TMP_5: + bsr _OPm_ ; assuming a denorm... + +_TMP_6: + fmove.d fp0,USER_D0(a6) ; result goes into d0/d1 pair + movem.l USER_D0(a6),d0-d1 + unlk a6 + rts + diff --git a/sys/arch/m68k/fpsp/Makefile b/sys/arch/m68k/fpsp/Makefile new file mode 100644 index 00000000000..ce9dcca6411 --- /dev/null +++ b/sys/arch/m68k/fpsp/Makefile @@ -0,0 +1,338 @@ +# $NetBSD: Makefile,v 1.4 1994/10/26 07:48:46 cgd Exp $ + +# MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +# M68000 Hi-Performance Microprocessor Division +# M68040 Software Package +# +# M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +# All rights reserved. +# +# THE SOFTWARE is provided on an "AS IS" basis and without warranty. +# To the maximum extent permitted by applicable law, +# MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +# INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +# PARTICULAR PURPOSE and any warranty against infringement with +# regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +# and any accompanying written materials. +# +# To the maximum extent permitted by applicable law, +# IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +# (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +# PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +# OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +# SOFTWARE. Motorola assumes no responsibility for the maintenance +# and support of the SOFTWARE. +# +# You are hereby granted a copyright license to use, modify, and +# distribute the SOFTWARE so long as this entire notice is retained +# without alteration in any modified and/or redistributed versions, +# and that such modified versions are clearly identified as such. +# No licenses are granted by implication, estoppel or otherwise +# under any patents or trademarks of Motorola, Inc. + +# +# Makefile 3.3 3/27/91 +# +# Makefile for 68040 Floating Point Software Package +# + +TARGET = fpsp + +AS = as -m68040 +LD = ld + +# +# For the Library Version: +# +AR = ar +LIB_FILTER = sed 's/fpsp.defs/l_fpsp.defs/' +LIB_TARGET = lib$(TARGET).a +# +# SYS selects the template set to use +# templates are supplied for R3V6, CI5 and GEN(generic) +# PREFIX is a string that begins a temporary label in the assembler +# R3V6 uses 'L%', CI5 likes '.L' +# +#SYS = R3V6 +#PREFIX = L%% +# +#SYS = CI5 +#PREFIX = .L +# +#SYS = GEN +#PREFIX = L_ +# +SYS = GCC +PREFIX = L_ + +.SUFFIXES: .o .s .sa .defs .h + +.sa.s: + sh ${.CURDIR}/asm2gas ${.IMPSRC} >${.TARGET} +.h.defs: + sh ${.CURDIR}/asm2gas ${.IMPSRC} >${.TARGET} +.s.o: + $(AS) -o ${.TARGET} ${.IMPSRC} + +H_FILES = \ + fpsp.defs \ + l_fpsp.defs + +O_FILES = \ + copyright.o \ + netbsd.o \ + bindec.o \ + binstr.o \ + decbin.o \ + do_func.o \ + gen_except.o \ + get_op.o \ + kernel_ex.o \ + res_func.o \ + round.o \ + sacos.o \ + sasin.o \ + satan.o \ + satanh.o \ + scosh.o \ + setox.o \ + sgetem.o \ + sint.o \ + slogn.o \ + slog2.o \ + smovecr.o \ + srem_mod.o \ + scale.o \ + ssin.o \ + ssinh.o \ + stan.o \ + stanh.o \ + sto_res.o \ + stwotox.o \ + tbldo.o \ + util.o \ + x_bsun.o \ + x_fline.o \ + x_operr.o \ + x_ovfl.o \ + x_snan.o \ + x_store.o \ + x_unfl.o \ + x_unimp.o \ + x_unsupp.o \ + bugfix.o + +LIB_O_FILES = \ + l_copyright.o \ + l_entry.o \ + l_do_func.o \ + l_round.o \ + l_sacos.o \ + l_sasin.o \ + l_satan.o \ + l_satanh.o \ + l_scale.o \ + l_scosh.o \ + l_setox.o \ + l_sgetem.o \ + l_sint.o \ + l_slog2.o \ + l_slogn.o \ + l_srem_mod.o \ + l_ssin.o \ + l_ssinh.o \ + l_stan.o \ + l_stanh.o \ + l_stwotox.o \ + l_support.o + +S_FILES = \ + netbsd.s \ + bindec.s \ + binstr.s \ + decbin.s \ + do_func.s \ + get_op.s \ + gen_except.s \ + kernel_ex.s \ + res_func.s \ + round.s \ + sacos.s \ + sasin.s \ + satan.s \ + satanh.s \ + scosh.s \ + setox.s \ + sgetem.s \ + sint.s \ + slogn.s \ + slog2.s \ + smovecr.s \ + srem_mod.s \ + scale.s \ + ssin.s \ + ssinh.s \ + stan.s \ + stanh.s \ + sto_res.s \ + stwotox.s \ + tbldo.s \ + util.s \ + x_bsun.s \ + x_fline.s \ + x_operr.s \ + x_ovfl.s \ + x_snan.s \ + x_store.s \ + x_unfl.s \ + x_unimp.s \ + x_unsupp.s \ + bugfix.s + +LIB_S_FILES = \ + l_entry.sa l_entry.s \ + l_do_func.s \ + l_round.s \ + l_sacos.s \ + l_sasin.s \ + l_satan.s \ + l_satanh.s \ + l_scale.s \ + l_scosh.s \ + l_setox.s \ + l_sgetem.s \ + l_sint.s \ + l_slog2.s \ + l_slogn.s \ + l_srem_mod.s \ + l_ssin.s \ + l_ssinh.s \ + l_stan.s \ + l_stanh.s \ + l_stwotox.s \ + l_support.s + +# +# Build the target object. The linkfile is created on the fly. +# Change the SEG directives to suit your system. +# +$(TARGET).o: $(O_FILES) + $(LD) -r -o $(TARGET).o $(O_FILES) + +# +# Just about every file needs fpsp.h so: +# +$(O_FILES): fpsp.defs + +# +#----------------------------------------------------------------------- +# +# For making a library version of the FPSP: +# +library: $(LIB_TARGET) + +$(LIB_TARGET): $(LIB_O_FILES) + rm -f $(LIB_TARGET) + $(AR) crv $(LIB_TARGET) $(LIB_O_FILES) + +$(LIB_O_FILES): l_fpsp.defs + +# +# The entry points to the library version are created here +# by using two template files an awk script and a list of +# the entry routines for each function. +# +l_entry.sa: L_ENTRY.AWK L_LIST MONADIC.$(SYS) DYADIC.$(SYS) l_fpsp.h + awk -f L_ENTRY.AWK SYS=$(SYS) PREFIX=$(PREFIX) - <L_LIST|sh>l_entry.sa + +# +# Do_func.sa and round.sa need special editing to remove references that +# aren't needed in the library version. Beware that changes in +# the source code may cause this editing to break.... +# +l_do_func.s: do_func.s + $(LIB_FILTER) ${.ALLSRC} >${.TARGET} + echo '/global.*do_func/,/^ rts/d' >.SCRIPT + echo 'g/smovcr/d' >>.SCRIPT + echo 'g/tblpre/d' >>.SCRIPT + echo 'w' >>.SCRIPT + echo 'q' >>.SCRIPT + ed - ${.TARGET} <.SCRIPT + rm .SCRIPT + +l_round.s: round.s + $(LIB_FILTER) ${.ALLSRC} >${.TARGET} + echo '/^not_E3:/-6,/^not_E3:/d' >.SCRIPT + echo 'w' >>.SCRIPT + echo 'q' >>.SCRIPT + ed - ${.TARGET} <.SCRIPT + rm .SCRIPT + +l_copyright.s: copyright.s + $(LIB_FILTER) ${.ALLSRC} >${.TARGET} + +l_sacos.s: sacos.s + $(LIB_FILTER) ${.ALLSRC} >${.TARGET} + +l_sasin.s: sasin.s + $(LIB_FILTER) ${.ALLSRC} >${.TARGET} + +l_satan.s: satan.s + $(LIB_FILTER) ${.ALLSRC} >${.TARGET} + +l_satanh.s: satanh.s + $(LIB_FILTER) ${.ALLSRC} >${.TARGET} + +l_scale.s: scale.s + $(LIB_FILTER) ${.ALLSRC} >${.TARGET} + +l_scosh.s: scosh.s + $(LIB_FILTER) ${.ALLSRC} >${.TARGET} + +l_setox.s: setox.s + $(LIB_FILTER) ${.ALLSRC} >${.TARGET} + +l_sgetem.s: sgetem.s + $(LIB_FILTER) ${.ALLSRC} >${.TARGET} + +l_sint.s: sint.s + $(LIB_FILTER) ${.ALLSRC} >${.TARGET} + +l_slog2.s: slog2.s + $(LIB_FILTER) ${.ALLSRC} >${.TARGET} + +l_slogn.s: slogn.s + $(LIB_FILTER) ${.ALLSRC} >${.TARGET} + +l_srem_mod.s: srem_mod.s + $(LIB_FILTER) ${.ALLSRC} >${.TARGET} + +l_ssin.s: ssin.s + $(LIB_FILTER) ${.ALLSRC} >${.TARGET} + +l_ssinh.s: ssinh.s + $(LIB_FILTER) ${.ALLSRC} >${.TARGET} + +l_stan.s: stan.s + $(LIB_FILTER) ${.ALLSRC} >${.TARGET} + +l_stanh.s: stanh.s + $(LIB_FILTER) ${.ALLSRC} >${.TARGET} + +l_stwotox.s: stwotox.s + $(LIB_FILTER) ${.ALLSRC} >${.TARGET} + +# +# Extract all files from SCCS directory +# +clean: + rm -f $(H_FILES) + rm -f $(S_FILES) + rm -f $(O_FILES) + rm -f $(TARGET).o + rm -f $(LIB_S_FILES) + rm -f $(LIB_O_FILES) + rm -f $(LIB_TARGET) + +clobber: clean + diff --git a/sys/arch/m68k/fpsp/Makefile.inc b/sys/arch/m68k/fpsp/Makefile.inc new file mode 100644 index 00000000000..4a76434aae3 --- /dev/null +++ b/sys/arch/m68k/fpsp/Makefile.inc @@ -0,0 +1,18 @@ +# $NetBSD: Makefile.inc,v 1.2 1994/10/26 07:48:47 cgd Exp $ +# +# NOTE: $S must correspond to the top of the `sys' tree + +FPSPSRCDIR= $S/arch/m68k/fpsp + +FPSPOBJDIR!= cd $(FPSPSRCDIR); \ + printf "xxx:\n\techo \$${.OBJDIR}\n" | $(MAKE) -r -s -f - xxx + +FPSPOBJ= $(FPSPOBJDIR)/fpsp.o + +$(FPSPOBJ): .NOTMAIN __always_make_fpsp + @echo making sure the fpsp is up to date... + @(cd $(FPSPSRCDIR) ; $(MAKE)) + +FPSP!= printf "\#ifdef FPSP\n${FPSPOBJ}\n\#endif\n" | cpp -P -undef ${COPTS:M-DFPSP} + +__always_make_fpsp: .NOTMAIN diff --git a/sys/arch/m68k/fpsp/asm2gas b/sys/arch/m68k/fpsp/asm2gas new file mode 100644 index 00000000000..af3f7702cfe --- /dev/null +++ b/sys/arch/m68k/fpsp/asm2gas @@ -0,0 +1,163 @@ +#!/bin/sh +# $NetBSD: asm2gas,v 1.3 1994/10/26 07:48:49 cgd Exp $ + +# +# Copyright (c) 1994 Charles Hannum. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. All advertising materials mentioning features or use of this software +# must display the following acknowledgement: +# This product includes software developed by Charles Hannum. +# 4. The name of the author may not be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +# This ugly script converts assembler code from Motorola's format to a +# form that gas (MIT syntax) can digest. + +cat $1 | sed -e ' + # format canonicalization + + /[ ]IDNT[ ]/{s/^/|/;p;d;} + /^\*/{s//|/;p;d;} + s/;/|/ + /[ ]equ[ ]/{ + s/\([A-Za-z_][A-Za-z0-9_]*\)[ ]*equ[ ]*/\1,/ + s/[ ][ ]*\(.*\)$/ |\1/ + s/ ||/ |/ + s/^/ .set / + p;d + } + s/^\([A-Za-z_][A-Za-z0-9_]*\)[ ][ ]*/\1: / + s/^\([A-Za-z_][A-Za-z0-9_]*\)$/\1:/ + /^[A-Za-z_][A-Za-z0-9_]*:/{ + h + s/:.*$/:/ + p + g + s/^.*:[ ]*/ / + /^ $/d + } + /^[ ][ ]*\([.a-zA-Z][.a-zA-Z0-9]*\)/{ + h + s/// + s/^[ ][ ]*// + s/[ ][ ]*\(.*\)$/ |\1/ + s/ ||/ |/ + x + s/^[ ][ ]*// + s/[ ][ ]*.*$/ / + y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ + s/^/ / + G + s/\n// + } +' | sed -e ' + # operator conversion + + s/^ section 7/ .text/ + s/^ section 8/ .text/ + s/^ section 15/ .data/ + /^ include/{s/include[ ]/.include "/;s/\.h[ ]*$/.defs"/;p;d;} + s/^ xref/| xref/ + s/^ end/| end/ + s/^ xdef/ .global/ + + s/^ dc\.l/ .long/ + s/^ dc\.w/ .short/ + s/^ dc\.b/ .byte/ + + /^ [aceg-z]/{ + /^ add[aiqx]*\.[bwl] /{s/\.//;p;d;} + /^ andi*\.[bwl] /{s/\.//;p;d;} + /^ as[lr]\.[bwl] /{s/\.//;p;d;} + /^ clr\.[bwl] /{s/\.//;p;d;} + /^ cmp[i2]*\.[bwl] /{s/\.//;p;d;} + /^ eori*\.[bwl] /{s/\.//;p;d;} + /^ lea\.l /{s/\..//;p;d;} + /^ ls[lr]\.[bwl] /{s/\.//;p;d;} + /^ move[acmqs]*\.[bwl] /{s/\.//;p;d;} + /^ mul[su]\.[wl] /{s/\.//;p;d;} + /^ neg\.[bwl] /{s/\.//;p;d;} + /^ ori*\.[bwl] /{s/\.//;p;d;} + /^ ro[lrx]*\.[bwl] /{s/\.//;p;d;} + /^ sub[aiqx]*\.[bwl] /{s/\.//;p;d;} + /^ swap\.w /{s/\..//;p;d;} + /^ s\([a-tv-z][a-z]*\)\.b /{s/\..//;p;d;} + /^ tst\.[bwl] /{s/\.//;p;d;} + p;d + } + + /^ bchg\.[bl] /{s/\..//;p;d;} + /^ bclr\.[bl] /{s/\..//;p;d;} + /^ bset\.[bl] /{s/\..//;p;d;} + /^ btst\.[bl] /{s/\..//;p;d;} + /^ div[sul]*\.[wl] /{s/\.//;p;d;} + /^ fabs\.[sdx] /{s/\.//;p;d;} + /^ fadd\.[sdxbwl] /{s/\.//;p;d;} + /^ fcmp\.[sdxbwl] /{s/\.//;p;d;} + /^ fdiv\.[sdx] /{s/\.//;p;d;} + /^ fmove[mx]*\.[sdxbwl] /{s/\.//;p;d;} + /^ fmul\.[sdx] /{s/\.//;p;d;} + /^ fneg\.[sdx] /{s/\.//;p;d;} + /^ fsqrt\.[sdx] /{s/\.//;p;d;} + /^ fsub\.[sdxbwl] /{s/\.//;p;d;} + /^ ftst\.[sdx] /{s/\.//;p;d;} + + /^ b[a-eg-z][a-z]*\.b /{s/\.b/s/;p;d;} + /^ b[a-eg-z][a-z]*\.w /{s/\.w//;p;d;} + /^ b[a-eg-z][a-z]*\.l /{s/\.l/l/;p;d;} + /^ db[a-z][a-z]*\.w /{s/\.w//;p;d;} + /^ fb[a-eg-z][a-z]*\.w /{s/\.w//;p;d;} + /^ fb[a-eg-z][a-z]*\.l /{s/\.l/l/;p;d;} +' | sed -e ' + # operand conversion + + s/\([^_a-zA-Z0-9]\)FPIAR\([^_a-zA-Z0-9]\)/\1FPI\2/g + s/\([^_a-zA-Z0-9]\)FPIAR\([^_a-zA-Z0-9]\)/\1FPI\2/g + s/\([^_a-zA-Z0-9]\)FPIAR$/\1FPI/g + s/\([^_a-zA-Z0-9]\)fpiar\([^_a-zA-Z0-9]\)/\1fpi\2/g + s/\([^_a-zA-Z0-9]\)fpiar\([^_a-zA-Z0-9]\)/\1fpi\2/g + s/\([^_a-zA-Z0-9]\)fpiar$/\1fpi/g + + s/\$/0x/g + s/#:/#:0x/g + + s/-(\([sSpPaA][pPcC0-7]\))/\1@-/g + s/(\([sSpPaA][pPcC0-7]\))+/\1@+/g + s/\([-+A-Za-z0-9_]*\)(\([sSpPaA][pPcC0-7]\)\([),]\)/\2@(\1\3/g + + s/\.\([bBwWlL])\)/:\1/g + s/\.\([bBwWlL]\)\*\([0-9][0-9]*)\)/:\1:\2/g + s/\*\([0-9][0-9]*\))/:l:\1)/g + s/{\([0-9][0-9]*\):\([0-9][0-9]*\)}/{#\1:#\2}/g + s/{\([dD][0-7]\):\([0-9][0-9]*\)}/{\1:#\2}/g + + s/@(0*)/@/g + s/(,/(/g;s/:)/)/g + + # make up for a gas bug + /^ fmovemx /{ + s/ \([fF][pP][0-7]\),/ \1-\1,/ + s/,\([fF][pP][0-7]\) /,\1-\1 / + s/,\([fF][pP][0-7]\)$/,\1-\1/ + } +' diff --git a/sys/arch/m68k/fpsp/bindec.sa b/sys/arch/m68k/fpsp/bindec.sa new file mode 100644 index 00000000000..4e68ade209f --- /dev/null +++ b/sys/arch/m68k/fpsp/bindec.sa @@ -0,0 +1,946 @@ +* $NetBSD: bindec.sa,v 1.3 1994/10/26 07:48:51 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* bindec.sa 3.4 1/3/91 +* +* bindec +* +* Description: +* Converts an input in extended precision format +* to bcd format. +* +* Input: +* a0 points to the input extended precision value +* value in memory; d0 contains the k-factor sign-extended +* to 32-bits. The input may be either normalized, +* unnormalized, or denormalized. +* +* Output: result in the FP_SCR1 space on the stack. +* +* Saves and Modifies: D2-D7,A2,FP2 +* +* Algorithm: +* +* A1. Set RM and size ext; Set SIGMA = sign of input. +* The k-factor is saved for use in d7. Clear the +* BINDEC_FLG for separating normalized/denormalized +* input. If input is unnormalized or denormalized, +* normalize it. +* +* A2. Set X = abs(input). +* +* A3. Compute ILOG. +* ILOG is the log base 10 of the input value. It is +* approximated by adding e + 0.f when the original +* value is viewed as 2^^e * 1.f in extended precision. +* This value is stored in d6. +* +* A4. Clr INEX bit. +* The operation in A3 above may have set INEX2. +* +* A5. Set ICTR = 0; +* ICTR is a flag used in A13. It must be set before the +* loop entry A6. +* +* A6. Calculate LEN. +* LEN is the number of digits to be displayed. The +* k-factor can dictate either the total number of digits, +* if it is a positive number, or the number of digits +* after the decimal point which are to be included as +* significant. See the 68882 manual for examples. +* If LEN is computed to be greater than 17, set OPERR in +* USER_FPSR. LEN is stored in d4. +* +* A7. Calculate SCALE. +* SCALE is equal to 10^ISCALE, where ISCALE is the number +* of decimal places needed to insure LEN integer digits +* in the output before conversion to bcd. LAMBDA is the +* sign of ISCALE, used in A9. Fp1 contains +* 10^^(abs(ISCALE)) using a rounding mode which is a +* function of the original rounding mode and the signs +* of ISCALE and X. A table is given in the code. +* +* A8. Clr INEX; Force RZ. +* The operation in A3 above may have set INEX2. +* RZ mode is forced for the scaling operation to insure +* only one rounding error. The grs bits are collected in +* the INEX flag for use in A10. +* +* A9. Scale X -> Y. +* The mantissa is scaled to the desired number of +* significant digits. The excess digits are collected +* in INEX2. +* +* A10. Or in INEX. +* If INEX is set, round error occured. This is +* compensated for by 'or-ing' in the INEX2 flag to +* the lsb of Y. +* +* A11. Restore original FPCR; set size ext. +* Perform FINT operation in the user's rounding mode. +* Keep the size to extended. +* +* A12. Calculate YINT = FINT(Y) according to user's rounding +* mode. The FPSP routine sintd0 is used. The output +* is in fp0. +* +* A13. Check for LEN digits. +* If the int operation results in more than LEN digits, +* or less than LEN -1 digits, adjust ILOG and repeat from +* A6. This test occurs only on the first pass. If the +* result is exactly 10^LEN, decrement ILOG and divide +* the mantissa by 10. +* +* A14. Convert the mantissa to bcd. +* The binstr routine is used to convert the LEN digit +* mantissa to bcd in memory. The input to binstr is +* to be a fraction; i.e. (mantissa)/10^LEN and adjusted +* such that the decimal point is to the left of bit 63. +* The bcd digits are stored in the correct position in +* the final string area in memory. +* +* A15. Convert the exponent to bcd. +* As in A14 above, the exp is converted to bcd and the +* digits are stored in the final string. +* Test the length of the final exponent string. If the +* length is 4, set operr. +* +* A16. Write sign bits to final string. +* +* Implementation Notes: +* +* The registers are used as follows: +* +* d0: scratch; LEN input to binstr +* d1: scratch +* d2: upper 32-bits of mantissa for binstr +* d3: scratch;lower 32-bits of mantissa for binstr +* d4: LEN +* d5: LAMBDA/ICTR +* d6: ILOG +* d7: k-factor +* a0: ptr for original operand/final result +* a1: scratch pointer +* a2: pointer to FP_X; abs(original value) in ext +* fp0: scratch +* fp1: scratch +* fp2: scratch +* F_SCR1: +* F_SCR2: +* L_SCR1: +* L_SCR2: +* + +BINDEC IDNT 2,1 Motorola 040 Floating Point Software Package + + include fpsp.h + + section 8 + +* Constants in extended precision +LOG2 dc.l $3FFD0000,$9A209A84,$FBCFF798,$00000000 +LOG2UP1 dc.l $3FFD0000,$9A209A84,$FBCFF799,$00000000 + +* Constants in single precision +FONE dc.l $3F800000,$00000000,$00000000,$00000000 +FTWO dc.l $40000000,$00000000,$00000000,$00000000 +FTEN dc.l $41200000,$00000000,$00000000,$00000000 +F4933 dc.l $459A2800,$00000000,$00000000,$00000000 + +RBDTBL dc.b 0,0,0,0 + dc.b 3,3,2,2 + dc.b 3,2,2,3 + dc.b 2,3,3,2 + + xref binstr + xref sintdo + xref ptenrn,ptenrm,ptenrp + + xdef bindec + xdef sc_mul +bindec: + movem.l d2-d7/a2,-(a7) + fmovem.x fp0-fp2,-(a7) + +* A1. Set RM and size ext. Set SIGMA = sign input; +* The k-factor is saved for use in d7. Clear BINDEC_FLG for +* separating normalized/denormalized input. If the input +* is a denormalized number, set the BINDEC_FLG memory word +* to signal denorm. If the input is unnormalized, normalize +* the input and test for denormalized result. +* + fmove.l #rm_mode,FPCR ;set RM and ext + move.l (a0),L_SCR2(a6) ;save exponent for sign check + move.l d0,d7 ;move k-factor to d7 + clr.b BINDEC_FLG(a6) ;clr norm/denorm flag + move.w STAG(a6),d0 ;get stag + andi.w #$e000,d0 ;isolate stag bits + beq A2_str ;if zero, input is norm +* +* Normalize the denorm +* +un_de_norm: + move.w (a0),d0 + andi.w #$7fff,d0 ;strip sign of normalized exp + move.l 4(a0),d1 + move.l 8(a0),d2 +norm_loop: + sub.w #1,d0 + add.l d2,d2 + addx.l d1,d1 + tst.l d1 + bge.b norm_loop +* +* Test if the normalized input is denormalized +* + tst.w d0 + bgt.b pos_exp ;if greater than zero, it is a norm + st BINDEC_FLG(a6) ;set flag for denorm +pos_exp: + andi.w #$7fff,d0 ;strip sign of normalized exp + move.w d0,(a0) + move.l d1,4(a0) + move.l d2,8(a0) + +* A2. Set X = abs(input). +* +A2_str: + move.l (a0),FP_SCR2(a6) ; move input to work space + move.l 4(a0),FP_SCR2+4(a6) ; move input to work space + move.l 8(a0),FP_SCR2+8(a6) ; move input to work space + andi.l #$7fffffff,FP_SCR2(a6) ;create abs(X) + +* A3. Compute ILOG. +* ILOG is the log base 10 of the input value. It is approx- +* imated by adding e + 0.f when the original value is viewed +* as 2^^e * 1.f in extended precision. This value is stored +* in d6. +* +* Register usage: +* Input/Output +* d0: k-factor/exponent +* d2: x/x +* d3: x/x +* d4: x/x +* d5: x/x +* d6: x/ILOG +* d7: k-factor/Unchanged +* a0: ptr for original operand/final result +* a1: x/x +* a2: x/x +* fp0: x/float(ILOG) +* fp1: x/x +* fp2: x/x +* F_SCR1:x/x +* F_SCR2:Abs(X)/Abs(X) with $3fff exponent +* L_SCR1:x/x +* L_SCR2:first word of X packed/Unchanged + + tst.b BINDEC_FLG(a6) ;check for denorm + beq.b A3_cont ;if clr, continue with norm + move.l #-4933,d6 ;force ILOG = -4933 + bra.b A4_str +A3_cont: + move.w FP_SCR2(a6),d0 ;move exp to d0 + move.w #$3fff,FP_SCR2(a6) ;replace exponent with 0x3fff + fmove.x FP_SCR2(a6),fp0 ;now fp0 has 1.f + sub.w #$3fff,d0 ;strip off bias + fadd.w d0,fp0 ;add in exp + fsub.s FONE,fp0 ;subtract off 1.0 + fbge.w pos_res ;if pos, branch + fmul.x LOG2UP1,fp0 ;if neg, mul by LOG2UP1 + fmove.l fp0,d6 ;put ILOG in d6 as a lword + bra.b A4_str ;go move out ILOG +pos_res: + fmul.x LOG2,fp0 ;if pos, mul by LOG2 + fmove.l fp0,d6 ;put ILOG in d6 as a lword + + +* A4. Clr INEX bit. +* The operation in A3 above may have set INEX2. + +A4_str: + fmove.l #0,FPSR ;zero all of fpsr - nothing needed + + +* A5. Set ICTR = 0; +* ICTR is a flag used in A13. It must be set before the +* loop entry A6. The lower word of d5 is used for ICTR. + + clr.w d5 ;clear ICTR + + +* A6. Calculate LEN. +* LEN is the number of digits to be displayed. The k-factor +* can dictate either the total number of digits, if it is +* a positive number, or the number of digits after the +* original decimal point which are to be included as +* significant. See the 68882 manual for examples. +* If LEN is computed to be greater than 17, set OPERR in +* USER_FPSR. LEN is stored in d4. +* +* Register usage: +* Input/Output +* d0: exponent/Unchanged +* d2: x/x/scratch +* d3: x/x +* d4: exc picture/LEN +* d5: ICTR/Unchanged +* d6: ILOG/Unchanged +* d7: k-factor/Unchanged +* a0: ptr for original operand/final result +* a1: x/x +* a2: x/x +* fp0: float(ILOG)/Unchanged +* fp1: x/x +* fp2: x/x +* F_SCR1:x/x +* F_SCR2:Abs(X) with $3fff exponent/Unchanged +* L_SCR1:x/x +* L_SCR2:first word of X packed/Unchanged + +A6_str: + tst.l d7 ;branch on sign of k + ble.b k_neg ;if k <= 0, LEN = ILOG + 1 - k + move.l d7,d4 ;if k > 0, LEN = k + bra.b len_ck ;skip to LEN check +k_neg: + move.l d6,d4 ;first load ILOG to d4 + sub.l d7,d4 ;subtract off k + addq.l #1,d4 ;add in the 1 +len_ck: + tst.l d4 ;LEN check: branch on sign of LEN + ble.b LEN_ng ;if neg, set LEN = 1 + cmp.l #17,d4 ;test if LEN > 17 + ble.b A7_str ;if not, forget it + move.l #17,d4 ;set max LEN = 17 + tst.l d7 ;if negative, never set OPERR + ble.b A7_str ;if positive, continue + or.l #opaop_mask,USER_FPSR(a6) ;set OPERR & AIOP in USER_FPSR + bra.b A7_str ;finished here +LEN_ng: + moveq.l #1,d4 ;min LEN is 1 + + +* A7. Calculate SCALE. +* SCALE is equal to 10^ISCALE, where ISCALE is the number +* of decimal places needed to insure LEN integer digits +* in the output before conversion to bcd. LAMBDA is the sign +* of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using +* the rounding mode as given in the following table (see +* Coonen, p. 7.23 as ref.; however, the SCALE variable is +* of opposite sign in bindec.sa from Coonen). +* +* Initial USE +* FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5] +* ---------------------------------------------- +* RN 00 0 0 00/0 RN +* RN 00 0 1 00/0 RN +* RN 00 1 0 00/0 RN +* RN 00 1 1 00/0 RN +* RZ 01 0 0 11/3 RP +* RZ 01 0 1 11/3 RP +* RZ 01 1 0 10/2 RM +* RZ 01 1 1 10/2 RM +* RM 10 0 0 11/3 RP +* RM 10 0 1 10/2 RM +* RM 10 1 0 10/2 RM +* RM 10 1 1 11/3 RP +* RP 11 0 0 10/2 RM +* RP 11 0 1 11/3 RP +* RP 11 1 0 11/3 RP +* RP 11 1 1 10/2 RM +* +* Register usage: +* Input/Output +* d0: exponent/scratch - final is 0 +* d2: x/0 or 24 for A9 +* d3: x/scratch - offset ptr into PTENRM array +* d4: LEN/Unchanged +* d5: 0/ICTR:LAMBDA +* d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k)) +* d7: k-factor/Unchanged +* a0: ptr for original operand/final result +* a1: x/ptr to PTENRM array +* a2: x/x +* fp0: float(ILOG)/Unchanged +* fp1: x/10^ISCALE +* fp2: x/x +* F_SCR1:x/x +* F_SCR2:Abs(X) with $3fff exponent/Unchanged +* L_SCR1:x/x +* L_SCR2:first word of X packed/Unchanged + +A7_str: + tst.l d7 ;test sign of k + bgt.b k_pos ;if pos and > 0, skip this + cmp.l d6,d7 ;test k - ILOG + blt.b k_pos ;if ILOG >= k, skip this + move.l d7,d6 ;if ((k<0) & (ILOG < k)) ILOG = k +k_pos: + move.l d6,d0 ;calc ILOG + 1 - LEN in d0 + addq.l #1,d0 ;add the 1 + sub.l d4,d0 ;sub off LEN + swap d5 ;use upper word of d5 for LAMBDA + clr.w d5 ;set it zero initially + clr.w d2 ;set up d2 for very small case + tst.l d0 ;test sign of ISCALE + bge.b iscale ;if pos, skip next inst + addq.w #1,d5 ;if neg, set LAMBDA true + cmp.l #$ffffecd4,d0 ;test iscale <= -4908 + bgt.b no_inf ;if false, skip rest + addi.l #24,d0 ;add in 24 to iscale + move.l #24,d2 ;put 24 in d2 for A9 +no_inf: + neg.l d0 ;and take abs of ISCALE +iscale: + fmove.s FONE,fp1 ;init fp1 to 1 + bfextu USER_FPCR(a6){26:2},d1 ;get initial rmode bits + add.w d1,d1 ;put them in bits 2:1 + add.w d5,d1 ;add in LAMBDA + add.w d1,d1 ;put them in bits 3:1 + tst.l L_SCR2(a6) ;test sign of original x + bge.b x_pos ;if pos, don't set bit 0 + addq.l #1,d1 ;if neg, set bit 0 +x_pos: + lea.l RBDTBL,a2 ;load rbdtbl base + move.b (a2,d1),d3 ;load d3 with new rmode + lsl.l #4,d3 ;put bits in proper position + fmove.l d3,fpcr ;load bits into fpu + lsr.l #4,d3 ;put bits in proper position + tst.b d3 ;decode new rmode for pten table + bne.b not_rn ;if zero, it is RN + lea.l PTENRN,a1 ;load a1 with RN table base + bra.b rmode ;exit decode +not_rn: + lsr.b #1,d3 ;get lsb in carry + bcc.b not_rp ;if carry clear, it is RM + lea.l PTENRP,a1 ;load a1 with RP table base + bra.b rmode ;exit decode +not_rp: + lea.l PTENRM,a1 ;load a1 with RM table base +rmode: + clr.l d3 ;clr table index +e_loop: + lsr.l #1,d0 ;shift next bit into carry + bcc.b e_next ;if zero, skip the mul + fmul.x (a1,d3),fp1 ;mul by 10**(d3_bit_no) +e_next: + add.l #12,d3 ;inc d3 to next pwrten table entry + tst.l d0 ;test if ISCALE is zero + bne.b e_loop ;if not, loop + + +* A8. Clr INEX; Force RZ. +* The operation in A3 above may have set INEX2. +* RZ mode is forced for the scaling operation to insure +* only one rounding error. The grs bits are collected in +* the INEX flag for use in A10. +* +* Register usage: +* Input/Output + + fmove.l #0,FPSR ;clr INEX + fmove.l #rz_mode,FPCR ;set RZ rounding mode + + +* A9. Scale X -> Y. +* The mantissa is scaled to the desired number of significant +* digits. The excess digits are collected in INEX2. If mul, +* Check d2 for excess 10 exponential value. If not zero, +* the iscale value would have caused the pwrten calculation +* to overflow. Only a negative iscale can cause this, so +* multiply by 10^(d2), which is now only allowed to be 24, +* with a multiply by 10^8 and 10^16, which is exact since +* 10^24 is exact. If the input was denormalized, we must +* create a busy stack frame with the mul command and the +* two operands, and allow the fpu to complete the multiply. +* +* Register usage: +* Input/Output +* d0: FPCR with RZ mode/Unchanged +* d2: 0 or 24/unchanged +* d3: x/x +* d4: LEN/Unchanged +* d5: ICTR:LAMBDA +* d6: ILOG/Unchanged +* d7: k-factor/Unchanged +* a0: ptr for original operand/final result +* a1: ptr to PTENRM array/Unchanged +* a2: x/x +* fp0: float(ILOG)/X adjusted for SCALE (Y) +* fp1: 10^ISCALE/Unchanged +* fp2: x/x +* F_SCR1:x/x +* F_SCR2:Abs(X) with $3fff exponent/Unchanged +* L_SCR1:x/x +* L_SCR2:first word of X packed/Unchanged + +A9_str: + fmove.x (a0),fp0 ;load X from memory + fabs.x fp0 ;use abs(X) + tst.w d5 ;LAMBDA is in lower word of d5 + bne.b sc_mul ;if neg (LAMBDA = 1), scale by mul + fdiv.x fp1,fp0 ;calculate X / SCALE -> Y to fp0 + bra.b A10_st ;branch to A10 + +sc_mul: + tst.b BINDEC_FLG(a6) ;check for denorm + beq.b A9_norm ;if norm, continue with mul + fmovem.x fp1,-(a7) ;load ETEMP with 10^ISCALE + move.l 8(a0),-(a7) ;load FPTEMP with input arg + move.l 4(a0),-(a7) + move.l (a0),-(a7) + move.l #18,d3 ;load count for busy stack +A9_loop: + clr.l -(a7) ;clear lword on stack + dbf.w d3,A9_loop + move.b VER_TMP(a6),(a7) ;write current version number + move.b #BUSY_SIZE-4,1(a7) ;write current busy size + move.b #$10,$44(a7) ;set fcefpte[15] bit + move.w #$0023,$40(a7) ;load cmdreg1b with mul command + move.b #$fe,$8(a7) ;load all 1s to cu savepc + frestore (a7)+ ;restore frame to fpu for completion + fmul.x 36(a1),fp0 ;multiply fp0 by 10^8 + fmul.x 48(a1),fp0 ;multiply fp0 by 10^16 + bra.b A10_st +A9_norm: + tst.w d2 ;test for small exp case + beq.b A9_con ;if zero, continue as normal + fmul.x 36(a1),fp0 ;multiply fp0 by 10^8 + fmul.x 48(a1),fp0 ;multiply fp0 by 10^16 +A9_con: + fmul.x fp1,fp0 ;calculate X * SCALE -> Y to fp0 + + +* A10. Or in INEX. +* If INEX is set, round error occured. This is compensated +* for by 'or-ing' in the INEX2 flag to the lsb of Y. +* +* Register usage: +* Input/Output +* d0: FPCR with RZ mode/FPSR with INEX2 isolated +* d2: x/x +* d3: x/x +* d4: LEN/Unchanged +* d5: ICTR:LAMBDA +* d6: ILOG/Unchanged +* d7: k-factor/Unchanged +* a0: ptr for original operand/final result +* a1: ptr to PTENxx array/Unchanged +* a2: x/ptr to FP_SCR2(a6) +* fp0: Y/Y with lsb adjusted +* fp1: 10^ISCALE/Unchanged +* fp2: x/x + +A10_st: + fmove.l FPSR,d0 ;get FPSR + fmove.x fp0,FP_SCR2(a6) ;move Y to memory + lea.l FP_SCR2(a6),a2 ;load a2 with ptr to FP_SCR2 + btst.l #9,d0 ;check if INEX2 set + beq.b A11_st ;if clear, skip rest + ori.l #1,8(a2) ;or in 1 to lsb of mantissa + fmove.x FP_SCR2(a6),fp0 ;write adjusted Y back to fpu + + +* A11. Restore original FPCR; set size ext. +* Perform FINT operation in the user's rounding mode. Keep +* the size to extended. The sintdo entry point in the sint +* routine expects the FPCR value to be in USER_FPCR for +* mode and precision. The original FPCR is saved in L_SCR1. + +A11_st: + move.l USER_FPCR(a6),L_SCR1(a6) ;save it for later + andi.l #$00000030,USER_FPCR(a6) ;set size to ext, +* ;block exceptions + + +* A12. Calculate YINT = FINT(Y) according to user's rounding mode. +* The FPSP routine sintd0 is used. The output is in fp0. +* +* Register usage: +* Input/Output +* d0: FPSR with AINEX cleared/FPCR with size set to ext +* d2: x/x/scratch +* d3: x/x +* d4: LEN/Unchanged +* d5: ICTR:LAMBDA/Unchanged +* d6: ILOG/Unchanged +* d7: k-factor/Unchanged +* a0: ptr for original operand/src ptr for sintdo +* a1: ptr to PTENxx array/Unchanged +* a2: ptr to FP_SCR2(a6)/Unchanged +* a6: temp pointer to FP_SCR2(a6) - orig value saved and restored +* fp0: Y/YINT +* fp1: 10^ISCALE/Unchanged +* fp2: x/x +* F_SCR1:x/x +* F_SCR2:Y adjusted for inex/Y with original exponent +* L_SCR1:x/original USER_FPCR +* L_SCR2:first word of X packed/Unchanged + +A12_st: + movem.l d0-d1/a0-a1,-(a7) ;save regs used by sintd0 + move.l L_SCR1(a6),-(a7) + move.l L_SCR2(a6),-(a7) + lea.l FP_SCR2(a6),a0 ;a0 is ptr to F_SCR2(a6) + fmove.x fp0,(a0) ;move Y to memory at FP_SCR2(a6) + tst.l L_SCR2(a6) ;test sign of original operand + bge.b do_fint ;if pos, use Y + or.l #$80000000,(a0) ;if neg, use -Y +do_fint: + move.l USER_FPSR(a6),-(a7) + bsr sintdo ;sint routine returns int in fp0 + move.b (a7),USER_FPSR(a6) + add.l #4,a7 + move.l (a7)+,L_SCR2(a6) + move.l (a7)+,L_SCR1(a6) + movem.l (a7)+,d0-d1/a0-a1 ;restore regs used by sint + move.l L_SCR2(a6),FP_SCR2(a6) ;restore original exponent + move.l L_SCR1(a6),USER_FPCR(a6) ;restore user's FPCR + + +* A13. Check for LEN digits. +* If the int operation results in more than LEN digits, +* or less than LEN -1 digits, adjust ILOG and repeat from +* A6. This test occurs only on the first pass. If the +* result is exactly 10^LEN, decrement ILOG and divide +* the mantissa by 10. The calculation of 10^LEN cannot +* be inexact, since all powers of ten upto 10^27 are exact +* in extended precision, so the use of a previous power-of-ten +* table will introduce no error. +* +* +* Register usage: +* Input/Output +* d0: FPCR with size set to ext/scratch final = 0 +* d2: x/x +* d3: x/scratch final = x +* d4: LEN/LEN adjusted +* d5: ICTR:LAMBDA/LAMBDA:ICTR +* d6: ILOG/ILOG adjusted +* d7: k-factor/Unchanged +* a0: pointer into memory for packed bcd string formation +* a1: ptr to PTENxx array/Unchanged +* a2: ptr to FP_SCR2(a6)/Unchanged +* fp0: int portion of Y/abs(YINT) adjusted +* fp1: 10^ISCALE/Unchanged +* fp2: x/10^LEN +* F_SCR1:x/x +* F_SCR2:Y with original exponent/Unchanged +* L_SCR1:original USER_FPCR/Unchanged +* L_SCR2:first word of X packed/Unchanged + +A13_st: + swap d5 ;put ICTR in lower word of d5 + tst.w d5 ;check if ICTR = 0 + bne not_zr ;if non-zero, go to second test +* +* Compute 10^(LEN-1) +* + fmove.s FONE,fp2 ;init fp2 to 1.0 + move.l d4,d0 ;put LEN in d0 + subq.l #1,d0 ;d0 = LEN -1 + clr.l d3 ;clr table index +l_loop: + lsr.l #1,d0 ;shift next bit into carry + bcc.b l_next ;if zero, skip the mul + fmul.x (a1,d3),fp2 ;mul by 10**(d3_bit_no) +l_next: + add.l #12,d3 ;inc d3 to next pwrten table entry + tst.l d0 ;test if LEN is zero + bne.b l_loop ;if not, loop +* +* 10^LEN-1 is computed for this test and A14. If the input was +* denormalized, check only the case in which YINT > 10^LEN. +* + tst.b BINDEC_FLG(a6) ;check if input was norm + beq.b A13_con ;if norm, continue with checking + fabs.x fp0 ;take abs of YINT + bra test_2 +* +* Compare abs(YINT) to 10^(LEN-1) and 10^LEN +* +A13_con: + fabs.x fp0 ;take abs of YINT + fcmp.x fp2,fp0 ;compare abs(YINT) with 10^(LEN-1) + fbge.w test_2 ;if greater, do next test + subq.l #1,d6 ;subtract 1 from ILOG + move.w #1,d5 ;set ICTR + fmove.l #rm_mode,FPCR ;set rmode to RM + fmul.s FTEN,fp2 ;compute 10^LEN + bra.w A6_str ;return to A6 and recompute YINT +test_2: + fmul.s FTEN,fp2 ;compute 10^LEN + fcmp.x fp2,fp0 ;compare abs(YINT) with 10^LEN + fblt.w A14_st ;if less, all is ok, go to A14 + fbgt.w fix_ex ;if greater, fix and redo + fdiv.s FTEN,fp0 ;if equal, divide by 10 + addq.l #1,d6 ; and inc ILOG + bra.b A14_st ; and continue elsewhere +fix_ex: + addq.l #1,d6 ;increment ILOG by 1 + move.w #1,d5 ;set ICTR + fmove.l #rm_mode,FPCR ;set rmode to RM + bra.w A6_str ;return to A6 and recompute YINT +* +* Since ICTR <> 0, we have already been through one adjustment, +* and shouldn't have another; this is to check if abs(YINT) = 10^LEN +* 10^LEN is again computed using whatever table is in a1 since the +* value calculated cannot be inexact. +* +not_zr: + fmove.s FONE,fp2 ;init fp2 to 1.0 + move.l d4,d0 ;put LEN in d0 + clr.l d3 ;clr table index +z_loop: + lsr.l #1,d0 ;shift next bit into carry + bcc.b z_next ;if zero, skip the mul + fmul.x (a1,d3),fp2 ;mul by 10**(d3_bit_no) +z_next: + add.l #12,d3 ;inc d3 to next pwrten table entry + tst.l d0 ;test if LEN is zero + bne.b z_loop ;if not, loop + fabs.x fp0 ;get abs(YINT) + fcmp.x fp2,fp0 ;check if abs(YINT) = 10^LEN + fbne.w A14_st ;if not, skip this + fdiv.s FTEN,fp0 ;divide abs(YINT) by 10 + addq.l #1,d6 ;and inc ILOG by 1 + addq.l #1,d4 ; and inc LEN + fmul.s FTEN,fp2 ; if LEN++, the get 10^^LEN + + +* A14. Convert the mantissa to bcd. +* The binstr routine is used to convert the LEN digit +* mantissa to bcd in memory. The input to binstr is +* to be a fraction; i.e. (mantissa)/10^LEN and adjusted +* such that the decimal point is to the left of bit 63. +* The bcd digits are stored in the correct position in +* the final string area in memory. +* +* +* Register usage: +* Input/Output +* d0: x/LEN call to binstr - final is 0 +* d1: x/0 +* d2: x/ms 32-bits of mant of abs(YINT) +* d3: x/ls 32-bits of mant of abs(YINT) +* d4: LEN/Unchanged +* d5: ICTR:LAMBDA/LAMBDA:ICTR +* d6: ILOG +* d7: k-factor/Unchanged +* a0: pointer into memory for packed bcd string formation +* /ptr to first mantissa byte in result string +* a1: ptr to PTENxx array/Unchanged +* a2: ptr to FP_SCR2(a6)/Unchanged +* fp0: int portion of Y/abs(YINT) adjusted +* fp1: 10^ISCALE/Unchanged +* fp2: 10^LEN/Unchanged +* F_SCR1:x/Work area for final result +* F_SCR2:Y with original exponent/Unchanged +* L_SCR1:original USER_FPCR/Unchanged +* L_SCR2:first word of X packed/Unchanged + +A14_st: + fmove.l #rz_mode,FPCR ;force rz for conversion + fdiv.x fp2,fp0 ;divide abs(YINT) by 10^LEN + lea.l FP_SCR1(a6),a0 + fmove.x fp0,(a0) ;move abs(YINT)/10^LEN to memory + move.l 4(a0),d2 ;move 2nd word of FP_RES to d2 + move.l 8(a0),d3 ;move 3rd word of FP_RES to d3 + clr.l 4(a0) ;zero word 2 of FP_RES + clr.l 8(a0) ;zero word 3 of FP_RES + move.l (a0),d0 ;move exponent to d0 + swap d0 ;put exponent in lower word + beq.b no_sft ;if zero, don't shift + subi.l #$3ffd,d0 ;sub bias less 2 to make fract + tst.l d0 ;check if > 1 + bgt.b no_sft ;if so, don't shift + neg.l d0 ;make exp positive +m_loop: + lsr.l #1,d2 ;shift d2:d3 right, add 0s + roxr.l #1,d3 ;the number of places + dbf.w d0,m_loop ;given in d0 +no_sft: + tst.l d2 ;check for mantissa of zero + bne.b no_zr ;if not, go on + tst.l d3 ;continue zero check + beq.b zer_m ;if zero, go directly to binstr +no_zr: + clr.l d1 ;put zero in d1 for addx + addi.l #$00000080,d3 ;inc at bit 7 + addx.l d1,d2 ;continue inc + andi.l #$ffffff80,d3 ;strip off lsb not used by 882 +zer_m: + move.l d4,d0 ;put LEN in d0 for binstr call + addq.l #3,a0 ;a0 points to M16 byte in result + bsr binstr ;call binstr to convert mant + + +* A15. Convert the exponent to bcd. +* As in A14 above, the exp is converted to bcd and the +* digits are stored in the final string. +* +* Digits are stored in L_SCR1(a6) on return from BINDEC as: +* +* 32 16 15 0 +* ----------------------------------------- +* | 0 | e3 | e2 | e1 | e4 | X | X | X | +* ----------------------------------------- +* +* And are moved into their proper places in FP_SCR1. If digit e4 +* is non-zero, OPERR is signaled. In all cases, all 4 digits are +* written as specified in the 881/882 manual for packed decimal. +* +* Register usage: +* Input/Output +* d0: x/LEN call to binstr - final is 0 +* d1: x/scratch (0);shift count for final exponent packing +* d2: x/ms 32-bits of exp fraction/scratch +* d3: x/ls 32-bits of exp fraction +* d4: LEN/Unchanged +* d5: ICTR:LAMBDA/LAMBDA:ICTR +* d6: ILOG +* d7: k-factor/Unchanged +* a0: ptr to result string/ptr to L_SCR1(a6) +* a1: ptr to PTENxx array/Unchanged +* a2: ptr to FP_SCR2(a6)/Unchanged +* fp0: abs(YINT) adjusted/float(ILOG) +* fp1: 10^ISCALE/Unchanged +* fp2: 10^LEN/Unchanged +* F_SCR1:Work area for final result/BCD result +* F_SCR2:Y with original exponent/ILOG/10^4 +* L_SCR1:original USER_FPCR/Exponent digits on return from binstr +* L_SCR2:first word of X packed/Unchanged + +A15_st: + tst.b BINDEC_FLG(a6) ;check for denorm + beq.b not_denorm + ftst.x fp0 ;test for zero + fbeq.w den_zero ;if zero, use k-factor or 4933 + fmove.l d6,fp0 ;float ILOG + fabs.x fp0 ;get abs of ILOG + bra.b convrt +den_zero: + tst.l d7 ;check sign of the k-factor + blt.b use_ilog ;if negative, use ILOG + fmove.s F4933,fp0 ;force exponent to 4933 + bra.b convrt ;do it +use_ilog: + fmove.l d6,fp0 ;float ILOG + fabs.x fp0 ;get abs of ILOG + bra.b convrt +not_denorm: + ftst.x fp0 ;test for zero + fbne.w not_zero ;if zero, force exponent + fmove.s FONE,fp0 ;force exponent to 1 + bra.b convrt ;do it +not_zero: + fmove.l d6,fp0 ;float ILOG + fabs.x fp0 ;get abs of ILOG +convrt: + fdiv.x 24(a1),fp0 ;compute ILOG/10^4 + fmove.x fp0,FP_SCR2(a6) ;store fp0 in memory + move.l 4(a2),d2 ;move word 2 to d2 + move.l 8(a2),d3 ;move word 3 to d3 + move.w (a2),d0 ;move exp to d0 + beq.b x_loop_fin ;if zero, skip the shift + subi.w #$3ffd,d0 ;subtract off bias + neg.w d0 ;make exp positive +x_loop: + lsr.l #1,d2 ;shift d2:d3 right + roxr.l #1,d3 ;the number of places + dbf.w d0,x_loop ;given in d0 +x_loop_fin: + clr.l d1 ;put zero in d1 for addx + addi.l #$00000080,d3 ;inc at bit 6 + addx.l d1,d2 ;continue inc + andi.l #$ffffff80,d3 ;strip off lsb not used by 882 + move.l #4,d0 ;put 4 in d0 for binstr call + lea.l L_SCR1(a6),a0 ;a0 is ptr to L_SCR1 for exp digits + bsr binstr ;call binstr to convert exp + move.l L_SCR1(a6),d0 ;load L_SCR1 lword to d0 + move.l #12,d1 ;use d1 for shift count + lsr.l d1,d0 ;shift d0 right by 12 + bfins d0,FP_SCR1(a6){4:12} ;put e3:e2:e1 in FP_SCR1 + lsr.l d1,d0 ;shift d0 right by 12 + bfins d0,FP_SCR1(a6){16:4} ;put e4 in FP_SCR1 + tst.b d0 ;check if e4 is zero + beq.b A16_st ;if zero, skip rest + or.l #opaop_mask,USER_FPSR(a6) ;set OPERR & AIOP in USER_FPSR + + +* A16. Write sign bits to final string. +* Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG). +* +* Register usage: +* Input/Output +* d0: x/scratch - final is x +* d2: x/x +* d3: x/x +* d4: LEN/Unchanged +* d5: ICTR:LAMBDA/LAMBDA:ICTR +* d6: ILOG/ILOG adjusted +* d7: k-factor/Unchanged +* a0: ptr to L_SCR1(a6)/Unchanged +* a1: ptr to PTENxx array/Unchanged +* a2: ptr to FP_SCR2(a6)/Unchanged +* fp0: float(ILOG)/Unchanged +* fp1: 10^ISCALE/Unchanged +* fp2: 10^LEN/Unchanged +* F_SCR1:BCD result with correct signs +* F_SCR2:ILOG/10^4 +* L_SCR1:Exponent digits on return from binstr +* L_SCR2:first word of X packed/Unchanged + +A16_st: + clr.l d0 ;clr d0 for collection of signs + andi.b #$0f,FP_SCR1(a6) ;clear first nibble of FP_SCR1 + tst.l L_SCR2(a6) ;check sign of original mantissa + bge.b mant_p ;if pos, don't set SM + moveq.l #2,d0 ;move 2 in to d0 for SM +mant_p: + tst.l d6 ;check sign of ILOG + bge.b wr_sgn ;if pos, don't set SE + addq.l #1,d0 ;set bit 0 in d0 for SE +wr_sgn: + bfins d0,FP_SCR1(a6){0:2} ;insert SM and SE into FP_SCR1 + +* Clean up and restore all registers used. + + fmove.l #0,FPSR ;clear possible inex2/ainex bits + fmovem.x (a7)+,fp0-fp2 + movem.l (a7)+,d2-d7/a2 + rts + + end diff --git a/sys/arch/m68k/fpsp/binstr.sa b/sys/arch/m68k/fpsp/binstr.sa new file mode 100644 index 00000000000..eeecf07f120 --- /dev/null +++ b/sys/arch/m68k/fpsp/binstr.sa @@ -0,0 +1,165 @@ +* $NetBSD: binstr.sa,v 1.3 1994/10/26 07:48:53 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* binstr.sa 3.3 12/19/90 +* +* +* Description: Converts a 64-bit binary integer to bcd. +* +* Input: 64-bit binary integer in d2:d3, desired length (LEN) in +* d0, and a pointer to start in memory for bcd characters +* in d0. (This pointer must point to byte 4 of the first +* lword of the packed decimal memory string.) +* +* Output: LEN bcd digits representing the 64-bit integer. +* +* Algorithm: +* The 64-bit binary is assumed to have a decimal point before +* bit 63. The fraction is multiplied by 10 using a mul by 2 +* shift and a mul by 8 shift. The bits shifted out of the +* msb form a decimal digit. This process is iterated until +* LEN digits are formed. +* +* A1. Init d7 to 1. D7 is the byte digit counter, and if 1, the +* digit formed will be assumed the least significant. This is +* to force the first byte formed to have a 0 in the upper 4 bits. +* +* A2. Beginning of the loop: +* Copy the fraction in d2:d3 to d4:d5. +* +* A3. Multiply the fraction in d2:d3 by 8 using bit-field +* extracts and shifts. The three msbs from d2 will go into +* d1. +* +* A4. Multiply the fraction in d4:d5 by 2 using shifts. The msb +* will be collected by the carry. +* +* A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5 +* into d2:d3. D1 will contain the bcd digit formed. +* +* A6. Test d7. If zero, the digit formed is the ms digit. If non- +* zero, it is the ls digit. Put the digit in its place in the +* upper word of d0. If it is the ls digit, write the word +* from d0 to memory. +* +* A7. Decrement d6 (LEN counter) and repeat the loop until zero. +* +* Implementation Notes: +* +* The registers are used as follows: +* +* d0: LEN counter +* d1: temp used to form the digit +* d2: upper 32-bits of fraction for mul by 8 +* d3: lower 32-bits of fraction for mul by 8 +* d4: upper 32-bits of fraction for mul by 2 +* d5: lower 32-bits of fraction for mul by 2 +* d6: temp for bit-field extracts +* d7: byte digit formation word;digit count {0,1} +* a0: pointer into memory for packed bcd string formation +* + +BINSTR IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + + xdef binstr +binstr: + movem.l d0-d7,-(a7) +* +* A1: Init d7 +* + moveq.l #1,d7 ;init d7 for second digit + subq.l #1,d0 ;for dbf d0 would have LEN+1 passes +* +* A2. Copy d2:d3 to d4:d5. Start loop. +* +loop: + move.l d2,d4 ;copy the fraction before muls + move.l d3,d5 ;to d4:d5 +* +* A3. Multiply d2:d3 by 8; extract msbs into d1. +* + bfextu d2{0:3},d1 ;copy 3 msbs of d2 into d1 + asl.l #3,d2 ;shift d2 left by 3 places + bfextu d3{0:3},d6 ;copy 3 msbs of d3 into d6 + asl.l #3,d3 ;shift d3 left by 3 places + or.l d6,d2 ;or in msbs from d3 into d2 +* +* A4. Multiply d4:d5 by 2; add carry out to d1. +* + add.l d5,d5 ;mul d5 by 2 + addx.l d4,d4 ;mul d4 by 2 + swap d6 ;put 0 in d6 lower word + addx.w d6,d1 ;add in extend from mul by 2 +* +* A5. Add mul by 8 to mul by 2. D1 contains the digit formed. +* + add.l d5,d3 ;add lower 32 bits + nop ;ERRATA FIX #13 (Rev. 1.2 6/6/90) + addx.l d4,d2 ;add with extend upper 32 bits + nop ;ERRATA FIX #13 (Rev. 1.2 6/6/90) + addx.w d6,d1 ;add in extend from add to d1 + swap d6 ;with d6 = 0; put 0 in upper word +* +* A6. Test d7 and branch. +* + tst.w d7 ;if zero, store digit & to loop + beq.b first_d ;if non-zero, form byte & write +sec_d: + swap d7 ;bring first digit to word d7b + asl.w #4,d7 ;first digit in upper 4 bits d7b + add.w d1,d7 ;add in ls digit to d7b + move.b d7,(a0)+ ;store d7b byte in memory + swap d7 ;put LEN counter in word d7a + clr.w d7 ;set d7a to signal no digits done + dbf.w d0,loop ;do loop some more! + bra.b end_bstr ;finished, so exit +first_d: + swap d7 ;put digit word in d7b + move.w d1,d7 ;put new digit in d7b + swap d7 ;put LEN counter in word d7a + addq.w #1,d7 ;set d7a to signal first digit done + dbf.w d0,loop ;do loop some more! + swap d7 ;put last digit in string + lsl.w #4,d7 ;move it to upper 4 bits + move.b d7,(a0)+ ;store it in memory string +* +* Clean up and return with result in fp0. +* +end_bstr: + movem.l (a7)+,d0-d7 + rts + end diff --git a/sys/arch/m68k/fpsp/bugfix.sa b/sys/arch/m68k/fpsp/bugfix.sa new file mode 100644 index 00000000000..d38f81656b0 --- /dev/null +++ b/sys/arch/m68k/fpsp/bugfix.sa @@ -0,0 +1,520 @@ +* $NetBSD: bugfix.sa,v 1.3 1994/10/26 07:48:55 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* bugfix.sa 3.2 1/31/91 +* +* +* This file contains workarounds for bugs in the 040 +* relating to the Floating-Point Software Package (FPSP) +* +* Fixes for bugs: 1238 +* +* Bug: 1238 +* +* +* /* The following dirty_bit clear should be left in +* * the handler permanently to improve throughput. +* * The dirty_bits are located at bits [23:16] in +* * longword $08 in the busy frame $4x60. Bit 16 +* * corresponds to FP0, bit 17 corresponds to FP1, +* * and so on. +* */ +* if (E3_exception_just_serviced) { +* dirty_bit[cmdreg3b[9:7]] = 0; +* } +* +* if (fsave_format_version != $40) {goto NOFIX} +* +* if !(E3_exception_just_serviced) {goto NOFIX} +* if (cupc == 0000000) {goto NOFIX} +* if ((cmdreg1b[15:13] != 000) && +* (cmdreg1b[15:10] != 010001)) {goto NOFIX} +* if (((cmdreg1b[15:13] != 000) || ((cmdreg1b[12:10] != cmdreg2b[9:7]) && +* (cmdreg1b[12:10] != cmdreg3b[9:7])) ) && +* ((cmdreg1b[ 9: 7] != cmdreg2b[9:7]) && +* (cmdreg1b[ 9: 7] != cmdreg3b[9:7])) ) {goto NOFIX} +* +* /* Note: for 6d43b or 8d43b, you may want to add the following code +* * to get better coverage. (If you do not insert this code, the part +* * won't lock up; it will simply get the wrong answer.) +* * Do NOT insert this code for 10d43b or later parts. +* * +* * if (fpiarcu == integer stack return address) { +* * cupc = 0000000; +* * goto NOFIX; +* * } +* */ +* +* if (cmdreg1b[15:13] != 000) {goto FIX_OPCLASS2} +* FIX_OPCLASS0: +* if (((cmdreg1b[12:10] == cmdreg2b[9:7]) || +* (cmdreg1b[ 9: 7] == cmdreg2b[9:7])) && +* (cmdreg1b[12:10] != cmdreg3b[9:7]) && +* (cmdreg1b[ 9: 7] != cmdreg3b[9:7])) { /* xu conflict only */ +* /* We execute the following code if there is an +* xu conflict and NOT an nu conflict */ +* +* /* first save some values on the fsave frame */ +* stag_temp = STAG[fsave_frame]; +* cmdreg1b_temp = CMDREG1B[fsave_frame]; +* dtag_temp = DTAG[fsave_frame]; +* ete15_temp = ETE15[fsave_frame]; +* +* CUPC[fsave_frame] = 0000000; +* FRESTORE +* FSAVE +* +* /* If the xu instruction is exceptional, we punt. +* * Otherwise, we would have to include OVFL/UNFL handler +* * code here to get the correct answer. +* */ +* if (fsave_frame_format == $4060) {goto KILL_PROCESS} +* +* fsave_frame = /* build a long frame of all zeros */ +* fsave_frame_format = $4060; /* label it as long frame */ +* +* /* load it with the temps we saved */ +* STAG[fsave_frame] = stag_temp; +* CMDREG1B[fsave_frame] = cmdreg1b_temp; +* DTAG[fsave_frame] = dtag_temp; +* ETE15[fsave_frame] = ete15_temp; +* +* /* Make sure that the cmdreg3b dest reg is not going to +* * be destroyed by a FMOVEM at the end of all this code. +* * If it is, you should move the current value of the reg +* * onto the stack so that the reg will loaded with that value. +* */ +* +* /* All done. Proceed with the code below */ +* } +* +* etemp = FP_reg_[cmdreg1b[12:10]]; +* ete15 = ~ete14; +* cmdreg1b[15:10] = 010010; +* clear(bug_flag_procIDxxxx); +* FRESTORE and return; +* +* +* FIX_OPCLASS2: +* if ((cmdreg1b[9:7] == cmdreg2b[9:7]) && +* (cmdreg1b[9:7] != cmdreg3b[9:7])) { /* xu conflict only */ +* /* We execute the following code if there is an +* xu conflict and NOT an nu conflict */ +* +* /* first save some values on the fsave frame */ +* stag_temp = STAG[fsave_frame]; +* cmdreg1b_temp = CMDREG1B[fsave_frame]; +* dtag_temp = DTAG[fsave_frame]; +* ete15_temp = ETE15[fsave_frame]; +* etemp_temp = ETEMP[fsave_frame]; +* +* CUPC[fsave_frame] = 0000000; +* FRESTORE +* FSAVE +* +* +* /* If the xu instruction is exceptional, we punt. +* * Otherwise, we would have to include OVFL/UNFL handler +* * code here to get the correct answer. +* */ +* if (fsave_frame_format == $4060) {goto KILL_PROCESS} +* +* fsave_frame = /* build a long frame of all zeros */ +* fsave_frame_format = $4060; /* label it as long frame */ +* +* /* load it with the temps we saved */ +* STAG[fsave_frame] = stag_temp; +* CMDREG1B[fsave_frame] = cmdreg1b_temp; +* DTAG[fsave_frame] = dtag_temp; +* ETE15[fsave_frame] = ete15_temp; +* ETEMP[fsave_frame] = etemp_temp; +* +* /* Make sure that the cmdreg3b dest reg is not going to +* * be destroyed by a FMOVEM at the end of all this code. +* * If it is, you should move the current value of the reg +* * onto the stack so that the reg will loaded with that value. +* */ +* +* /* All done. Proceed with the code below */ +* } +* +* if (etemp_exponent == min_sgl) etemp_exponent = min_dbl; +* if (etemp_exponent == max_sgl) etemp_exponent = max_dbl; +* cmdreg1b[15:10] = 010101; +* clear(bug_flag_procIDxxxx); +* FRESTORE and return; +* +* +* NOFIX: +* clear(bug_flag_procIDxxxx); +* FRESTORE and return; +* + +BUGFIX IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + + xref fpsp_fmt_error + + xdef b1238_fix +b1238_fix: +* +* This code is entered only on completion of the handling of an +* nu-generated ovfl, unfl, or inex exception. If the version +* number of the fsave is not $40, this handler is not necessary. +* Simply branch to fix_done and exit normally. +* + cmpi.b #VER_40,4(a7) + bne.w fix_done +* +* Test for cu_savepc equal to zero. If not, this is not a bug +* #1238 case. +* + move.b CU_SAVEPC(a6),d0 + andi.b #$FE,d0 + beq fix_done ;if zero, this is not bug #1238 + +* +* Test the register conflict aspect. If opclass0, check for +* cu src equal to xu dest or equal to nu dest. If so, go to +* op0. Else, or if opclass2, check for cu dest equal to +* xu dest or equal to nu dest. If so, go to tst_opcl. Else, +* exit, it is not the bug case. +* +* Check for opclass 0. If not, go and check for opclass 2 and sgl. +* + move.w CMDREG1B(a6),d0 + andi.w #$E000,d0 ;strip all but opclass + bne op2sgl ;not opclass 0, check op2 +* +* Check for cu and nu register conflict. If one exists, this takes +* priority over a cu and xu conflict. +* + bfextu CMDREG1B(a6){3:3},d0 ;get 1st src + bfextu CMDREG3B(a6){6:3},d1 ;get 3rd dest + cmp.b d0,d1 + beq.b op0 ;if equal, continue bugfix +* +* Check for cu dest equal to nu dest. If so, go and fix the +* bug condition. Otherwise, exit. +* + bfextu CMDREG1B(a6){6:3},d0 ;get 1st dest + cmp.b d0,d1 ;cmp 1st dest with 3rd dest + beq.b op0 ;if equal, continue bugfix +* +* Check for cu and xu register conflict. +* + bfextu CMDREG2B(a6){6:3},d1 ;get 2nd dest + cmp.b d0,d1 ;cmp 1st dest with 2nd dest + beq.b op0_xu ;if equal, continue bugfix + bfextu CMDREG1B(a6){3:3},d0 ;get 1st src + cmp.b d0,d1 ;cmp 1st src with 2nd dest + beq op0_xu + bne fix_done ;if the reg checks fail, exit +* +* We have the opclass 0 situation. +* +op0: + bfextu CMDREG1B(a6){3:3},d0 ;get source register no + move.l #7,d1 + sub.l d0,d1 + clr.l d0 + bset.l d1,d0 + fmovem.x d0,ETEMP(a6) ;load source to ETEMP + + move.b #$12,d0 + bfins d0,CMDREG1B(a6){0:6} ;opclass 2, extended +* +* Set ETEMP exponent bit 15 as the opposite of ete14 +* + btst #6,ETEMP_EX(a6) ;check etemp exponent bit 14 + beq setete15 + bclr #etemp15_bit,STAG(a6) + bra finish +setete15: + bset #etemp15_bit,STAG(a6) + bra finish + +* +* We have the case in which a conflict exists between the cu src or +* dest and the dest of the xu. We must clear the instruction in +* the cu and restore the state, allowing the instruction in the +* xu to complete. Remember, the instruction in the nu +* was exceptional, and was completed by the appropriate handler. +* If the result of the xu instruction is not exceptional, we can +* restore the instruction from the cu to the frame and continue +* processing the original exception. If the result is also +* exceptional, we choose to kill the process. +* +* Items saved from the stack: +* +* $3c stag - L_SCR1 +* $40 cmdreg1b - L_SCR2 +* $44 dtag - L_SCR3 +* +* The cu savepc is set to zero, and the frame is restored to the +* fpu. +* +op0_xu: + move.l STAG(a6),L_SCR1(a6) + move.l CMDREG1B(a6),L_SCR2(a6) + move.l DTAG(a6),L_SCR3(a6) + andi.l #$e0000000,L_SCR3(a6) + clr.b CU_SAVEPC(a6) + move.l (a7)+,d1 ;save return address from bsr + frestore (a7)+ + fsave -(a7) +* +* Check if the instruction which just completed was exceptional. +* + cmp.w #$4060,(a7) + beq op0_xb +* +* It is necessary to isolate the result of the instruction in the +* xu if it is to fp0 - fp3 and write that value to the USER_FPn +* locations on the stack. The correct destination register is in +* cmdreg2b. +* + bfextu CMDREG2B(a6){6:3},d0 ;get dest register no + cmpi.l #3,d0 + bgt.b op0_xi + beq.b op0_fp3 + cmpi.l #1,d0 + blt.b op0_fp0 + beq.b op0_fp1 +op0_fp2: + fmovem.x fp2,USER_FP2(a6) + bra.b op0_xi +op0_fp1: + fmovem.x fp1,USER_FP1(a6) + bra.b op0_xi +op0_fp0: + fmovem.x fp0,USER_FP0(a6) + bra.b op0_xi +op0_fp3: + fmovem.x fp3,USER_FP3(a6) +* +* The frame returned is idle. We must build a busy frame to hold +* the cu state information and setup etemp. +* +op0_xi: + move.l #22,d0 ;clear 23 lwords + clr.l (a7) +op0_loop: + clr.l -(a7) + dbf d0,op0_loop + move.l #$40600000,-(a7) + move.l L_SCR1(a6),STAG(a6) + move.l L_SCR2(a6),CMDREG1B(a6) + move.l L_SCR3(a6),DTAG(a6) + move.b #$6,CU_SAVEPC(a6) + move.l d1,-(a7) ;return bsr return address + bfextu CMDREG1B(a6){3:3},d0 ;get source register no + move.l #7,d1 + sub.l d0,d1 + clr.l d0 + bset.l d1,d0 + fmovem.x d0,ETEMP(a6) ;load source to ETEMP + + move.b #$12,d0 + bfins d0,CMDREG1B(a6){0:6} ;opclass 2, extended +* +* Set ETEMP exponent bit 15 as the opposite of ete14 +* + btst #6,ETEMP_EX(a6) ;check etemp exponent bit 14 + beq op0_sete15 + bclr #etemp15_bit,STAG(a6) + bra finish +op0_sete15: + bset #etemp15_bit,STAG(a6) + bra finish + +* +* The frame returned is busy. It is not possible to reconstruct +* the code sequence to allow completion. We will jump to +* fpsp_fmt_error and allow the kernel to kill the process. +* +op0_xb: + jmp fpsp_fmt_error + +* +* Check for opclass 2 and single size. If not both, exit. +* +op2sgl: + move.w CMDREG1B(a6),d0 + andi.w #$FC00,d0 ;strip all but opclass and size + cmpi.w #$4400,d0 ;test for opclass 2 and size=sgl + bne fix_done ;if not, it is not bug 1238 +* +* Check for cu dest equal to nu dest or equal to xu dest, with +* a cu and nu conflict taking priority an nu conflict. If either, +* go and fix the bug condition. Otherwise, exit. +* + bfextu CMDREG1B(a6){6:3},d0 ;get 1st dest + bfextu CMDREG3B(a6){6:3},d1 ;get 3rd dest + cmp.b d0,d1 ;cmp 1st dest with 3rd dest + beq op2_com ;if equal, continue bugfix + bfextu CMDREG2B(a6){6:3},d1 ;get 2nd dest + cmp.b d0,d1 ;cmp 1st dest with 2nd dest + bne fix_done ;if the reg checks fail, exit +* +* We have the case in which a conflict exists between the cu src or +* dest and the dest of the xu. We must clear the instruction in +* the cu and restore the state, allowing the instruction in the +* xu to complete. Remember, the instruction in the nu +* was exceptional, and was completed by the appropriate handler. +* If the result of the xu instruction is not exceptional, we can +* restore the instruction from the cu to the frame and continue +* processing the original exception. If the result is also +* exceptional, we choose to kill the process. +* +* Items saved from the stack: +* +* $3c stag - L_SCR1 +* $40 cmdreg1b - L_SCR2 +* $44 dtag - L_SCR3 +* etemp - FP_SCR2 +* +* The cu savepc is set to zero, and the frame is restored to the +* fpu. +* +op2_xu: + move.l STAG(a6),L_SCR1(a6) + move.l CMDREG1B(a6),L_SCR2(a6) + move.l DTAG(a6),L_SCR3(a6) + andi.l #$e0000000,L_SCR3(a6) + clr.b CU_SAVEPC(a6) + move.l ETEMP(a6),FP_SCR2(a6) + move.l ETEMP_HI(a6),FP_SCR2+4(a6) + move.l ETEMP_LO(a6),FP_SCR2+8(a6) + move.l (a7)+,d1 ;save return address from bsr + frestore (a7)+ + fsave -(a7) +* +* Check if the instruction which just completed was exceptional. +* + cmp.w #$4060,(a7) + beq op2_xb +* +* It is necessary to isolate the result of the instruction in the +* xu if it is to fp0 - fp3 and write that value to the USER_FPn +* locations on the stack. The correct destination register is in +* cmdreg2b. +* + bfextu CMDREG2B(a6){6:3},d0 ;get dest register no + cmpi.l #3,d0 + bgt.b op2_xi + beq.b op2_fp3 + cmpi.l #1,d0 + blt.b op2_fp0 + beq.b op2_fp1 +op2_fp2: + fmovem.x fp2,USER_FP2(a6) + bra.b op2_xi +op2_fp1: + fmovem.x fp1,USER_FP1(a6) + bra.b op2_xi +op2_fp0: + fmovem.x fp0,USER_FP0(a6) + bra.b op2_xi +op2_fp3: + fmovem.x fp3,USER_FP3(a6) +* +* The frame returned is idle. We must build a busy frame to hold +* the cu state information and fix up etemp. +* +op2_xi: + move.l #22,d0 ;clear 23 lwords + clr.l (a7) +op2_loop: + clr.l -(a7) + dbf d0,op2_loop + move.l #$40600000,-(a7) + move.l L_SCR1(a6),STAG(a6) + move.l L_SCR2(a6),CMDREG1B(a6) + move.l L_SCR3(a6),DTAG(a6) + move.b #$6,CU_SAVEPC(a6) + move.l FP_SCR2(a6),ETEMP(a6) + move.l FP_SCR2+4(a6),ETEMP_HI(a6) + move.l FP_SCR2+8(a6),ETEMP_LO(a6) + move.l d1,-(a7) + bra op2_com + +* +* We have the opclass 2 single source situation. +* +op2_com: + move.b #$15,d0 + bfins d0,CMDREG1B(a6){0:6} ;opclass 2, double + + cmp.w #$407F,ETEMP_EX(a6) ;single +max + bne.b case2 + move.w #$43FF,ETEMP_EX(a6) ;to double +max + bra finish +case2: + cmp.w #$C07F,ETEMP_EX(a6) ;single -max + bne.b case3 + move.w #$C3FF,ETEMP_EX(a6) ;to double -max + bra finish +case3: + cmp.w #$3F80,ETEMP_EX(a6) ;single +min + bne.b case4 + move.w #$3C00,ETEMP_EX(a6) ;to double +min + bra finish +case4: + cmp.w #$BF80,ETEMP_EX(a6) ;single -min + bne fix_done + move.w #$BC00,ETEMP_EX(a6) ;to double -min + bra finish +* +* The frame returned is busy. It is not possible to reconstruct +* the code sequence to allow completion. fpsp_fmt_error causes +* an fline illegal instruction to be executed. +* +* You should replace the jump to fpsp_fmt_error with a jump +* to the entry point used to kill a process. +* +op2_xb: + jmp fpsp_fmt_error + +* +* Enter here if the case is not of the situations affected by +* bug #1238, or if the fix is completed, and exit. +* +finish: +fix_done: + rts + + end diff --git a/sys/arch/m68k/fpsp/copyright.s b/sys/arch/m68k/fpsp/copyright.s new file mode 100644 index 00000000000..c6039f91313 --- /dev/null +++ b/sys/arch/m68k/fpsp/copyright.s @@ -0,0 +1,32 @@ +| $NetBSD: copyright.s,v 1.2 1994/10/26 07:48:57 cgd Exp $ + +.text +.ascii "MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP\n" +.ascii "M68000 Hi-Performance Microprocessor Division\n" +.ascii "M68040 Software Package\n" +.ascii "\n" +.ascii "M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.\n" +.ascii "All rights reserved.\n" +.ascii "\n" +.ascii "THE SOFTWARE is provided on an \"AS IS\" basis and without warranty.\n" +.ascii "To the maximum extent permitted by applicable law,\n" +.ascii "MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,\n" +.ascii "INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A\n" +.ascii "PARTICULAR PURPOSE and any warranty against infringement with\n" +.ascii "regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)\n" +.ascii "and any accompanying written materials. \n" +.ascii "\n" +.ascii "To the maximum extent permitted by applicable law,\n" +.ascii "IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER\n" +.ascii "(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS\n" +.ascii "PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR\n" +.ascii "OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE\n" +.ascii "SOFTWARE. Motorola assumes no responsibility for the maintenance\n" +.ascii "and support of the SOFTWARE. \n" +.ascii "\n" +.ascii "You are hereby granted a copyright license to use, modify, and\n" +.ascii "distribute the SOFTWARE so long as this entire notice is retained\n" +.ascii "without alteration in any modified and/or redistributed versions,\n" +.ascii "and that such modified versions are clearly identified as such.\n" +.ascii "No licenses are granted by implication, estoppel or otherwise\n" +.ascii "under any patents or trademarks of Motorola, Inc.\n" diff --git a/sys/arch/m68k/fpsp/decbin.sa b/sys/arch/m68k/fpsp/decbin.sa new file mode 100644 index 00000000000..5f7106427c5 --- /dev/null +++ b/sys/arch/m68k/fpsp/decbin.sa @@ -0,0 +1,531 @@ +* $NetBSD: decbin.sa,v 1.2 1994/10/26 07:48:59 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* decbin.sa 3.3 12/19/90 +* +* Description: Converts normalized packed bcd value pointed to by +* register A6 to extended-precision value in FP0. +* +* Input: Normalized packed bcd value in ETEMP(a6). +* +* Output: Exact floating-point representation of the packed bcd value. +* +* Saves and Modifies: D2-D5 +* +* Speed: The program decbin takes ??? cycles to execute. +* +* Object Size: +* +* External Reference(s): None. +* +* Algorithm: +* Expected is a normal bcd (i.e. non-exceptional; all inf, zero, +* and NaN operands are dispatched without entering this routine) +* value in 68881/882 format at location ETEMP(A6). +* +* A1. Convert the bcd exponent to binary by successive adds and muls. +* Set the sign according to SE. Subtract 16 to compensate +* for the mantissa which is to be interpreted as 17 integer +* digits, rather than 1 integer and 16 fraction digits. +* Note: this operation can never overflow. +* +* A2. Convert the bcd mantissa to binary by successive +* adds and muls in FP0. Set the sign according to SM. +* The mantissa digits will be converted with the decimal point +* assumed following the least-significant digit. +* Note: this operation can never overflow. +* +* A3. Count the number of leading/trailing zeros in the +* bcd string. If SE is positive, count the leading zeros; +* if negative, count the trailing zeros. Set the adjusted +* exponent equal to the exponent from A1 and the zero count +* added if SM = 1 and subtracted if SM = 0. Scale the +* mantissa the equivalent of forcing in the bcd value: +* +* SM = 0 a non-zero digit in the integer position +* SM = 1 a non-zero digit in Mant0, lsd of the fraction +* +* this will insure that any value, regardless of its +* representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted +* consistently. +* +* A4. Calculate the factor 10^exp in FP1 using a table of +* 10^(2^n) values. To reduce the error in forming factors +* greater than 10^27, a directed rounding scheme is used with +* tables rounded to RN, RM, and RP, according to the table +* in the comments of the pwrten section. +* +* A5. Form the final binary number by scaling the mantissa by +* the exponent factor. This is done by multiplying the +* mantissa in FP0 by the factor in FP1 if the adjusted +* exponent sign is positive, and dividing FP0 by FP1 if +* it is negative. +* +* Clean up and return. Check if the final mul or div resulted +* in an inex2 exception. If so, set inex1 in the fpsr and +* check if the inex1 exception is enabled. If so, set d7 upper +* word to $0100. This will signal unimp.sa that an enabled inex1 +* exception occured. Unimp will fix the stack. +* + +DECBIN IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + +* +* PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded +* to nearest, minus, and plus, respectively. The tables include +* 10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}. No rounding +* is required until the power is greater than 27, however, all +* tables include the first 5 for ease of indexing. +* + xref PTENRN + xref PTENRM + xref PTENRP + +RTABLE dc.b 0,0,0,0 + dc.b 2,3,2,3 + dc.b 2,3,3,2 + dc.b 3,2,2,3 + + xdef decbin + xdef calc_e + xdef pwrten + xdef calc_m + xdef norm + xdef ap_st_z + xdef ap_st_n +* +FNIBS equ 7 +FSTRT equ 0 +* +ESTRT equ 4 +EDIGITS equ 2 +* +* Constants in single precision +FZERO dc.l $00000000 +FONE dc.l $3F800000 +FTEN dc.l $41200000 + +TEN equ 10 + +* +decbin: + fmove.l #0,FPCR ;clr real fpcr + movem.l d2-d5,-(a7) +* +* Calculate exponent: +* 1. Copy bcd value in memory for use as a working copy. +* 2. Calculate absolute value of exponent in d1 by mul and add. +* 3. Correct for exponent sign. +* 4. Subtract 16 to compensate for interpreting the mant as all integer digits. +* (i.e., all digits assumed left of the decimal point.) +* +* Register usage: +* +* calc_e: +* (*) d0: temp digit storage +* (*) d1: accumulator for binary exponent +* (*) d2: digit count +* (*) d3: offset pointer +* ( ) d4: first word of bcd +* ( ) a0: pointer to working bcd value +* ( ) a6: pointer to original bcd value +* (*) FP_SCR1: working copy of original bcd value +* (*) L_SCR1: copy of original exponent word +* +calc_e: + move.l #EDIGITS,d2 ;# of nibbles (digits) in fraction part + moveq.l #ESTRT,d3 ;counter to pick up digits + lea.l FP_SCR1(a6),a0 ;load tmp bcd storage address + move.l ETEMP(a6),(a0) ;save input bcd value + move.l ETEMP_HI(a6),4(a0) ;save words 2 and 3 + move.l ETEMP_LO(a6),8(a0) ;and work with these + move.l (a0),d4 ;get first word of bcd + clr.l d1 ;zero d1 for accumulator +e_gd: + mulu.l #TEN,d1 ;mul partial product by one digit place + bfextu d4{d3:4},d0 ;get the digit and zero extend into d0 + add.l d0,d1 ;d1 = d1 + d0 + addq.b #4,d3 ;advance d3 to the next digit + dbf.w d2,e_gd ;if we have used all 3 digits, exit loop + btst #30,d4 ;get SE + beq.b e_pos ;don't negate if pos + neg.l d1 ;negate before subtracting +e_pos: + sub.l #16,d1 ;sub to compensate for shift of mant + bge.b e_save ;if still pos, do not neg + neg.l d1 ;now negative, make pos and set SE + or.l #$40000000,d4 ;set SE in d4, + or.l #$40000000,(a0) ;and in working bcd +e_save: + move.l d1,L_SCR1(a6) ;save exp in memory +* +* +* Calculate mantissa: +* 1. Calculate absolute value of mantissa in fp0 by mul and add. +* 2. Correct for mantissa sign. +* (i.e., all digits assumed left of the decimal point.) +* +* Register usage: +* +* calc_m: +* (*) d0: temp digit storage +* (*) d1: lword counter +* (*) d2: digit count +* (*) d3: offset pointer +* ( ) d4: words 2 and 3 of bcd +* ( ) a0: pointer to working bcd value +* ( ) a6: pointer to original bcd value +* (*) fp0: mantissa accumulator +* ( ) FP_SCR1: working copy of original bcd value +* ( ) L_SCR1: copy of original exponent word +* +calc_m: + moveq.l #1,d1 ;word counter, init to 1 + fmove.s FZERO,fp0 ;accumulator +* +* +* Since the packed number has a long word between the first & second parts, +* get the integer digit then skip down & get the rest of the +* mantissa. We will unroll the loop once. +* + bfextu (a0){28:4},d0 ;integer part is ls digit in long word + fadd.b d0,fp0 ;add digit to sum in fp0 +* +* +* Get the rest of the mantissa. +* +loadlw: + move.l (a0,d1.L*4),d4 ;load mantissa lonqword into d4 + moveq.l #FSTRT,d3 ;counter to pick up digits + moveq.l #FNIBS,d2 ;reset number of digits per a0 ptr +md2b: + fmul.s FTEN,fp0 ;fp0 = fp0 * 10 + bfextu d4{d3:4},d0 ;get the digit and zero extend + fadd.b d0,fp0 ;fp0 = fp0 + digit +* +* +* If all the digits (8) in that long word have been converted (d2=0), +* then inc d1 (=2) to point to the next long word and reset d3 to 0 +* to initialize the digit offset, and set d2 to 7 for the digit count; +* else continue with this long word. +* + addq.b #4,d3 ;advance d3 to the next digit + dbf.w d2,md2b ;check for last digit in this lw +nextlw: + addq.l #1,d1 ;inc lw pointer in mantissa + cmp.l #2,d1 ;test for last lw + ble loadlw ;if not, get last one + +* +* Check the sign of the mant and make the value in fp0 the same sign. +* +m_sign: + btst #31,(a0) ;test sign of the mantissa + beq.b ap_st_z ;if clear, go to append/strip zeros + fneg.x fp0 ;if set, negate fp0 + +* +* Append/strip zeros: +* +* For adjusted exponents which have an absolute value greater than 27*, +* this routine calculates the amount needed to normalize the mantissa +* for the adjusted exponent. That number is subtracted from the exp +* if the exp was positive, and added if it was negative. The purpose +* of this is to reduce the value of the exponent and the possibility +* of error in calculation of pwrten. +* +* 1. Branch on the sign of the adjusted exponent. +* 2p.(positive exp) +* 2. Check M16 and the digits in lwords 2 and 3 in decending order. +* 3. Add one for each zero encountered until a non-zero digit. +* 4. Subtract the count from the exp. +* 5. Check if the exp has crossed zero in #3 above; make the exp abs +* and set SE. +* 6. Multiply the mantissa by 10**count. +* 2n.(negative exp) +* 2. Check the digits in lwords 3 and 2 in decending order. +* 3. Add one for each zero encountered until a non-zero digit. +* 4. Add the count to the exp. +* 5. Check if the exp has crossed zero in #3 above; clear SE. +* 6. Divide the mantissa by 10**count. +* +* *Why 27? If the adjusted exponent is within -28 < expA < 28, than +* any adjustment due to append/strip zeros will drive the resultane +* exponent towards zero. Since all pwrten constants with a power +* of 27 or less are exact, there is no need to use this routine to +* attempt to lessen the resultant exponent. +* +* Register usage: +* +* ap_st_z: +* (*) d0: temp digit storage +* (*) d1: zero count +* (*) d2: digit count +* (*) d3: offset pointer +* ( ) d4: first word of bcd +* (*) d5: lword counter +* ( ) a0: pointer to working bcd value +* ( ) FP_SCR1: working copy of original bcd value +* ( ) L_SCR1: copy of original exponent word +* +* +* First check the absolute value of the exponent to see if this +* routine is necessary. If so, then check the sign of the exponent +* and do append (+) or strip (-) zeros accordingly. +* This section handles a positive adjusted exponent. +* +ap_st_z: + move.l L_SCR1(a6),d1 ;load expA for range test + cmp.l #27,d1 ;test is with 27 + ble.w pwrten ;if abs(expA) <28, skip ap/st zeros + btst #30,(a0) ;check sign of exp + bne.b ap_st_n ;if neg, go to neg side + clr.l d1 ;zero count reg + move.l (a0),d4 ;load lword 1 to d4 + bfextu d4{28:4},d0 ;get M16 in d0 + bne.b ap_p_fx ;if M16 is non-zero, go fix exp + addq.l #1,d1 ;inc zero count + moveq.l #1,d5 ;init lword counter + move.l (a0,d5.L*4),d4 ;get lword 2 to d4 + bne.b ap_p_cl ;if lw 2 is zero, skip it + addq.l #8,d1 ;and inc count by 8 + addq.l #1,d5 ;inc lword counter + move.l (a0,d5.L*4),d4 ;get lword 3 to d4 +ap_p_cl: + clr.l d3 ;init offset reg + moveq.l #7,d2 ;init digit counter +ap_p_gd: + bfextu d4{d3:4},d0 ;get digit + bne.b ap_p_fx ;if non-zero, go to fix exp + addq.l #4,d3 ;point to next digit + addq.l #1,d1 ;inc digit counter + dbf.w d2,ap_p_gd ;get next digit +ap_p_fx: + move.l d1,d0 ;copy counter to d2 + move.l L_SCR1(a6),d1 ;get adjusted exp from memory + sub.l d0,d1 ;subtract count from exp + bge.b ap_p_fm ;if still pos, go to pwrten + neg.l d1 ;now its neg; get abs + move.l (a0),d4 ;load lword 1 to d4 + or.l #$40000000,d4 ; and set SE in d4 + or.l #$40000000,(a0) ; and in memory +* +* Calculate the mantissa multiplier to compensate for the striping of +* zeros from the mantissa. +* +ap_p_fm: + move.l #PTENRN,a1 ;get address of power-of-ten table + clr.l d3 ;init table index + fmove.s FONE,fp1 ;init fp1 to 1 + moveq.l #3,d2 ;init d2 to count bits in counter +ap_p_el: + asr.l #1,d0 ;shift lsb into carry + bcc.b ap_p_en ;if 1, mul fp1 by pwrten factor + fmul.x (a1,d3),fp1 ;mul by 10**(d3_bit_no) +ap_p_en: + add.l #12,d3 ;inc d3 to next rtable entry + tst.l d0 ;check if d0 is zero + bne.b ap_p_el ;if not, get next bit + fmul.x fp1,fp0 ;mul mantissa by 10**(no_bits_shifted) + bra.b pwrten ;go calc pwrten +* +* This section handles a negative adjusted exponent. +* +ap_st_n: + clr.l d1 ;clr counter + moveq.l #2,d5 ;set up d5 to point to lword 3 + move.l (a0,d5.L*4),d4 ;get lword 3 + bne.b ap_n_cl ;if not zero, check digits + sub.l #1,d5 ;dec d5 to point to lword 2 + addq.l #8,d1 ;inc counter by 8 + move.l (a0,d5.L*4),d4 ;get lword 2 +ap_n_cl: + move.l #28,d3 ;point to last digit + moveq.l #7,d2 ;init digit counter +ap_n_gd: + bfextu d4{d3:4},d0 ;get digit + bne.b ap_n_fx ;if non-zero, go to exp fix + subq.l #4,d3 ;point to previous digit + addq.l #1,d1 ;inc digit counter + dbf.w d2,ap_n_gd ;get next digit +ap_n_fx: + move.l d1,d0 ;copy counter to d0 + move.l L_SCR1(a6),d1 ;get adjusted exp from memory + sub.l d0,d1 ;subtract count from exp + bgt.b ap_n_fm ;if still pos, go fix mantissa + neg.l d1 ;take abs of exp and clr SE + move.l (a0),d4 ;load lword 1 to d4 + and.l #$bfffffff,d4 ; and clr SE in d4 + and.l #$bfffffff,(a0) ; and in memory +* +* Calculate the mantissa multiplier to compensate for the appending of +* zeros to the mantissa. +* +ap_n_fm: + move.l #PTENRN,a1 ;get address of power-of-ten table + clr.l d3 ;init table index + fmove.s FONE,fp1 ;init fp1 to 1 + moveq.l #3,d2 ;init d2 to count bits in counter +ap_n_el: + asr.l #1,d0 ;shift lsb into carry + bcc.b ap_n_en ;if 1, mul fp1 by pwrten factor + fmul.x (a1,d3),fp1 ;mul by 10**(d3_bit_no) +ap_n_en: + add.l #12,d3 ;inc d3 to next rtable entry + tst.l d0 ;check if d0 is zero + bne.b ap_n_el ;if not, get next bit + fdiv.x fp1,fp0 ;div mantissa by 10**(no_bits_shifted) +* +* +* Calculate power-of-ten factor from adjusted and shifted exponent. +* +* Register usage: +* +* pwrten: +* (*) d0: temp +* ( ) d1: exponent +* (*) d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp +* (*) d3: FPCR work copy +* ( ) d4: first word of bcd +* (*) a1: RTABLE pointer +* calc_p: +* (*) d0: temp +* ( ) d1: exponent +* (*) d3: PWRTxx table index +* ( ) a0: pointer to working copy of bcd +* (*) a1: PWRTxx pointer +* (*) fp1: power-of-ten accumulator +* +* Pwrten calculates the exponent factor in the selected rounding mode +* according to the following table: +* +* Sign of Mant Sign of Exp Rounding Mode PWRTEN Rounding Mode +* +* ANY ANY RN RN +* +* + + RP RP +* - + RP RM +* + - RP RM +* - - RP RP +* +* + + RM RM +* - + RM RP +* + - RM RP +* - - RM RM +* +* + + RZ RM +* - + RZ RM +* + - RZ RP +* - - RZ RP +* +* +pwrten: + move.l USER_FPCR(a6),d3 ;get user's FPCR + bfextu d3{26:2},d2 ;isolate rounding mode bits + move.l (a0),d4 ;reload 1st bcd word to d4 + asl.l #2,d2 ;format d2 to be + bfextu d4{0:2},d0 ; {FPCR[6],FPCR[5],SM,SE} + add.l d0,d2 ;in d2 as index into RTABLE + lea.l RTABLE,a1 ;load rtable base + move.b (a1,d2),d0 ;load new rounding bits from table + clr.l d3 ;clear d3 to force no exc and extended + bfins d0,d3{26:2} ;stuff new rounding bits in FPCR + fmove.l d3,FPCR ;write new FPCR + asr.l #1,d0 ;write correct PTENxx table + bcc.b not_rp ;to a1 + lea.l PTENRP,a1 ;it is RP + bra.b calc_p ;go to init section +not_rp: + asr.l #1,d0 ;keep checking + bcc.b not_rm + lea.l PTENRM,a1 ;it is RM + bra.b calc_p ;go to init section +not_rm: + lea.l PTENRN,a1 ;it is RN +calc_p: + move.l d1,d0 ;copy exp to d0;use d0 + bpl.b no_neg ;if exp is negative, + neg.l d0 ;invert it + or.l #$40000000,(a0) ;and set SE bit +no_neg: + clr.l d3 ;table index + fmove.s FONE,fp1 ;init fp1 to 1 +e_loop: + asr.l #1,d0 ;shift next bit into carry + bcc.b e_next ;if zero, skip the mul + fmul.x (a1,d3),fp1 ;mul by 10**(d3_bit_no) +e_next: + add.l #12,d3 ;inc d3 to next rtable entry + tst.l d0 ;check if d0 is zero + bne.b e_loop ;not zero, continue shifting +* +* +* Check the sign of the adjusted exp and make the value in fp0 the +* same sign. If the exp was pos then multiply fp1*fp0; +* else divide fp0/fp1. +* +* Register Usage: +* norm: +* ( ) a0: pointer to working bcd value +* (*) fp0: mantissa accumulator +* ( ) fp1: scaling factor - 10**(abs(exp)) +* +norm: + btst #30,(a0) ;test the sign of the exponent + beq.b mul ;if clear, go to multiply +div: + fdiv.x fp1,fp0 ;exp is negative, so divide mant by exp + bra.b end_dec +mul: + fmul.x fp1,fp0 ;exp is positive, so multiply by exp +* +* +* Clean up and return with result in fp0. +* +* If the final mul/div in decbin incurred an inex exception, +* it will be inex2, but will be reported as inex1 by get_op. +* +end_dec: + fmove.l FPSR,d0 ;get status register + bclr.l #inex2_bit+8,d0 ;test for inex2 and clear it + fmove.l d0,FPSR ;return status reg w/o inex2 + beq.b no_exc ;skip this if no exc + or.l #inx1a_mask,USER_FPSR(a6) ;set inex1/ainex +no_exc: + movem.l (a7)+,d2-d5 + rts + end diff --git a/sys/arch/m68k/fpsp/do_func.sa b/sys/arch/m68k/fpsp/do_func.sa new file mode 100644 index 00000000000..92e3fde0b49 --- /dev/null +++ b/sys/arch/m68k/fpsp/do_func.sa @@ -0,0 +1,584 @@ +* $NetBSD: do_func.sa,v 1.2 1994/10/26 07:49:02 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* do_func.sa 3.4 2/18/91 +* +* Do_func performs the unimplemented operation. The operation +* to be performed is determined from the lower 7 bits of the +* extension word (except in the case of fmovecr and fsincos). +* The opcode and tag bits form an index into a jump table in +* tbldo.sa. Cases of zero, infinity and NaN are handled in +* do_func by forcing the default result. Normalized and +* denormalized (there are no unnormalized numbers at this +* point) are passed onto the emulation code. +* +* CMDREG1B and STAG are extracted from the fsave frame +* and combined to form the table index. The function called +* will start with a0 pointing to the ETEMP operand. Dyadic +* functions can find FPTEMP at -12(a0). +* +* Called functions return their result in fp0. Sincos returns +* sin(x) in fp0 and cos(x) in fp1. +* + +DO_FUNC IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + + xref t_dz2 + xref t_operr + xref t_inx2 + xref t_resdnrm + xref dst_nan + xref src_nan + xref nrm_set + xref sto_cos + + xref tblpre + xref slognp1,slogn,slog10,slog2 + xref slognd,slog10d,slog2d + xref smod,srem + xref sscale + xref smovcr + +PONE dc.l $3fff0000,$80000000,$00000000 ;+1 +MONE dc.l $bfff0000,$80000000,$00000000 ;-1 +PZERO dc.l $00000000,$00000000,$00000000 ;+0 +MZERO dc.l $80000000,$00000000,$00000000 ;-0 +PINF dc.l $7fff0000,$00000000,$00000000 ;+inf +MINF dc.l $ffff0000,$00000000,$00000000 ;-inf +QNAN dc.l $7fff0000,$ffffffff,$ffffffff ;non-signaling nan +PPIBY2 dc.l $3FFF0000,$C90FDAA2,$2168C235 ;+PI/2 +MPIBY2 dc.l $bFFF0000,$C90FDAA2,$2168C235 ;-PI/2 + + xdef do_func +do_func: + clr.b CU_ONLY(a6) +* +* Check for fmovecr. It does not follow the format of fp gen +* unimplemented instructions. The test is on the upper 6 bits; +* if they are $17, the inst is fmovecr. Call entry smovcr +* directly. +* + bfextu CMDREG1B(a6){0:6},d0 ;get opclass and src fields + cmpi.l #$17,d0 ;if op class and size fields are $17, +* ;it is FMOVECR; if not, continue + bne.b not_fmovecr + jmp smovcr ;fmovecr; jmp directly to emulation + +not_fmovecr: + move.w CMDREG1B(a6),d0 + and.l #$7F,d0 + cmpi.l #$38,d0 ;if the extension is >= $38, + bge.b serror ;it is illegal + bfextu STAG(a6){0:3},d1 + lsl.l #3,d0 ;make room for STAG + add.l d1,d0 ;combine for final index into table + lea.l tblpre,a1 ;start of monster jump table + move.l (a1,d0.w*4),a1 ;real target address + lea.l ETEMP(a6),a0 ;a0 is pointer to src op + move.l USER_FPCR(a6),d1 + and.l #$FF,d1 ; discard all but rounding mode/prec + fmove.l #0,fpcr + jmp (a1) +* +* ERROR +* + xdef serror +serror: + st.b STORE_FLG(a6) + rts +* +* These routines load forced values into fp0. They are called +* by index into tbldo. +* +* Load a signed zero to fp0 and set inex2/ainex +* + xdef snzrinx +snzrinx: + btst.b #sign_bit,LOCAL_EX(a0) ;get sign of source operand + bne.b ld_mzinx ;if negative, branch + bsr ld_pzero ;bsr so we can return and set inx + bra t_inx2 ;now, set the inx for the next inst +ld_mzinx: + bsr ld_mzero ;if neg, load neg zero, return here + bra t_inx2 ;now, set the inx for the next inst +* +* Load a signed zero to fp0; do not set inex2/ainex +* + xdef szero +szero: + btst.b #sign_bit,LOCAL_EX(a0) ;get sign of source operand + bne ld_mzero ;if neg, load neg zero + bra ld_pzero ;load positive zero +* +* Load a signed infinity to fp0; do not set inex2/ainex +* + xdef sinf +sinf: + btst.b #sign_bit,LOCAL_EX(a0) ;get sign of source operand + bne ld_minf ;if negative branch + bra ld_pinf +* +* Load a signed one to fp0; do not set inex2/ainex +* + xdef sone +sone: + btst.b #sign_bit,LOCAL_EX(a0) ;check sign of source + bne ld_mone + bra ld_pone +* +* Load a signed pi/2 to fp0; do not set inex2/ainex +* + xdef spi_2 +spi_2: + btst.b #sign_bit,LOCAL_EX(a0) ;check sign of source + bne ld_mpi2 + bra ld_ppi2 +* +* Load either a +0 or +inf for plus/minus operand +* + xdef szr_inf +szr_inf: + btst.b #sign_bit,LOCAL_EX(a0) ;check sign of source + bne ld_pzero + bra ld_pinf +* +* Result is either an operr or +inf for plus/minus operand +* [Used by slogn, slognp1, slog10, and slog2] +* + xdef sopr_inf +sopr_inf: + btst.b #sign_bit,LOCAL_EX(a0) ;check sign of source + bne t_operr + bra ld_pinf +* +* FLOGNP1 +* + xdef sslognp1 +sslognp1: + fmovem.x (a0),fp0 + fcmp.b #-1,fp0 + fbgt slognp1 + fbeq t_dz2 ;if = -1, divide by zero exception + fmove.l #0,FPSR ;clr N flag + bra t_operr ;take care of operands < -1 +* +* FETOXM1 +* + xdef setoxm1i +setoxm1i: + btst.b #sign_bit,LOCAL_EX(a0) ;check sign of source + bne ld_mone + bra ld_pinf +* +* FLOGN +* +* Test for 1.0 as an input argument, returning +zero. Also check +* the sign and return operr if negative. +* + xdef sslogn +sslogn: + btst.b #sign_bit,LOCAL_EX(a0) + bne t_operr ;take care of operands < 0 + cmpi.w #$3fff,LOCAL_EX(a0) ;test for 1.0 input + bne slogn + cmpi.l #$80000000,LOCAL_HI(a0) + bne slogn + tst.l LOCAL_LO(a0) + bne slogn + fmove.x PZERO,fp0 + rts + + xdef sslognd +sslognd: + btst.b #sign_bit,LOCAL_EX(a0) + beq slognd + bra t_operr ;take care of operands < 0 + +* +* FLOG10 +* + xdef sslog10 +sslog10: + btst.b #sign_bit,LOCAL_EX(a0) + bne t_operr ;take care of operands < 0 + cmpi.w #$3fff,LOCAL_EX(a0) ;test for 1.0 input + bne slog10 + cmpi.l #$80000000,LOCAL_HI(a0) + bne slog10 + tst.l LOCAL_LO(a0) + bne slog10 + fmove.x PZERO,fp0 + rts + + xdef sslog10d +sslog10d: + btst.b #sign_bit,LOCAL_EX(a0) + beq slog10d + bra t_operr ;take care of operands < 0 + +* +* FLOG2 +* + xdef sslog2 +sslog2: + btst.b #sign_bit,LOCAL_EX(a0) + bne t_operr ;take care of operands < 0 + cmpi.w #$3fff,LOCAL_EX(a0) ;test for 1.0 input + bne slog2 + cmpi.l #$80000000,LOCAL_HI(a0) + bne slog2 + tst.l LOCAL_LO(a0) + bne slog2 + fmove.x PZERO,fp0 + rts + + xdef sslog2d +sslog2d: + btst.b #sign_bit,LOCAL_EX(a0) + beq slog2d + bra t_operr ;take care of operands < 0 + +* +* FMOD +* +pmodt: +* ;$21 fmod +* ;dtag,stag + dc.l smod ; 00,00 norm,norm = normal + dc.l smod_oper ; 00,01 norm,zero = nan with operr + dc.l smod_fpn ; 00,10 norm,inf = fpn + dc.l smod_snan ; 00,11 norm,nan = nan + dc.l smod_zro ; 01,00 zero,norm = +-zero + dc.l smod_oper ; 01,01 zero,zero = nan with operr + dc.l smod_zro ; 01,10 zero,inf = +-zero + dc.l smod_snan ; 01,11 zero,nan = nan + dc.l smod_oper ; 10,00 inf,norm = nan with operr + dc.l smod_oper ; 10,01 inf,zero = nan with operr + dc.l smod_oper ; 10,10 inf,inf = nan with operr + dc.l smod_snan ; 10,11 inf,nan = nan + dc.l smod_dnan ; 11,00 nan,norm = nan + dc.l smod_dnan ; 11,01 nan,zero = nan + dc.l smod_dnan ; 11,10 nan,inf = nan + dc.l smod_dnan ; 11,11 nan,nan = nan + + xdef pmod +pmod: + clr.b FPSR_QBYTE(a6) ; clear quotient field + bfextu STAG(a6){0:3},d0 ;stag = d0 + bfextu DTAG(a6){0:3},d1 ;dtag = d1 + +* +* Alias extended denorms to norms for the jump table. +* + bclr.l #2,d0 + bclr.l #2,d1 + + lsl.b #2,d1 + or.b d0,d1 ;d1{3:2} = dtag, d1{1:0} = stag +* ;Tag values: +* ;00 = norm or denorm +* ;01 = zero +* ;10 = inf +* ;11 = nan + lea pmodt,a1 + move.l (a1,d1.w*4),a1 + jmp (a1) + +smod_snan: + bra src_nan +smod_dnan: + bra dst_nan +smod_oper: + bra t_operr +smod_zro: + move.b ETEMP(a6),d1 ;get sign of src op + move.b FPTEMP(a6),d0 ;get sign of dst op + eor.b d0,d1 ;get exor of sign bits + btst.l #7,d1 ;test for sign + beq.b smod_zsn ;if clr, do not set sign big + bset.b #q_sn_bit,FPSR_QBYTE(a6) ;set q-byte sign bit +smod_zsn: + btst.l #7,d0 ;test if + or - + beq ld_pzero ;if pos then load +0 + bra ld_mzero ;else neg load -0 + +smod_fpn: + move.b ETEMP(a6),d1 ;get sign of src op + move.b FPTEMP(a6),d0 ;get sign of dst op + eor.b d0,d1 ;get exor of sign bits + btst.l #7,d1 ;test for sign + beq.b smod_fsn ;if clr, do not set sign big + bset.b #q_sn_bit,FPSR_QBYTE(a6) ;set q-byte sign bit +smod_fsn: + tst.b DTAG(a6) ;filter out denormal destination case + bpl.b smod_nrm ; + lea.l FPTEMP(a6),a0 ;a0<- addr(FPTEMP) + bra t_resdnrm ;force UNFL(but exact) result +smod_nrm: + fmove.l USER_FPCR(a6),fpcr ;use user's rmode and precision + fmove.x FPTEMP(a6),fp0 ;return dest to fp0 + rts + +* +* FREM +* +premt: +* ;$25 frem +* ;dtag,stag + dc.l srem ; 00,00 norm,norm = normal + dc.l srem_oper ; 00,01 norm,zero = nan with operr + dc.l srem_fpn ; 00,10 norm,inf = fpn + dc.l srem_snan ; 00,11 norm,nan = nan + dc.l srem_zro ; 01,00 zero,norm = +-zero + dc.l srem_oper ; 01,01 zero,zero = nan with operr + dc.l srem_zro ; 01,10 zero,inf = +-zero + dc.l srem_snan ; 01,11 zero,nan = nan + dc.l srem_oper ; 10,00 inf,norm = nan with operr + dc.l srem_oper ; 10,01 inf,zero = nan with operr + dc.l srem_oper ; 10,10 inf,inf = nan with operr + dc.l srem_snan ; 10,11 inf,nan = nan + dc.l srem_dnan ; 11,00 nan,norm = nan + dc.l srem_dnan ; 11,01 nan,zero = nan + dc.l srem_dnan ; 11,10 nan,inf = nan + dc.l srem_dnan ; 11,11 nan,nan = nan + + xdef prem +prem: + clr.b FPSR_QBYTE(a6) ;clear quotient field + bfextu STAG(a6){0:3},d0 ;stag = d0 + bfextu DTAG(a6){0:3},d1 ;dtag = d1 +* +* Alias extended denorms to norms for the jump table. +* + bclr #2,d0 + bclr #2,d1 + + lsl.b #2,d1 + or.b d0,d1 ;d1{3:2} = dtag, d1{1:0} = stag +* ;Tag values: +* ;00 = norm or denorm +* ;01 = zero +* ;10 = inf +* ;11 = nan + lea premt,a1 + move.l (a1,d1.w*4),a1 + jmp (a1) + +srem_snan: + bra src_nan +srem_dnan: + bra dst_nan +srem_oper: + bra t_operr +srem_zro: + move.b ETEMP(a6),d1 ;get sign of src op + move.b FPTEMP(a6),d0 ;get sign of dst op + eor.b d0,d1 ;get exor of sign bits + btst.l #7,d1 ;test for sign + beq.b srem_zsn ;if clr, do not set sign big + bset.b #q_sn_bit,FPSR_QBYTE(a6) ;set q-byte sign bit +srem_zsn: + btst.l #7,d0 ;test if + or - + beq ld_pzero ;if pos then load +0 + bra ld_mzero ;else neg load -0 + +srem_fpn: + move.b ETEMP(a6),d1 ;get sign of src op + move.b FPTEMP(a6),d0 ;get sign of dst op + eor.b d0,d1 ;get exor of sign bits + btst.l #7,d1 ;test for sign + beq.b srem_fsn ;if clr, do not set sign big + bset.b #q_sn_bit,FPSR_QBYTE(a6) ;set q-byte sign bit +srem_fsn: + tst.b DTAG(a6) ;filter out denormal destination case + bpl.b srem_nrm ; + lea.l FPTEMP(a6),a0 ;a0<- addr(FPTEMP) + bra t_resdnrm ;force UNFL(but exact) result +srem_nrm: + fmove.l USER_FPCR(a6),fpcr ;use user's rmode and precision + fmove.x FPTEMP(a6),fp0 ;return dest to fp0 + rts +* +* FSCALE +* +pscalet: +* ;$26 fscale +* ;dtag,stag + dc.l sscale ; 00,00 norm,norm = result + dc.l sscale ; 00,01 norm,zero = fpn + dc.l scl_opr ; 00,10 norm,inf = nan with operr + dc.l scl_snan ; 00,11 norm,nan = nan + dc.l scl_zro ; 01,00 zero,norm = +-zero + dc.l scl_zro ; 01,01 zero,zero = +-zero + dc.l scl_opr ; 01,10 zero,inf = nan with operr + dc.l scl_snan ; 01,11 zero,nan = nan + dc.l scl_inf ; 10,00 inf,norm = +-inf + dc.l scl_inf ; 10,01 inf,zero = +-inf + dc.l scl_opr ; 10,10 inf,inf = nan with operr + dc.l scl_snan ; 10,11 inf,nan = nan + dc.l scl_dnan ; 11,00 nan,norm = nan + dc.l scl_dnan ; 11,01 nan,zero = nan + dc.l scl_dnan ; 11,10 nan,inf = nan + dc.l scl_dnan ; 11,11 nan,nan = nan + + xdef pscale +pscale: + bfextu STAG(a6){0:3},d0 ;stag in d0 + bfextu DTAG(a6){0:3},d1 ;dtag in d1 + bclr.l #2,d0 ;alias denorm into norm + bclr.l #2,d1 ;alias denorm into norm + lsl.b #2,d1 + or.b d0,d1 ;d1{4:2} = dtag, d1{1:0} = stag +* ;dtag values stag values: +* ;000 = norm 00 = norm +* ;001 = zero 01 = zero +* ;010 = inf 10 = inf +* ;011 = nan 11 = nan +* ;100 = dnrm +* +* + lea.l pscalet,a1 ;load start of jump table + move.l (a1,d1.w*4),a1 ;load a1 with label depending on tag + jmp (a1) ;go to the routine + +scl_opr: + bra t_operr + +scl_dnan: + bra dst_nan + +scl_zro: + btst.b #sign_bit,FPTEMP_EX(a6) ;test if + or - + beq ld_pzero ;if pos then load +0 + bra ld_mzero ;if neg then load -0 +scl_inf: + btst.b #sign_bit,FPTEMP_EX(a6) ;test if + or - + beq ld_pinf ;if pos then load +inf + bra ld_minf ;else neg load -inf +scl_snan: + bra src_nan +* +* FSINCOS +* + xdef ssincosz +ssincosz: + btst.b #sign_bit,ETEMP(a6) ;get sign + beq.b sincosp + fmove.x MZERO,fp0 + bra.b sincoscom +sincosp: + fmove.x PZERO,fp0 +sincoscom: + fmovem.x PONE,fp1 ;do not allow FPSR to be affected + bra sto_cos ;store cosine result + + xdef ssincosi +ssincosi: + fmove.x QNAN,fp1 ;load NAN + bsr sto_cos ;store cosine result + fmove.x QNAN,fp0 ;load NAN + bra t_operr + + xdef ssincosnan +ssincosnan: + move.l ETEMP_EX(a6),FP_SCR1(a6) + move.l ETEMP_HI(a6),FP_SCR1+4(a6) + move.l ETEMP_LO(a6),FP_SCR1+8(a6) + bset.b #signan_bit,FP_SCR1+4(a6) + fmovem.x FP_SCR1(a6),fp1 + bsr sto_cos + bra src_nan +* +* This code forces default values for the zero, inf, and nan cases +* in the transcendentals code. The CC bits must be set in the +* stacked FPSR to be correctly reported. +* +***Returns +PI/2 + xdef ld_ppi2 +ld_ppi2: + fmove.x PPIBY2,fp0 ;load +pi/2 + bra t_inx2 ;set inex2 exc + +***Returns -PI/2 + xdef ld_mpi2 +ld_mpi2: + fmove.x MPIBY2,fp0 ;load -pi/2 + or.l #neg_mask,USER_FPSR(a6) ;set N bit + bra t_inx2 ;set inex2 exc + +***Returns +inf + xdef ld_pinf +ld_pinf: + fmove.x PINF,fp0 ;load +inf + or.l #inf_mask,USER_FPSR(a6) ;set I bit + rts + +***Returns -inf + xdef ld_minf +ld_minf: + fmove.x MINF,fp0 ;load -inf + or.l #neg_mask+inf_mask,USER_FPSR(a6) ;set N and I bits + rts + +***Returns +1 + xdef ld_pone +ld_pone: + fmove.x PONE,fp0 ;load +1 + rts + +***Returns -1 + xdef ld_mone +ld_mone: + fmove.x MONE,fp0 ;load -1 + or.l #neg_mask,USER_FPSR(a6) ;set N bit + rts + +***Returns +0 + xdef ld_pzero +ld_pzero: + fmove.x PZERO,fp0 ;load +0 + or.l #z_mask,USER_FPSR(a6) ;set Z bit + rts + +***Returns -0 + xdef ld_mzero +ld_mzero: + fmove.x MZERO,fp0 ;load -0 + or.l #neg_mask+z_mask,USER_FPSR(a6) ;set N and Z bits + rts + + end diff --git a/sys/arch/m68k/fpsp/fpsp.h b/sys/arch/m68k/fpsp/fpsp.h new file mode 100644 index 00000000000..4ce4d092ce7 --- /dev/null +++ b/sys/arch/m68k/fpsp/fpsp.h @@ -0,0 +1,373 @@ +* $NetBSD: fpsp.h,v 1.2 1994/10/26 07:49:04 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* fpsp.h 3.3 3.3 +* + +* fpsp.h --- stack frame offsets during FPSP exception handling +* +* These equates are used to access the exception frame, the fsave +* frame and any local variables needed by the FPSP package. +* +* All FPSP handlers begin by executing: +* +* link a6,#-LOCAL_SIZE +* fsave -(a7) +* movem.l d0-d1/a0-a1,USER_DA(a6) +* fmovem.x fp0-fp3,USER_FP0(a6) +* fmove.l fpsr/fpcr/fpiar,USER_FPSR(a6) +* +* After initialization, the stack looks like this: +* +* A7 ---> +-------------------------------+ +* | | +* | FPU fsave area | +* | | +* +-------------------------------+ +* | | +* | FPSP Local Variables | +* | including | +* | saved registers | +* | | +* +-------------------------------+ +* A6 ---> | Saved A6 | +* +-------------------------------+ +* | | +* | Exception Frame | +* | | +* | | +* +* Positive offsets from A6 refer to the exception frame. Negative +* offsets refer to the Local Variable area and the fsave area. +* The fsave frame is also accessible 'from the top' via A7. +* +* On exit, the handlers execute: +* +* movem.l USER_DA(a6),d0-d1/a0-a1 +* fmovem.x USER_FP0(a6),fp0-fp3 +* fmove.l USER_FPSR(a6),fpsr/fpcr/fpiar +* frestore (a7)+ +* unlk a6 +* +* and then either 'bra fpsp_done' if the exception was completely +* handled by the package, or 'bra real_xxxx' which is an external +* label to a routine that will process a real exception of the +* type that was generated. Some handlers may omit the 'frestore' +* if the FPU state after the exception is idle. +* +* Sometimes the exception handler will transform the fsave area +* because it needs to report an exception back to the user. This +* can happen if the package is entered for an unimplemented float +* instruction that generates (say) an underflow. Alternatively, +* a second fsave frame can be pushed onto the stack and the +* handler exit code will reload the new frame and discard the old. +* +* The registers d0, d1, a0, a1 and fp0-fp3 are always saved and +* restored from the 'local variable' area and can be used as +* temporaries. If a routine needs to change any +* of these registers, it should modify the saved copy and let +* the handler exit code restore the value. +* +*---------------------------------------------------------------------- +* +* Local Variables on the stack +* +LOCAL_SIZE equ 192 ;bytes needed for local variables +LV equ -LOCAL_SIZE ;convenient base value +* +USER_DA equ LV+0 ;save space for D0-D1,A0-A1 +USER_D0 equ LV+0 ;saved user D0 +USER_D1 equ LV+4 ;saved user D1 +USER_A0 equ LV+8 ;saved user A0 +USER_A1 equ LV+12 ;saved user A1 +USER_FP0 equ LV+16 ;saved user FP0 +USER_FP1 equ LV+28 ;saved user FP1 +USER_FP2 equ LV+40 ;saved user FP2 +USER_FP3 equ LV+52 ;saved user FP3 +USER_FPCR equ LV+64 ;saved user FPCR +FPCR_ENABLE equ USER_FPCR+2 ; FPCR exception enable +FPCR_MODE equ USER_FPCR+3 ; FPCR rounding mode control +USER_FPSR equ LV+68 ;saved user FPSR +FPSR_CC equ USER_FPSR+0 ; FPSR condition code +FPSR_QBYTE equ USER_FPSR+1 ; FPSR quotient +FPSR_EXCEPT equ USER_FPSR+2 ; FPSR exception +FPSR_AEXCEPT equ USER_FPSR+3 ; FPSR accrued exception +USER_FPIAR equ LV+72 ;saved user FPIAR +FP_SCR1 equ LV+76 ;room for a temporary float value +FP_SCR2 equ LV+92 ;room for a temporary float value +L_SCR1 equ LV+108 ;room for a temporary long value +L_SCR2 equ LV+112 ;room for a temporary long value +STORE_FLG equ LV+116 +BINDEC_FLG equ LV+117 ;used in bindec +DNRM_FLG equ LV+118 ;used in res_func +RES_FLG equ LV+119 ;used in res_func +DY_MO_FLG equ LV+120 ;dyadic/monadic flag +UFLG_TMP equ LV+121 ;temporary for uflag errata +CU_ONLY equ LV+122 ;cu-only flag +VER_TMP equ LV+123 ;temp holding for version number +L_SCR3 equ LV+124 ;room for a temporary long value +FP_SCR3 equ LV+128 ;room for a temporary float value +FP_SCR4 equ LV+144 ;room for a temporary float value +FP_SCR5 equ LV+160 ;room for a temporary float value +FP_SCR6 equ LV+176 +* +*NEXT equ LV+192 ;need to increase LOCAL_SIZE +* +*-------------------------------------------------------------------------- +* +* fsave offsets and bit definitions +* +* Offsets are defined from the end of an fsave because the last 10 +* words of a busy frame are the same as the unimplemented frame. +* +CU_SAVEPC equ LV-92 ;micro-pc for CU (1 byte) +FPR_DIRTY_BITS equ LV-91 ;fpr dirty bits +* +WBTEMP equ LV-76 ;write back temp (12 bytes) +WBTEMP_EX equ WBTEMP ;wbtemp sign and exponent (2 bytes) +WBTEMP_HI equ WBTEMP+4 ;wbtemp mantissa [63:32] (4 bytes) +WBTEMP_LO equ WBTEMP+8 ;wbtemp mantissa [31:00] (4 bytes) +* +WBTEMP_SGN equ WBTEMP+2 ;used to store sign +* +FPSR_SHADOW equ LV-64 ;fpsr shadow reg +* +FPIARCU equ LV-60 ;Instr. addr. reg. for CU (4 bytes) +* +CMDREG2B equ LV-52 ;cmd reg for machine 2 +CMDREG3B equ LV-48 ;cmd reg for E3 exceptions (2 bytes) +* +NMNEXC equ LV-44 ;NMNEXC (unsup,snan bits only) +nmn_unsup_bit equ 1 +nmn_snan_bit equ 0 +* +NMCEXC equ LV-43 ;NMNEXC & NMCEXC +nmn_operr_bit equ 7 +nmn_ovfl_bit equ 6 +nmn_unfl_bit equ 5 +nmc_unsup_bit equ 4 +nmc_snan_bit equ 3 +nmc_operr_bit equ 2 +nmc_ovfl_bit equ 1 +nmc_unfl_bit equ 0 +* +STAG equ LV-40 ;source tag (1 byte) +WBTEMP_GRS equ LV-40 ;alias wbtemp guard, round, sticky +guard_bit equ 1 ;guard bit is bit number 1 +round_bit equ 0 ;round bit is bit number 0 +stag_mask equ $E0 ;upper 3 bits are source tag type +denorm_bit equ 7 ;bit determins if denorm or unnorm +etemp15_bit equ 4 ;etemp exponent bit #15 +wbtemp66_bit equ 2 ;wbtemp mantissa bit #66 +wbtemp1_bit equ 1 ;wbtemp mantissa bit #1 +wbtemp0_bit equ 0 ;wbtemp mantissa bit #0 +* +STICKY equ LV-39 ;holds sticky bit +sticky_bit equ 7 +* +CMDREG1B equ LV-36 ;cmd reg for E1 exceptions (2 bytes) +kfact_bit equ 12 ;distinguishes static/dynamic k-factor +* ;on packed move out's. NOTE: this +* ;equate only works when CMDREG1B is in +* ;a register. +* +CMDWORD equ LV-35 ;command word in cmd1b +direction_bit equ 5 ;bit 0 in opclass +size_bit2 equ 12 ;bit 2 in size field +* +DTAG equ LV-32 ;dest tag (1 byte) +dtag_mask equ $E0 ;upper 3 bits are dest type tag +fptemp15_bit equ 4 ;fptemp exponent bit #15 +* +WB_BYTE equ LV-31 ;holds WBTE15 bit (1 byte) +wbtemp15_bit equ 4 ;wbtemp exponent bit #15 +* +E_BYTE equ LV-28 ;holds E1 and E3 bits (1 byte) +E1 equ 2 ;which bit is E1 flag +E3 equ 1 ;which bit is E3 flag +SFLAG equ 0 ;which bit is S flag +* +T_BYTE equ LV-27 ;holds T and U bits (1 byte) +XFLAG equ 7 ;which bit is X flag +UFLAG equ 5 ;which bit is U flag +TFLAG equ 4 ;which bit is T flag +* +FPTEMP equ LV-24 ;fptemp (12 bytes) +FPTEMP_EX equ FPTEMP ;fptemp sign and exponent (2 bytes) +FPTEMP_HI equ FPTEMP+4 ;fptemp mantissa [63:32] (4 bytes) +FPTEMP_LO equ FPTEMP+8 ;fptemp mantissa [31:00] (4 bytes) +* +FPTEMP_SGN equ FPTEMP+2 ;used to store sign +* +ETEMP equ LV-12 ;etemp (12 bytes) +ETEMP_EX equ ETEMP ;etemp sign and exponent (2 bytes) +ETEMP_HI equ ETEMP+4 ;etemp mantissa [63:32] (4 bytes) +ETEMP_LO equ ETEMP+8 ;etemp mantissa [31:00] (4 bytes) +* +ETEMP_SGN equ ETEMP+2 ;used to store sign +* +EXC_SR equ 4 ;exception frame status register +EXC_PC equ 6 ;exception frame program counter +EXC_VEC equ 10 ;exception frame vector (format+vector#) +EXC_EA equ 12 ;exception frame effective address +* +*-------------------------------------------------------------------------- +* +* FPSR/FPCR bits +* +neg_bit equ 3 negative result +z_bit equ 2 zero result +inf_bit equ 1 infinity result +nan_bit equ 0 not-a-number result +* +q_sn_bit equ 7 sign bit of quotient byte +* +bsun_bit equ 7 branch on unordered +snan_bit equ 6 signalling nan +operr_bit equ 5 operand error +ovfl_bit equ 4 overflow +unfl_bit equ 3 underflow +dz_bit equ 2 divide by zero +inex2_bit equ 1 inexact result 2 +inex1_bit equ 0 inexact result 1 +* +aiop_bit equ 7 accrued illegal operation +aovfl_bit equ 6 accrued overflow +aunfl_bit equ 5 accrued underflow +adz_bit equ 4 accrued divide by zero +ainex_bit equ 3 accrued inexact +* +* FPSR individual bit masks +* +neg_mask equ $08000000 +z_mask equ $04000000 +inf_mask equ $02000000 +nan_mask equ $01000000 +* +bsun_mask equ $00008000 +snan_mask equ $00004000 +operr_mask equ $00002000 +ovfl_mask equ $00001000 +unfl_mask equ $00000800 +dz_mask equ $00000400 +inex2_mask equ $00000200 +inex1_mask equ $00000100 +* +aiop_mask equ $00000080 accrued illegal operation +aovfl_mask equ $00000040 accrued overflow +aunfl_mask equ $00000020 accrued underflow +adz_mask equ $00000010 accrued divide by zero +ainex_mask equ $00000008 accrued inexact +* +* FPSR combinations used in the FPSP +* +dzinf_mask equ inf_mask+dz_mask+adz_mask +opnan_mask equ nan_mask+operr_mask+aiop_mask +nzi_mask equ $01ffffff clears N, Z, and I +unfinx_mask equ unfl_mask+inex2_mask+aunfl_mask+ainex_mask +unf2inx_mask equ unfl_mask+inex2_mask+ainex_mask +ovfinx_mask equ ovfl_mask+inex2_mask+aovfl_mask+ainex_mask +inx1a_mask equ inex1_mask+ainex_mask +inx2a_mask equ inex2_mask+ainex_mask +snaniop_mask equ nan_mask+snan_mask+aiop_mask +naniop_mask equ nan_mask+aiop_mask +neginf_mask equ neg_mask+inf_mask +infaiop_mask equ inf_mask+aiop_mask +negz_mask equ neg_mask+z_mask +opaop_mask equ operr_mask+aiop_mask +unfl_inx_mask equ unfl_mask+aunfl_mask+ainex_mask +ovfl_inx_mask equ ovfl_mask+aovfl_mask+ainex_mask +* +*-------------------------------------------------------------------------- +* +* FPCR rounding modes +* +x_mode equ $00 round to extended +s_mode equ $40 round to single +d_mode equ $80 round to double +* +rn_mode equ $00 round nearest +rz_mode equ $10 round to zero +rm_mode equ $20 round to minus infinity +rp_mode equ $30 round to plus infinity +* +*-------------------------------------------------------------------------- +* +* Miscellaneous equates +* +signan_bit equ 6 signalling nan bit in mantissa +sign_bit equ 7 +* +rnd_stky_bit equ 29 round/sticky bit of mantissa +* this can only be used if in a data register +sx_mask equ $01800000 set s and x bits in word $48 +* +LOCAL_EX equ 0 +LOCAL_SGN equ 2 +LOCAL_HI equ 4 +LOCAL_LO equ 8 +LOCAL_GRS equ 12 valid ONLY for FP_SCR1, FP_SCR2 +* +* +norm_tag equ $00 tag bits in {7:5} position +zero_tag equ $20 +inf_tag equ $40 +nan_tag equ $60 +dnrm_tag equ $80 +* +* fsave sizes and formats +* +VER_4 equ $40 fpsp compatible version numbers +* are in the $40s {$40-$4f} +VER_40 equ $40 original version number +VER_41 equ $41 revision version number +* +BUSY_SIZE equ 100 size of busy frame +BUSY_FRAME equ LV-BUSY_SIZE start of busy frame +* +UNIMP_40_SIZE equ 44 size of orig unimp frame +UNIMP_41_SIZE equ 52 size of rev unimp frame +* +IDLE_SIZE equ 4 size of idle frame +IDLE_FRAME equ LV-IDLE_SIZE start of idle frame +* +* exception vectors +* +TRACE_VEC equ $2024 trace trap +FLINE_VEC equ $002C 'real' F-line +UNIMP_VEC equ $202C unimplemented +INEX_VEC equ $00C4 +* +dbl_thresh equ $3C01 +sgl_thresh equ $3F81 +* diff --git a/sys/arch/m68k/fpsp/gen_except.sa b/sys/arch/m68k/fpsp/gen_except.sa new file mode 100644 index 00000000000..0d13020dac7 --- /dev/null +++ b/sys/arch/m68k/fpsp/gen_except.sa @@ -0,0 +1,493 @@ +* $NetBSD: gen_except.sa,v 1.3 1994/10/26 07:49:07 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* gen_except.sa 3.7 1/16/92 +* +* gen_except --- FPSP routine to detect reportable exceptions +* +* This routine compares the exception enable byte of the +* user_fpcr on the stack with the exception status byte +* of the user_fpsr. +* +* Any routine which may report an exceptions must load +* the stack frame in memory with the exceptional operand(s). +* +* Priority for exceptions is: +* +* Highest: bsun +* snan +* operr +* ovfl +* unfl +* dz +* inex2 +* Lowest: inex1 +* +* Note: The IEEE standard specifies that inex2 is to be +* reported if ovfl occurs and the ovfl enable bit is not +* set but the inex2 enable bit is. +* + +GEN_EXCEPT IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + + xref real_trace + xref fpsp_done + xref fpsp_fmt_error + +exc_tbl: + dc.l bsun_exc + dc.l commonE1 + dc.l commonE1 + dc.l ovfl_unfl + dc.l ovfl_unfl + dc.l commonE1 + dc.l commonE3 + dc.l commonE3 + dc.l no_match + + xdef gen_except +gen_except: + cmpi.b #IDLE_SIZE-4,1(a7) ;test for idle frame + beq.w do_check ;go handle idle frame + cmpi.b #UNIMP_40_SIZE-4,1(a7) ;test for orig unimp frame + beq.b unimp_x ;go handle unimp frame + cmpi.b #UNIMP_41_SIZE-4,1(a7) ;test for rev unimp frame + beq.b unimp_x ;go handle unimp frame + cmpi.b #BUSY_SIZE-4,1(a7) ;if size <> $60, fmt error + bne.l fpsp_fmt_error + lea.l BUSY_SIZE+LOCAL_SIZE(a7),a1 ;init a1 so fpsp.h +* ;equates will work +* Fix up the new busy frame with entries from the unimp frame +* + move.l ETEMP_EX(a6),ETEMP_EX(a1) ;copy etemp from unimp + move.l ETEMP_HI(a6),ETEMP_HI(a1) ;frame to busy frame + move.l ETEMP_LO(a6),ETEMP_LO(a1) + move.l CMDREG1B(a6),CMDREG1B(a1) ;set inst in frame to unimp + move.l CMDREG1B(a6),d0 ;fix cmd1b to make it + and.l #$03c30000,d0 ;work for cmd3b + bfextu CMDREG1B(a6){13:1},d1 ;extract bit 2 + lsl.l #5,d1 + swap d1 + or.l d1,d0 ;put it in the right place + bfextu CMDREG1B(a6){10:3},d1 ;extract bit 3,4,5 + lsl.l #2,d1 + swap d1 + or.l d1,d0 ;put them in the right place + move.l d0,CMDREG3B(a1) ;in the busy frame +* +* Or in the FPSR from the emulation with the USER_FPSR on the stack. +* + fmove.l FPSR,d0 + or.l d0,USER_FPSR(a6) + move.l USER_FPSR(a6),FPSR_SHADOW(a1) ;set exc bits + or.l #sx_mask,E_BYTE(a1) + bra do_clean + +* +* Frame is an unimp frame possible resulting from an fmove <ea>,fp0 +* that caused an exception +* +* a1 is modified to point into the new frame allowing fpsp equates +* to be valid. +* +unimp_x: + cmpi.b #UNIMP_40_SIZE-4,1(a7) ;test for orig unimp frame + bne.b test_rev + lea.l UNIMP_40_SIZE+LOCAL_SIZE(a7),a1 + bra.b unimp_con +test_rev: + cmpi.b #UNIMP_41_SIZE-4,1(a7) ;test for rev unimp frame + bne.l fpsp_fmt_error ;if not $28 or $30 + lea.l UNIMP_41_SIZE+LOCAL_SIZE(a7),a1 + +unimp_con: +* +* Fix up the new unimp frame with entries from the old unimp frame +* + move.l CMDREG1B(a6),CMDREG1B(a1) ;set inst in frame to unimp +* +* Or in the FPSR from the emulation with the USER_FPSR on the stack. +* + fmove.l FPSR,d0 + or.l d0,USER_FPSR(a6) + bra do_clean + +* +* Frame is idle, so check for exceptions reported through +* USER_FPSR and set the unimp frame accordingly. +* A7 must be incremented to the point before the +* idle fsave vector to the unimp vector. +* + +do_check: + add.l #4,A7 ;point A7 back to unimp frame +* +* Or in the FPSR from the emulation with the USER_FPSR on the stack. +* + fmove.l FPSR,d0 + or.l d0,USER_FPSR(a6) +* +* On a busy frame, we must clear the nmnexc bits. +* + cmpi.b #BUSY_SIZE-4,1(a7) ;check frame type + bne.b check_fr ;if busy, clr nmnexc + clr.w NMNEXC(a6) ;clr nmnexc & nmcexc + btst.b #5,CMDREG1B(a6) ;test for fmove out + bne.b frame_com + move.l USER_FPSR(a6),FPSR_SHADOW(a6) ;set exc bits + or.l #sx_mask,E_BYTE(a6) + bra.b frame_com +check_fr: + cmp.b #UNIMP_40_SIZE-4,1(a7) + beq.b frame_com + clr.w NMNEXC(a6) +frame_com: + move.b FPCR_ENABLE(a6),d0 ;get fpcr enable byte + and.b FPSR_EXCEPT(a6),d0 ;and in the fpsr exc byte + bfffo d0{24:8},d1 ;test for first set bit + lea.l exc_tbl,a0 ;load jmp table address + subi.b #24,d1 ;normalize bit offset to 0-8 + move.l (a0,d1.w*4),a0 ;load routine address based +* ;based on first enabled exc + jmp (a0) ;jump to routine +* +* Bsun is not possible in unimp or unsupp +* +bsun_exc: + bra do_clean +* +* The typical work to be done to the unimp frame to report an +* exception is to set the E1/E3 byte and clr the U flag. +* commonE1 does this for E1 exceptions, which are snan, +* operr, and dz. commonE3 does this for E3 exceptions, which +* are inex2 and inex1, and also clears the E1 exception bit +* left over from the unimp exception. +* +commonE1: + bset.b #E1,E_BYTE(a6) ;set E1 flag + bra.w commonE ;go clean and exit + +commonE3: + tst.b UFLG_TMP(a6) ;test flag for unsup/unimp state + bne.b unsE3 +uniE3: + bset.b #E3,E_BYTE(a6) ;set E3 flag + bclr.b #E1,E_BYTE(a6) ;clr E1 from unimp + bra.w commonE + +unsE3: + tst.b RES_FLG(a6) + bne.b unsE3_0 +unsE3_1: + bset.b #E3,E_BYTE(a6) ;set E3 flag +unsE3_0: + bclr.b #E1,E_BYTE(a6) ;clr E1 flag + move.l CMDREG1B(a6),d0 + and.l #$03c30000,d0 ;work for cmd3b + bfextu CMDREG1B(a6){13:1},d1 ;extract bit 2 + lsl.l #5,d1 + swap d1 + or.l d1,d0 ;put it in the right place + bfextu CMDREG1B(a6){10:3},d1 ;extract bit 3,4,5 + lsl.l #2,d1 + swap d1 + or.l d1,d0 ;put them in the right place + move.l d0,CMDREG3B(a6) ;in the busy frame + +commonE: + bclr.b #UFLAG,T_BYTE(a6) ;clr U flag from unimp + bra.w do_clean ;go clean and exit +* +* No bits in the enable byte match existing exceptions. Check for +* the case of the ovfl exc without the ovfl enabled, but with +* inex2 enabled. +* +no_match: + btst.b #inex2_bit,FPCR_ENABLE(a6) ;check for ovfl/inex2 case + beq.b no_exc ;if clear, exit + btst.b #ovfl_bit,FPSR_EXCEPT(a6) ;now check ovfl + beq.b no_exc ;if clear, exit + bra.b ovfl_unfl ;go to unfl_ovfl to determine if +* ;it is an unsupp or unimp exc + +* No exceptions are to be reported. If the instruction was +* unimplemented, no FPU restore is necessary. If it was +* unsupported, we must perform the restore. +no_exc: + tst.b UFLG_TMP(a6) ;test flag for unsupp/unimp state + beq.b uni_no_exc +uns_no_exc: + tst.b RES_FLG(a6) ;check if frestore is needed + bne.w do_clean ;if clear, no frestore needed +uni_no_exc: + movem.l USER_DA(a6),d0-d1/a0-a1 + fmovem.x USER_FP0(a6),fp0-fp3 + fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar + unlk a6 + bra finish_up +* +* Unsupported Data Type Handler: +* Ovfl: +* An fmoveout that results in an overflow is reported this way. +* Unfl: +* An fmoveout that results in an underflow is reported this way. +* +* Unimplemented Instruction Handler: +* Ovfl: +* Only scosh, setox, ssinh, stwotox, and scale can set overflow in +* this manner. +* Unfl: +* Stwotox, setox, and scale can set underflow in this manner. +* Any of the other Library Routines such that f(x)=x in which +* x is an extended denorm can report an underflow exception. +* It is the responsibility of the exception-causing exception +* to make sure that WBTEMP is correct. +* +* The exceptional operand is in FP_SCR1. +* +ovfl_unfl: + tst.b UFLG_TMP(a6) ;test flag for unsupp/unimp state + beq.b ofuf_con +* +* The caller was from an unsupported data type trap. Test if the +* caller set CU_ONLY. If so, the exceptional operand is expected in +* FPTEMP, rather than WBTEMP. +* + tst.b CU_ONLY(a6) ;test if inst is cu-only + beq.w unsE3 +* move.w #$fe,CU_SAVEPC(a6) + clr.b CU_SAVEPC(a6) + bset.b #E1,E_BYTE(a6) ;set E1 exception flag + move.w ETEMP_EX(a6),FPTEMP_EX(a6) + move.l ETEMP_HI(a6),FPTEMP_HI(a6) + move.l ETEMP_LO(a6),FPTEMP_LO(a6) + bset.b #fptemp15_bit,DTAG(a6) ;set fpte15 + bclr.b #UFLAG,T_BYTE(a6) ;clr U flag from unimp + bra.w do_clean ;go clean and exit + +ofuf_con: + move.b (a7),VER_TMP(a6) ;save version number + cmpi.b #BUSY_SIZE-4,1(a7) ;check for busy frame + beq.b busy_fr ;if unimp, grow to busy + cmpi.b #VER_40,(a7) ;test for orig unimp frame + bne.b try_41 ;if not, test for rev frame + moveq.l #13,d0 ;need to zero 14 lwords + bra.b ofuf_fin +try_41: + cmpi.b #VER_41,(a7) ;test for rev unimp frame + bne.l fpsp_fmt_error ;if neither, exit with error + moveq.l #11,d0 ;need to zero 12 lwords + +ofuf_fin: + clr.l (a7) +loop1: + clr.l -(a7) ;clear and dec a7 + dbra.w d0,loop1 + move.b VER_TMP(a6),(a7) + move.b #BUSY_SIZE-4,1(a7) ;write busy fmt word. +busy_fr: + move.l FP_SCR1(a6),WBTEMP_EX(a6) ;write + move.l FP_SCR1+4(a6),WBTEMP_HI(a6) ;execptional op to + move.l FP_SCR1+8(a6),WBTEMP_LO(a6) ;wbtemp + bset.b #E3,E_BYTE(a6) ;set E3 flag + bclr.b #E1,E_BYTE(a6) ;make sure E1 is clear + bclr.b #UFLAG,T_BYTE(a6) ;clr U flag + move.l USER_FPSR(a6),FPSR_SHADOW(a6) + or.l #sx_mask,E_BYTE(a6) + move.l CMDREG1B(a6),d0 ;fix cmd1b to make it + and.l #$03c30000,d0 ;work for cmd3b + bfextu CMDREG1B(a6){13:1},d1 ;extract bit 2 + lsl.l #5,d1 + swap d1 + or.l d1,d0 ;put it in the right place + bfextu CMDREG1B(a6){10:3},d1 ;extract bit 3,4,5 + lsl.l #2,d1 + swap d1 + or.l d1,d0 ;put them in the right place + move.l d0,CMDREG3B(a6) ;in the busy frame + +* +* Check if the frame to be restored is busy or unimp. +*** NOTE *** Bug fix for errata (0d43b #3) +* If the frame is unimp, we must create a busy frame to +* fix the bug with the nmnexc bits in cases in which they +* are set by a previous instruction and not cleared by +* the save. The frame will be unimp only if the final +* instruction in an emulation routine caused the exception +* by doing an fmove <ea>,fp0. The exception operand, in +* internal format, is in fptemp. +* +do_clean: + cmpi.b #UNIMP_40_SIZE-4,1(a7) + bne.b do_con + moveq.l #13,d0 ;in orig, need to zero 14 lwords + bra.b do_build +do_con: + cmpi.b #UNIMP_41_SIZE-4,1(a7) + bne.b do_restore ;frame must be busy + moveq.l #11,d0 ;in rev, need to zero 12 lwords + +do_build: + move.b (a7),VER_TMP(a6) + clr.l (a7) +loop2: + clr.l -(a7) ;clear and dec a7 + dbra.w d0,loop2 +* +* Use a1 as pointer into new frame. a6 is not correct if an unimp or +* busy frame was created as the result of an exception on the final +* instruction of an emulation routine. +* +* We need to set the nmcexc bits if the exception is E1. Otherwise, +* the exc taken will be inex2. +* + lea.l BUSY_SIZE+LOCAL_SIZE(a7),a1 ;init a1 for new frame + move.b VER_TMP(a6),(a7) ;write busy fmt word + move.b #BUSY_SIZE-4,1(a7) + move.l FP_SCR1(a6),WBTEMP_EX(a1) ;write + move.l FP_SCR1+4(a6),WBTEMP_HI(a1) ;exceptional op to + move.l FP_SCR1+8(a6),WBTEMP_LO(a1) ;wbtemp +* btst.b #E1,E_BYTE(a1) +* beq.b do_restore + bfextu USER_FPSR(a6){17:4},d0 ;get snan/operr/ovfl/unfl bits + bfins d0,NMCEXC(a1){4:4} ;and insert them in nmcexc + move.l USER_FPSR(a6),FPSR_SHADOW(a1) ;set exc bits + or.l #sx_mask,E_BYTE(a1) + +do_restore: + movem.l USER_DA(a6),d0-d1/a0-a1 + fmovem.x USER_FP0(a6),fp0-fp3 + fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar + frestore (a7)+ + tst.b RES_FLG(a6) ;RES_FLG indicates a "continuation" frame + beq cont + bsr bug1384 +cont: + unlk a6 +* +* If trace mode enabled, then go to trace handler. This handler +* cannot have any fp instructions. If there are fp inst's and an +* exception has been restored into the machine then the exception +* will occur upon execution of the fp inst. This is not desirable +* in the kernel (supervisor mode). See MC68040 manual Section 9.3.8. +* +finish_up: + btst.b #7,(a7) ;test T1 in SR + bne.b g_trace + btst.b #6,(a7) ;test T0 in SR + bne.b g_trace + bra.l fpsp_done +* +* Change integer stack to look like trace stack +* The address of the instruction that caused the +* exception is already in the integer stack (is +* the same as the saved friar) +* +* If the current frame is already a 6-word stack then all +* that needs to be done is to change the vector# to TRACE. +* If the frame is only a 4-word stack (meaning we got here +* on an Unsupported data type exception), then we need to grow +* the stack an extra 2 words and get the FPIAR from the FPU. +* +g_trace: + bftst EXC_VEC-4(sp){0:4} + bne g_easy + + subq.l #4,sp make room + move.l 4(sp),(sp) + move.l 8(sp),4(sp) + sub.l #BUSY_SIZE,sp + fsave (sp) + fmove.l fpiar,BUSY_SIZE+EXC_EA-4(sp) + frestore (sp) + add.l #BUSY_SIZE,sp + +g_easy: + move.w #TRACE_VEC,EXC_VEC-4(a7) + bra.l real_trace +* +* This is a work-around for hardware bug 1384. +* +bug1384: + link a5,#0 + fsave -(sp) + cmpi.b #$41,(sp) ; check for correct frame + beq frame_41 + bgt nofix ; if more advanced mask, do nada + +frame_40: + tst.b 1(sp) ; check to see if idle + bne notidle +idle40: + clr.l (sp) ; get rid of old fsave frame + move.l d1,USER_D1(a6) ; save d1 + move.w #8,d1 ; place unimp frame instead +loop40: clr.l -(sp) + dbra d1,loop40 + move.l USER_D1(a6),d1 ; restore d1 + move.l #$40280000,-(sp) + frestore (sp)+ + unlk a5 + rts + +frame_41: + tst.b 1(sp) ; check to see if idle + bne notidle +idle41: + clr.l (sp) ; get rid of old fsave frame + move.l d1,USER_D1(a6) ; save d1 + move.w #10,d1 ; place unimp frame instead +loop41: clr.l -(sp) + dbra d1,loop41 + move.l USER_D1(a6),d1 ; restore d1 + move.l #$41300000,-(sp) + frestore (sp)+ + unlk a5 + rts + +notidle: + bclr.b #etemp15_bit,-40(a5) + frestore (sp)+ + unlk a5 + rts + +nofix: + frestore (sp)+ + unlk a5 + rts + + end diff --git a/sys/arch/m68k/fpsp/get_op.sa b/sys/arch/m68k/fpsp/get_op.sa new file mode 100644 index 00000000000..c79646e0438 --- /dev/null +++ b/sys/arch/m68k/fpsp/get_op.sa @@ -0,0 +1,701 @@ +* $NetBSD: get_op.sa,v 1.3 1994/10/26 07:49:09 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* get_op.sa 3.6 5/19/92 +* +* get_op.sa 3.5 4/26/91 +* +* Description: This routine is called by the unsupported format/data +* type exception handler ('unsupp' - vector 55) and the unimplemented +* instruction exception handler ('unimp' - vector 11). 'get_op' +* determines the opclass (0, 2, or 3) and branches to the +* opclass handler routine. See 68881/2 User's Manual table 4-11 +* for a description of the opclasses. +* +* For UNSUPPORTED data/format (exception vector 55) and for +* UNIMPLEMENTED instructions (exception vector 11) the following +* applies: +* +* - For unnormormalized numbers (opclass 0, 2, or 3) the +* number(s) is normalized and the operand type tag is updated. +* +* - For a packed number (opclass 2) the number is unpacked and the +* operand type tag is updated. +* +* - For denormalized numbers (opclass 0 or 2) the number(s) is not +* changed but passed to the next module. The next module for +* unimp is do_func, the next module for unsupp is res_func. +* +* For UNSUPPORTED data/format (exception vector 55) only the +* following applies: +* +* - If there is a move out with a packed number (opclass 3) the +* number is packed and written to user memory. For the other +* opclasses the number(s) are written back to the fsave stack +* and the instruction is then restored back into the '040. The +* '040 is then able to complete the instruction. +* +* For example: +* fadd.x fpm,fpn where the fpm contains an unnormalized number. +* The '040 takes an unsupported data trap and gets to this +* routine. The number is normalized, put back on the stack and +* then an frestore is done to restore the instruction back into +* the '040. The '040 then re-executes the fadd.x fpm,fpn with +* a normalized number in the source and the instruction is +* successful. +* +* Next consider if in the process of normalizing the un- +* normalized number it becomes a denormalized number. The +* routine which converts the unnorm to a norm (called mk_norm) +* detects this and tags the number as a denorm. The routine +* res_func sees the denorm tag and converts the denorm to a +* norm. The instruction is then restored back into the '040 +* which re_executess the instruction. +* + +GET_OP IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + + xdef PIRN,PIRZRM,PIRP + xdef SMALRN,SMALRZRM,SMALRP + xdef BIGRN,BIGRZRM,BIGRP + +PIRN: + dc.l $40000000,$c90fdaa2,$2168c235 ;pi +PIRZRM: + dc.l $40000000,$c90fdaa2,$2168c234 ;pi +PIRP: + dc.l $40000000,$c90fdaa2,$2168c235 ;pi + +*round to nearest +SMALRN: + dc.l $3ffd0000,$9a209a84,$fbcff798 ;log10(2) + dc.l $40000000,$adf85458,$a2bb4a9a ;e + dc.l $3fff0000,$b8aa3b29,$5c17f0bc ;log2(e) + dc.l $3ffd0000,$de5bd8a9,$37287195 ;log10(e) + dc.l $00000000,$00000000,$00000000 ;0.0 +* round to zero;round to negative infinity +SMALRZRM: + dc.l $3ffd0000,$9a209a84,$fbcff798 ;log10(2) + dc.l $40000000,$adf85458,$a2bb4a9a ;e + dc.l $3fff0000,$b8aa3b29,$5c17f0bb ;log2(e) + dc.l $3ffd0000,$de5bd8a9,$37287195 ;log10(e) + dc.l $00000000,$00000000,$00000000 ;0.0 +* round to positive infinity +SMALRP: + dc.l $3ffd0000,$9a209a84,$fbcff799 ;log10(2) + dc.l $40000000,$adf85458,$a2bb4a9b ;e + dc.l $3fff0000,$b8aa3b29,$5c17f0bc ;log2(e) + dc.l $3ffd0000,$de5bd8a9,$37287195 ;log10(e) + dc.l $00000000,$00000000,$00000000 ;0.0 + +*round to nearest +BIGRN: + dc.l $3ffe0000,$b17217f7,$d1cf79ac ;ln(2) + dc.l $40000000,$935d8ddd,$aaa8ac17 ;ln(10) + dc.l $3fff0000,$80000000,$00000000 ;10 ^ 0 + + xdef PTENRN +PTENRN: + dc.l $40020000,$A0000000,$00000000 ;10 ^ 1 + dc.l $40050000,$C8000000,$00000000 ;10 ^ 2 + dc.l $400C0000,$9C400000,$00000000 ;10 ^ 4 + dc.l $40190000,$BEBC2000,$00000000 ;10 ^ 8 + dc.l $40340000,$8E1BC9BF,$04000000 ;10 ^ 16 + dc.l $40690000,$9DC5ADA8,$2B70B59E ;10 ^ 32 + dc.l $40D30000,$C2781F49,$FFCFA6D5 ;10 ^ 64 + dc.l $41A80000,$93BA47C9,$80E98CE0 ;10 ^ 128 + dc.l $43510000,$AA7EEBFB,$9DF9DE8E ;10 ^ 256 + dc.l $46A30000,$E319A0AE,$A60E91C7 ;10 ^ 512 + dc.l $4D480000,$C9767586,$81750C17 ;10 ^ 1024 + dc.l $5A920000,$9E8B3B5D,$C53D5DE5 ;10 ^ 2048 + dc.l $75250000,$C4605202,$8A20979B ;10 ^ 4096 +*round to minus infinity +BIGRZRM: + dc.l $3ffe0000,$b17217f7,$d1cf79ab ;ln(2) + dc.l $40000000,$935d8ddd,$aaa8ac16 ;ln(10) + dc.l $3fff0000,$80000000,$00000000 ;10 ^ 0 + + xdef PTENRM +PTENRM: + dc.l $40020000,$A0000000,$00000000 ;10 ^ 1 + dc.l $40050000,$C8000000,$00000000 ;10 ^ 2 + dc.l $400C0000,$9C400000,$00000000 ;10 ^ 4 + dc.l $40190000,$BEBC2000,$00000000 ;10 ^ 8 + dc.l $40340000,$8E1BC9BF,$04000000 ;10 ^ 16 + dc.l $40690000,$9DC5ADA8,$2B70B59D ;10 ^ 32 + dc.l $40D30000,$C2781F49,$FFCFA6D5 ;10 ^ 64 + dc.l $41A80000,$93BA47C9,$80E98CDF ;10 ^ 128 + dc.l $43510000,$AA7EEBFB,$9DF9DE8D ;10 ^ 256 + dc.l $46A30000,$E319A0AE,$A60E91C6 ;10 ^ 512 + dc.l $4D480000,$C9767586,$81750C17 ;10 ^ 1024 + dc.l $5A920000,$9E8B3B5D,$C53D5DE5 ;10 ^ 2048 + dc.l $75250000,$C4605202,$8A20979A ;10 ^ 4096 +*round to positive infinity +BIGRP: + dc.l $3ffe0000,$b17217f7,$d1cf79ac ;ln(2) + dc.l $40000000,$935d8ddd,$aaa8ac17 ;ln(10) + dc.l $3fff0000,$80000000,$00000000 ;10 ^ 0 + + xdef PTENRP +PTENRP: + dc.l $40020000,$A0000000,$00000000 ;10 ^ 1 + dc.l $40050000,$C8000000,$00000000 ;10 ^ 2 + dc.l $400C0000,$9C400000,$00000000 ;10 ^ 4 + dc.l $40190000,$BEBC2000,$00000000 ;10 ^ 8 + dc.l $40340000,$8E1BC9BF,$04000000 ;10 ^ 16 + dc.l $40690000,$9DC5ADA8,$2B70B59E ;10 ^ 32 + dc.l $40D30000,$C2781F49,$FFCFA6D6 ;10 ^ 64 + dc.l $41A80000,$93BA47C9,$80E98CE0 ;10 ^ 128 + dc.l $43510000,$AA7EEBFB,$9DF9DE8E ;10 ^ 256 + dc.l $46A30000,$E319A0AE,$A60E91C7 ;10 ^ 512 + dc.l $4D480000,$C9767586,$81750C18 ;10 ^ 1024 + dc.l $5A920000,$9E8B3B5D,$C53D5DE6 ;10 ^ 2048 + dc.l $75250000,$C4605202,$8A20979B ;10 ^ 4096 + + xref nrm_zero + xref decbin + xref round + + xdef get_op + xdef uns_getop + xdef uni_getop +get_op: + clr.b DY_MO_FLG(a6) + tst.b UFLG_TMP(a6) ;test flag for unsupp/unimp state + beq.b uni_getop + +uns_getop: + btst.b #direction_bit,CMDREG1B(a6) + bne.w opclass3 ;branch if a fmove out (any kind) + btst.b #6,CMDREG1B(a6) + beq.b uns_notpacked + + bfextu CMDREG1B(a6){3:3},d0 + cmp.b #3,d0 + beq.w pack_source ;check for a packed src op, branch if so +uns_notpacked: + bsr chk_dy_mo ;set the dyadic/monadic flag + tst.b DY_MO_FLG(a6) + beq.b src_op_ck ;if monadic, go check src op +* ;else, check dst op (fall through) + + btst.b #7,DTAG(a6) + beq.b src_op_ck ;if dst op is norm, check src op + bra.b dst_ex_dnrm ;else, handle destination unnorm/dnrm + +uni_getop: + bfextu CMDREG1B(a6){0:6},d0 ;get opclass and src fields + cmpi.l #$17,d0 ;if op class and size fields are $17, +* ;it is FMOVECR; if not, continue +* +* If the instruction is fmovecr, exit get_op. It is handled +* in do_func and smovecr.sa. +* + bne.w not_fmovecr ;handle fmovecr as an unimplemented inst + rts + +not_fmovecr: + btst.b #E1,E_BYTE(a6) ;if set, there is a packed operand + bne.w pack_source ;check for packed src op, branch if so + +* The following lines of are coded to optimize on normalized operands + move.b STAG(a6),d0 + or.b DTAG(a6),d0 ;check if either of STAG/DTAG msb set + bmi.b dest_op_ck ;if so, some op needs to be fixed + rts + +dest_op_ck: + btst.b #7,DTAG(a6) ;check for unsupported data types in + beq.b src_op_ck ;the destination, if not, check src op + bsr chk_dy_mo ;set dyadic/monadic flag + tst.b DY_MO_FLG(a6) ; + beq.b src_op_ck ;if monadic, check src op +* +* At this point, destination has an extended denorm or unnorm. +* +dst_ex_dnrm: + move.w FPTEMP_EX(a6),d0 ;get destination exponent + andi.w #$7fff,d0 ;mask sign, check if exp = 0000 + beq.b src_op_ck ;if denorm then check source op. +* ;denorms are taken care of in res_func +* ;(unsupp) or do_func (unimp) +* ;else unnorm fall through + lea.l FPTEMP(a6),a0 ;point a0 to dop - used in mk_norm + bsr mk_norm ;go normalize - mk_norm returns: +* ;L_SCR1{7:5} = operand tag +* ; (000 = norm, 100 = denorm) +* ;L_SCR1{4} = fpte15 or ete15 +* ; 0 = exp > $3fff +* ; 1 = exp <= $3fff +* ;and puts the normalized num back +* ;on the fsave stack +* + move.b L_SCR1(a6),DTAG(a6) ;write the new tag & fpte15 +* ;to the fsave stack and fall +* ;through to check source operand +* +src_op_ck: + btst.b #7,STAG(a6) + beq.w end_getop ;check for unsupported data types on the +* ;source operand + btst.b #5,STAG(a6) + bne.b src_sd_dnrm ;if bit 5 set, handle sgl/dbl denorms +* +* At this point only unnorms or extended denorms are possible. +* +src_ex_dnrm: + move.w ETEMP_EX(a6),d0 ;get source exponent + andi.w #$7fff,d0 ;mask sign, check if exp = 0000 + beq.w end_getop ;if denorm then exit, denorms are +* ;handled in do_func + lea.l ETEMP(a6),a0 ;point a0 to sop - used in mk_norm + bsr mk_norm ;go normalize - mk_norm returns: +* ;L_SCR1{7:5} = operand tag +* ; (000 = norm, 100 = denorm) +* ;L_SCR1{4} = fpte15 or ete15 +* ; 0 = exp > $3fff +* ; 1 = exp <= $3fff +* ;and puts the normalized num back +* ;on the fsave stack +* + move.b L_SCR1(a6),STAG(a6) ;write the new tag & ete15 + rts ;end_getop + +* +* At this point, only single or double denorms are possible. +* If the inst is not fmove, normalize the source. If it is, +* do nothing to the input. +* +src_sd_dnrm: + btst.b #4,CMDREG1B(a6) ;differentiate between sgl/dbl denorm + bne.b is_double +is_single: + move.w #$3f81,d1 ;write bias for sgl denorm + bra.b common ;goto the common code +is_double: + move.w #$3c01,d1 ;write the bias for a dbl denorm +common: + btst.b #sign_bit,ETEMP_EX(a6) ;grab sign bit of mantissa + beq.b pos + bset #15,d1 ;set sign bit because it is negative +pos: + move.w d1,ETEMP_EX(a6) +* ;put exponent on stack + + move.w CMDREG1B(a6),d1 + and.w #$e3ff,d1 ;clear out source specifier + or.w #$0800,d1 ;set source specifier to extended prec + move.w d1,CMDREG1B(a6) ;write back to the command word in stack +* ;this is needed to fix unsupp data stack + lea.l ETEMP(a6),a0 ;point a0 to sop + + bsr mk_norm ;convert sgl/dbl denorm to norm + move.b L_SCR1(a6),STAG(a6) ;put tag into source tag reg - d0 + rts ;end_getop +* +* At this point, the source is definitely packed, whether +* instruction is dyadic or monadic is still unknown +* +pack_source: + move.l FPTEMP_LO(a6),ETEMP(a6) ;write ms part of packed +* ;number to etemp slot + bsr chk_dy_mo ;set dyadic/monadic flag + bsr unpack + + tst.b DY_MO_FLG(a6) + beq.b end_getop ;if monadic, exit +* ;else, fix FPTEMP +pack_dya: + bfextu CMDREG1B(a6){6:3},d0 ;extract dest fp reg + move.l #7,d1 + sub.l d0,d1 + clr.l d0 + bset.l d1,d0 ;set up d0 as a dynamic register mask + fmovem.x d0,FPTEMP(a6) ;write to FPTEMP + + btst.b #7,DTAG(a6) ;check dest tag for unnorm or denorm + bne.w dst_ex_dnrm ;else, handle the unnorm or ext denorm +* +* Dest is not denormalized. Check for norm, and set fpte15 +* accordingly. +* + move.b DTAG(a6),d0 + andi.b #$f0,d0 ;strip to only dtag:fpte15 + tst.b d0 ;check for normalized value + bne.b end_getop ;if inf/nan/zero leave get_op + move.w FPTEMP_EX(a6),d0 + andi.w #$7fff,d0 + cmpi.w #$3fff,d0 ;check if fpte15 needs setting + bge.b end_getop ;if >= $3fff, leave fpte15=0 + or.b #$10,DTAG(a6) + bra.b end_getop + +* +* At this point, it is either an fmoveout packed, unnorm or denorm +* +opclass3: + clr.b DY_MO_FLG(a6) ;set dyadic/monadic flag to monadic + bfextu CMDREG1B(a6){4:2},d0 + cmpi.b #3,d0 + bne.w src_ex_dnrm ;if not equal, must be unnorm or denorm +* ;else it is a packed move out +* ;exit +end_getop: + rts + +* +* Sets the DY_MO_FLG correctly. This is used only on if it is an +* unuspported data type exception. Set if dyadic. +* +chk_dy_mo: + move.w CMDREG1B(a6),d0 + btst.l #5,d0 ;testing extension command word + beq.b set_mon ;if bit 5 = 0 then monadic + btst.l #4,d0 ;know that bit 5 = 1 + beq.b set_dya ;if bit 4 = 0 then dyadic + andi.w #$007f,d0 ;get rid of all but extension bits {6:0} + cmpi.w #$0038,d0 ;if extension = $38 then fcmp (dyadic) + bne.b set_mon +set_dya: + st.b DY_MO_FLG(a6) ;set the inst flag type to dyadic + rts +set_mon: + clr.b DY_MO_FLG(a6) ;set the inst flag type to monadic + rts +* +* MK_NORM +* +* Normalizes unnormalized numbers, sets tag to norm or denorm, sets unfl +* exception if denorm. +* +* CASE opclass 0x0 unsupp +* mk_norm till msb set +* set tag = norm +* +* CASE opclass 0x0 unimp +* mk_norm till msb set or exp = 0 +* if integer bit = 0 +* tag = denorm +* else +* tag = norm +* +* CASE opclass 011 unsupp +* mk_norm till msb set or exp = 0 +* if integer bit = 0 +* tag = denorm +* set unfl_nmcexe = 1 +* else +* tag = norm +* +* if exp <= $3fff +* set ete15 or fpte15 = 1 +* else set ete15 or fpte15 = 0 + +* input: +* a0 = points to operand to be normalized +* output: +* L_SCR1{7:5} = operand tag (000 = norm, 100 = denorm) +* L_SCR1{4} = fpte15 or ete15 (0 = exp > $3fff, 1 = exp <=$3fff) +* the normalized operand is placed back on the fsave stack +mk_norm: + clr.l L_SCR1(a6) + bclr.b #sign_bit,LOCAL_EX(a0) + sne LOCAL_SGN(a0) ;transform into internal extended format + + cmpi.b #$2c,1+EXC_VEC(a6) ;check if unimp + bne.b uns_data ;branch if unsupp + bsr uni_inst ;call if unimp (opclass 0x0) + bra.b reload +uns_data: + btst.b #direction_bit,CMDREG1B(a6) ;check transfer direction + bne.b bit_set ;branch if set (opclass 011) + bsr uns_opx ;call if opclass 0x0 + bra.b reload +bit_set: + bsr uns_op3 ;opclass 011 +reload: + cmp.w #$3fff,LOCAL_EX(a0) ;if exp > $3fff + bgt.b end_mk ; fpte15/ete15 already set to 0 + bset.b #4,L_SCR1(a6) ;else set fpte15/ete15 to 1 +* ;calling routine actually sets the +* ;value on the stack (along with the +* ;tag), since this routine doesn't +* ;know if it should set ete15 or fpte15 +* ;ie, it doesn't know if this is the +* ;src op or dest op. +end_mk: + bfclr LOCAL_SGN(a0){0:8} + beq.b end_mk_pos + bset.b #sign_bit,LOCAL_EX(a0) ;convert back to IEEE format +end_mk_pos: + rts +* +* CASE opclass 011 unsupp +* +uns_op3: + bsr nrm_zero ;normalize till msb = 1 or exp = zero + btst.b #7,LOCAL_HI(a0) ;if msb = 1 + bne.b no_unfl ;then branch +set_unfl: + or.b #dnrm_tag,L_SCR1(a6) ;set denorm tag + bset.b #unfl_bit,FPSR_EXCEPT(a6) ;set unfl exception bit +no_unfl: + rts +* +* CASE opclass 0x0 unsupp +* +uns_opx: + bsr nrm_zero ;normalize the number + btst.b #7,LOCAL_HI(a0) ;check if integer bit (j-bit) is set + beq.b uns_den ;if clear then now have a denorm +uns_nrm: + or.b #norm_tag,L_SCR1(a6) ;set tag to norm + rts +uns_den: + or.b #dnrm_tag,L_SCR1(a6) ;set tag to denorm + rts +* +* CASE opclass 0x0 unimp +* +uni_inst: + bsr nrm_zero + btst.b #7,LOCAL_HI(a0) ;check if integer bit (j-bit) is set + beq.b uni_den ;if clear then now have a denorm +uni_nrm: + or.b #norm_tag,L_SCR1(a6) ;set tag to norm + rts +uni_den: + or.b #dnrm_tag,L_SCR1(a6) ;set tag to denorm + rts + +* +* Decimal to binary conversion +* +* Special cases of inf and NaNs are completed outside of decbin. +* If the input is an snan, the snan bit is not set. +* +* input: +* ETEMP(a6) - points to packed decimal string in memory +* output: +* fp0 - contains packed string converted to extended precision +* ETEMP - same as fp0 +unpack: + move.w CMDREG1B(a6),d0 ;examine command word, looking for fmove's + and.w #$3b,d0 + beq move_unpack ;special handling for fmove: must set FPSR_CC + + move.w ETEMP(a6),d0 ;get word with inf information + bfextu d0{20:12},d1 ;get exponent into d1 + cmpi.w #$0fff,d1 ;test for inf or NaN + bne.b try_zero ;if not equal, it is not special + bfextu d0{17:3},d1 ;get SE and y bits into d1 + cmpi.w #7,d1 ;SE and y bits must be on for special + bne.b try_zero ;if not on, it is not special +*input is of the special cases of inf and NaN + tst.l ETEMP_HI(a6) ;check ms mantissa + bne.b fix_nan ;if non-zero, it is a NaN + tst.l ETEMP_LO(a6) ;check ls mantissa + bne.b fix_nan ;if non-zero, it is a NaN + bra.w finish ;special already on stack +fix_nan: + btst.b #signan_bit,ETEMP_HI(a6) ;test for snan + bne.w finish + or.l #snaniop_mask,USER_FPSR(a6) ;always set snan if it is so + bra.w finish +try_zero: + move.w ETEMP_EX+2(a6),d0 ;get word 4 + andi.w #$000f,d0 ;clear all but last ni(y)bble + tst.w d0 ;check for zero. + bne.w not_spec + tst.l ETEMP_HI(a6) ;check words 3 and 2 + bne.w not_spec + tst.l ETEMP_LO(a6) ;check words 1 and 0 + bne.w not_spec + tst.l ETEMP(a6) ;test sign of the zero + bge.b pos_zero + move.l #$80000000,ETEMP(a6) ;write neg zero to etemp + clr.l ETEMP_HI(a6) + clr.l ETEMP_LO(a6) + bra.w finish +pos_zero: + clr.l ETEMP(a6) + clr.l ETEMP_HI(a6) + clr.l ETEMP_LO(a6) + bra.w finish + +not_spec: + fmovem.x fp0-fp1,-(a7) ;save fp0 - decbin returns in it + bsr decbin + fmove.x fp0,ETEMP(a6) ;put the unpacked sop in the fsave stack + fmovem.x (a7)+,fp0-fp1 + fmove.l #0,FPSR ;clr fpsr from decbin + bra finish + +* +* Special handling for packed move in: Same results as all other +* packed cases, but we must set the FPSR condition codes properly. +* +move_unpack: + move.w ETEMP(a6),d0 ;get word with inf information + bfextu d0{20:12},d1 ;get exponent into d1 + cmpi.w #$0fff,d1 ;test for inf or NaN + bne.b mtry_zero ;if not equal, it is not special + bfextu d0{17:3},d1 ;get SE and y bits into d1 + cmpi.w #7,d1 ;SE and y bits must be on for special + bne.b mtry_zero ;if not on, it is not special +*input is of the special cases of inf and NaN + tst.l ETEMP_HI(a6) ;check ms mantissa + bne.b mfix_nan ;if non-zero, it is a NaN + tst.l ETEMP_LO(a6) ;check ls mantissa + bne.b mfix_nan ;if non-zero, it is a NaN +*input is inf + or.l #inf_mask,USER_FPSR(a6) ;set I bit + tst.l ETEMP(a6) ;check sign + bge.w finish + or.l #neg_mask,USER_FPSR(a6) ;set N bit + bra.w finish ;special already on stack +mfix_nan: + or.l #nan_mask,USER_FPSR(a6) ;set NaN bit + move.b #nan_tag,STAG(a6) ;set stag to NaN + btst.b #signan_bit,ETEMP_HI(a6) ;test for snan + bne.b mn_snan + or.l #snaniop_mask,USER_FPSR(a6) ;set snan bit + btst.b #snan_bit,FPCR_ENABLE(a6) ;test for snan enabled + bne.b mn_snan + bset.b #signan_bit,ETEMP_HI(a6) ;force snans to qnans +mn_snan: + tst.l ETEMP(a6) ;check for sign + bge.w finish ;if clr, go on + or.l #neg_mask,USER_FPSR(a6) ;set N bit + bra.w finish + +mtry_zero: + move.w ETEMP_EX+2(a6),d0 ;get word 4 + andi.w #$000f,d0 ;clear all but last ni(y)bble + tst.w d0 ;check for zero. + bne.b mnot_spec + tst.l ETEMP_HI(a6) ;check words 3 and 2 + bne.b mnot_spec + tst.l ETEMP_LO(a6) ;check words 1 and 0 + bne.b mnot_spec + tst.l ETEMP(a6) ;test sign of the zero + bge.b mpos_zero + or.l #neg_mask+z_mask,USER_FPSR(a6) ;set N and Z + move.l #$80000000,ETEMP(a6) ;write neg zero to etemp + clr.l ETEMP_HI(a6) + clr.l ETEMP_LO(a6) + bra.b finish +mpos_zero: + or.l #z_mask,USER_FPSR(a6) ;set Z + clr.l ETEMP(a6) + clr.l ETEMP_HI(a6) + clr.l ETEMP_LO(a6) + bra.b finish + +mnot_spec: + fmovem.x fp0-fp1,-(a7) ;save fp0 ,fp1 - decbin returns in fp0 + bsr decbin + fmove.x fp0,ETEMP(a6) +* ;put the unpacked sop in the fsave stack + fmovem.x (a7)+,fp0-fp1 + +finish: + move.w CMDREG1B(a6),d0 ;get the command word + and.w #$fbff,d0 ;change the source specifier field to +* ;extended (was packed). + move.w d0,CMDREG1B(a6) ;write command word back to fsave stack +* ;we need to do this so the 040 will +* ;re-execute the inst. without taking +* ;another packed trap. + +fix_stag: +*Converted result is now in etemp on fsave stack, now set the source +*tag (stag) +* if (ete =$7fff) then INF or NAN +* if (etemp = $x.0----0) then +* stag = INF +* else +* stag = NAN +* else +* if (ete = $0000) then +* stag = ZERO +* else +* stag = NORM +* +* Note also that the etemp_15 bit (just right of the stag) must +* be set accordingly. +* + move.w ETEMP_EX(a6),d1 + andi.w #$7fff,d1 ;strip sign + cmp.w #$7fff,d1 + bne.b z_or_nrm + move.l ETEMP_HI(a6),d1 + bne.b is_nan + move.l ETEMP_LO(a6),d1 + bne.b is_nan +is_inf: + move.b #$40,STAG(a6) + move.l #$40,d0 + rts +is_nan: + move.b #$60,STAG(a6) + move.l #$60,d0 + rts +z_or_nrm: + tst.w d1 + bne.b is_nrm +is_zro: +* For a zero, set etemp_15 + move.b #$30,STAG(a6) + move.l #$20,d0 + rts +is_nrm: +* For a norm, check if the exp <= $3fff; if so, set etemp_15 + cmpi.w #$3fff,d1 + ble.b set_bit15 + clr.b STAG(a6) + bra.b end_is_nrm +set_bit15: + move.b #$10,STAG(a6) +end_is_nrm: + clr.l d0 +end_fix: + rts + +end_get: + rts + end diff --git a/sys/arch/m68k/fpsp/kernel_ex.sa b/sys/arch/m68k/fpsp/kernel_ex.sa new file mode 100644 index 00000000000..98807a91adb --- /dev/null +++ b/sys/arch/m68k/fpsp/kernel_ex.sa @@ -0,0 +1,519 @@ +* $NetBSD: kernel_ex.sa,v 1.2 1994/10/26 07:49:12 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* kernel_ex.sa 3.3 12/19/90 +* +* This file contains routines to force exception status in the +* fpu for exceptional cases detected or reported within the +* transcendental functions. Typically, the t_xx routine will +* set the appropriate bits in the USER_FPSR word on the stack. +* The bits are tested in gen_except.sa to determine if an exceptional +* situation needs to be created on return from the FPSP. +* + +KERNEL_EX IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + +mns_inf dc.l $ffff0000,$00000000,$00000000 +pls_inf dc.l $7fff0000,$00000000,$00000000 +nan dc.l $7fff0000,$ffffffff,$ffffffff +huge dc.l $7ffe0000,$ffffffff,$ffffffff + + xref ovf_r_k + xref unf_sub + xref nrm_set + + xdef t_dz + xdef t_dz2 + xdef t_operr + xdef t_unfl + xdef t_ovfl + xdef t_ovfl2 + xdef t_inx2 + xdef t_frcinx + xdef t_extdnrm + xdef t_resdnrm + xdef dst_nan + xdef src_nan +* +* DZ exception +* +* +* if dz trap disabled +* store properly signed inf (use sign of etemp) into fp0 +* set FPSR exception status dz bit, condition code +* inf bit, and accrued dz bit +* return +* frestore the frame into the machine (done by unimp_hd) +* +* else dz trap enabled +* set exception status bit & accrued bits in FPSR +* set flag to disable sto_res from corrupting fp register +* return +* frestore the frame into the machine (done by unimp_hd) +* +* t_dz2 is used by monadic functions such as flogn (from do_func). +* t_dz is used by monadic functions such as satanh (from the +* transcendental function). +* +t_dz2: + bset.b #neg_bit,FPSR_CC(a6) ;set neg bit in FPSR + fmove.l #0,FPSR ;clr status bits (Z set) + btst.b #dz_bit,FPCR_ENABLE(a6) ;test FPCR for dz exc enabled + bne.b dz_ena_end + bra.b m_inf ;flogx always returns -inf +t_dz: + fmove.l #0,FPSR ;clr status bits (Z set) + btst.b #dz_bit,FPCR_ENABLE(a6) ;test FPCR for dz exc enabled + bne.b dz_ena +* +* dz disabled +* + btst.b #sign_bit,ETEMP_EX(a6) ;check sign for neg or pos + beq.b p_inf ;branch if pos sign + +m_inf: + fmovem.x mns_inf,fp0 ;load -inf + bset.b #neg_bit,FPSR_CC(a6) ;set neg bit in FPSR + bra.b set_fpsr +p_inf: + fmovem.x pls_inf,fp0 ;load +inf +set_fpsr: + or.l #dzinf_mask,USER_FPSR(a6) ;set I,DZ,ADZ + rts +* +* dz enabled +* +dz_ena: + btst.b #sign_bit,ETEMP_EX(a6) ;check sign for neg or pos + beq.b dz_ena_end + bset.b #neg_bit,FPSR_CC(a6) ;set neg bit in FPSR +dz_ena_end: + or.l #dzinf_mask,USER_FPSR(a6) ;set I,DZ,ADZ + st.b STORE_FLG(a6) + rts +* +* OPERR exception +* +* if (operr trap disabled) +* set FPSR exception status operr bit, condition code +* nan bit; Store default NAN into fp0 +* frestore the frame into the machine (done by unimp_hd) +* +* else (operr trap enabled) +* set FPSR exception status operr bit, accrued operr bit +* set flag to disable sto_res from corrupting fp register +* frestore the frame into the machine (done by unimp_hd) +* +t_operr: + or.l #opnan_mask,USER_FPSR(a6) ;set NaN, OPERR, AIOP + + btst.b #operr_bit,FPCR_ENABLE(a6) ;test FPCR for operr enabled + bne.b op_ena + + fmovem.x nan,fp0 ;load default nan + rts +op_ena: + st.b STORE_FLG(a6) ;do not corrupt destination + rts + +* +* t_unfl --- UNFL exception +* +* This entry point is used by all routines requiring unfl, inex2, +* aunfl, and ainex to be set on exit. +* +* On entry, a0 points to the exceptional operand. The final exceptional +* operand is built in FP_SCR1 and only the sign from the original operand +* is used. +* +t_unfl: + clr.l FP_SCR1(a6) ;set exceptional operand to zero + clr.l FP_SCR1+4(a6) + clr.l FP_SCR1+8(a6) + tst.b (a0) ;extract sign from caller's exop + bpl.b unfl_signok + bset #sign_bit,FP_SCR1(a6) +unfl_signok: + lea.l FP_SCR1(a6),a0 + or.l #unfinx_mask,USER_FPSR(a6) +* ;set UNFL, INEX2, AUNFL, AINEX +unfl_con: + btst.b #unfl_bit,FPCR_ENABLE(a6) + beq.b unfl_dis + +unfl_ena: + bfclr STAG(a6){5:3} ;clear wbtm66,wbtm1,wbtm0 + bset.b #wbtemp15_bit,WB_BYTE(a6) ;set wbtemp15 + bset.b #sticky_bit,STICKY(a6) ;set sticky bit + + bclr.b #E1,E_BYTE(a6) + +unfl_dis: + bfextu FPCR_MODE(a6){0:2},d0 ;get round precision + + bclr.b #sign_bit,LOCAL_EX(a0) + sne LOCAL_SGN(a0) ;convert to internal ext format + + bsr unf_sub ;returns IEEE result at a0 +* ;and sets FPSR_CC accordingly + + bfclr LOCAL_SGN(a0){0:8} ;convert back to IEEE ext format + beq.b unfl_fin + + bset.b #sign_bit,LOCAL_EX(a0) + bset.b #sign_bit,FP_SCR1(a6) ;set sign bit of exc operand + +unfl_fin: + fmovem.x (a0),fp0 ;store result in fp0 + rts + + +* +* t_ovfl2 --- OVFL exception (without inex2 returned) +* +* This entry is used by scale to force catastrophic overflow. The +* ovfl, aovfl, and ainex bits are set, but not the inex2 bit. +* +t_ovfl2: + or.l #ovfl_inx_mask,USER_FPSR(a6) + move.l ETEMP(a6),FP_SCR1(a6) + move.l ETEMP_HI(a6),FP_SCR1+4(a6) + move.l ETEMP_LO(a6),FP_SCR1+8(a6) +* +* Check for single or double round precision. If single, check if +* the lower 40 bits of ETEMP are zero; if not, set inex2. If double, +* check if the lower 21 bits are zero; if not, set inex2. +* + move.b FPCR_MODE(a6),d0 + andi.b #$c0,d0 + beq.w t_work ;if extended, finish ovfl processing + cmpi.b #$40,d0 ;test for single + bne.b t_dbl +t_sgl: + tst.b ETEMP_LO(a6) + bne.b t_setinx2 + move.l ETEMP_HI(a6),d0 + andi.l #$ff,d0 ;look at only lower 8 bits + bne.b t_setinx2 + bra.w t_work +t_dbl: + move.l ETEMP_LO(a6),d0 + andi.l #$7ff,d0 ;look at only lower 11 bits + beq.w t_work +t_setinx2: + or.l #inex2_mask,USER_FPSR(a6) + bra.b t_work +* +* t_ovfl --- OVFL exception +* +*** Note: the exc operand is returned in ETEMP. +* +t_ovfl: + or.l #ovfinx_mask,USER_FPSR(a6) +t_work: + btst.b #ovfl_bit,FPCR_ENABLE(a6) ;test FPCR for ovfl enabled + beq.b ovf_dis + +ovf_ena: + clr.l FP_SCR1(a6) ;set exceptional operand + clr.l FP_SCR1+4(a6) + clr.l FP_SCR1+8(a6) + + bfclr STAG(a6){5:3} ;clear wbtm66,wbtm1,wbtm0 + bclr.b #wbtemp15_bit,WB_BYTE(a6) ;clear wbtemp15 + bset.b #sticky_bit,STICKY(a6) ;set sticky bit + + bclr.b #E1,E_BYTE(a6) +* ;fall through to disabled case + +* For disabled overflow call 'ovf_r_k'. This routine loads the +* correct result based on the rounding precision, destination +* format, rounding mode and sign. +* +ovf_dis: + bsr ovf_r_k ;returns unsigned ETEMP_EX +* ;and sets FPSR_CC accordingly. + bfclr ETEMP_SGN(a6){0:8} ;fix sign + beq.b ovf_pos + bset.b #sign_bit,ETEMP_EX(a6) + bset.b #sign_bit,FP_SCR1(a6) ;set exceptional operand sign +ovf_pos: + fmovem.x ETEMP(a6),fp0 ;move the result to fp0 + rts + + +* +* INEX2 exception +* +* The inex2 and ainex bits are set. +* +t_inx2: + or.l #inx2a_mask,USER_FPSR(a6) ;set INEX2, AINEX + rts + +* +* Force Inex2 +* +* This routine is called by the transcendental routines to force +* the inex2 exception bits set in the FPSR. If the underflow bit +* is set, but the underflow trap was not taken, the aunfl bit in +* the FPSR must be set. +* +t_frcinx: + or.l #inx2a_mask,USER_FPSR(a6) ;set INEX2, AINEX + btst.b #unfl_bit,FPSR_EXCEPT(a6) ;test for unfl bit set + beq.b no_uacc1 ;if clear, do not set aunfl + bset.b #aunfl_bit,FPSR_AEXCEPT(a6) +no_uacc1: + rts + +* +* DST_NAN +* +* Determine if the destination nan is signalling or non-signalling, +* and set the FPSR bits accordingly. See the MC68040 User's Manual +* section 3.2.2.5 NOT-A-NUMBERS. +* +dst_nan: + btst.b #sign_bit,FPTEMP_EX(a6) ;test sign of nan + beq.b dst_pos ;if clr, it was positive + bset.b #neg_bit,FPSR_CC(a6) ;set N bit +dst_pos: + btst.b #signan_bit,FPTEMP_HI(a6) ;check if signalling + beq.b dst_snan ;branch if signalling + + fmove.l d1,fpcr ;restore user's rmode/prec + fmove.x FPTEMP(a6),fp0 ;return the non-signalling nan +* +* Check the source nan. If it is signalling, snan will be reported. +* + move.b STAG(a6),d0 + andi.b #$e0,d0 + cmpi.b #$60,d0 + bne.b no_snan + btst.b #signan_bit,ETEMP_HI(a6) ;check if signalling + bne.b no_snan + or.l #snaniop_mask,USER_FPSR(a6) ;set NAN, SNAN, AIOP +no_snan: + rts + +dst_snan: + btst.b #snan_bit,FPCR_ENABLE(a6) ;check if trap enabled + beq.b dst_dis ;branch if disabled + + or.b #nan_tag,DTAG(a6) ;set up dtag for nan + st.b STORE_FLG(a6) ;do not store a result + or.l #snaniop_mask,USER_FPSR(a6) ;set NAN, SNAN, AIOP + rts + +dst_dis: + bset.b #signan_bit,FPTEMP_HI(a6) ;set SNAN bit in sop + fmove.l d1,fpcr ;restore user's rmode/prec + fmove.x FPTEMP(a6),fp0 ;load non-sign. nan + or.l #snaniop_mask,USER_FPSR(a6) ;set NAN, SNAN, AIOP + rts + +* +* SRC_NAN +* +* Determine if the source nan is signalling or non-signalling, +* and set the FPSR bits accordingly. See the MC68040 User's Manual +* section 3.2.2.5 NOT-A-NUMBERS. +* +src_nan: + btst.b #sign_bit,ETEMP_EX(a6) ;test sign of nan + beq.b src_pos ;if clr, it was positive + bset.b #neg_bit,FPSR_CC(a6) ;set N bit +src_pos: + btst.b #signan_bit,ETEMP_HI(a6) ;check if signalling + beq.b src_snan ;branch if signalling + fmove.l d1,fpcr ;restore user's rmode/prec + fmove.x ETEMP(a6),fp0 ;return the non-signalling nan + rts + +src_snan: + btst.b #snan_bit,FPCR_ENABLE(a6) ;check if trap enabled + beq.b src_dis ;branch if disabled + bset.b #signan_bit,ETEMP_HI(a6) ;set SNAN bit in sop + or.b #norm_tag,DTAG(a6) ;set up dtag for norm + or.b #nan_tag,STAG(a6) ;set up stag for nan + st.b STORE_FLG(a6) ;do not store a result + or.l #snaniop_mask,USER_FPSR(a6) ;set NAN, SNAN, AIOP + rts + +src_dis: + bset.b #signan_bit,ETEMP_HI(a6) ;set SNAN bit in sop + fmove.l d1,fpcr ;restore user's rmode/prec + fmove.x ETEMP(a6),fp0 ;load non-sign. nan + or.l #snaniop_mask,USER_FPSR(a6) ;set NAN, SNAN, AIOP + rts + +* +* For all functions that have a denormalized input and that f(x)=x, +* this is the entry point +* +t_extdnrm: + or.l #unfinx_mask,USER_FPSR(a6) +* ;set UNFL, INEX2, AUNFL, AINEX + bra.b xdnrm_con +* +* Entry point for scale with extended denorm. The function does +* not set inex2, aunfl, or ainex. +* +t_resdnrm: + or.l #unfl_mask,USER_FPSR(a6) + +xdnrm_con: + btst.b #unfl_bit,FPCR_ENABLE(a6) + beq.b xdnrm_dis + +* +* If exceptions are enabled, the additional task of setting up WBTEMP +* is needed so that when the underflow exception handler is entered, +* the user perceives no difference between what the 040 provides vs. +* what the FPSP provides. +* +xdnrm_ena: + move.l a0,-(a7) + + move.l LOCAL_EX(a0),FP_SCR1(a6) + move.l LOCAL_HI(a0),FP_SCR1+4(a6) + move.l LOCAL_LO(a0),FP_SCR1+8(a6) + + lea FP_SCR1(a6),a0 + + bclr.b #sign_bit,LOCAL_EX(a0) + sne LOCAL_SGN(a0) ;convert to internal ext format + tst.w LOCAL_EX(a0) ;check if input is denorm + beq.b xdnrm_dn ;if so, skip nrm_set + bsr nrm_set ;normalize the result (exponent +* ;will be negative +xdnrm_dn: + bclr.b #sign_bit,LOCAL_EX(a0) ;take off false sign + bfclr LOCAL_SGN(a0){0:8} ;change back to IEEE ext format + beq.b xdep + bset.b #sign_bit,LOCAL_EX(a0) +xdep: + bfclr STAG(a6){5:3} ;clear wbtm66,wbtm1,wbtm0 + bset.b #wbtemp15_bit,WB_BYTE(a6) ;set wbtemp15 + bclr.b #sticky_bit,STICKY(a6) ;clear sticky bit + bclr.b #E1,E_BYTE(a6) + move.l (a7)+,a0 +xdnrm_dis: + bfextu FPCR_MODE(a6){0:2},d0 ;get round precision + bne.b not_ext ;if not round extended, store +* ;IEEE defaults +is_ext: + btst.b #sign_bit,LOCAL_EX(a0) + beq.b xdnrm_store + + bset.b #neg_bit,FPSR_CC(a6) ;set N bit in FPSR_CC + + bra.b xdnrm_store + +not_ext: + bclr.b #sign_bit,LOCAL_EX(a0) + sne LOCAL_SGN(a0) ;convert to internal ext format + bsr unf_sub ;returns IEEE result pointed by +* ;a0; sets FPSR_CC accordingly + bfclr LOCAL_SGN(a0){0:8} ;convert back to IEEE ext format + beq.b xdnrm_store + bset.b #sign_bit,LOCAL_EX(a0) +xdnrm_store: + fmovem.x (a0),fp0 ;store result in fp0 + rts + +* +* This subroutine is used for dyadic operations that use an extended +* denorm within the kernel. The approach used is to capture the frame, +* fix/restore. +* + xdef t_avoid_unsupp +t_avoid_unsupp: + link a2,#-LOCAL_SIZE ;so that a2 fpsp.h negative +* ;offsets may be used + fsave -(a7) + tst.b 1(a7) ;check if idle, exit if so + beq.w idle_end + btst.b #E1,E_BYTE(a2) ;check for an E1 exception if +* ;enabled, there is an unsupp + beq.w end_avun ;else, exit + btst.b #7,DTAG(a2) ;check for denorm destination + beq.b src_den ;else, must be a source denorm +* +* handle destination denorm +* + lea FPTEMP(a2),a0 + btst.b #sign_bit,LOCAL_EX(a0) + sne LOCAL_SGN(a0) ;convert to internal ext format + bclr.b #7,DTAG(a2) ;set DTAG to norm + bsr nrm_set ;normalize result, exponent +* ;will become negative + bclr.b #sign_bit,LOCAL_EX(a0) ;get rid of fake sign + bfclr LOCAL_SGN(a0){0:8} ;convert back to IEEE ext format + beq.b ck_src_den ;check if source is also denorm + bset.b #sign_bit,LOCAL_EX(a0) +ck_src_den: + btst.b #7,STAG(a2) + beq.b end_avun +src_den: + lea ETEMP(a2),a0 + btst.b #sign_bit,LOCAL_EX(a0) + sne LOCAL_SGN(a0) ;convert to internal ext format + bclr.b #7,STAG(a2) ;set STAG to norm + bsr nrm_set ;normalize result, exponent +* ;will become negative + bclr.b #sign_bit,LOCAL_EX(a0) ;get rid of fake sign + bfclr LOCAL_SGN(a0){0:8} ;convert back to IEEE ext format + beq.b den_com + bset.b #sign_bit,LOCAL_EX(a0) +den_com: + move.b #$fe,CU_SAVEPC(a2) ;set continue frame + clr.w NMNEXC(a2) ;clear NMNEXC + bclr.b #E1,E_BYTE(a2) +* fmove.l FPSR,FPSR_SHADOW(a2) +* bset.b #SFLAG,E_BYTE(a2) +* bset.b #XFLAG,T_BYTE(a2) +end_avun: + frestore (a7)+ + unlk a2 + rts +idle_end: + add.l #4,a7 + unlk a2 + rts + end diff --git a/sys/arch/m68k/fpsp/l_fpsp.h b/sys/arch/m68k/fpsp/l_fpsp.h new file mode 100644 index 00000000000..7737b1ce524 --- /dev/null +++ b/sys/arch/m68k/fpsp/l_fpsp.h @@ -0,0 +1,280 @@ +* $NetBSD: l_fpsp.h,v 1.2 1994/10/26 07:49:14 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* l_fpsp.h 1.2 5/1/91 +* + +* l_fpsp.h --- stack frame offsets for library version of FPSP +* +* This file is derived from fpsp.h. All equates that refer +* to the fsave frame and it's bits are removed with the +* exception of ETEMP, WBTEMP, DTAG and STAG which are simulated +* in the library version. Equates for the exception frame are +* also not needed. Some of the equates that are only used in +* the kernel version of the FPSP are left in to minimize the +* differences between this file and the original. +* +* The library routines use the same source files as the regular +* kernel mode code so they expect the same setup. That is, you +* must create enough space on the stack for all save areas and +* work variables that are needed, and save any registers that +* your compiler does not treat as scratch registers on return +* from function calls. +* +* The worst case setup is: +* +* link a6,#-LOCAL_SIZE +* movem.l d0-d1/a0-a1,USER_DA(a6) +* fmovem.x fp0-fp3,USER_FP0(a6) +* fmovem.l fpsr/fpcr,USER_FPSR(a6) +* +* After initialization, the stack looks like this: +* +* A7 ---> +-------------------------------+ +* | | +* | FPSP Local Variables | +* | including | +* | saved registers | +* | | +* +-------------------------------+ +* A6 ---> | Saved A6 | +* +-------------------------------+ +* | Return PC | +* +-------------------------------+ +* | Arguments to | +* | an FPSP library | +* | package | +* | | +* +* Positive offsets from A6 refer to the input arguments. Negative +* offsets refer to the Local Variable area. +* +* On exit, execute: +* +* movem.l USER_DA(a6),d0-d1/a0-a1 +* fmovem.x USER_FP0(a6),fp0-fp3 +* fmove.l USER_FPSR(a6),fpsr/fpcr +* unlk a6 +* rts +* +* Many 68K C compilers treat a0/a1/d0/d1/fp0/fp1 as scratch so +* a simplified setup/exit is possible: +* +* link a6,#-LOCAL_SIZE +* fmovem.x fp2-fp3,USER_FP2(a6) +* fmove.l fpsr/fpcr,USER_FPSR(a6) +* +* [call appropriate emulation routine] +* +* fmovem.x USER_FP2(a6),fp2-fp3 +* fmove.l USER_FPSR(a6),fpsr/fpcr +* unlk a6 +* rts +* +* Note that you must still save fp2/fp3 because the FPSP emulation +* routines expect fp0-fp3 as scratch registers. For all monadic +* entry points, the caller should save the fpcr in d1 and zero the +* real fpcr before calling the emulation routine. On return, the +* monadic emulation code will place the value supplied in d1 back +* into the fpcr and do a single floating point operation so that +* the final result will be correctly rounded and any specified +* exceptions will be generated. +* +*---------------------------------------------------------------------- +* +* Local Variables on the stack +* +LOCAL_SIZE equ 228 ;bytes needed for local variables +LV equ -LOCAL_SIZE ;convenient base value +* +USER_DA equ LV+0 ;save space for D0-D1,A0-A1 +USER_D0 equ LV+0 ;saved user D0 +USER_D1 equ LV+4 ;saved user D1 +USER_A0 equ LV+8 ;saved user A0 +USER_A1 equ LV+12 ;saved user A1 +USER_FP0 equ LV+16 ;saved user FP0 +USER_FP1 equ LV+28 ;saved user FP1 +USER_FP2 equ LV+40 ;saved user FP2 +USER_FP3 equ LV+52 ;saved user FP3 +USER_FPCR equ LV+64 ;saved user FPCR +FPCR_ENABLE equ USER_FPCR+2 ; FPCR exception enable +FPCR_MODE equ USER_FPCR+3 ; FPCR rounding mode control +USER_FPSR equ LV+68 ;saved user FPSR +FPSR_CC equ USER_FPSR+0 ; FPSR condition code +FPSR_QBYTE equ USER_FPSR+1 ; FPSR quotient +FPSR_EXCEPT equ USER_FPSR+2 ; FPSR exception +FPSR_AEXCEPT equ USER_FPSR+3 ; FPSR accrued exception +USER_FPIAR equ LV+72 ;saved user FPIAR +FP_SCR1 equ LV+76 ;room for a temporary float value +FP_SCR2 equ LV+92 ;room for a temporary float value +L_SCR1 equ LV+108 ;room for a temporary long value +L_SCR2 equ LV+112 ;room for a temporary long value +STORE_FLG equ LV+116 +BINDEC_FLG equ LV+117 ;used in bindec +DNRM_FLG equ LV+118 ;used in res_func +RES_FLG equ LV+119 ;used in res_func +DY_MO_FLG equ LV+120 ;dyadic/monadic flag +UFLG_TMP equ LV+121 ;temporary for uflag errata +CU_ONLY equ LV+122 ;cu-only flag +VER_TMP equ LV+123 ;temp holding for version number +L_SCR3 equ LV+124 ;room for a temporary long value +FP_SCR3 equ LV+128 ;room for a temporary float value +FP_SCR4 equ LV+144 ;room for a temporary float value +FP_SCR5 equ LV+160 ;room for a temporary float value +FP_SCR6 equ LV+176 +* +*-------------------------------------------------------------------------- +* +STAG equ LV+192 ;source tag (1 byte) +* +DTAG equ LV+193 ;dest tag (1 byte) +* +FPTEMP equ LV+196 ;fptemp (12 bytes) +FPTEMP_EX equ FPTEMP ;fptemp sign and exponent (2 bytes) +FPTEMP_HI equ FPTEMP+4 ;fptemp mantissa [63:32] (4 bytes) +FPTEMP_LO equ FPTEMP+8 ;fptemp mantissa [31:00] (4 bytes) +* +FPTEMP_SGN equ FPTEMP+2 ;used to store sign +* +ETEMP equ LV+208 ;etemp (12 bytes) +ETEMP_EX equ ETEMP ;etemp sign and exponent (2 bytes) +ETEMP_HI equ ETEMP+4 ;etemp mantissa [63:32] (4 bytes) +ETEMP_LO equ ETEMP+8 ;etemp mantissa [31:00] (4 bytes) +* +ETEMP_SGN equ ETEMP+2 ;used to store sign +* +*-------------------------------------------------------------------------- +* +* FPSR/FPCR bits +* +neg_bit equ 3 negative result +z_bit equ 2 zero result +inf_bit equ 1 infinity result +nan_bit equ 0 not-a-number result +* +q_sn_bit equ 7 sign bit of quotient byte +* +bsun_bit equ 7 branch on unordered +snan_bit equ 6 signalling nan +operr_bit equ 5 operand error +ovfl_bit equ 4 overflow +unfl_bit equ 3 underflow +dz_bit equ 2 divide by zero +inex2_bit equ 1 inexact result 2 +inex1_bit equ 0 inexact result 1 +* +aiop_bit equ 7 accrued illegal operation +aovfl_bit equ 6 accrued overflow +aunfl_bit equ 5 accrued underflow +adz_bit equ 4 accrued divide by zero +ainex_bit equ 3 accrued inexact +* +* FPSR individual bit masks +* +neg_mask equ $08000000 +z_mask equ $04000000 +inf_mask equ $02000000 +nan_mask equ $01000000 +* +bsun_mask equ $00008000 +snan_mask equ $00004000 +operr_mask equ $00002000 +ovfl_mask equ $00001000 +unfl_mask equ $00000800 +dz_mask equ $00000400 +inex2_mask equ $00000200 +inex1_mask equ $00000100 +* +aiop_mask equ $00000080 accrued illegal operation +aovfl_mask equ $00000040 accrued overflow +aunfl_mask equ $00000020 accrued underflow +adz_mask equ $00000010 accrued divide by zero +ainex_mask equ $00000008 accrued inexact +* +* FPSR combinations used in the FPSP +* +dzinf_mask equ inf_mask+dz_mask+adz_mask +opnan_mask equ nan_mask+operr_mask+aiop_mask +nzi_mask equ $01ffffff clears N, Z, and I +unfinx_mask equ unfl_mask+inex2_mask+aunfl_mask+ainex_mask +unf2inx_mask equ unfl_mask+inex2_mask+ainex_mask +ovfinx_mask equ ovfl_mask+inex2_mask+aovfl_mask+ainex_mask +inx1a_mask equ inex1_mask+ainex_mask +inx2a_mask equ inex2_mask+ainex_mask +snaniop_mask equ nan_mask+snan_mask+aiop_mask +naniop_mask equ nan_mask+aiop_mask +neginf_mask equ neg_mask+inf_mask +infaiop_mask equ inf_mask+aiop_mask +negz_mask equ neg_mask+z_mask +opaop_mask equ operr_mask+aiop_mask +unfl_inx_mask equ unfl_mask+aunfl_mask+ainex_mask +ovfl_inx_mask equ ovfl_mask+aovfl_mask+ainex_mask +* +*-------------------------------------------------------------------------- +* +* FPCR rounding modes +* +x_mode equ $00 round to extended +s_mode equ $40 round to single +d_mode equ $80 round to double +* +rn_mode equ $00 round nearest +rz_mode equ $10 round to zero +rm_mode equ $20 round to minus infinity +rp_mode equ $30 round to plus infinity +* +*-------------------------------------------------------------------------- +* +* Miscellaneous equates +* +signan_bit equ 6 signalling nan bit in mantissa +sign_bit equ 7 +* +rnd_stky_bit equ 29 round/sticky bit of mantissa +* this can only be used if in a data register +LOCAL_EX equ 0 +LOCAL_SGN equ 2 +LOCAL_HI equ 4 +LOCAL_LO equ 8 +LOCAL_GRS equ 12 valid ONLY for FP_SCR1, FP_SCR2 +* +* +norm_tag equ $00 tag bits in {7:5} position +zero_tag equ $20 +inf_tag equ $40 +nan_tag equ $60 +dnrm_tag equ $80 +* +dbl_thresh equ $3C01 +sgl_thresh equ $3F81 +* diff --git a/sys/arch/m68k/fpsp/l_support.sa b/sys/arch/m68k/fpsp/l_support.sa new file mode 100644 index 00000000000..e704484b5a5 --- /dev/null +++ b/sys/arch/m68k/fpsp/l_support.sa @@ -0,0 +1,388 @@ +* $NetBSD: l_support.sa,v 1.3 1994/10/26 07:49:16 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* l_support.sa 1.2 5/1/91 +* + +L_SUPPORT IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + +mns_one dc.l $bfff0000,$80000000,$00000000 +pls_one dc.l $3fff0000,$80000000,$00000000 +pls_inf dc.l $7fff0000,$00000000,$00000000 +pls_huge dc.l $7ffe0000,$ffffffff,$ffffffff +mns_huge dc.l $fffe0000,$ffffffff,$ffffffff +pls_tiny dc.l $00000000,$80000000,$00000000 +mns_tiny dc.l $80000000,$80000000,$00000000 +small dc.l $20000000,$80000000,$00000000 +pls_zero dc.l $00000000,$00000000,$00000000 + + include l_fpsp.h + +* +* tag --- determine the type of an extended precision operand +* +* The tag values returned match the way the 68040 would have +* tagged them. +* +* Input: a0 points to operand +* +* Output d0.b = $00 norm +* $20 zero +* $40 inf +* $60 nan +* $80 denorm +* All other registers are unchanged +* + xdef tag +tag: + move.w LOCAL_EX(a0),d0 + andi.w #$7fff,d0 + beq.b chk_zro + cmpi.w #$7fff,d0 + beq.b chk_inf +tag_nrm: + clr.b d0 + rts +tag_nan: + move.b #$60,d0 + rts +tag_dnrm: + move.b #$80,d0 + rts +chk_zro: + btst.b #7,LOCAL_HI(a0) # check if J-bit is set + bne.b tag_nrm + tst.l LOCAL_HI(a0) + bne.b tag_dnrm + tst.l LOCAL_LO(a0) + bne.b tag_dnrm +tag_zero: + move.b #$20,d0 + rts +chk_inf: + tst.l LOCAL_HI(a0) + bne.b tag_nan + tst.l LOCAL_LO(a0) + bne.b tag_nan +tag_inf: + move.b #$40,d0 + rts + +* +* t_dz, t_dz2 --- divide by zero exception +* +* t_dz2 is used by monadic functions such as flogn (from do_func). +* t_dz is used by monadic functions such as satanh (from the +* transcendental function). +* + xdef t_dz2 +t_dz2: + fmovem.x mns_one,fp0 + fmove.l d1,fpcr + fdiv.x pls_zero,fp0 + rts + + xdef t_dz +t_dz: + btst.b #sign_bit,ETEMP_EX(a6) ;check sign for neg or pos + beq.b p_inf ;branch if pos sign +m_inf: + fmovem.x mns_one,fp0 + fmove.l d1,fpcr + fdiv.x pls_zero,fp0 + rts +p_inf: + fmovem.x pls_one,fp0 + fmove.l d1,fpcr + fdiv.x pls_zero,fp0 + rts +* +* t_operr --- Operand Error exception +* + xdef t_operr +t_operr: + fmovem.x pls_inf,fp0 + fmove.l d1,fpcr + fmul.x pls_zero,fp0 + rts + +* +* t_unfl --- UNFL exception +* + xdef t_unfl +t_unfl: + btst.b #sign_bit,ETEMP(a6) + beq.b unf_pos +unf_neg: + fmovem.x mns_tiny,fp0 + fmove.l d1,fpcr + fmul.x pls_tiny,fp0 + rts + +unf_pos: + fmovem.x pls_tiny,fp0 + fmove.l d1,fpcr + fmul.x fp0,fp0 + rts +* +* t_ovfl --- OVFL exception +* +* t_ovfl is called as an exit for monadic functions. t_ovfl2 +* is for dyadic exits. +* + xdef t_ovfl +t_ovfl: + xdef t_ovfl2 + move.l d1,USER_FPCR(a6) user's control register + move.l #ovfinx_mask,d0 + bra.b t_work +t_ovfl2: + move.l #ovfl_inx_mask,d0 +t_work: + btst.b #sign_bit,ETEMP(a6) + beq.b ovf_pos +ovf_neg: + fmovem.x mns_huge,fp0 + fmove.l USER_FPCR(a6),fpcr + fmul.x pls_huge,fp0 + fmove.l fpsr,d1 + or.l d1,d0 + fmove.l d0,fpsr + rts +ovf_pos: + fmovem.x pls_huge,fp0 + fmove.l USER_FPCR(a6),fpcr + fmul.x pls_huge,fp0 + fmove.l fpsr,d1 + or.l d1,d0 + fmove.l d0,fpsr + rts +* +* t_inx2 --- INEX2 exception (correct fpcr is in USER_FPCR(a6)) +* + xdef t_inx2 +t_inx2: + fmove.l fpsr,USER_FPSR(a6) capture incoming fpsr + fmove.l USER_FPCR(a6),fpcr +* +* create an inex2 exception by adding two numbers with very different exponents +* do the add in fp1 so as to not disturb the result sitting in fp0 +* + fmove.x pls_one,fp1 + fadd.x small,fp1 +* + or.l #inx2a_mask,USER_FPSR(a6) ;set INEX2, AINEX + fmove.l USER_FPSR(a6),fpsr + rts +* +* t_frcinx --- Force Inex2 (for monadic functions) +* + xdef t_frcinx +t_frcinx: + fmove.l fpsr,USER_FPSR(a6) capture incoming fpsr + fmove.l d1,fpcr +* +* create an inex2 exception by adding two numbers with very different exponents +* do the add in fp1 so as to not disturb the result sitting in fp0 +* + fmove.x pls_one,fp1 + fadd.x small,fp1 +* + or.l #inx2a_mask,USER_FPSR(a6) ;set INEX2, AINEX + btst.b #unfl_bit,FPSR_EXCEPT(a6) ;test for unfl bit set + beq.b no_uacc1 ;if clear, do not set aunfl + bset.b #aunfl_bit,FPSR_AEXCEPT(a6) +no_uacc1: + fmove.l USER_FPSR(a6),fpsr + rts +* +* dst_nan --- force result when destination is a NaN +* + xdef dst_nan +dst_nan: + fmove.l USER_FPCR(a6),fpcr + fmove.x FPTEMP(a6),fp0 + rts + +* +* src_nan --- force result when source is a NaN +* + xdef src_nan +src_nan: + fmove.l USER_FPCR(a6),fpcr + fmove.x ETEMP(a6),fp0 + rts +* +* mon_nan --- force result when source is a NaN (monadic version) +* +* This is the same as src_nan except that the user's fpcr comes +* in via d1, not USER_FPCR(a6). +* + xdef mon_nan +mon_nan: + fmove.l d1,fpcr + fmove.x ETEMP(a6),fp0 + rts +* +* t_extdnrm, t_resdnrm --- generate results for denorm inputs +* +* For all functions that have a denormalized input and that f(x)=x, +* this is the entry point. +* + xdef t_extdnrm +t_extdnrm: + fmove.l d1,fpcr + fmove.x LOCAL_EX(a0),fp0 + fmove.l fpsr,d0 + or.l #unfinx_mask,d0 + fmove.l d0,fpsr + rts + + xdef t_resdnrm +t_resdnrm: + fmove.l USER_FPCR(a6),fpcr + fmove.x LOCAL_EX(a0),fp0 + fmove.l fpsr,d0 + or.l #unfl_mask,d0 + fmove.l d0,fpsr + rts +* +* +* + xdef t_avoid_unsupp +t_avoid_unsupp: + fmove.x fp0,fp0 + rts + + xdef sto_cos +sto_cos: + fmovem.x LOCAL_EX(a0),fp1 + rts +* +* Native instruction support +* +* Some systems may need entry points even for 68040 native +* instructions. These routines are provided for +* convenience. +* + xdef sadd +sadd: + fmovem.x FPTEMP(a6),fp0 + fmove.l USER_FPCR(a6),fpcr + fadd.x ETEMP(a6),fp0 + rts + + xdef ssub +ssub: + fmovem.x FPTEMP(a6),fp0 + fmove.l USER_FPCR(a6),fpcr + fsub.x ETEMP(a6),fp0 + rts + + xdef smul +smul: + fmovem.x FPTEMP(a6),fp0 + fmove.l USER_FPCR(a6),fpcr + fmul.x ETEMP(a6),fp0 + rts + + xdef sdiv +sdiv: + fmovem.x FPTEMP(a6),fp0 + fmove.l USER_FPCR(a6),fpcr + fdiv.x ETEMP(a6),fp0 + rts + + xdef sabs +sabs: + fmovem.x ETEMP(a6),fp0 + fmove.l d1,fpcr + fabs.x fp0 + rts + + xdef sneg +sneg: + fmovem.x ETEMP(a6),fp0 + fmove.l d1,fpcr + fneg.x fp0 + rts + + xdef ssqrt +ssqrt: + fmovem.x ETEMP(a6),fp0 + fmove.l d1,fpcr + fsqrt.x fp0 + rts + +* +* l_sint,l_sintrz,l_sintd --- special wrapper for fint and fintrz +* +* On entry, move the user's FPCR to USER_FPCR. +* +* On return from, we need to pickup the INEX2/AINEX bits +* that are in USER_FPSR. +* + xref sint + xref sintrz + xref sintd + + xdef l_sint +l_sint: + move.l d1,USER_FPCR(a6) + jsr sint + fmove.l fpsr,d0 + or.l USER_FPSR(a6),d0 + fmove.l d0,fpsr + rts + + xdef l_sintrz +l_sintrz: + move.l d1,USER_FPCR(a6) + jsr sintrz + fmove.l fpsr,d0 + or.l USER_FPSR(a6),d0 + fmove.l d0,fpsr + rts + + xdef l_sintd +l_sintd: + move.l d1,USER_FPCR(a6) + jsr sintd + fmove.l fpsr,d0 + or.l USER_FPSR(a6),d0 + fmove.l d0,fpsr + rts + + end diff --git a/sys/arch/m68k/fpsp/netbsd.sa b/sys/arch/m68k/fpsp/netbsd.sa new file mode 100644 index 00000000000..5dad0ef7779 --- /dev/null +++ b/sys/arch/m68k/fpsp/netbsd.sa @@ -0,0 +1,442 @@ +* $NetBSD: netbsd.sa,v 1.2 1994/10/26 07:49:19 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* skeleton.sa 3.2 4/26/91 +* +* This file contains code that is system dependent and will +* need to be modified to install the FPSP. +* +* Each entry point for exception 'xxxx' begins with a 'jmp fpsp_xxxx'. +* Put any target system specific handling that must be done immediately +* before the jump instruction. If there no handling necessary, then +* the 'fpsp_xxxx' handler entry point should be placed in the exception +* table so that the 'jmp' can be eliminated. If the FPSP determines that the +* exception is one that must be reported then there will be a +* return from the package by a 'jmp real_xxxx'. At that point +* the machine state will be identical to the state before +* the FPSP was entered. In particular, whatever condition +* that caused the exception will still be pending when the FPSP +* package returns. Thus, there will be system specific code +* to handle the exception. +* +* If the exception was completely handled by the package, then +* the return will be via a 'jmp fpsp_done'. Unless there is +* OS specific work to be done (such as handling a context switch or +* interrupt) the user program can be resumed via 'rte'. +* +* In the following skeleton code, some typical 'real_xxxx' handling +* code is shown. This code may need to be moved to an appropriate +* place in the target system, or rewritten. +* + +SKELETON IDNT 2,1 Motorola 040 Floating Point Software Package + + section 15 +* +* The following counters are used for standalone testing +* + + section 8 + + include fpsp.h + + xref b1238_fix + xref _mmutype + +* +* Divide by Zero exception +* +* All dz exceptions are 'real', hence no fpsp_dz entry point. +* + xdef dz + xdef real_dz +dz: + cmp.l #-2,_mmutype + bne.l _fpfault +real_dz: + link a6,#-LOCAL_SIZE + fsave -(sp) + bclr.b #E1,E_BYTE(a6) + frestore (sp)+ + unlk a6 + jmp _fpfault + +* +* Inexact exception +* +* All inexact exceptions are real, but the 'real' handler +* will probably want to clear the pending exception. +* The provided code will clear the E3 exception (if pending), +* otherwise clear the E1 exception. The frestore is not really +* necessary for E1 exceptions. +* +* Code following the 'inex' label is to handle bug #1232. In this +* bug, if an E1 snan, ovfl, or unfl occured, and the process was +* swapped out before taking the exception, the exception taken on +* return was inex, rather than the correct exception. The snan, ovfl, +* and unfl exception to be taken must not have been enabled. The +* fix is to check for E1, and the existence of one of snan, ovfl, +* or unfl bits set in the fpsr. If any of these are set, branch +* to the appropriate handler for the exception in the fpsr. Note +* that this fix is only for d43b parts, and is skipped if the +* version number is not $40. +* +* + xdef real_inex + xdef inex +inex: + cmp.l #-2,_mmutype + bne.l _fpfault + link a6,#-LOCAL_SIZE + fsave -(sp) + cmpi.b #VER_40,(sp) ;test version number + bne.b not_fmt40 + fmove.l fpsr,-(sp) + btst.b #E1,E_BYTE(a6) ;test for E1 set + beq.b not_b1232 + btst.b #snan_bit,2(sp) ;test for snan + beq inex_ckofl + addq.l #4,sp + frestore (sp)+ + unlk a6 + bra snan +inex_ckofl: + btst.b #ovfl_bit,2(sp) ;test for ovfl + beq inex_ckufl + addq.l #4,sp + frestore (sp)+ + unlk a6 + bra ovfl +inex_ckufl: + btst.b #unfl_bit,2(sp) ;test for unfl + beq not_b1232 + addq.l #4,sp + frestore (sp)+ + unlk a6 + bra unfl + +* +* We do not have the bug 1232 case. Clean up the stack and call +* real_inex. +* +not_b1232: + addq.l #4,sp + frestore (sp)+ + unlk a6 + +real_inex: + link a6,#-LOCAL_SIZE + fsave -(sp) +not_fmt40: + bclr.b #E3,E_BYTE(a6) ;clear and test E3 flag + beq.b inex_cke1 +* +* Clear dirty bit on dest resister in the frame before branching +* to b1238_fix. +* + movem.l d0/d1,USER_DA(a6) + bfextu CMDREG1B(a6){6:3},d0 ;get dest reg no + bclr.b d0,FPR_DIRTY_BITS(a6) ;clr dest dirty bit + bsr.l b1238_fix ;test for bug1238 case + movem.l USER_DA(a6),d0/d1 + bra.b inex_done +inex_cke1: + bclr.b #E1,E_BYTE(a6) +inex_done: + frestore (sp)+ + unlk a6 + jmp _fpfault + +* +* Overflow exception +* + xref fpsp_ovfl + xdef real_ovfl + xdef ovfl +ovfl: + cmp.l #-2,_mmutype + beq.l fpsp_ovfl + jmp _fpfault +real_ovfl: + link a6,#-LOCAL_SIZE + fsave -(sp) + bclr.b #E3,E_BYTE(a6) ;clear and test E3 flag + bne.b ovfl_done + bclr.b #E1,E_BYTE(a6) +ovfl_done: + frestore (sp)+ + unlk a6 + jmp _fpfault + +* +* Underflow exception +* + xref fpsp_unfl + xdef real_unfl + xdef unfl +unfl: + cmp.l #-2,_mmutype + beq.l fpsp_unfl + jmp _fpfault +real_unfl: + link a6,#-LOCAL_SIZE + fsave -(sp) + bclr.b #E3,E_BYTE(a6) ;clear and test E3 flag + bne.b unfl_done + bclr.b #E1,E_BYTE(a6) +unfl_done: + frestore (sp)+ + unlk a6 + jmp _fpfault + +* +* Signalling NAN exception +* + xref fpsp_snan + xdef real_snan + xdef snan +snan: + cmp.l #-2,_mmutype + beq.l fpsp_snan + jmp _fpfault +real_snan: + link a6,#-LOCAL_SIZE + fsave -(sp) + bclr.b #E1,E_BYTE(a6) ;snan is always an E1 exception + frestore (sp)+ + unlk a6 + jmp _fpfault + +* +* Operand Error exception +* + xref fpsp_operr + xdef real_operr + xdef operr +operr: + cmp.l #-2,_mmutype + beq.l fpsp_operr + jmp _fpfault +real_operr: + link a6,#-LOCAL_SIZE + fsave -(sp) + bclr.b #E1,E_BYTE(a6) ;operr is always an E1 exception + frestore (sp)+ + unlk a6 + jmp _fpfault + +* +* BSUN exception +* +* This sample handler simply clears the nan bit in the FPSR. +* + xref fpsp_bsun + xdef real_bsun + xdef bsun +bsun: + cmp.l #-2,_mmutype + beq.l fpsp_bsun + jmp _fpfault +real_bsun: + link a6,#-LOCAL_SIZE + fsave -(sp) + bclr.b #E1,E_BYTE(a6) ;bsun is always an E1 exception + fmove.l FPSR,-(sp) + bclr.b #nan_bit,(sp) + fmove.l (sp)+,FPSR + frestore (sp)+ + unlk a6 + jmp _fpfault + +* +* F-line exception +* +* A 'real' F-line exception is one that the FPSP isn't supposed to +* handle. E.g. an instruction with a co-processor ID that is not 1. +* +* + xref fpsp_fline + xdef real_fline + xdef fline +fline: + cmp.l #-2,_mmutype + beq.l fpsp_fline + jmp _fpfault +real_fline: + jmp _fpfault + +* +* Unsupported data type exception +* + xref fpsp_unsupp + xdef real_unsupp + xdef unsupp +unsupp: + cmp.l #-2,_mmutype + beq.l fpsp_unsupp + jmp _fpfault +real_unsupp: + link a6,#-LOCAL_SIZE + fsave -(sp) + bclr.b #E1,E_BYTE(a6) ;unsupp is always an E1 exception + frestore (sp)+ + unlk a6 + jmp _fpfault + +* +* Trace exception +* + xdef real_trace +real_trace: + rte + +* +* fpsp_fmt_error --- exit point for frame format error +* +* The fpu stack frame does not match the frames existing +* or planned at the time of this writing. The fpsp is +* unable to handle frame sizes not in the following +* version:size pairs: +* +* {4060, 4160} - busy frame +* {4028, 4130} - unimp frame +* {4000, 4100} - idle frame +* +* This entry point simply holds an f-line illegal value. +* Replace this with a call to your kernel panic code or +* code to handle future revisions of the fpu. +* + xdef fpsp_fmt_error +fpsp_fmt_error: + pea 1f + jsr _panic + dc.l $f27f0000 ;f-line illegal +1: + .asciz "bad floating point stack frame" + .even + +* +* fpsp_done --- FPSP exit point +* +* The exception has been handled by the package and we are ready +* to return to user mode, but there may be OS specific code +* to execute before we do. If there is, do it now. +* +* + xref rei + xdef fpsp_done +fpsp_done: + jmp rei + +* +* mem_write --- write to user or supervisor address space +* +* Writes to memory while in supervisor mode. copyout accomplishes +* this via a 'moves' instruction. copyout is a UNIX SVR3 (and later) function. +* If you don't have copyout, use the local copy of the function below. +* +* a0 - supervisor source address +* a1 - user destination address +* d0 - number of bytes to write (maximum count is 12) +* +* The supervisor source address is guaranteed to point into the supervisor +* stack. The result is that a UNIX +* process is allowed to sleep as a consequence of a page fault during +* copyout. The probability of a page fault is exceedingly small because +* the 68040 always reads the destination address and thus the page +* faults should have already been handled. +* +* If the EXC_SR shows that the exception was from supervisor space, +* then just do a dumb (and slow) memory move. In a UNIX environment +* there shouldn't be any supervisor mode floating point exceptions. +* + xdef mem_write +mem_write: + btst.b #5,EXC_SR(a6) ;check for supervisor state + beq.b user_write +super_write: + move.b (a0)+,(a1)+ + subq.l #1,d0 + bne.b super_write + rts +user_write: + move.l d1,-(sp) ;preserve d1 just in case + move.l d0,-(sp) + move.l a1,-(sp) + move.l a0,-(sp) + jsr _copyout + add.l #12,sp + move.l (sp)+,d1 + rts + +* +* mem_read --- read from user or supervisor address space +* +* Reads from memory while in supervisor mode. copyin accomplishes +* this via a 'moves' instruction. copyin is a UNIX SVR3 (and later) function. +* If you don't have copyin, use the local copy of the function below. +* +* The FPSP calls mem_read to read the original F-line instruction in order +* to extract the data register number when the 'Dn' addressing mode is +* used. +* +*Input: +* a0 - user source address +* a1 - supervisor destination address +* d0 - number of bytes to read (maximum count is 12) +* +* Like mem_write, mem_read always reads with a supervisor +* destination address on the supervisor stack. Also like mem_write, +* the EXC_SR is checked and a simple memory copy is done if reading +* from supervisor space is indicated. +* + xdef mem_read +mem_read: + btst.b #5,EXC_SR(a6) ;check for supervisor state + beq.b user_read +super_read: + move.b (a0)+,(a1)+ + subq.l #1,d0 + bne.b super_read + rts +user_read: + move.l d1,-(sp) ;preserve d1 just in case + move.l d0,-(sp) + move.l a1,-(sp) + move.l a0,-(sp) + jsr _copyin + add.l #12,sp + move.l (sp)+,d1 + rts + + end diff --git a/sys/arch/m68k/fpsp/res_func.sa b/sys/arch/m68k/fpsp/res_func.sa new file mode 100644 index 00000000000..5c036b742fc --- /dev/null +++ b/sys/arch/m68k/fpsp/res_func.sa @@ -0,0 +1,2065 @@ +* $NetBSD: res_func.sa,v 1.3 1994/10/26 07:49:22 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* res_func.sa 3.9 7/29/91 +* +* Normalizes denormalized numbers if necessary and updates the +* stack frame. The function is then restored back into the +* machine and the 040 completes the operation. This routine +* is only used by the unsupported data type/format handler. +* (Exception vector 55). +* +* For packed move out (fmove.p fpm,<ea>) the operation is +* completed here; data is packed and moved to user memory. +* The stack is restored to the 040 only in the case of a +* reportable exception in the conversion. +* + +RES_FUNC IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + +sp_bnds: dc.w $3f81,$407e + dc.w $3f6a,$0000 +dp_bnds: dc.w $3c01,$43fe + dc.w $3bcd,$0000 + + xref mem_write + xref bindec + xref get_fline + xref round + xref denorm + xref dest_ext + xref dest_dbl + xref dest_sgl + xref unf_sub + xref nrm_set + xref dnrm_lp + xref ovf_res + xref reg_dest + xref t_ovfl + xref t_unfl + + xdef res_func + xdef p_move + +res_func: + clr.b DNRM_FLG(a6) + clr.b RES_FLG(a6) + clr.b CU_ONLY(a6) + tst.b DY_MO_FLG(a6) + beq.b monadic +dyadic: + btst.b #7,DTAG(a6) ;if dop = norm=000, zero=001, +* ;inf=010 or nan=011 + beq.b monadic ;then branch +* ;else denorm +* HANDLE DESTINATION DENORM HERE +* ;set dtag to norm +* ;write the tag & fpte15 to the fstack + lea.l FPTEMP(a6),a0 + + bclr.b #sign_bit,LOCAL_EX(a0) + sne LOCAL_SGN(a0) + + bsr nrm_set ;normalize number (exp will go negative) + bclr.b #sign_bit,LOCAL_EX(a0) ;get rid of false sign + bfclr LOCAL_SGN(a0){0:8} ;change back to IEEE ext format + beq.b dpos + bset.b #sign_bit,LOCAL_EX(a0) +dpos: + bfclr DTAG(a6){0:4} ;set tag to normalized, FPTE15 = 0 + bset.b #4,DTAG(a6) ;set FPTE15 + or.b #$0f,DNRM_FLG(a6) +monadic: + lea.l ETEMP(a6),a0 + btst.b #direction_bit,CMDREG1B(a6) ;check direction + bne.w opclass3 ;it is a mv out +* +* At this point, only oplcass 0 and 2 possible +* + btst.b #7,STAG(a6) ;if sop = norm=000, zero=001, +* ;inf=010 or nan=011 + bne.w mon_dnrm ;else denorm + tst.b DY_MO_FLG(a6) ;all cases of dyadic instructions would + bne.w normal ;require normalization of denorm + +* At this point: +* monadic instructions: fabs = $18 fneg = $1a ftst = $3a +* fmove = $00 fsmove = $40 fdmove = $44 +* fsqrt = $05* fssqrt = $41 fdsqrt = $45 +* (*fsqrt reencoded to $05) +* + move.w CMDREG1B(a6),d0 ;get command register + andi.l #$7f,d0 ;strip to only command word +* +* At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and +* fdsqrt are possible. +* For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize) +* For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize) +* + btst.l #0,d0 + bne.w normal ;weed out fsqrt instructions +* +* cu_norm handles fmove in instructions with normalized inputs. +* The routine round is used to correctly round the input for the +* destination precision and mode. +* +cu_norm: + st CU_ONLY(a6) ;set cu-only inst flag + move.w CMDREG1B(a6),d0 + andi.b #$3b,d0 ;isolate bits to select inst + tst.b d0 + beq.l cu_nmove ;if zero, it is an fmove + cmpi.b #$18,d0 + beq.l cu_nabs ;if $18, it is fabs + cmpi.b #$1a,d0 + beq.l cu_nneg ;if $1a, it is fneg +* +* Inst is ftst. Check the source operand and set the cc's accordingly. +* No write is done, so simply rts. +* +cu_ntst: + move.w LOCAL_EX(a0),d0 + bclr.l #15,d0 + sne LOCAL_SGN(a0) + beq.b cu_ntpo + or.l #neg_mask,USER_FPSR(a6) ;set N +cu_ntpo: + cmpi.w #$7fff,d0 ;test for inf/nan + bne.b cu_ntcz + tst.l LOCAL_HI(a0) + bne.b cu_ntn + tst.l LOCAL_LO(a0) + bne.b cu_ntn + or.l #inf_mask,USER_FPSR(a6) + rts +cu_ntn: + or.l #nan_mask,USER_FPSR(a6) + move.l ETEMP_EX(a6),FPTEMP_EX(a6) ;set up fptemp sign for +* ;snan handler + + rts +cu_ntcz: + tst.l LOCAL_HI(a0) + bne.l cu_ntsx + tst.l LOCAL_LO(a0) + bne.l cu_ntsx + or.l #z_mask,USER_FPSR(a6) +cu_ntsx: + rts +* +* Inst is fabs. Execute the absolute value function on the input. +* Branch to the fmove code. If the operand is NaN, do nothing. +* +cu_nabs: + move.b STAG(a6),d0 + btst.l #5,d0 ;test for NaN or zero + bne wr_etemp ;if either, simply write it + bclr.b #7,LOCAL_EX(a0) ;do abs + bra.b cu_nmove ;fmove code will finish +* +* Inst is fneg. Execute the negate value function on the input. +* Fall though to the fmove code. If the operand is NaN, do nothing. +* +cu_nneg: + move.b STAG(a6),d0 + btst.l #5,d0 ;test for NaN or zero + bne wr_etemp ;if either, simply write it + bchg.b #7,LOCAL_EX(a0) ;do neg +* +* Inst is fmove. This code also handles all result writes. +* If bit 2 is set, round is forced to double. If it is clear, +* and bit 6 is set, round is forced to single. If both are clear, +* the round precision is found in the fpcr. If the rounding precision +* is double or single, round the result before the write. +* +cu_nmove: + move.b STAG(a6),d0 + andi.b #$e0,d0 ;isolate stag bits + bne wr_etemp ;if not norm, simply write it + btst.b #2,CMDREG1B+1(a6) ;check for rd + bne cu_nmrd + btst.b #6,CMDREG1B+1(a6) ;check for rs + bne cu_nmrs +* +* The move or operation is not with forced precision. Test for +* nan or inf as the input; if so, simply write it to FPn. Use the +* FPCR_MODE byte to get rounding on norms and zeros. +* +cu_nmnr: + bfextu FPCR_MODE(a6){0:2},d0 + tst.b d0 ;check for extended + beq cu_wrexn ;if so, just write result + cmpi.b #1,d0 ;check for single + beq cu_nmrs ;fall through to double +* +* The move is fdmove or round precision is double. +* +cu_nmrd: + move.l #2,d0 ;set up the size for denorm + move.w LOCAL_EX(a0),d1 ;compare exponent to double threshold + and.w #$7fff,d1 + cmp.w #$3c01,d1 + bls cu_nunfl + bfextu FPCR_MODE(a6){2:2},d1 ;get rmode + or.l #$00020000,d1 ;or in rprec (double) + clr.l d0 ;clear g,r,s for round + bclr.b #sign_bit,LOCAL_EX(a0) ;convert to internal format + sne LOCAL_SGN(a0) + bsr.l round + bfclr LOCAL_SGN(a0){0:8} + beq.b cu_nmrdc + bset.b #sign_bit,LOCAL_EX(a0) +cu_nmrdc: + move.w LOCAL_EX(a0),d1 ;check for overflow + and.w #$7fff,d1 + cmp.w #$43ff,d1 + bge cu_novfl ;take care of overflow case + bra.w cu_wrexn +* +* The move is fsmove or round precision is single. +* +cu_nmrs: + move.l #1,d0 + move.w LOCAL_EX(a0),d1 + and.w #$7fff,d1 + cmp.w #$3f81,d1 + bls cu_nunfl + bfextu FPCR_MODE(a6){2:2},d1 + or.l #$00010000,d1 + clr.l d0 + bclr.b #sign_bit,LOCAL_EX(a0) + sne LOCAL_SGN(a0) + bsr.l round + bfclr LOCAL_SGN(a0){0:8} + beq.b cu_nmrsc + bset.b #sign_bit,LOCAL_EX(a0) +cu_nmrsc: + move.w LOCAL_EX(a0),d1 + and.w #$7FFF,d1 + cmp.w #$407f,d1 + blt cu_wrexn +* +* The operand is above precision boundaries. Use t_ovfl to +* generate the correct value. +* +cu_novfl: + bsr t_ovfl + bra cu_wrexn +* +* The operand is below precision boundaries. Use denorm to +* generate the correct value. +* +cu_nunfl: + bclr.b #sign_bit,LOCAL_EX(a0) + sne LOCAL_SGN(a0) + bsr denorm + bfclr LOCAL_SGN(a0){0:8} ;change back to IEEE ext format + beq.b cu_nucont + bset.b #sign_bit,LOCAL_EX(a0) +cu_nucont: + bfextu FPCR_MODE(a6){2:2},d1 + btst.b #2,CMDREG1B+1(a6) ;check for rd + bne inst_d + btst.b #6,CMDREG1B+1(a6) ;check for rs + bne inst_s + swap d1 + move.b FPCR_MODE(a6),d1 + lsr.b #6,d1 + swap d1 + bra inst_sd +inst_d: + or.l #$00020000,d1 + bra inst_sd +inst_s: + or.l #$00010000,d1 +inst_sd: + bclr.b #sign_bit,LOCAL_EX(a0) + sne LOCAL_SGN(a0) + bsr.l round + bfclr LOCAL_SGN(a0){0:8} + beq.b cu_nuflp + bset.b #sign_bit,LOCAL_EX(a0) +cu_nuflp: + btst.b #inex2_bit,FPSR_EXCEPT(a6) + beq.b cu_nuninx + or.l #aunfl_mask,USER_FPSR(a6) ;if the round was inex, set AUNFL +cu_nuninx: + tst.l LOCAL_HI(a0) ;test for zero + bne.b cu_nunzro + tst.l LOCAL_LO(a0) + bne.b cu_nunzro +* +* The mantissa is zero from the denorm loop. Check sign and rmode +* to see if rounding should have occured which would leave the lsb. +* + move.l USER_FPCR(a6),d0 + andi.l #$30,d0 ;isolate rmode + cmpi.l #$20,d0 + blt.b cu_nzro + bne.b cu_nrp +cu_nrm: + tst.w LOCAL_EX(a0) ;if positive, set lsb + bge.b cu_nzro + btst.b #7,FPCR_MODE(a6) ;check for double + beq.b cu_nincs + bra.b cu_nincd +cu_nrp: + tst.w LOCAL_EX(a0) ;if positive, set lsb + blt.b cu_nzro + btst.b #7,FPCR_MODE(a6) ;check for double + beq.b cu_nincs +cu_nincd: + or.l #$800,LOCAL_LO(a0) ;inc for double + bra cu_nunzro +cu_nincs: + or.l #$100,LOCAL_HI(a0) ;inc for single + bra cu_nunzro +cu_nzro: + or.l #z_mask,USER_FPSR(a6) + move.b STAG(a6),d0 + andi.b #$e0,d0 + cmpi.b #$40,d0 ;check if input was tagged zero + beq.b cu_numv +cu_nunzro: + or.l #unfl_mask,USER_FPSR(a6) ;set unfl +cu_numv: + move.l (a0),ETEMP(a6) + move.l 4(a0),ETEMP_HI(a6) + move.l 8(a0),ETEMP_LO(a6) +* +* Write the result to memory, setting the fpsr cc bits. NaN and Inf +* bypass cu_wrexn. +* +cu_wrexn: + tst.w LOCAL_EX(a0) ;test for zero + beq.b cu_wrzero + cmp.w #$8000,LOCAL_EX(a0) ;test for zero + bne.b cu_wreon +cu_wrzero: + or.l #z_mask,USER_FPSR(a6) ;set Z bit +cu_wreon: + tst.w LOCAL_EX(a0) + bpl wr_etemp + or.l #neg_mask,USER_FPSR(a6) + bra wr_etemp + +* +* HANDLE SOURCE DENORM HERE +* +* ;clear denorm stag to norm +* ;write the new tag & ete15 to the fstack +mon_dnrm: +* +* At this point, check for the cases in which normalizing the +* denorm produces incorrect results. +* + tst.b DY_MO_FLG(a6) ;all cases of dyadic instructions would + bne.b nrm_src ;require normalization of denorm + +* At this point: +* monadic instructions: fabs = $18 fneg = $1a ftst = $3a +* fmove = $00 fsmove = $40 fdmove = $44 +* fsqrt = $05* fssqrt = $41 fdsqrt = $45 +* (*fsqrt reencoded to $05) +* + move.w CMDREG1B(a6),d0 ;get command register + andi.l #$7f,d0 ;strip to only command word +* +* At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and +* fdsqrt are possible. +* For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize) +* For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize) +* + btst.l #0,d0 + bne.b nrm_src ;weed out fsqrt instructions + st CU_ONLY(a6) ;set cu-only inst flag + bra cu_dnrm ;fmove, fabs, fneg, ftst +* ;cases go to cu_dnrm +nrm_src: + bclr.b #sign_bit,LOCAL_EX(a0) + sne LOCAL_SGN(a0) + bsr nrm_set ;normalize number (exponent will go +* ; negative) + bclr.b #sign_bit,LOCAL_EX(a0) ;get rid of false sign + + bfclr LOCAL_SGN(a0){0:8} ;change back to IEEE ext format + beq.b spos + bset.b #sign_bit,LOCAL_EX(a0) +spos: + bfclr STAG(a6){0:4} ;set tag to normalized, FPTE15 = 0 + bset.b #4,STAG(a6) ;set ETE15 + or.b #$f0,DNRM_FLG(a6) +normal: + tst.b DNRM_FLG(a6) ;check if any of the ops were denorms + bne ck_wrap ;if so, check if it is a potential +* ;wrap-around case +fix_stk: + move.b #$fe,CU_SAVEPC(a6) + bclr.b #E1,E_BYTE(a6) + + clr.w NMNEXC(a6) + + st.b RES_FLG(a6) ;indicate that a restore is needed + rts + +* +* cu_dnrm handles all cu-only instructions (fmove, fabs, fneg, and +* ftst) completly in software without an frestore to the 040. +* +cu_dnrm: + st.b CU_ONLY(a6) + move.w CMDREG1B(a6),d0 + andi.b #$3b,d0 ;isolate bits to select inst + tst.b d0 + beq.l cu_dmove ;if zero, it is an fmove + cmpi.b #$18,d0 + beq.l cu_dabs ;if $18, it is fabs + cmpi.b #$1a,d0 + beq.l cu_dneg ;if $1a, it is fneg +* +* Inst is ftst. Check the source operand and set the cc's accordingly. +* No write is done, so simply rts. +* +cu_dtst: + move.w LOCAL_EX(a0),d0 + bclr.l #15,d0 + sne LOCAL_SGN(a0) + beq.b cu_dtpo + or.l #neg_mask,USER_FPSR(a6) ;set N +cu_dtpo: + cmpi.w #$7fff,d0 ;test for inf/nan + bne.b cu_dtcz + tst.l LOCAL_HI(a0) + bne.b cu_dtn + tst.l LOCAL_LO(a0) + bne.b cu_dtn + or.l #inf_mask,USER_FPSR(a6) + rts +cu_dtn: + or.l #nan_mask,USER_FPSR(a6) + move.l ETEMP_EX(a6),FPTEMP_EX(a6) ;set up fptemp sign for +* ;snan handler + rts +cu_dtcz: + tst.l LOCAL_HI(a0) + bne.l cu_dtsx + tst.l LOCAL_LO(a0) + bne.l cu_dtsx + or.l #z_mask,USER_FPSR(a6) +cu_dtsx: + rts +* +* Inst is fabs. Execute the absolute value function on the input. +* Branch to the fmove code. +* +cu_dabs: + bclr.b #7,LOCAL_EX(a0) ;do abs + bra.b cu_dmove ;fmove code will finish +* +* Inst is fneg. Execute the negate value function on the input. +* Fall though to the fmove code. +* +cu_dneg: + bchg.b #7,LOCAL_EX(a0) ;do neg +* +* Inst is fmove. This code also handles all result writes. +* If bit 2 is set, round is forced to double. If it is clear, +* and bit 6 is set, round is forced to single. If both are clear, +* the round precision is found in the fpcr. If the rounding precision +* is double or single, the result is zero, and the mode is checked +* to determine if the lsb of the result should be set. +* +cu_dmove: + btst.b #2,CMDREG1B+1(a6) ;check for rd + bne cu_dmrd + btst.b #6,CMDREG1B+1(a6) ;check for rs + bne cu_dmrs +* +* The move or operation is not with forced precision. Use the +* FPCR_MODE byte to get rounding. +* +cu_dmnr: + bfextu FPCR_MODE(a6){0:2},d0 + tst.b d0 ;check for extended + beq cu_wrexd ;if so, just write result + cmpi.b #1,d0 ;check for single + beq cu_dmrs ;fall through to double +* +* The move is fdmove or round precision is double. Result is zero. +* Check rmode for rp or rm and set lsb accordingly. +* +cu_dmrd: + bfextu FPCR_MODE(a6){2:2},d1 ;get rmode + tst.w LOCAL_EX(a0) ;check sign + blt.b cu_dmdn + cmpi.b #3,d1 ;check for rp + bne cu_dpd ;load double pos zero + bra cu_dpdr ;load double pos zero w/lsb +cu_dmdn: + cmpi.b #2,d1 ;check for rm + bne cu_dnd ;load double neg zero + bra cu_dndr ;load double neg zero w/lsb +* +* The move is fsmove or round precision is single. Result is zero. +* Check for rp or rm and set lsb accordingly. +* +cu_dmrs: + bfextu FPCR_MODE(a6){2:2},d1 ;get rmode + tst.w LOCAL_EX(a0) ;check sign + blt.b cu_dmsn + cmpi.b #3,d1 ;check for rp + bne cu_spd ;load single pos zero + bra cu_spdr ;load single pos zero w/lsb +cu_dmsn: + cmpi.b #2,d1 ;check for rm + bne cu_snd ;load single neg zero + bra cu_sndr ;load single neg zero w/lsb +* +* The precision is extended, so the result in etemp is correct. +* Simply set unfl (not inex2 or aunfl) and write the result to +* the correct fp register. +cu_wrexd: + or.l #unfl_mask,USER_FPSR(a6) + tst.w LOCAL_EX(a0) + beq wr_etemp + or.l #neg_mask,USER_FPSR(a6) + bra wr_etemp +* +* These routines write +/- zero in double format. The routines +* cu_dpdr and cu_dndr set the double lsb. +* +cu_dpd: + move.l #$3c010000,LOCAL_EX(a0) ;force pos double zero + clr.l LOCAL_HI(a0) + clr.l LOCAL_LO(a0) + or.l #z_mask,USER_FPSR(a6) + or.l #unfinx_mask,USER_FPSR(a6) + bra wr_etemp +cu_dpdr: + move.l #$3c010000,LOCAL_EX(a0) ;force pos double zero + clr.l LOCAL_HI(a0) + move.l #$800,LOCAL_LO(a0) ;with lsb set + or.l #unfinx_mask,USER_FPSR(a6) + bra wr_etemp +cu_dnd: + move.l #$bc010000,LOCAL_EX(a0) ;force pos double zero + clr.l LOCAL_HI(a0) + clr.l LOCAL_LO(a0) + or.l #z_mask,USER_FPSR(a6) + or.l #neg_mask,USER_FPSR(a6) + or.l #unfinx_mask,USER_FPSR(a6) + bra wr_etemp +cu_dndr: + move.l #$bc010000,LOCAL_EX(a0) ;force pos double zero + clr.l LOCAL_HI(a0) + move.l #$800,LOCAL_LO(a0) ;with lsb set + or.l #neg_mask,USER_FPSR(a6) + or.l #unfinx_mask,USER_FPSR(a6) + bra wr_etemp +* +* These routines write +/- zero in single format. The routines +* cu_dpdr and cu_dndr set the single lsb. +* +cu_spd: + move.l #$3f810000,LOCAL_EX(a0) ;force pos single zero + clr.l LOCAL_HI(a0) + clr.l LOCAL_LO(a0) + or.l #z_mask,USER_FPSR(a6) + or.l #unfinx_mask,USER_FPSR(a6) + bra wr_etemp +cu_spdr: + move.l #$3f810000,LOCAL_EX(a0) ;force pos single zero + move.l #$100,LOCAL_HI(a0) ;with lsb set + clr.l LOCAL_LO(a0) + or.l #unfinx_mask,USER_FPSR(a6) + bra wr_etemp +cu_snd: + move.l #$bf810000,LOCAL_EX(a0) ;force pos single zero + clr.l LOCAL_HI(a0) + clr.l LOCAL_LO(a0) + or.l #z_mask,USER_FPSR(a6) + or.l #neg_mask,USER_FPSR(a6) + or.l #unfinx_mask,USER_FPSR(a6) + bra wr_etemp +cu_sndr: + move.l #$bf810000,LOCAL_EX(a0) ;force pos single zero + move.l #$100,LOCAL_HI(a0) ;with lsb set + clr.l LOCAL_LO(a0) + or.l #neg_mask,USER_FPSR(a6) + or.l #unfinx_mask,USER_FPSR(a6) + bra wr_etemp + +* +* This code checks for 16-bit overflow conditions on dyadic +* operations which are not restorable into the floating-point +* unit and must be completed in software. Basically, this +* condition exists with a very large norm and a denorm. One +* of the operands must be denormalized to enter this code. +* +* Flags used: +* DY_MO_FLG contains 0 for monadic op, $ff for dyadic +* DNRM_FLG contains $00 for neither op denormalized +* $0f for the destination op denormalized +* $f0 for the source op denormalized +* $ff for both ops denormalzed +* +* The wrap-around condition occurs for add, sub, div, and cmp +* when +* +* abs(dest_exp - src_exp) >= $8000 +* +* and for mul when +* +* (dest_exp + src_exp) < $0 +* +* we must process the operation here if this case is true. +* +* The rts following the frcfpn routine is the exit from res_func +* for this condition. The restore flag (RES_FLG) is left clear. +* No frestore is done unless an exception is to be reported. +* +* For fadd: +* if(sign_of(dest) != sign_of(src)) +* replace exponent of src with $3fff (keep sign) +* use fpu to perform dest+new_src (user's rmode and X) +* clr sticky +* else +* set sticky +* call round with user's precision and mode +* move result to fpn and wbtemp +* +* For fsub: +* if(sign_of(dest) == sign_of(src)) +* replace exponent of src with $3fff (keep sign) +* use fpu to perform dest+new_src (user's rmode and X) +* clr sticky +* else +* set sticky +* call round with user's precision and mode +* move result to fpn and wbtemp +* +* For fdiv/fsgldiv: +* if(both operands are denorm) +* restore_to_fpu; +* if(dest is norm) +* force_ovf; +* else(dest is denorm) +* force_unf: +* +* For fcmp: +* if(dest is norm) +* N = sign_of(dest); +* else(dest is denorm) +* N = sign_of(src); +* +* For fmul: +* if(both operands are denorm) +* force_unf; +* if((dest_exp + src_exp) < 0) +* force_unf: +* else +* restore_to_fpu; +* +* local equates: +addcode equ $22 +subcode equ $28 +mulcode equ $23 +divcode equ $20 +cmpcode equ $38 +ck_wrap: + tst.b DY_MO_FLG(a6) ;check for fsqrt + beq fix_stk ;if zero, it is fsqrt + move.w CMDREG1B(a6),d0 + andi.w #$3b,d0 ;strip to command bits + cmpi.w #addcode,d0 + beq wrap_add + cmpi.w #subcode,d0 + beq wrap_sub + cmpi.w #mulcode,d0 + beq wrap_mul + cmpi.w #cmpcode,d0 + beq wrap_cmp +* +* Inst is fdiv. +* +wrap_div: + cmp.b #$ff,DNRM_FLG(a6) ;if both ops denorm, + beq fix_stk ;restore to fpu +* +* One of the ops is denormalized. Test for wrap condition +* and force the result. +* + cmp.b #$0f,DNRM_FLG(a6) ;check for dest denorm + bne.b div_srcd +div_destd: + bsr.l ckinf_ns + bne fix_stk + bfextu ETEMP_EX(a6){1:15},d0 ;get src exp (always pos) + bfexts FPTEMP_EX(a6){1:15},d1 ;get dest exp (always neg) + sub.l d1,d0 ;subtract dest from src + cmp.l #$7fff,d0 + blt fix_stk ;if less, not wrap case + clr.b WBTEMP_SGN(a6) + move.w ETEMP_EX(a6),d0 ;find the sign of the result + move.w FPTEMP_EX(a6),d1 + eor.w d1,d0 + andi.w #$8000,d0 + beq force_unf + st.b WBTEMP_SGN(a6) + bra force_unf + +ckinf_ns: + move.b STAG(a6),d0 ;check source tag for inf or nan + bra ck_in_com +ckinf_nd: + move.b DTAG(a6),d0 ;check destination tag for inf or nan +ck_in_com: + andi.b #$60,d0 ;isolate tag bits + cmp.b #$40,d0 ;is it inf? + beq nan_or_inf ;not wrap case + cmp.b #$60,d0 ;is it nan? + beq nan_or_inf ;yes, not wrap case? + cmp.b #$20,d0 ;is it a zero? + beq nan_or_inf ;yes + clr.l d0 + rts ;then it is either a zero of norm, +* ;check wrap case +nan_or_inf: + moveq.l #-1,d0 + rts + + + +div_srcd: + bsr.l ckinf_nd + bne fix_stk + bfextu FPTEMP_EX(a6){1:15},d0 ;get dest exp (always pos) + bfexts ETEMP_EX(a6){1:15},d1 ;get src exp (always neg) + sub.l d1,d0 ;subtract src from dest + cmp.l #$8000,d0 + blt fix_stk ;if less, not wrap case + clr.b WBTEMP_SGN(a6) + move.w ETEMP_EX(a6),d0 ;find the sign of the result + move.w FPTEMP_EX(a6),d1 + eor.w d1,d0 + andi.w #$8000,d0 + beq.b force_ovf + st.b WBTEMP_SGN(a6) +* +* This code handles the case of the instruction resulting in +* an overflow condition. +* +force_ovf: + bclr.b #E1,E_BYTE(a6) + or.l #ovfl_inx_mask,USER_FPSR(a6) + clr.w NMNEXC(a6) + lea.l WBTEMP(a6),a0 ;point a0 to memory location + move.w CMDREG1B(a6),d0 + btst.l #6,d0 ;test for forced precision + beq.b frcovf_fpcr + btst.l #2,d0 ;check for double + bne.b frcovf_dbl + move.l #$1,d0 ;inst is forced single + bra.b frcovf_rnd +frcovf_dbl: + move.l #$2,d0 ;inst is forced double + bra.b frcovf_rnd +frcovf_fpcr: + bfextu FPCR_MODE(a6){0:2},d0 ;inst not forced - use fpcr prec +frcovf_rnd: + +* The 881/882 does not set inex2 for the following case, so the +* line is commented out to be compatible with 881/882 +* tst.b d0 +* beq.b frcovf_x +* or.l #inex2_mask,USER_FPSR(a6) ;if prec is s or d, set inex2 + +*frcovf_x: + bsr.l ovf_res ;get correct result based on +* ;round precision/mode. This +* ;sets FPSR_CC correctly +* ;returns in external format + bfclr WBTEMP_SGN(a6){0:8} + beq frcfpn + bset.b #sign_bit,WBTEMP_EX(a6) + bra frcfpn +* +* Inst is fadd. +* +wrap_add: + cmp.b #$ff,DNRM_FLG(a6) ;if both ops denorm, + beq fix_stk ;restore to fpu +* +* One of the ops is denormalized. Test for wrap condition +* and complete the instruction. +* + cmp.b #$0f,DNRM_FLG(a6) ;check for dest denorm + bne.b add_srcd +add_destd: + bsr.l ckinf_ns + bne fix_stk + bfextu ETEMP_EX(a6){1:15},d0 ;get src exp (always pos) + bfexts FPTEMP_EX(a6){1:15},d1 ;get dest exp (always neg) + sub.l d1,d0 ;subtract dest from src + cmp.l #$8000,d0 + blt fix_stk ;if less, not wrap case + bra add_wrap +add_srcd: + bsr.l ckinf_nd + bne fix_stk + bfextu FPTEMP_EX(a6){1:15},d0 ;get dest exp (always pos) + bfexts ETEMP_EX(a6){1:15},d1 ;get src exp (always neg) + sub.l d1,d0 ;subtract src from dest + cmp.l #$8000,d0 + blt fix_stk ;if less, not wrap case +* +* Check the signs of the operands. If they are unlike, the fpu +* can be used to add the norm and 1.0 with the sign of the +* denorm and it will correctly generate the result in extended +* precision. We can then call round with no sticky and the result +* will be correct for the user's rounding mode and precision. If +* the signs are the same, we call round with the sticky bit set +* and the result will be correctfor the user's rounding mode and +* precision. +* +add_wrap: + move.w ETEMP_EX(a6),d0 + move.w FPTEMP_EX(a6),d1 + eor.w d1,d0 + andi.w #$8000,d0 + beq add_same +* +* The signs are unlike. +* + cmp.b #$0f,DNRM_FLG(a6) ;is dest the denorm? + bne.b add_u_srcd + move.w FPTEMP_EX(a6),d0 + andi.w #$8000,d0 + or.w #$3fff,d0 ;force the exponent to +/- 1 + move.w d0,FPTEMP_EX(a6) ;in the denorm + move.l USER_FPCR(a6),d0 + andi.l #$30,d0 + fmove.l d0,fpcr ;set up users rmode and X + fmove.x ETEMP(a6),fp0 + fadd.x FPTEMP(a6),fp0 + lea.l WBTEMP(a6),a0 ;point a0 to wbtemp in frame + fmove.l fpsr,d1 + or.l d1,USER_FPSR(a6) ;capture cc's and inex from fadd + fmove.x fp0,WBTEMP(a6) ;write result to memory + lsr.l #4,d0 ;put rmode in lower 2 bits + move.l USER_FPCR(a6),d1 + andi.l #$c0,d1 + lsr.l #6,d1 ;put precision in upper word + swap d1 + or.l d0,d1 ;set up for round call + clr.l d0 ;force sticky to zero + bclr.b #sign_bit,WBTEMP_EX(a6) + sne WBTEMP_SGN(a6) + bsr.l round ;round result to users rmode & prec + bfclr WBTEMP_SGN(a6){0:8} ;convert back to IEEE ext format + beq frcfpnr + bset.b #sign_bit,WBTEMP_EX(a6) + bra frcfpnr +add_u_srcd: + move.w ETEMP_EX(a6),d0 + andi.w #$8000,d0 + or.w #$3fff,d0 ;force the exponent to +/- 1 + move.w d0,ETEMP_EX(a6) ;in the denorm + move.l USER_FPCR(a6),d0 + andi.l #$30,d0 + fmove.l d0,fpcr ;set up users rmode and X + fmove.x ETEMP(a6),fp0 + fadd.x FPTEMP(a6),fp0 + fmove.l fpsr,d1 + or.l d1,USER_FPSR(a6) ;capture cc's and inex from fadd + lea.l WBTEMP(a6),a0 ;point a0 to wbtemp in frame + fmove.x fp0,WBTEMP(a6) ;write result to memory + lsr.l #4,d0 ;put rmode in lower 2 bits + move.l USER_FPCR(a6),d1 + andi.l #$c0,d1 + lsr.l #6,d1 ;put precision in upper word + swap d1 + or.l d0,d1 ;set up for round call + clr.l d0 ;force sticky to zero + bclr.b #sign_bit,WBTEMP_EX(a6) + sne WBTEMP_SGN(a6) ;use internal format for round + bsr.l round ;round result to users rmode & prec + bfclr WBTEMP_SGN(a6){0:8} ;convert back to IEEE ext format + beq frcfpnr + bset.b #sign_bit,WBTEMP_EX(a6) + bra frcfpnr +* +* Signs are alike: +* +add_same: + cmp.b #$0f,DNRM_FLG(a6) ;is dest the denorm? + bne.b add_s_srcd +add_s_destd: + lea.l ETEMP(a6),a0 + move.l USER_FPCR(a6),d0 + andi.l #$30,d0 + lsr.l #4,d0 ;put rmode in lower 2 bits + move.l USER_FPCR(a6),d1 + andi.l #$c0,d1 + lsr.l #6,d1 ;put precision in upper word + swap d1 + or.l d0,d1 ;set up for round call + move.l #$20000000,d0 ;set sticky for round + bclr.b #sign_bit,ETEMP_EX(a6) + sne ETEMP_SGN(a6) + bsr.l round ;round result to users rmode & prec + bfclr ETEMP_SGN(a6){0:8} ;convert back to IEEE ext format + beq.b add_s_dclr + bset.b #sign_bit,ETEMP_EX(a6) +add_s_dclr: + lea.l WBTEMP(a6),a0 + move.l ETEMP(a6),(a0) ;write result to wbtemp + move.l ETEMP_HI(a6),4(a0) + move.l ETEMP_LO(a6),8(a0) + tst.w ETEMP_EX(a6) + bgt add_ckovf + or.l #neg_mask,USER_FPSR(a6) + bra add_ckovf +add_s_srcd: + lea.l FPTEMP(a6),a0 + move.l USER_FPCR(a6),d0 + andi.l #$30,d0 + lsr.l #4,d0 ;put rmode in lower 2 bits + move.l USER_FPCR(a6),d1 + andi.l #$c0,d1 + lsr.l #6,d1 ;put precision in upper word + swap d1 + or.l d0,d1 ;set up for round call + move.l #$20000000,d0 ;set sticky for round + bclr.b #sign_bit,FPTEMP_EX(a6) + sne FPTEMP_SGN(a6) + bsr.l round ;round result to users rmode & prec + bfclr FPTEMP_SGN(a6){0:8} ;convert back to IEEE ext format + beq.b add_s_sclr + bset.b #sign_bit,FPTEMP_EX(a6) +add_s_sclr: + lea.l WBTEMP(a6),a0 + move.l FPTEMP(a6),(a0) ;write result to wbtemp + move.l FPTEMP_HI(a6),4(a0) + move.l FPTEMP_LO(a6),8(a0) + tst.w FPTEMP_EX(a6) + bgt add_ckovf + or.l #neg_mask,USER_FPSR(a6) +add_ckovf: + move.w WBTEMP_EX(a6),d0 + andi.w #$7fff,d0 + cmpi.w #$7fff,d0 + bne frcfpnr +* +* The result has overflowed to $7fff exponent. Set I, ovfl, +* and aovfl, and clr the mantissa (incorrectly set by the +* round routine.) +* + or.l #inf_mask+ovfl_inx_mask,USER_FPSR(a6) + clr.l 4(a0) + bra frcfpnr +* +* Inst is fsub. +* +wrap_sub: + cmp.b #$ff,DNRM_FLG(a6) ;if both ops denorm, + beq fix_stk ;restore to fpu +* +* One of the ops is denormalized. Test for wrap condition +* and complete the instruction. +* + cmp.b #$0f,DNRM_FLG(a6) ;check for dest denorm + bne.b sub_srcd +sub_destd: + bsr.l ckinf_ns + bne fix_stk + bfextu ETEMP_EX(a6){1:15},d0 ;get src exp (always pos) + bfexts FPTEMP_EX(a6){1:15},d1 ;get dest exp (always neg) + sub.l d1,d0 ;subtract src from dest + cmp.l #$8000,d0 + blt fix_stk ;if less, not wrap case + bra sub_wrap +sub_srcd: + bsr.l ckinf_nd + bne fix_stk + bfextu FPTEMP_EX(a6){1:15},d0 ;get dest exp (always pos) + bfexts ETEMP_EX(a6){1:15},d1 ;get src exp (always neg) + sub.l d1,d0 ;subtract dest from src + cmp.l #$8000,d0 + blt fix_stk ;if less, not wrap case +* +* Check the signs of the operands. If they are alike, the fpu +* can be used to subtract from the norm 1.0 with the sign of the +* denorm and it will correctly generate the result in extended +* precision. We can then call round with no sticky and the result +* will be correct for the user's rounding mode and precision. If +* the signs are unlike, we call round with the sticky bit set +* and the result will be correctfor the user's rounding mode and +* precision. +* +sub_wrap: + move.w ETEMP_EX(a6),d0 + move.w FPTEMP_EX(a6),d1 + eor.w d1,d0 + andi.w #$8000,d0 + bne sub_diff +* +* The signs are alike. +* + cmp.b #$0f,DNRM_FLG(a6) ;is dest the denorm? + bne.b sub_u_srcd + move.w FPTEMP_EX(a6),d0 + andi.w #$8000,d0 + or.w #$3fff,d0 ;force the exponent to +/- 1 + move.w d0,FPTEMP_EX(a6) ;in the denorm + move.l USER_FPCR(a6),d0 + andi.l #$30,d0 + fmove.l d0,fpcr ;set up users rmode and X + fmove.x FPTEMP(a6),fp0 + fsub.x ETEMP(a6),fp0 + fmove.l fpsr,d1 + or.l d1,USER_FPSR(a6) ;capture cc's and inex from fadd + lea.l WBTEMP(a6),a0 ;point a0 to wbtemp in frame + fmove.x fp0,WBTEMP(a6) ;write result to memory + lsr.l #4,d0 ;put rmode in lower 2 bits + move.l USER_FPCR(a6),d1 + andi.l #$c0,d1 + lsr.l #6,d1 ;put precision in upper word + swap d1 + or.l d0,d1 ;set up for round call + clr.l d0 ;force sticky to zero + bclr.b #sign_bit,WBTEMP_EX(a6) + sne WBTEMP_SGN(a6) + bsr.l round ;round result to users rmode & prec + bfclr WBTEMP_SGN(a6){0:8} ;convert back to IEEE ext format + beq frcfpnr + bset.b #sign_bit,WBTEMP_EX(a6) + bra frcfpnr +sub_u_srcd: + move.w ETEMP_EX(a6),d0 + andi.w #$8000,d0 + or.w #$3fff,d0 ;force the exponent to +/- 1 + move.w d0,ETEMP_EX(a6) ;in the denorm + move.l USER_FPCR(a6),d0 + andi.l #$30,d0 + fmove.l d0,fpcr ;set up users rmode and X + fmove.x FPTEMP(a6),fp0 + fsub.x ETEMP(a6),fp0 + fmove.l fpsr,d1 + or.l d1,USER_FPSR(a6) ;capture cc's and inex from fadd + lea.l WBTEMP(a6),a0 ;point a0 to wbtemp in frame + fmove.x fp0,WBTEMP(a6) ;write result to memory + lsr.l #4,d0 ;put rmode in lower 2 bits + move.l USER_FPCR(a6),d1 + andi.l #$c0,d1 + lsr.l #6,d1 ;put precision in upper word + swap d1 + or.l d0,d1 ;set up for round call + clr.l d0 ;force sticky to zero + bclr.b #sign_bit,WBTEMP_EX(a6) + sne WBTEMP_SGN(a6) + bsr.l round ;round result to users rmode & prec + bfclr WBTEMP_SGN(a6){0:8} ;convert back to IEEE ext format + beq frcfpnr + bset.b #sign_bit,WBTEMP_EX(a6) + bra frcfpnr +* +* Signs are unlike: +* +sub_diff: + cmp.b #$0f,DNRM_FLG(a6) ;is dest the denorm? + bne.b sub_s_srcd +sub_s_destd: + lea.l ETEMP(a6),a0 + move.l USER_FPCR(a6),d0 + andi.l #$30,d0 + lsr.l #4,d0 ;put rmode in lower 2 bits + move.l USER_FPCR(a6),d1 + andi.l #$c0,d1 + lsr.l #6,d1 ;put precision in upper word + swap d1 + or.l d0,d1 ;set up for round call + move.l #$20000000,d0 ;set sticky for round +* +* Since the dest is the denorm, the sign is the opposite of the +* norm sign. +* + eori.w #$8000,ETEMP_EX(a6) ;flip sign on result + tst.w ETEMP_EX(a6) + bgt.b sub_s_dwr + or.l #neg_mask,USER_FPSR(a6) +sub_s_dwr: + bclr.b #sign_bit,ETEMP_EX(a6) + sne ETEMP_SGN(a6) + bsr.l round ;round result to users rmode & prec + bfclr ETEMP_SGN(a6){0:8} ;convert back to IEEE ext format + beq.b sub_s_dclr + bset.b #sign_bit,ETEMP_EX(a6) +sub_s_dclr: + lea.l WBTEMP(a6),a0 + move.l ETEMP(a6),(a0) ;write result to wbtemp + move.l ETEMP_HI(a6),4(a0) + move.l ETEMP_LO(a6),8(a0) + bra sub_ckovf +sub_s_srcd: + lea.l FPTEMP(a6),a0 + move.l USER_FPCR(a6),d0 + andi.l #$30,d0 + lsr.l #4,d0 ;put rmode in lower 2 bits + move.l USER_FPCR(a6),d1 + andi.l #$c0,d1 + lsr.l #6,d1 ;put precision in upper word + swap d1 + or.l d0,d1 ;set up for round call + move.l #$20000000,d0 ;set sticky for round + bclr.b #sign_bit,FPTEMP_EX(a6) + sne FPTEMP_SGN(a6) + bsr.l round ;round result to users rmode & prec + bfclr FPTEMP_SGN(a6){0:8} ;convert back to IEEE ext format + beq.b sub_s_sclr + bset.b #sign_bit,FPTEMP_EX(a6) +sub_s_sclr: + lea.l WBTEMP(a6),a0 + move.l FPTEMP(a6),(a0) ;write result to wbtemp + move.l FPTEMP_HI(a6),4(a0) + move.l FPTEMP_LO(a6),8(a0) + tst.w FPTEMP_EX(a6) + bgt sub_ckovf + or.l #neg_mask,USER_FPSR(a6) +sub_ckovf: + move.w WBTEMP_EX(a6),d0 + andi.w #$7fff,d0 + cmpi.w #$7fff,d0 + bne frcfpnr +* +* The result has overflowed to $7fff exponent. Set I, ovfl, +* and aovfl, and clr the mantissa (incorrectly set by the +* round routine.) +* + or.l #inf_mask+ovfl_inx_mask,USER_FPSR(a6) + clr.l 4(a0) + bra frcfpnr +* +* Inst is fcmp. +* +wrap_cmp: + cmp.b #$ff,DNRM_FLG(a6) ;if both ops denorm, + beq fix_stk ;restore to fpu +* +* One of the ops is denormalized. Test for wrap condition +* and complete the instruction. +* + cmp.b #$0f,DNRM_FLG(a6) ;check for dest denorm + bne.b cmp_srcd +cmp_destd: + bsr.l ckinf_ns + bne fix_stk + bfextu ETEMP_EX(a6){1:15},d0 ;get src exp (always pos) + bfexts FPTEMP_EX(a6){1:15},d1 ;get dest exp (always neg) + sub.l d1,d0 ;subtract dest from src + cmp.l #$8000,d0 + blt fix_stk ;if less, not wrap case + tst.w ETEMP_EX(a6) ;set N to ~sign_of(src) + bge cmp_setn + rts +cmp_srcd: + bsr.l ckinf_nd + bne fix_stk + bfextu FPTEMP_EX(a6){1:15},d0 ;get dest exp (always pos) + bfexts ETEMP_EX(a6){1:15},d1 ;get src exp (always neg) + sub.l d1,d0 ;subtract src from dest + cmp.l #$8000,d0 + blt fix_stk ;if less, not wrap case + tst.w FPTEMP_EX(a6) ;set N to sign_of(dest) + blt cmp_setn + rts +cmp_setn: + or.l #neg_mask,USER_FPSR(a6) + rts + +* +* Inst is fmul. +* +wrap_mul: + cmp.b #$ff,DNRM_FLG(a6) ;if both ops denorm, + beq force_unf ;force an underflow (really!) +* +* One of the ops is denormalized. Test for wrap condition +* and complete the instruction. +* + cmp.b #$0f,DNRM_FLG(a6) ;check for dest denorm + bne.b mul_srcd +mul_destd: + bsr.l ckinf_ns + bne fix_stk + bfextu ETEMP_EX(a6){1:15},d0 ;get src exp (always pos) + bfexts FPTEMP_EX(a6){1:15},d1 ;get dest exp (always neg) + add.l d1,d0 ;subtract dest from src + bgt fix_stk + bra force_unf +mul_srcd: + bsr.l ckinf_nd + bne fix_stk + bfextu FPTEMP_EX(a6){1:15},d0 ;get dest exp (always pos) + bfexts ETEMP_EX(a6){1:15},d1 ;get src exp (always neg) + add.l d1,d0 ;subtract src from dest + bgt fix_stk + +* +* This code handles the case of the instruction resulting in +* an underflow condition. +* +force_unf: + bclr.b #E1,E_BYTE(a6) + or.l #unfinx_mask,USER_FPSR(a6) + clr.w NMNEXC(a6) + clr.b WBTEMP_SGN(a6) + move.w ETEMP_EX(a6),d0 ;find the sign of the result + move.w FPTEMP_EX(a6),d1 + eor.w d1,d0 + andi.w #$8000,d0 + beq.b frcunfcont + st.b WBTEMP_SGN(a6) +frcunfcont: + lea WBTEMP(a6),a0 ;point a0 to memory location + move.w CMDREG1B(a6),d0 + btst.l #6,d0 ;test for forced precision + beq.b frcunf_fpcr + btst.l #2,d0 ;check for double + bne.b frcunf_dbl + move.l #$1,d0 ;inst is forced single + bra.b frcunf_rnd +frcunf_dbl: + move.l #$2,d0 ;inst is forced double + bra.b frcunf_rnd +frcunf_fpcr: + bfextu FPCR_MODE(a6){0:2},d0 ;inst not forced - use fpcr prec +frcunf_rnd: + bsr.l unf_sub ;get correct result based on +* ;round precision/mode. This +* ;sets FPSR_CC correctly + bfclr WBTEMP_SGN(a6){0:8} ;convert back to IEEE ext format + beq.b frcfpn + bset.b #sign_bit,WBTEMP_EX(a6) + bra frcfpn + +* +* Write the result to the user's fpn. All results must be HUGE to be +* written; otherwise the results would have overflowed or underflowed. +* If the rounding precision is single or double, the ovf_res routine +* is needed to correctly supply the max value. +* +frcfpnr: + move.w CMDREG1B(a6),d0 + btst.l #6,d0 ;test for forced precision + beq.b frcfpn_fpcr + btst.l #2,d0 ;check for double + bne.b frcfpn_dbl + move.l #$1,d0 ;inst is forced single + bra.b frcfpn_rnd +frcfpn_dbl: + move.l #$2,d0 ;inst is forced double + bra.b frcfpn_rnd +frcfpn_fpcr: + bfextu FPCR_MODE(a6){0:2},d0 ;inst not forced - use fpcr prec + tst.b d0 + beq.b frcfpn ;if extended, write what you got +frcfpn_rnd: + bclr.b #sign_bit,WBTEMP_EX(a6) + sne WBTEMP_SGN(a6) + bsr.l ovf_res ;get correct result based on +* ;round precision/mode. This +* ;sets FPSR_CC correctly + bfclr WBTEMP_SGN(a6){0:8} ;convert back to IEEE ext format + beq.b frcfpn_clr + bset.b #sign_bit,WBTEMP_EX(a6) +frcfpn_clr: + or.l #ovfinx_mask,USER_FPSR(a6) +* +* Perform the write. +* +frcfpn: + bfextu CMDREG1B(a6){6:3},d0 ;extract fp destination register + cmpi.b #3,d0 + ble.b frc0123 ;check if dest is fp0-fp3 + move.l #7,d1 + sub.l d0,d1 + clr.l d0 + bset.l d1,d0 + fmovem.x WBTEMP(a6),d0 + rts +frc0123: + tst.b d0 + beq.b frc0_dst + cmpi.b #1,d0 + beq.b frc1_dst + cmpi.b #2,d0 + beq.b frc2_dst +frc3_dst: + move.l WBTEMP_EX(a6),USER_FP3(a6) + move.l WBTEMP_HI(a6),USER_FP3+4(a6) + move.l WBTEMP_LO(a6),USER_FP3+8(a6) + rts +frc2_dst: + move.l WBTEMP_EX(a6),USER_FP2(a6) + move.l WBTEMP_HI(a6),USER_FP2+4(a6) + move.l WBTEMP_LO(a6),USER_FP2+8(a6) + rts +frc1_dst: + move.l WBTEMP_EX(a6),USER_FP1(a6) + move.l WBTEMP_HI(a6),USER_FP1+4(a6) + move.l WBTEMP_LO(a6),USER_FP1+8(a6) + rts +frc0_dst: + move.l WBTEMP_EX(a6),USER_FP0(a6) + move.l WBTEMP_HI(a6),USER_FP0+4(a6) + move.l WBTEMP_LO(a6),USER_FP0+8(a6) + rts + +* +* Write etemp to fpn. +* A check is made on enabled and signalled snan exceptions, +* and the destination is not overwritten if this condition exists. +* This code is designed to make fmoveins of unsupported data types +* faster. +* +wr_etemp: + btst.b #snan_bit,FPSR_EXCEPT(a6) ;if snan is set, and + beq.b fmoveinc ;enabled, force restore + btst.b #snan_bit,FPCR_ENABLE(a6) ;and don't overwrite + beq.b fmoveinc ;the dest + move.l ETEMP_EX(a6),FPTEMP_EX(a6) ;set up fptemp sign for +* ;snan handler + tst.b ETEMP(a6) ;check for negative + blt.b snan_neg + rts +snan_neg: + or.l #neg_bit,USER_FPSR(a6) ;snan is negative; set N + rts +fmoveinc: + clr.w NMNEXC(a6) + bclr.b #E1,E_BYTE(a6) + move.b STAG(a6),d0 ;check if stag is inf + andi.b #$e0,d0 + cmpi.b #$40,d0 + bne.b fminc_cnan + or.l #inf_mask,USER_FPSR(a6) ;if inf, nothing yet has set I + tst.w LOCAL_EX(a0) ;check sign + bge.b fminc_con + or.l #neg_mask,USER_FPSR(a6) + bra fminc_con +fminc_cnan: + cmpi.b #$60,d0 ;check if stag is NaN + bne.b fminc_czero + or.l #nan_mask,USER_FPSR(a6) ;if nan, nothing yet has set NaN + move.l ETEMP_EX(a6),FPTEMP_EX(a6) ;set up fptemp sign for +* ;snan handler + tst.w LOCAL_EX(a0) ;check sign + bge.b fminc_con + or.l #neg_mask,USER_FPSR(a6) + bra fminc_con +fminc_czero: + cmpi.b #$20,d0 ;check if zero + bne.b fminc_con + or.l #z_mask,USER_FPSR(a6) ;if zero, set Z + tst.w LOCAL_EX(a0) ;check sign + bge.b fminc_con + or.l #neg_mask,USER_FPSR(a6) +fminc_con: + bfextu CMDREG1B(a6){6:3},d0 ;extract fp destination register + cmpi.b #3,d0 + ble.b fp0123 ;check if dest is fp0-fp3 + move.l #7,d1 + sub.l d0,d1 + clr.l d0 + bset.l d1,d0 + fmovem.x ETEMP(a6),d0 + rts + +fp0123: + tst.b d0 + beq.b fp0_dst + cmpi.b #1,d0 + beq.b fp1_dst + cmpi.b #2,d0 + beq.b fp2_dst +fp3_dst: + move.l ETEMP_EX(a6),USER_FP3(a6) + move.l ETEMP_HI(a6),USER_FP3+4(a6) + move.l ETEMP_LO(a6),USER_FP3+8(a6) + rts +fp2_dst: + move.l ETEMP_EX(a6),USER_FP2(a6) + move.l ETEMP_HI(a6),USER_FP2+4(a6) + move.l ETEMP_LO(a6),USER_FP2+8(a6) + rts +fp1_dst: + move.l ETEMP_EX(a6),USER_FP1(a6) + move.l ETEMP_HI(a6),USER_FP1+4(a6) + move.l ETEMP_LO(a6),USER_FP1+8(a6) + rts +fp0_dst: + move.l ETEMP_EX(a6),USER_FP0(a6) + move.l ETEMP_HI(a6),USER_FP0+4(a6) + move.l ETEMP_LO(a6),USER_FP0+8(a6) + rts + +opclass3: + st.b CU_ONLY(a6) + move.w CMDREG1B(a6),d0 ;check if packed moveout + andi.w #$0c00,d0 ;isolate last 2 bits of size field + cmpi.w #$0c00,d0 ;if size is 011 or 111, it is packed + beq.w pack_out ;else it is norm or denorm + bra.w mv_out + + +* +* MOVE OUT +* + +mv_tbl: + dc.l li + dc.l sgp + dc.l xp + dc.l mvout_end ;should never be taken + dc.l wi + dc.l dp + dc.l bi + dc.l mvout_end ;should never be taken +mv_out: + bfextu CMDREG1B(a6){3:3},d1 ;put source specifier in d1 + lea.l mv_tbl,a0 + move.l (a0,d1*4),a0 + jmp (a0) + +* +* This exit is for move-out to memory. The aunfl bit is +* set if the result is inex and unfl is signalled. +* +mvout_end: + btst.b #inex2_bit,FPSR_EXCEPT(a6) + beq.b no_aufl + btst.b #unfl_bit,FPSR_EXCEPT(a6) + beq.b no_aufl + bset.b #aunfl_bit,FPSR_AEXCEPT(a6) +no_aufl: + clr.w NMNEXC(a6) + bclr.b #E1,E_BYTE(a6) + fmove.l #0,FPSR ;clear any cc bits from res_func +* +* Return ETEMP to extended format from internal extended format so +* that gen_except will have a correctly signed value for ovfl/unfl +* handlers. +* + bfclr ETEMP_SGN(a6){0:8} + beq.b mvout_con + bset.b #sign_bit,ETEMP_EX(a6) +mvout_con: + rts +* +* This exit is for move-out to int register. The aunfl bit is +* not set in any case for this move. +* +mvouti_end: + clr.w NMNEXC(a6) + bclr.b #E1,E_BYTE(a6) + fmove.l #0,FPSR ;clear any cc bits from res_func +* +* Return ETEMP to extended format from internal extended format so +* that gen_except will have a correctly signed value for ovfl/unfl +* handlers. +* + bfclr ETEMP_SGN(a6){0:8} + beq.b mvouti_con + bset.b #sign_bit,ETEMP_EX(a6) +mvouti_con: + rts +* +* li is used to handle a long integer source specifier +* + +li: + moveq.l #4,d0 ;set byte count + + btst.b #7,STAG(a6) ;check for extended denorm + bne.w int_dnrm ;if so, branch + + fmovem.x ETEMP(a6),fp0 + fcmp.d #:41dfffffffc00000,fp0 +* 41dfffffffc00000 in dbl prec = 401d0000fffffffe00000000 in ext prec + fbge.w lo_plrg + fcmp.d #:c1e0000000000000,fp0 +* c1e0000000000000 in dbl prec = c01e00008000000000000000 in ext prec + fble.w lo_nlrg +* +* at this point, the answer is between the largest pos and neg values +* + move.l USER_FPCR(a6),d1 ;use user's rounding mode + andi.l #$30,d1 + fmove.l d1,fpcr + fmove.l fp0,L_SCR1(a6) ;let the 040 perform conversion + fmove.l fpsr,d1 + or.l d1,USER_FPSR(a6) ;capture inex2/ainex if set + bra.w int_wrt + + +lo_plrg: + move.l #$7fffffff,L_SCR1(a6) ;answer is largest positive int + fbeq.w int_wrt ;exact answer + fcmp.d #:41dfffffffe00000,fp0 +* 41dfffffffe00000 in dbl prec = 401d0000ffffffff00000000 in ext prec + fbge.w int_operr ;set operr + bra.w int_inx ;set inexact + +lo_nlrg: + move.l #$80000000,L_SCR1(a6) + fbeq.w int_wrt ;exact answer + fcmp.d #:c1e0000000100000,fp0 +* c1e0000000100000 in dbl prec = c01e00008000000080000000 in ext prec + fblt.w int_operr ;set operr + bra.w int_inx ;set inexact + +* +* wi is used to handle a word integer source specifier +* + +wi: + moveq.l #2,d0 ;set byte count + + btst.b #7,STAG(a6) ;check for extended denorm + bne.w int_dnrm ;branch if so + + fmovem.x ETEMP(a6),fp0 + fcmp.s #:46fffe00,fp0 +* 46fffe00 in sgl prec = 400d0000fffe000000000000 in ext prec + fbge.w wo_plrg + fcmp.s #:c7000000,fp0 +* c7000000 in sgl prec = c00e00008000000000000000 in ext prec + fble.w wo_nlrg + +* +* at this point, the answer is between the largest pos and neg values +* + move.l USER_FPCR(a6),d1 ;use user's rounding mode + andi.l #$30,d1 + fmove.l d1,fpcr + fmove.w fp0,L_SCR1(a6) ;let the 040 perform conversion + fmove.l fpsr,d1 + or.l d1,USER_FPSR(a6) ;capture inex2/ainex if set + bra.w int_wrt + +wo_plrg: + move.w #$7fff,L_SCR1(a6) ;answer is largest positive int + fbeq.w int_wrt ;exact answer + fcmp.s #:46ffff00,fp0 +* 46ffff00 in sgl prec = 400d0000ffff000000000000 in ext prec + fbge.w int_operr ;set operr + bra.w int_inx ;set inexact + +wo_nlrg: + move.w #$8000,L_SCR1(a6) + fbeq.w int_wrt ;exact answer + fcmp.s #:c7000080,fp0 +* c7000080 in sgl prec = c00e00008000800000000000 in ext prec + fblt.w int_operr ;set operr + bra.w int_inx ;set inexact + +* +* bi is used to handle a byte integer source specifier +* + +bi: + moveq.l #1,d0 ;set byte count + + btst.b #7,STAG(a6) ;check for extended denorm + bne.w int_dnrm ;branch if so + + fmovem.x ETEMP(a6),fp0 + fcmp.s #:42fe0000,fp0 +* 42fe0000 in sgl prec = 40050000fe00000000000000 in ext prec + fbge.w by_plrg + fcmp.s #:c3000000,fp0 +* c3000000 in sgl prec = c00600008000000000000000 in ext prec + fble.w by_nlrg + +* +* at this point, the answer is between the largest pos and neg values +* + move.l USER_FPCR(a6),d1 ;use user's rounding mode + andi.l #$30,d1 + fmove.l d1,fpcr + fmove.b fp0,L_SCR1(a6) ;let the 040 perform conversion + fmove.l fpsr,d1 + or.l d1,USER_FPSR(a6) ;capture inex2/ainex if set + bra.w int_wrt + +by_plrg: + move.b #$7f,L_SCR1(a6) ;answer is largest positive int + fbeq.w int_wrt ;exact answer + fcmp.s #:42ff0000,fp0 +* 42ff0000 in sgl prec = 40050000ff00000000000000 in ext prec + fbge.w int_operr ;set operr + bra.w int_inx ;set inexact + +by_nlrg: + move.b #$80,L_SCR1(a6) + fbeq.w int_wrt ;exact answer + fcmp.s #:c3008000,fp0 +* c3008000 in sgl prec = c00600008080000000000000 in ext prec + fblt.w int_operr ;set operr + bra.w int_inx ;set inexact + +* +* Common integer routines +* +* int_drnrm---account for possible nonzero result for round up with positive +* operand and round down for negative answer. In the first case (result = 1) +* byte-width (store in d0) of result must be honored. In the second case, +* -1 in L_SCR1(a6) will cover all contingencies (FMOVE.B/W/L out). + +int_dnrm: + clr.l L_SCR1(a6) ; initialize result to 0 + bfextu FPCR_MODE(a6){2:2},d1 ; d1 is the rounding mode + cmp.b #2,d1 + bmi.b int_inx ; if RN or RZ, done + bne.b int_rp ; if RP, continue below + tst.w ETEMP(a6) ; RM: store -1 in L_SCR1 if src is negative + bpl.b int_inx ; otherwise result is 0 + move.l #-1,L_SCR1(a6) + bra.b int_inx +int_rp: + tst.w ETEMP(a6) ; RP: store +1 of proper width in L_SCR1 if +* ; source is greater than 0 + bmi.b int_inx ; otherwise, result is 0 + lea L_SCR1(a6),a1 ; a1 is address of L_SCR1 + adda.l d0,a1 ; offset by destination width -1 + suba.l #1,a1 + bset.b #0,(a1) ; set low bit at a1 address +int_inx: + ori.l #inx2a_mask,USER_FPSR(a6) + bra.b int_wrt +int_operr: + fmovem.x fp0,FPTEMP(a6) ;FPTEMP must contain the extended +* ;precision source that needs to be +* ;converted to integer this is required +* ;if the operr exception is enabled. +* ;set operr/aiop (no inex2 on int ovfl) + + ori.l #opaop_mask,USER_FPSR(a6) +* ;fall through to perform int_wrt +int_wrt: + move.l EXC_EA(a6),a1 ;load destination address + tst.l a1 ;check to see if it is a dest register + beq.b wrt_dn ;write data register + lea L_SCR1(a6),a0 ;point to supervisor source address + bsr.l mem_write + bra.w mvouti_end + +wrt_dn: + move.l d0,-(sp) ;d0 currently contains the size to write + bsr.l get_fline ;get_fline returns Dn in d0 + andi.w #$7,d0 ;isolate register + move.l (sp)+,d1 ;get size + cmpi.l #4,d1 ;most frequent case + beq.b sz_long + cmpi.l #2,d1 + bne.b sz_con + or.l #8,d0 ;add 'word' size to register# + bra.b sz_con +sz_long: + or.l #$10,d0 ;add 'long' size to register# +sz_con: + move.l d0,d1 ;reg_dest expects size:reg in d1 + bsr.l reg_dest ;load proper data register + bra.w mvouti_end +xp: + lea ETEMP(a6),a0 + bclr.b #sign_bit,LOCAL_EX(a0) + sne LOCAL_SGN(a0) + btst.b #7,STAG(a6) ;check for extended denorm + bne.w xdnrm + clr.l d0 + bra.b do_fp ;do normal case +sgp: + lea ETEMP(a6),a0 + bclr.b #sign_bit,LOCAL_EX(a0) + sne LOCAL_SGN(a0) + btst.b #7,STAG(a6) ;check for extended denorm + bne.w sp_catas ;branch if so + move.w LOCAL_EX(a0),d0 + lea sp_bnds,a1 + cmp.w (a1),d0 + blt.w sp_under + cmp.w 2(a1),d0 + bgt.w sp_over + move.l #1,d0 ;set destination format to single + bra.b do_fp ;do normal case +dp: + lea ETEMP(a6),a0 + bclr.b #sign_bit,LOCAL_EX(a0) + sne LOCAL_SGN(a0) + + btst.b #7,STAG(a6) ;check for extended denorm + bne.w dp_catas ;branch if so + + move.w LOCAL_EX(a0),d0 + lea dp_bnds,a1 + + cmp.w (a1),d0 + blt.w dp_under + cmp.w 2(a1),d0 + bgt.w dp_over + + move.l #2,d0 ;set destination format to double +* ;fall through to do_fp +* +do_fp: + bfextu FPCR_MODE(a6){2:2},d1 ;rnd mode in d1 + swap d0 ;rnd prec in upper word + add.l d0,d1 ;d1 has PREC/MODE info + + clr.l d0 ;clear g,r,s + + bsr.l round ;round + + move.l a0,a1 + move.l EXC_EA(a6),a0 + + bfextu CMDREG1B(a6){3:3},d1 ;extract destination format +* ;at this point only the dest +* ;formats sgl, dbl, ext are +* ;possible + cmp.b #2,d1 + bgt.b ddbl ;double=5, extended=2, single=1 + bne.b dsgl +* ;fall through to dext +dext: + bsr.l dest_ext + bra.w mvout_end +dsgl: + bsr.l dest_sgl + bra.w mvout_end +ddbl: + bsr.l dest_dbl + bra.w mvout_end + +* +* Handle possible denorm or catastrophic underflow cases here +* +xdnrm: + bsr.w set_xop ;initialize WBTEMP + bset.b #wbtemp15_bit,WB_BYTE(a6) ;set wbtemp15 + + move.l a0,a1 + move.l EXC_EA(a6),a0 ;a0 has the destination pointer + bsr.l dest_ext ;store to memory + bset.b #unfl_bit,FPSR_EXCEPT(a6) + bra.w mvout_end + +sp_under: + bset.b #etemp15_bit,STAG(a6) + + cmp.w 4(a1),d0 + blt.b sp_catas ;catastrophic underflow case + + move.l #1,d0 ;load in round precision + move.l #sgl_thresh,d1 ;load in single denorm threshold + bsr.l dpspdnrm ;expects d1 to have the proper +* ;denorm threshold + bsr.l dest_sgl ;stores value to destination + bset.b #unfl_bit,FPSR_EXCEPT(a6) + bra.w mvout_end ;exit + +dp_under: + bset.b #etemp15_bit,STAG(a6) + + cmp.w 4(a1),d0 + blt.b dp_catas ;catastrophic underflow case + + move.l #dbl_thresh,d1 ;load in double precision threshold + move.l #2,d0 + bsr.l dpspdnrm ;expects d1 to have proper +* ;denorm threshold +* ;expects d0 to have round precision + bsr.l dest_dbl ;store value to destination + bset.b #unfl_bit,FPSR_EXCEPT(a6) + bra.w mvout_end ;exit + +* +* Handle catastrophic underflow cases here +* +sp_catas: +* Temp fix for z bit set in unf_sub + move.l USER_FPSR(a6),-(a7) + + move.l #1,d0 ;set round precision to sgl + + bsr.l unf_sub ;a0 points to result + + move.l (a7)+,USER_FPSR(a6) + + move.l #1,d0 + sub.w d0,LOCAL_EX(a0) ;account for difference between +* ;denorm/norm bias + + move.l a0,a1 ;a1 has the operand input + move.l EXC_EA(a6),a0 ;a0 has the destination pointer + + bsr.l dest_sgl ;store the result + ori.l #unfinx_mask,USER_FPSR(a6) + bra.w mvout_end + +dp_catas: +* Temp fix for z bit set in unf_sub + move.l USER_FPSR(a6),-(a7) + + move.l #2,d0 ;set round precision to dbl + bsr.l unf_sub ;a0 points to result + + move.l (a7)+,USER_FPSR(a6) + + move.l #1,d0 + sub.w d0,LOCAL_EX(a0) ;account for difference between +* ;denorm/norm bias + + move.l a0,a1 ;a1 has the operand input + move.l EXC_EA(a6),a0 ;a0 has the destination pointer + + bsr.l dest_dbl ;store the result + ori.l #unfinx_mask,USER_FPSR(a6) + bra.w mvout_end + +* +* Handle catastrophic overflow cases here +* +sp_over: +* Temp fix for z bit set in unf_sub + move.l USER_FPSR(a6),-(a7) + + move.l #1,d0 + lea.l FP_SCR1(a6),a0 ;use FP_SCR1 for creating result + move.l ETEMP_EX(a6),(a0) + move.l ETEMP_HI(a6),4(a0) + move.l ETEMP_LO(a6),8(a0) + bsr.l ovf_res + + move.l (a7)+,USER_FPSR(a6) + + move.l a0,a1 + move.l EXC_EA(a6),a0 + bsr.l dest_sgl + or.l #ovfinx_mask,USER_FPSR(a6) + bra.w mvout_end + +dp_over: +* Temp fix for z bit set in ovf_res + move.l USER_FPSR(a6),-(a7) + + move.l #2,d0 + lea.l FP_SCR1(a6),a0 ;use FP_SCR1 for creating result + move.l ETEMP_EX(a6),(a0) + move.l ETEMP_HI(a6),4(a0) + move.l ETEMP_LO(a6),8(a0) + bsr.l ovf_res + + move.l (a7)+,USER_FPSR(a6) + + move.l a0,a1 + move.l EXC_EA(a6),a0 + bsr.l dest_dbl + or.l #ovfinx_mask,USER_FPSR(a6) + bra.w mvout_end + +* +* DPSPDNRM +* +* This subroutine takes an extended normalized number and denormalizes +* it to the given round precision. This subroutine also decrements +* the input operand's exponent by 1 to account for the fact that +* dest_sgl or dest_dbl expects a normalized number's bias. +* +* Input: a0 points to a normalized number in internal extended format +* d0 is the round precision (=1 for sgl; =2 for dbl) +* d1 is the the single precision or double precision +* denorm threshold +* +* Output: (In the format for dest_sgl or dest_dbl) +* a0 points to the destination +* a1 points to the operand +* +* Exceptions: Reports inexact 2 exception by setting USER_FPSR bits +* +dpspdnrm: + move.l d0,-(a7) ;save round precision + clr.l d0 ;clear initial g,r,s + bsr.l dnrm_lp ;careful with d0, it's needed by round + + bfextu FPCR_MODE(a6){2:2},d1 ;get rounding mode + swap d1 + move.w 2(a7),d1 ;set rounding precision + swap d1 ;at this point d1 has PREC/MODE info + bsr.l round ;round result, sets the inex bit in +* ;USER_FPSR if needed + + move.w #1,d0 + sub.w d0,LOCAL_EX(a0) ;account for difference in denorm +* ;vs norm bias + + move.l a0,a1 ;a1 has the operand input + move.l EXC_EA(a6),a0 ;a0 has the destination pointer + addq.l #4,a7 ;pop stack + rts +* +* SET_XOP initialized WBTEMP with the value pointed to by a0 +* input: a0 points to input operand in the internal extended format +* +set_xop: + move.l LOCAL_EX(a0),WBTEMP_EX(a6) + move.l LOCAL_HI(a0),WBTEMP_HI(a6) + move.l LOCAL_LO(a0),WBTEMP_LO(a6) + bfclr WBTEMP_SGN(a6){0:8} + beq.b sxop + bset.b #sign_bit,WBTEMP_EX(a6) +sxop: + bfclr STAG(a6){5:4} ;clear wbtm66,wbtm1,wbtm0,sbit + rts +* +* P_MOVE +* +p_movet: + dc.l p_move + dc.l p_movez + dc.l p_movei + dc.l p_moven + dc.l p_move +p_regd: + dc.l p_dyd0 + dc.l p_dyd1 + dc.l p_dyd2 + dc.l p_dyd3 + dc.l p_dyd4 + dc.l p_dyd5 + dc.l p_dyd6 + dc.l p_dyd7 + +pack_out: + lea.l p_movet,a0 ;load jmp table address + move.w STAG(a6),d0 ;get source tag + bfextu d0{16:3},d0 ;isolate source bits + move.l (a0,d0.w*4),a0 ;load a0 with routine label for tag + jmp (a0) ;go to the routine + +p_write: + move.l #$0c,d0 ;get byte count + move.l EXC_EA(a6),a1 ;get the destination address + bsr mem_write ;write the user's destination + clr.b CU_SAVEPC(a6) ;set the cu save pc to all 0's + +* +* Also note that the dtag must be set to norm here - this is because +* the 040 uses the dtag to execute the correct microcode. +* + bfclr DTAG(a6){0:3} ;set dtag to norm + + rts + +* Notes on handling of special case (zero, inf, and nan) inputs: +* 1. Operr is not signalled if the k-factor is greater than 18. +* 2. Per the manual, status bits are not set. +* + +p_move: + move.w CMDREG1B(a6),d0 + btst.l #kfact_bit,d0 ;test for dynamic k-factor + beq.b statick ;if clear, k-factor is static +dynamick: + bfextu d0{25:3},d0 ;isolate register for dynamic k-factor + lea p_regd,a0 + move.l (a0,d0*4),a0 + jmp (a0) +statick: + andi.w #$007f,d0 ;get k-factor + bfexts d0{25:7},d0 ;sign extend d0 for bindec + lea.l ETEMP(a6),a0 ;a0 will point to the packed decimal + bsr.l bindec ;perform the convert; data at a6 + lea.l FP_SCR1(a6),a0 ;load a0 with result address + bra.l p_write +p_movez: + lea.l ETEMP(a6),a0 ;a0 will point to the packed decimal + clr.w 2(a0) ;clear lower word of exp + clr.l 4(a0) ;load second lword of ZERO + clr.l 8(a0) ;load third lword of ZERO + bra.w p_write ;go write results +p_movei: + fmove.l #0,FPSR ;clear aiop + lea.l ETEMP(a6),a0 ;a0 will point to the packed decimal + clr.w 2(a0) ;clear lower word of exp + bra.w p_write ;go write the result +p_moven: + lea.l ETEMP(a6),a0 ;a0 will point to the packed decimal + clr.w 2(a0) ;clear lower word of exp + bra.w p_write ;go write the result + +* +* Routines to read the dynamic k-factor from Dn. +* +p_dyd0: + move.l USER_D0(a6),d0 + bra.b statick +p_dyd1: + move.l USER_D1(a6),d0 + bra.b statick +p_dyd2: + move.l d2,d0 + bra.b statick +p_dyd3: + move.l d3,d0 + bra.b statick +p_dyd4: + move.l d4,d0 + bra.b statick +p_dyd5: + move.l d5,d0 + bra.b statick +p_dyd6: + move.l d6,d0 + bra.w statick +p_dyd7: + move.l d7,d0 + bra.w statick + + end diff --git a/sys/arch/m68k/fpsp/round.sa b/sys/arch/m68k/fpsp/round.sa new file mode 100644 index 00000000000..ebd02d11e25 --- /dev/null +++ b/sys/arch/m68k/fpsp/round.sa @@ -0,0 +1,673 @@ +* $NetBSD: round.sa,v 1.3 1994/10/26 07:49:24 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* round.sa 3.4 7/29/91 +* +* handle rounding and normalization tasks +* + +ROUND IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + +* +* round --- round result according to precision/mode +* +* a0 points to the input operand in the internal extended format +* d1(high word) contains rounding precision: +* ext = $0000xxxx +* sgl = $0001xxxx +* dbl = $0002xxxx +* d1(low word) contains rounding mode: +* RN = $xxxx0000 +* RZ = $xxxx0001 +* RM = $xxxx0010 +* RP = $xxxx0011 +* d0{31:29} contains the g,r,s bits (extended) +* +* On return the value pointed to by a0 is correctly rounded, +* a0 is preserved and the g-r-s bits in d0 are cleared. +* The result is not typed - the tag field is invalid. The +* result is still in the internal extended format. +* +* The INEX bit of USER_FPSR will be set if the rounded result was +* inexact (i.e. if any of the g-r-s bits were set). +* + + xdef round +round: +* If g=r=s=0 then result is exact and round is done, else set +* the inex flag in status reg and continue. +* + bsr.b ext_grs ;this subroutine looks at the +* :rounding precision and sets +* ;the appropriate g-r-s bits. + tst.l d0 ;if grs are zero, go force + bne.w rnd_cont ;lower bits to zero for size + + swap d1 ;set up d1.w for round prec. + bra.w truncate + +rnd_cont: +* +* Use rounding mode as an index into a jump table for these modes. +* + or.l #inx2a_mask,USER_FPSR(a6) ;set inex2/ainex + lea mode_tab,a1 + move.l (a1,d1.w*4),a1 + jmp (a1) +* +* Jump table indexed by rounding mode in d1.w. All following assumes +* grs != 0. +* +mode_tab: + dc.l rnd_near + dc.l rnd_zero + dc.l rnd_mnus + dc.l rnd_plus +* +* ROUND PLUS INFINITY +* +* If sign of fp number = 0 (positive), then add 1 to l. +* +rnd_plus: + swap d1 ;set up d1 for round prec. + tst.b LOCAL_SGN(a0) ;check for sign + bmi.w truncate ;if positive then truncate + move.l #$ffffffff,d0 ;force g,r,s to be all f's + lea add_to_l,a1 + move.l (a1,d1.w*4),a1 + jmp (a1) +* +* ROUND MINUS INFINITY +* +* If sign of fp number = 1 (negative), then add 1 to l. +* +rnd_mnus: + swap d1 ;set up d1 for round prec. + tst.b LOCAL_SGN(a0) ;check for sign + bpl.w truncate ;if negative then truncate + move.l #$ffffffff,d0 ;force g,r,s to be all f's + lea add_to_l,a1 + move.l (a1,d1.w*4),a1 + jmp (a1) +* +* ROUND ZERO +* +* Always truncate. +rnd_zero: + swap d1 ;set up d1 for round prec. + bra.w truncate +* +* +* ROUND NEAREST +* +* If (g=1), then add 1 to l and if (r=s=0), then clear l +* Note that this will round to even in case of a tie. +* +rnd_near: + swap d1 ;set up d1 for round prec. + add.l d0,d0 ;shift g-bit to c-bit + bcc.w truncate ;if (g=1) then + lea add_to_l,a1 + move.l (a1,d1.w*4),a1 + jmp (a1) + +* +* ext_grs --- extract guard, round and sticky bits +* +* Input: d1 = PREC:ROUND +* Output: d0{31:29}= guard, round, sticky +* +* The ext_grs extract the guard/round/sticky bits according to the +* selected rounding precision. It is called by the round subroutine +* only. All registers except d0 are kept intact. d0 becomes an +* updated guard,round,sticky in d0{31:29} +* +* Notes: the ext_grs uses the round PREC, and therefore has to swap d1 +* prior to usage, and needs to restore d1 to original. +* +ext_grs: + swap d1 ;have d1.w point to round precision + tst.w d1 + bne.b sgl_or_dbl + bra.b end_ext_grs + +sgl_or_dbl: + movem.l d2/d3,-(a7) ;make some temp registers + cmpi.w #1,d1 + bne.b grs_dbl +grs_sgl: + bfextu LOCAL_HI(a0){24:2},d3 ;sgl prec. g-r are 2 bits right + move.l #30,d2 ;of the sgl prec. limits + lsl.l d2,d3 ;shift g-r bits to MSB of d3 + move.l LOCAL_HI(a0),d2 ;get word 2 for s-bit test + andi.l #$0000003f,d2 ;s bit is the or of all other + bne.b st_stky ;bits to the right of g-r + tst.l LOCAL_LO(a0) ;test lower mantissa + bne.b st_stky ;if any are set, set sticky + tst.l d0 ;test original g,r,s + bne.b st_stky ;if any are set, set sticky + bra.b end_sd ;if words 3 and 4 are clr, exit +grs_dbl: + bfextu LOCAL_LO(a0){21:2},d3 ;dbl-prec. g-r are 2 bits right + move.l #30,d2 ;of the dbl prec. limits + lsl.l d2,d3 ;shift g-r bits to the MSB of d3 + move.l LOCAL_LO(a0),d2 ;get lower mantissa for s-bit test + andi.l #$000001ff,d2 ;s bit is the or-ing of all + bne.b st_stky ;other bits to the right of g-r + tst.l d0 ;test word original g,r,s + bne.b st_stky ;if any are set, set sticky + bra.b end_sd ;if clear, exit +st_stky: + bset #rnd_stky_bit,d3 +end_sd: + move.l d3,d0 ;return grs to d0 + movem.l (a7)+,d2/d3 ;restore scratch registers +end_ext_grs: + swap d1 ;restore d1 to original + rts + +******************** Local Equates +ad_1_sgl equ $00000100 constant to add 1 to l-bit in sgl prec +ad_1_dbl equ $00000800 constant to add 1 to l-bit in dbl prec + + +*Jump table for adding 1 to the l-bit indexed by rnd prec + +add_to_l: + dc.l add_ext + dc.l add_sgl + dc.l add_dbl + dc.l add_dbl +* +* ADD SINGLE +* +add_sgl: + add.l #ad_1_sgl,LOCAL_HI(a0) + bcc.b scc_clr ;no mantissa overflow + roxr.w LOCAL_HI(a0) ;shift v-bit back in + roxr.w LOCAL_HI+2(a0) ;shift v-bit back in + add.w #$1,LOCAL_EX(a0) ;and incr exponent +scc_clr: + tst.l d0 ;test for rs = 0 + bne.b sgl_done + andi.w #$fe00,LOCAL_HI+2(a0) ;clear the l-bit +sgl_done: + andi.l #$ffffff00,LOCAL_HI(a0) ;truncate bits beyond sgl limit + clr.l LOCAL_LO(a0) ;clear d2 + rts + +* +* ADD EXTENDED +* +add_ext: + addq.l #1,LOCAL_LO(a0) ;add 1 to l-bit + bcc.b xcc_clr ;test for carry out + addq.l #1,LOCAL_HI(a0) ;propogate carry + bcc.b xcc_clr + roxr.w LOCAL_HI(a0) ;mant is 0 so restore v-bit + roxr.w LOCAL_HI+2(a0) ;mant is 0 so restore v-bit + roxr.w LOCAL_LO(a0) + roxr.w LOCAL_LO+2(a0) + add.w #$1,LOCAL_EX(a0) ;and inc exp +xcc_clr: + tst.l d0 ;test rs = 0 + bne.b add_ext_done + andi.b #$fe,LOCAL_LO+3(a0) ;clear the l bit +add_ext_done: + rts +* +* ADD DOUBLE +* +add_dbl: + add.l #ad_1_dbl,LOCAL_LO(a0) + bcc.b dcc_clr + addq.l #1,LOCAL_HI(a0) ;propogate carry + bcc.b dcc_clr + roxr.w LOCAL_HI(a0) ;mant is 0 so restore v-bit + roxr.w LOCAL_HI+2(a0) ;mant is 0 so restore v-bit + roxr.w LOCAL_LO(a0) + roxr.w LOCAL_LO+2(a0) + add.w #$1,LOCAL_EX(a0) ;incr exponent +dcc_clr: + tst.l d0 ;test for rs = 0 + bne.b dbl_done + andi.w #$f000,LOCAL_LO+2(a0) ;clear the l-bit + +dbl_done: + andi.l #$fffff800,LOCAL_LO(a0) ;truncate bits beyond dbl limit + rts + +error: + rts +* +* Truncate all other bits +* +trunct: + dc.l end_rnd + dc.l sgl_done + dc.l dbl_done + dc.l dbl_done + +truncate: + lea trunct,a1 + move.l (a1,d1.w*4),a1 + jmp (a1) + +end_rnd: + rts + +* +* NORMALIZE +* +* These routines (nrm_zero & nrm_set) normalize the unnorm. This +* is done by shifting the mantissa left while decrementing the +* exponent. +* +* NRM_SET shifts and decrements until there is a 1 set in the integer +* bit of the mantissa (msb in d1). +* +* NRM_ZERO shifts and decrements until there is a 1 set in the integer +* bit of the mantissa (msb in d1) unless this would mean the exponent +* would go less than 0. In that case the number becomes a denorm - the +* exponent (d0) is set to 0 and the mantissa (d1 & d2) is not +* normalized. +* +* Note that both routines have been optimized (for the worst case) and +* therefore do not have the easy to follow decrement/shift loop. +* +* NRM_ZERO +* +* Distance to first 1 bit in mantissa = X +* Distance to 0 from exponent = Y +* If X < Y +* Then +* nrm_set +* Else +* shift mantissa by Y +* set exponent = 0 +* +*input: +* FP_SCR1 = exponent, ms mantissa part, ls mantissa part +*output: +* L_SCR1{4} = fpte15 or ete15 bit +* + xdef nrm_zero +nrm_zero: + move.w LOCAL_EX(a0),d0 + cmp.w #64,d0 ;see if exp > 64 + bmi.b d0_less + bsr nrm_set ;exp > 64 so exp won't exceed 0 + rts +d0_less: + movem.l d2/d3/d5/d6,-(a7) + move.l LOCAL_HI(a0),d1 + move.l LOCAL_LO(a0),d2 + + bfffo d1{0:32},d3 ;get the distance to the first 1 +* ;in ms mant + beq.b ms_clr ;branch if no bits were set + cmp.w d3,d0 ;of X>Y + bmi.b greater ;then exp will go past 0 (neg) if +* ;it is just shifted + bsr nrm_set ;else exp won't go past 0 + movem.l (a7)+,d2/d3/d5/d6 + rts +greater: + move.l d2,d6 ;save ls mant in d6 + lsl.l d0,d2 ;shift ls mant by count + lsl.l d0,d1 ;shift ms mant by count + move.l #32,d5 + sub.l d0,d5 ;make op a denorm by shifting bits + lsr.l d5,d6 ;by the number in the exp, then +* ;set exp = 0. + or.l d6,d1 ;shift the ls mant bits into the ms mant + clr.l d0 ;same as if decremented exp to 0 +* ;while shifting + move.w d0,LOCAL_EX(a0) + move.l d1,LOCAL_HI(a0) + move.l d2,LOCAL_LO(a0) + movem.l (a7)+,d2/d3/d5/d6 + rts +ms_clr: + bfffo d2{0:32},d3 ;check if any bits set in ls mant + beq.b all_clr ;branch if none set + add.w #32,d3 + cmp.w d3,d0 ;if X>Y + bmi.b greater ;then branch + bsr nrm_set ;else exp won't go past 0 + movem.l (a7)+,d2/d3/d5/d6 + rts +all_clr: + clr.w LOCAL_EX(a0) ;no mantissa bits set. Set exp = 0. + movem.l (a7)+,d2/d3/d5/d6 + rts +* +* NRM_SET +* + xdef nrm_set +nrm_set: + move.l d7,-(a7) + bfffo LOCAL_HI(a0){0:32},d7 ;find first 1 in ms mant to d7) + beq.b lower ;branch if ms mant is all 0's + + move.l d6,-(a7) + + sub.w d7,LOCAL_EX(a0) ;sub exponent by count + move.l LOCAL_HI(a0),d0 ;d0 has ms mant + move.l LOCAL_LO(a0),d1 ;d1 has ls mant + + lsl.l d7,d0 ;shift first 1 to j bit position + move.l d1,d6 ;copy ls mant into d6 + lsl.l d7,d6 ;shift ls mant by count + move.l d6,LOCAL_LO(a0) ;store ls mant into memory + moveq.l #32,d6 + sub.l d7,d6 ;continue shift + lsr.l d6,d1 ;shift off all bits but those that will +* ;be shifted into ms mant + or.l d1,d0 ;shift the ls mant bits into the ms mant + move.l d0,LOCAL_HI(a0) ;store ms mant into memory + movem.l (a7)+,d7/d6 ;restore registers + rts + +* +* We get here if ms mant was = 0, and we assume ls mant has bits +* set (otherwise this would have been tagged a zero not a denorm). +* +lower: + move.w LOCAL_EX(a0),d0 ;d0 has exponent + move.l LOCAL_LO(a0),d1 ;d1 has ls mant + sub.w #32,d0 ;account for ms mant being all zeros + bfffo d1{0:32},d7 ;find first 1 in ls mant to d7) + sub.w d7,d0 ;subtract shift count from exp + lsl.l d7,d1 ;shift first 1 to integer bit in ms mant + move.w d0,LOCAL_EX(a0) ;store ms mant + move.l d1,LOCAL_HI(a0) ;store exp + clr.l LOCAL_LO(a0) ;clear ls mant + move.l (a7)+,d7 + rts +* +* denorm --- denormalize an intermediate result +* +* Used by underflow. +* +* Input: +* a0 points to the operand to be denormalized +* (in the internal extended format) +* +* d0: rounding precision +* Output: +* a0 points to the denormalized result +* (in the internal extended format) +* +* d0 is guard,round,sticky +* +* d0 comes into this routine with the rounding precision. It +* is then loaded with the denormalized exponent threshold for the +* rounding precision. +* + + xdef denorm +denorm: + btst.b #6,LOCAL_EX(a0) ;check for exponents between $7fff-$4000 + beq.b no_sgn_ext + bset.b #7,LOCAL_EX(a0) ;sign extend if it is so +no_sgn_ext: + + tst.b d0 ;if 0 then extended precision + bne.b not_ext ;else branch + + clr.l d1 ;load d1 with ext threshold + clr.l d0 ;clear the sticky flag + bsr dnrm_lp ;denormalize the number + tst.b d1 ;check for inex + beq.w no_inex ;if clr, no inex + bra.b dnrm_inex ;if set, set inex + +not_ext: + cmpi.l #1,d0 ;if 1 then single precision + beq.b load_sgl ;else must be 2, double prec + +load_dbl: + move.w #dbl_thresh,d1 ;put copy of threshold in d1 + move.l d1,d0 ;copy d1 into d0 + sub.w LOCAL_EX(a0),d0 ;diff = threshold - exp + cmp.w #67,d0 ;if diff > 67 (mant + grs bits) + bpl.b chk_stky ;then branch (all bits would be +* ; shifted off in denorm routine) + clr.l d0 ;else clear the sticky flag + bsr dnrm_lp ;denormalize the number + tst.b d1 ;check flag + beq.b no_inex ;if clr, no inex + bra.b dnrm_inex ;if set, set inex + +load_sgl: + move.w #sgl_thresh,d1 ;put copy of threshold in d1 + move.l d1,d0 ;copy d1 into d0 + sub.w LOCAL_EX(a0),d0 ;diff = threshold - exp + cmp.w #67,d0 ;if diff > 67 (mant + grs bits) + bpl.b chk_stky ;then branch (all bits would be +* ; shifted off in denorm routine) + clr.l d0 ;else clear the sticky flag + bsr dnrm_lp ;denormalize the number + tst.b d1 ;check flag + beq.b no_inex ;if clr, no inex + bra.b dnrm_inex ;if set, set inex + +chk_stky: + tst.l LOCAL_HI(a0) ;check for any bits set + bne.b set_stky + tst.l LOCAL_LO(a0) ;check for any bits set + bne.b set_stky + bra.b clr_mant +set_stky: + or.l #inx2a_mask,USER_FPSR(a6) ;set inex2/ainex + move.l #$20000000,d0 ;set sticky bit in return value +clr_mant: + move.w d1,LOCAL_EX(a0) ;load exp with threshold + clr.l LOCAL_HI(a0) ;set d1 = 0 (ms mantissa) + clr.l LOCAL_LO(a0) ;set d2 = 0 (ms mantissa) + rts +dnrm_inex: + or.l #inx2a_mask,USER_FPSR(a6) ;set inex2/ainex +no_inex: + rts + +* +* dnrm_lp --- normalize exponent/mantissa to specified threshhold +* +* Input: +* a0 points to the operand to be denormalized +* d0{31:29} initial guard,round,sticky +* d1{15:0} denormalization threshold +* Output: +* a0 points to the denormalized operand +* d0{31:29} final guard,round,sticky +* d1.b inexact flag: all ones means inexact result +* +* The LOCAL_LO and LOCAL_GRS parts of the value are copied to FP_SCR2 +* so that bfext can be used to extract the new low part of the mantissa. +* Dnrm_lp can be called with a0 pointing to ETEMP or WBTEMP and there +* is no LOCAL_GRS scratch word following it on the fsave frame. +* + xdef dnrm_lp +dnrm_lp: + move.l d2,-(sp) ;save d2 for temp use + btst.b #E3,E_BYTE(a6) ;test for type E3 exception + beq.b not_E3 ;not type E3 exception + bfextu WBTEMP_GRS(a6){6:3},d2 ;extract guard,round, sticky bit + move.l #29,d0 + lsl.l d0,d2 ;shift g,r,s to their postions + move.l d2,d0 +not_E3: + move.l (sp)+,d2 ;restore d2 + move.l LOCAL_LO(a0),FP_SCR2+LOCAL_LO(a6) + move.l d0,FP_SCR2+LOCAL_GRS(a6) + move.l d1,d0 ;copy the denorm threshold + sub.w LOCAL_EX(a0),d1 ;d1 = threshold - uns exponent + ble.b no_lp ;d1 <= 0 + cmp.w #32,d1 + blt.b case_1 ;0 = d1 < 32 + cmp.w #64,d1 + blt.b case_2 ;32 <= d1 < 64 + bra.w case_3 ;d1 >= 64 +* +* No normalization necessary +* +no_lp: + clr.b d1 ;set no inex2 reported + move.l FP_SCR2+LOCAL_GRS(a6),d0 ;restore original g,r,s + rts +* +* case (0<d1<32) +* +case_1: + move.l d2,-(sp) + move.w d0,LOCAL_EX(a0) ;exponent = denorm threshold + move.l #32,d0 + sub.w d1,d0 ;d0 = 32 - d1 + bfextu LOCAL_EX(a0){d0:32},d2 + bfextu d2{d1:d0},d2 ;d2 = new LOCAL_HI + bfextu LOCAL_HI(a0){d0:32},d1 ;d1 = new LOCAL_LO + bfextu FP_SCR2+LOCAL_LO(a6){d0:32},d0 ;d0 = new G,R,S + move.l d2,LOCAL_HI(a0) ;store new LOCAL_HI + move.l d1,LOCAL_LO(a0) ;store new LOCAL_LO + clr.b d1 + bftst d0{2:30} + beq.b c1nstky + bset.l #rnd_stky_bit,d0 + st.b d1 +c1nstky: + move.l FP_SCR2+LOCAL_GRS(a6),d2 ;restore original g,r,s + andi.l #$e0000000,d2 ;clear all but G,R,S + tst.l d2 ;test if original G,R,S are clear + beq.b grs_clear + or.l #$20000000,d0 ;set sticky bit in d0 +grs_clear: + andi.l #$e0000000,d0 ;clear all but G,R,S + move.l (sp)+,d2 + rts +* +* case (32<=d1<64) +* +case_2: + move.l d2,-(sp) + move.w d0,LOCAL_EX(a0) ;unsigned exponent = threshold + sub.w #32,d1 ;d1 now between 0 and 32 + move.l #32,d0 + sub.w d1,d0 ;d0 = 32 - d1 + bfextu LOCAL_EX(a0){d0:32},d2 + bfextu d2{d1:d0},d2 ;d2 = new LOCAL_LO + bfextu LOCAL_HI(a0){d0:32},d1 ;d1 = new G,R,S + bftst d1{2:30} + bne.b c2_sstky ;bra if sticky bit to be set + bftst FP_SCR2+LOCAL_LO(a6){d0:32} + bne.b c2_sstky ;bra if sticky bit to be set + move.l d1,d0 + clr.b d1 + bra.b end_c2 +c2_sstky: + move.l d1,d0 + bset.l #rnd_stky_bit,d0 + st.b d1 +end_c2: + clr.l LOCAL_HI(a0) ;store LOCAL_HI = 0 + move.l d2,LOCAL_LO(a0) ;store LOCAL_LO + move.l FP_SCR2+LOCAL_GRS(a6),d2 ;restore original g,r,s + andi.l #$e0000000,d2 ;clear all but G,R,S + tst.l d2 ;test if original G,R,S are clear + beq.b clear_grs + or.l #$20000000,d0 ;set sticky bit in d0 +clear_grs: + andi.l #$e0000000,d0 ;get rid of all but G,R,S + move.l (sp)+,d2 + rts +* +* d1 >= 64 Force the exponent to be the denorm threshold with the +* correct sign. +* +case_3: + move.w d0,LOCAL_EX(a0) + tst.w LOCAL_SGN(a0) + bge.b c3con +c3neg: + or.l #$80000000,LOCAL_EX(a0) +c3con: + cmp.w #64,d1 + beq.b sixty_four + cmp.w #65,d1 + beq.b sixty_five +* +* Shift value is out of range. Set d1 for inex2 flag and +* return a zero with the given threshold. +* + clr.l LOCAL_HI(a0) + clr.l LOCAL_LO(a0) + move.l #$20000000,d0 + st.b d1 + rts + +sixty_four: + move.l LOCAL_HI(a0),d0 + bfextu d0{2:30},d1 + andi.l #$c0000000,d0 + bra.b c3com + +sixty_five: + move.l LOCAL_HI(a0),d0 + bfextu d0{1:31},d1 + andi.l #$80000000,d0 + lsr.l #1,d0 ;shift high bit into R bit + +c3com: + tst.l d1 + bne.b c3ssticky + tst.l LOCAL_LO(a0) + bne.b c3ssticky + tst.b FP_SCR2+LOCAL_GRS(a6) + bne.b c3ssticky + clr.b d1 + bra.b c3end + +c3ssticky: + bset.l #rnd_stky_bit,d0 + st.b d1 +c3end: + clr.l LOCAL_HI(a0) + clr.l LOCAL_LO(a0) + rts + + end diff --git a/sys/arch/m68k/fpsp/sacos.sa b/sys/arch/m68k/fpsp/sacos.sa new file mode 100644 index 00000000000..7a904741823 --- /dev/null +++ b/sys/arch/m68k/fpsp/sacos.sa @@ -0,0 +1,140 @@ +* $NetBSD: sacos.sa,v 1.3 1994/10/26 07:49:27 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* sacos.sa 3.3 12/19/90 +* +* Description: The entry point sAcos computes the inverse cosine of +* an input argument; sAcosd does the same except for denormalized +* input. +* +* Input: Double-extended number X in location pointed to +* by address register a0. +* +* Output: The value arccos(X) returned in floating-point register Fp0. +* +* Accuracy and Monotonicity: The returned result is within 3 ulps in +* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the +* result is subsequently rounded to double precision. The +* result is provably monotonic in double precision. +* +* Speed: The program sCOS takes approximately 310 cycles. +* +* Algorithm: +* +* ACOS +* 1. If |X| >= 1, go to 3. +* +* 2. (|X| < 1) Calculate acos(X) by +* z := (1-X) / (1+X) +* acos(X) = 2 * atan( sqrt(z) ). +* Exit. +* +* 3. If |X| > 1, go to 5. +* +* 4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit. +* +* 5. (|X| > 1) Generate an invalid operation by 0 * infinity. +* Exit. +* + +SACOS IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + +PI DC.L $40000000,$C90FDAA2,$2168C235,$00000000 +PIBY2 DC.L $3FFF0000,$C90FDAA2,$2168C235,$00000000 + + xref t_operr + xref t_frcinx + xref satan + + xdef sacosd +sacosd: +*--ACOS(X) = PI/2 FOR DENORMALIZED X + fmove.l d1,fpcr ...load user's rounding mode/precision + FMOVE.X PIBY2,FP0 + bra t_frcinx + + xdef sacos +sacos: + FMOVE.X (a0),FP0 ...LOAD INPUT + + move.l (a0),d0 ...pack exponent with upper 16 fraction + move.w 4(a0),d0 + ANDI.L #$7FFFFFFF,D0 + CMPI.L #$3FFF8000,D0 + BGE.B ACOSBIG + +*--THIS IS THE USUAL CASE, |X| < 1 +*--ACOS(X) = 2 * ATAN( SQRT( (1-X)/(1+X) ) ) + + FMOVE.S #:3F800000,FP1 + FADD.X FP0,FP1 ...1+X + FNEG.X FP0 ... -X + FADD.S #:3F800000,FP0 ...1-X + FDIV.X FP1,FP0 ...(1-X)/(1+X) + FSQRT.X FP0 ...SQRT((1-X)/(1+X)) + fmovem.x fp0,(a0) ...overwrite input + move.l d1,-(sp) ;save original users fpcr + clr.l d1 + bsr satan ...ATAN(SQRT([1-X]/[1+X])) + fMOVE.L (sp)+,fpcr ;restore users exceptions + FADD.X FP0,FP0 ...2 * ATAN( STUFF ) + bra t_frcinx + +ACOSBIG: + FABS.X FP0 + FCMP.S #:3F800000,FP0 + fbgt t_operr ;cause an operr exception + +*--|X| = 1, ACOS(X) = 0 OR PI + move.l (a0),d0 ...pack exponent with upper 16 fraction + move.w 4(a0),d0 + TST.L D0 ;D0 has original exponent+fraction + BGT.B ACOSP1 + +*--X = -1 +*Returns PI and inexact exception + FMOVE.X PI,FP0 + FMOVE.L d1,FPCR + FADD.S #:00800000,FP0 ;cause an inexact exception to be put +* ;into the 040 - will not trap until next +* ;fp inst. + bra t_frcinx + +ACOSP1: + FMOVE.L d1,FPCR + FMOVE.S #:00000000,FP0 + rts ;Facos of +1 is exact + + end diff --git a/sys/arch/m68k/fpsp/sasin.sa b/sys/arch/m68k/fpsp/sasin.sa new file mode 100644 index 00000000000..99e2b88d9e0 --- /dev/null +++ b/sys/arch/m68k/fpsp/sasin.sa @@ -0,0 +1,129 @@ +* $NetBSD: sasin.sa,v 1.2 1994/10/26 07:49:29 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* sasin.sa 3.3 12/19/90 +* +* Description: The entry point sAsin computes the inverse sine of +* an input argument; sAsind does the same except for denormalized +* input. +* +* Input: Double-extended number X in location pointed to +* by address register a0. +* +* Output: The value arcsin(X) returned in floating-point register Fp0. +* +* Accuracy and Monotonicity: The returned result is within 3 ulps in +* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the +* result is subsequently rounded to double precision. The +* result is provably monotonic in double precision. +* +* Speed: The program sASIN takes approximately 310 cycles. +* +* Algorithm: +* +* ASIN +* 1. If |X| >= 1, go to 3. +* +* 2. (|X| < 1) Calculate asin(X) by +* z := sqrt( [1-X][1+X] ) +* asin(X) = atan( x / z ). +* Exit. +* +* 3. If |X| > 1, go to 5. +* +* 4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit. +* +* 5. (|X| > 1) Generate an invalid operation by 0 * infinity. +* Exit. +* + +SASIN IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + +PIBY2 DC.L $3FFF0000,$C90FDAA2,$2168C235,$00000000 + + xref t_operr + xref t_frcinx + xref t_extdnrm + xref satan + + xdef sasind +sasind: +*--ASIN(X) = X FOR DENORMALIZED X + + bra t_extdnrm + + xdef sasin +sasin: + FMOVE.X (a0),FP0 ...LOAD INPUT + + move.l (a0),d0 + move.w 4(a0),d0 + ANDI.L #$7FFFFFFF,D0 + CMPI.L #$3FFF8000,D0 + BGE.B asinbig + +*--THIS IS THE USUAL CASE, |X| < 1 +*--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) ) + + FMOVE.S #:3F800000,FP1 + FSUB.X FP0,FP1 ...1-X + fmovem.x fp2,-(a7) + FMOVE.S #:3F800000,FP2 + FADD.X FP0,FP2 ...1+X + FMUL.X FP2,FP1 ...(1+X)(1-X) + fmovem.x (a7)+,fp2 + FSQRT.X FP1 ...SQRT([1-X][1+X]) + FDIV.X FP1,FP0 ...X/SQRT([1-X][1+X]) + fmovem.x fp0,(a0) + bsr satan + bra t_frcinx + +asinbig: + FABS.X FP0 ...|X| + FCMP.S #:3F800000,FP0 + fbgt t_operr ;cause an operr exception + +*--|X| = 1, ASIN(X) = +- PI/2. + + FMOVE.X PIBY2,FP0 + move.l (a0),d0 + ANDI.L #$80000000,D0 ...SIGN BIT OF X + ORI.L #$3F800000,D0 ...+-1 IN SGL FORMAT + MOVE.L D0,-(sp) ...push SIGN(X) IN SGL-FMT + FMOVE.L d1,FPCR + FMUL.S (sp)+,FP0 + bra t_frcinx + + end diff --git a/sys/arch/m68k/fpsp/satan.sa b/sys/arch/m68k/fpsp/satan.sa new file mode 100644 index 00000000000..a865043197b --- /dev/null +++ b/sys/arch/m68k/fpsp/satan.sa @@ -0,0 +1,503 @@ +* $NetBSD: satan.sa,v 1.3 1994/10/26 07:49:31 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* satan.sa 3.3 12/19/90 +* +* The entry point satan computes the arctagent of an +* input value. satand does the same except the input value is a +* denormalized number. +* +* Input: Double-extended value in memory location pointed to by address +* register a0. +* +* Output: Arctan(X) returned in floating-point register Fp0. +* +* Accuracy and Monotonicity: The returned result is within 2 ulps in +* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the +* result is subsequently rounded to double precision. The +* result is provably monotonic in double precision. +* +* Speed: The program satan takes approximately 160 cycles for input +* argument X such that 1/16 < |X| < 16. For the other arguments, +* the program will run no worse than 10% slower. +* +* Algorithm: +* Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5. +* +* Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. Note that k = -4, -3,..., or 3. +* Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 significant bits +* of X with a bit-1 attached at the 6-th bit position. Define u +* to be u = (X-F) / (1 + X*F). +* +* Step 3. Approximate arctan(u) by a polynomial poly. +* +* Step 4. Return arctan(F) + poly, arctan(F) is fetched from a table of values +* calculated beforehand. Exit. +* +* Step 5. If |X| >= 16, go to Step 7. +* +* Step 6. Approximate arctan(X) by an odd polynomial in X. Exit. +* +* Step 7. Define X' = -1/X. Approximate arctan(X') by an odd polynomial in X'. +* Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit. +* + +satan IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + +BOUNDS1 DC.L $3FFB8000,$4002FFFF + +ONE DC.L $3F800000 + + DC.L $00000000 + +ATANA3 DC.L $BFF6687E,$314987D8 +ATANA2 DC.L $4002AC69,$34A26DB3 + +ATANA1 DC.L $BFC2476F,$4E1DA28E +ATANB6 DC.L $3FB34444,$7F876989 + +ATANB5 DC.L $BFB744EE,$7FAF45DB +ATANB4 DC.L $3FBC71C6,$46940220 + +ATANB3 DC.L $BFC24924,$921872F9 +ATANB2 DC.L $3FC99999,$99998FA9 + +ATANB1 DC.L $BFD55555,$55555555 +ATANC5 DC.L $BFB70BF3,$98539E6A + +ATANC4 DC.L $3FBC7187,$962D1D7D +ATANC3 DC.L $BFC24924,$827107B8 + +ATANC2 DC.L $3FC99999,$9996263E +ATANC1 DC.L $BFD55555,$55555536 + +PPIBY2 DC.L $3FFF0000,$C90FDAA2,$2168C235,$00000000 +NPIBY2 DC.L $BFFF0000,$C90FDAA2,$2168C235,$00000000 +PTINY DC.L $00010000,$80000000,$00000000,$00000000 +NTINY DC.L $80010000,$80000000,$00000000,$00000000 + +ATANTBL: + DC.L $3FFB0000,$83D152C5,$060B7A51,$00000000 + DC.L $3FFB0000,$8BC85445,$65498B8B,$00000000 + DC.L $3FFB0000,$93BE4060,$17626B0D,$00000000 + DC.L $3FFB0000,$9BB3078D,$35AEC202,$00000000 + DC.L $3FFB0000,$A3A69A52,$5DDCE7DE,$00000000 + DC.L $3FFB0000,$AB98E943,$62765619,$00000000 + DC.L $3FFB0000,$B389E502,$F9C59862,$00000000 + DC.L $3FFB0000,$BB797E43,$6B09E6FB,$00000000 + DC.L $3FFB0000,$C367A5C7,$39E5F446,$00000000 + DC.L $3FFB0000,$CB544C61,$CFF7D5C6,$00000000 + DC.L $3FFB0000,$D33F62F8,$2488533E,$00000000 + DC.L $3FFB0000,$DB28DA81,$62404C77,$00000000 + DC.L $3FFB0000,$E310A407,$8AD34F18,$00000000 + DC.L $3FFB0000,$EAF6B0A8,$188EE1EB,$00000000 + DC.L $3FFB0000,$F2DAF194,$9DBE79D5,$00000000 + DC.L $3FFB0000,$FABD5813,$61D47E3E,$00000000 + DC.L $3FFC0000,$8346AC21,$0959ECC4,$00000000 + DC.L $3FFC0000,$8B232A08,$304282D8,$00000000 + DC.L $3FFC0000,$92FB70B8,$D29AE2F9,$00000000 + DC.L $3FFC0000,$9ACF476F,$5CCD1CB4,$00000000 + DC.L $3FFC0000,$A29E7630,$4954F23F,$00000000 + DC.L $3FFC0000,$AA68C5D0,$8AB85230,$00000000 + DC.L $3FFC0000,$B22DFFFD,$9D539F83,$00000000 + DC.L $3FFC0000,$B9EDEF45,$3E900EA5,$00000000 + DC.L $3FFC0000,$C1A85F1C,$C75E3EA5,$00000000 + DC.L $3FFC0000,$C95D1BE8,$28138DE6,$00000000 + DC.L $3FFC0000,$D10BF300,$840D2DE4,$00000000 + DC.L $3FFC0000,$D8B4B2BA,$6BC05E7A,$00000000 + DC.L $3FFC0000,$E0572A6B,$B42335F6,$00000000 + DC.L $3FFC0000,$E7F32A70,$EA9CAA8F,$00000000 + DC.L $3FFC0000,$EF888432,$64ECEFAA,$00000000 + DC.L $3FFC0000,$F7170A28,$ECC06666,$00000000 + DC.L $3FFD0000,$812FD288,$332DAD32,$00000000 + DC.L $3FFD0000,$88A8D1B1,$218E4D64,$00000000 + DC.L $3FFD0000,$9012AB3F,$23E4AEE8,$00000000 + DC.L $3FFD0000,$976CC3D4,$11E7F1B9,$00000000 + DC.L $3FFD0000,$9EB68949,$3889A227,$00000000 + DC.L $3FFD0000,$A5EF72C3,$4487361B,$00000000 + DC.L $3FFD0000,$AD1700BA,$F07A7227,$00000000 + DC.L $3FFD0000,$B42CBCFA,$FD37EFB7,$00000000 + DC.L $3FFD0000,$BB303A94,$0BA80F89,$00000000 + DC.L $3FFD0000,$C22115C6,$FCAEBBAF,$00000000 + DC.L $3FFD0000,$C8FEF3E6,$86331221,$00000000 + DC.L $3FFD0000,$CFC98330,$B4000C70,$00000000 + DC.L $3FFD0000,$D6807AA1,$102C5BF9,$00000000 + DC.L $3FFD0000,$DD2399BC,$31252AA3,$00000000 + DC.L $3FFD0000,$E3B2A855,$6B8FC517,$00000000 + DC.L $3FFD0000,$EA2D764F,$64315989,$00000000 + DC.L $3FFD0000,$F3BF5BF8,$BAD1A21D,$00000000 + DC.L $3FFE0000,$801CE39E,$0D205C9A,$00000000 + DC.L $3FFE0000,$8630A2DA,$DA1ED066,$00000000 + DC.L $3FFE0000,$8C1AD445,$F3E09B8C,$00000000 + DC.L $3FFE0000,$91DB8F16,$64F350E2,$00000000 + DC.L $3FFE0000,$97731420,$365E538C,$00000000 + DC.L $3FFE0000,$9CE1C8E6,$A0B8CDBA,$00000000 + DC.L $3FFE0000,$A22832DB,$CADAAE09,$00000000 + DC.L $3FFE0000,$A746F2DD,$B7602294,$00000000 + DC.L $3FFE0000,$AC3EC0FB,$997DD6A2,$00000000 + DC.L $3FFE0000,$B110688A,$EBDC6F6A,$00000000 + DC.L $3FFE0000,$B5BCC490,$59ECC4B0,$00000000 + DC.L $3FFE0000,$BA44BC7D,$D470782F,$00000000 + DC.L $3FFE0000,$BEA94144,$FD049AAC,$00000000 + DC.L $3FFE0000,$C2EB4ABB,$661628B6,$00000000 + DC.L $3FFE0000,$C70BD54C,$E602EE14,$00000000 + DC.L $3FFE0000,$CD000549,$ADEC7159,$00000000 + DC.L $3FFE0000,$D48457D2,$D8EA4EA3,$00000000 + DC.L $3FFE0000,$DB948DA7,$12DECE3B,$00000000 + DC.L $3FFE0000,$E23855F9,$69E8096A,$00000000 + DC.L $3FFE0000,$E8771129,$C4353259,$00000000 + DC.L $3FFE0000,$EE57C16E,$0D379C0D,$00000000 + DC.L $3FFE0000,$F3E10211,$A87C3779,$00000000 + DC.L $3FFE0000,$F919039D,$758B8D41,$00000000 + DC.L $3FFE0000,$FE058B8F,$64935FB3,$00000000 + DC.L $3FFF0000,$8155FB49,$7B685D04,$00000000 + DC.L $3FFF0000,$83889E35,$49D108E1,$00000000 + DC.L $3FFF0000,$859CFA76,$511D724B,$00000000 + DC.L $3FFF0000,$87952ECF,$FF8131E7,$00000000 + DC.L $3FFF0000,$89732FD1,$9557641B,$00000000 + DC.L $3FFF0000,$8B38CAD1,$01932A35,$00000000 + DC.L $3FFF0000,$8CE7A8D8,$301EE6B5,$00000000 + DC.L $3FFF0000,$8F46A39E,$2EAE5281,$00000000 + DC.L $3FFF0000,$922DA7D7,$91888487,$00000000 + DC.L $3FFF0000,$94D19FCB,$DEDF5241,$00000000 + DC.L $3FFF0000,$973AB944,$19D2A08B,$00000000 + DC.L $3FFF0000,$996FF00E,$08E10B96,$00000000 + DC.L $3FFF0000,$9B773F95,$12321DA7,$00000000 + DC.L $3FFF0000,$9D55CC32,$0F935624,$00000000 + DC.L $3FFF0000,$9F100575,$006CC571,$00000000 + DC.L $3FFF0000,$A0A9C290,$D97CC06C,$00000000 + DC.L $3FFF0000,$A22659EB,$EBC0630A,$00000000 + DC.L $3FFF0000,$A388B4AF,$F6EF0EC9,$00000000 + DC.L $3FFF0000,$A4D35F10,$61D292C4,$00000000 + DC.L $3FFF0000,$A60895DC,$FBE3187E,$00000000 + DC.L $3FFF0000,$A72A51DC,$7367BEAC,$00000000 + DC.L $3FFF0000,$A83A5153,$0956168F,$00000000 + DC.L $3FFF0000,$A93A2007,$7539546E,$00000000 + DC.L $3FFF0000,$AA9E7245,$023B2605,$00000000 + DC.L $3FFF0000,$AC4C84BA,$6FE4D58F,$00000000 + DC.L $3FFF0000,$ADCE4A4A,$606B9712,$00000000 + DC.L $3FFF0000,$AF2A2DCD,$8D263C9C,$00000000 + DC.L $3FFF0000,$B0656F81,$F22265C7,$00000000 + DC.L $3FFF0000,$B1846515,$0F71496A,$00000000 + DC.L $3FFF0000,$B28AAA15,$6F9ADA35,$00000000 + DC.L $3FFF0000,$B37B44FF,$3766B895,$00000000 + DC.L $3FFF0000,$B458C3DC,$E9630433,$00000000 + DC.L $3FFF0000,$B525529D,$562246BD,$00000000 + DC.L $3FFF0000,$B5E2CCA9,$5F9D88CC,$00000000 + DC.L $3FFF0000,$B692CADA,$7ACA1ADA,$00000000 + DC.L $3FFF0000,$B736AEA7,$A6925838,$00000000 + DC.L $3FFF0000,$B7CFAB28,$7E9F7B36,$00000000 + DC.L $3FFF0000,$B85ECC66,$CB219835,$00000000 + DC.L $3FFF0000,$B8E4FD5A,$20A593DA,$00000000 + DC.L $3FFF0000,$B99F41F6,$4AFF9BB5,$00000000 + DC.L $3FFF0000,$BA7F1E17,$842BBE7B,$00000000 + DC.L $3FFF0000,$BB471285,$7637E17D,$00000000 + DC.L $3FFF0000,$BBFABE8A,$4788DF6F,$00000000 + DC.L $3FFF0000,$BC9D0FAD,$2B689D79,$00000000 + DC.L $3FFF0000,$BD306A39,$471ECD86,$00000000 + DC.L $3FFF0000,$BDB6C731,$856AF18A,$00000000 + DC.L $3FFF0000,$BE31CAC5,$02E80D70,$00000000 + DC.L $3FFF0000,$BEA2D55C,$E33194E2,$00000000 + DC.L $3FFF0000,$BF0B10B7,$C03128F0,$00000000 + DC.L $3FFF0000,$BF6B7A18,$DACB778D,$00000000 + DC.L $3FFF0000,$BFC4EA46,$63FA18F6,$00000000 + DC.L $3FFF0000,$C0181BDE,$8B89A454,$00000000 + DC.L $3FFF0000,$C065B066,$CFBF6439,$00000000 + DC.L $3FFF0000,$C0AE345F,$56340AE6,$00000000 + DC.L $3FFF0000,$C0F22291,$9CB9E6A7,$00000000 + +X equ FP_SCR1 +XDCARE equ X+2 +XFRAC equ X+4 +XFRACLO equ X+8 + +ATANF equ FP_SCR2 +ATANFHI equ ATANF+4 +ATANFLO equ ATANF+8 + + + xref t_frcinx + xref t_extdnrm + + xdef satand +satand: +*--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT + + bra t_extdnrm + + xdef satan +satan: +*--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S + + FMOVE.X (A0),FP0 ...LOAD INPUT + + MOVE.L (A0),D0 + MOVE.W 4(A0),D0 + FMOVE.X FP0,X(a6) + ANDI.L #$7FFFFFFF,D0 + + CMPI.L #$3FFB8000,D0 ...|X| >= 1/16? + BGE.B ATANOK1 + BRA.W ATANSM + +ATANOK1: + CMPI.L #$4002FFFF,D0 ...|X| < 16 ? + BLE.B ATANMAIN + BRA.W ATANBIG + + +*--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE +*--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ). +*--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN +*--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE +*--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS +*--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR +*--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO +*--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE +*--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL +*--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE +*--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION +*--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION +*--WILL INVOLVE A VERY LONG POLYNOMIAL. + +*--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS +*--WE CHOSE F TO BE +-2^K * 1.BBBB1 +*--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE +*--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE +*--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS +*-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|). + +ATANMAIN: + + CLR.W XDCARE(a6) ...CLEAN UP X JUST IN CASE + ANDI.L #$F8000000,XFRAC(a6) ...FIRST 5 BITS + ORI.L #$04000000,XFRAC(a6) ...SET 6-TH BIT TO 1 + CLR.L XFRACLO(a6) ...LOCATION OF X IS NOW F + + FMOVE.X FP0,FP1 ...FP1 IS X + FMUL.X X(a6),FP1 ...FP1 IS X*F, NOTE THAT X*F > 0 + FSUB.X X(a6),FP0 ...FP0 IS X-F + FADD.S #:3F800000,FP1 ...FP1 IS 1 + X*F + FDIV.X FP1,FP0 ...FP0 IS U = (X-F)/(1+X*F) + +*--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|) +*--CREATE ATAN(F) AND STORE IT IN ATANF, AND +*--SAVE REGISTERS FP2. + + MOVE.L d2,-(a7) ...SAVE d2 TEMPORARILY + MOVE.L d0,d2 ...THE EXPO AND 16 BITS OF X + ANDI.L #$00007800,d0 ...4 VARYING BITS OF F'S FRACTION + ANDI.L #$7FFF0000,d2 ...EXPONENT OF F + SUBI.L #$3FFB0000,d2 ...K+4 + ASR.L #1,d2 + ADD.L d2,d0 ...THE 7 BITS IDENTIFYING F + ASR.L #7,d0 ...INDEX INTO TBL OF ATAN(|F|) + LEA ATANTBL,a1 + ADDA.L d0,a1 ...ADDRESS OF ATAN(|F|) + MOVE.L (a1)+,ATANF(a6) + MOVE.L (a1)+,ATANFHI(a6) + MOVE.L (a1)+,ATANFLO(a6) ...ATANF IS NOW ATAN(|F|) + MOVE.L X(a6),d0 ...LOAD SIGN AND EXPO. AGAIN + ANDI.L #$80000000,d0 ...SIGN(F) + OR.L d0,ATANF(a6) ...ATANF IS NOW SIGN(F)*ATAN(|F|) + MOVE.L (a7)+,d2 ...RESTORE d2 + +*--THAT'S ALL I HAVE TO DO FOR NOW, +*--BUT ALAS, THE DIVIDE IS STILL CRANKING! + +*--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS +*--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U +*--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT. +*--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3)) +*--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3. +*--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT +*--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED + + + FMOVE.X FP0,FP1 + FMUL.X FP1,FP1 + FMOVE.D ATANA3,FP2 + FADD.X FP1,FP2 ...A3+V + FMUL.X FP1,FP2 ...V*(A3+V) + FMUL.X FP0,FP1 ...U*V + FADD.D ATANA2,FP2 ...A2+V*(A3+V) + FMUL.D ATANA1,FP1 ...A1*U*V + FMUL.X FP2,FP1 ...A1*U*V*(A2+V*(A3+V)) + + FADD.X FP1,FP0 ...ATAN(U), FP1 RELEASED + FMOVE.L d1,FPCR ;restore users exceptions + FADD.X ATANF(a6),FP0 ...ATAN(X) + bra t_frcinx + +ATANBORS: +*--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED. +*--FP0 IS X AND |X| <= 1/16 OR |X| >= 16. + CMPI.L #$3FFF8000,d0 + BGT.W ATANBIG ...I.E. |X| >= 16 + +ATANSM: +*--|X| <= 1/16 +*--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE +*--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6))))) +*--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] ) +*--WHERE Y = X*X, AND Z = Y*Y. + + CMPI.L #$3FD78000,d0 + BLT.W ATANTINY +*--COMPUTE POLYNOMIAL + FMUL.X FP0,FP0 ...FP0 IS Y = X*X + + + CLR.W XDCARE(a6) + + FMOVE.X FP0,FP1 + FMUL.X FP1,FP1 ...FP1 IS Z = Y*Y + + FMOVE.D ATANB6,FP2 + FMOVE.D ATANB5,FP3 + + FMUL.X FP1,FP2 ...Z*B6 + FMUL.X FP1,FP3 ...Z*B5 + + FADD.D ATANB4,FP2 ...B4+Z*B6 + FADD.D ATANB3,FP3 ...B3+Z*B5 + + FMUL.X FP1,FP2 ...Z*(B4+Z*B6) + FMUL.X FP3,FP1 ...Z*(B3+Z*B5) + + FADD.D ATANB2,FP2 ...B2+Z*(B4+Z*B6) + FADD.D ATANB1,FP1 ...B1+Z*(B3+Z*B5) + + FMUL.X FP0,FP2 ...Y*(B2+Z*(B4+Z*B6)) + FMUL.X X(a6),FP0 ...X*Y + + FADD.X FP2,FP1 ...[B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] + + + FMUL.X FP1,FP0 ...X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) + + FMOVE.L d1,FPCR ;restore users exceptions + FADD.X X(a6),FP0 + + bra t_frcinx + +ATANTINY: +*--|X| < 2^(-40), ATAN(X) = X + CLR.W XDCARE(a6) + + FMOVE.L d1,FPCR ;restore users exceptions + FMOVE.X X(a6),FP0 ;last inst - possible exception set + + bra t_frcinx + +ATANBIG: +*--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE, +*--RETURN SIGN(X)*PI/2 + ATAN(-1/X). + CMPI.L #$40638000,d0 + BGT.W ATANHUGE + +*--APPROXIMATE ATAN(-1/X) BY +*--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X' +*--THIS CAN BE RE-WRITTEN AS +*--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y. + + FMOVE.S #:BF800000,FP1 ...LOAD -1 + FDIV.X FP0,FP1 ...FP1 IS -1/X + + +*--DIVIDE IS STILL CRANKING + + FMOVE.X FP1,FP0 ...FP0 IS X' + FMUL.X FP0,FP0 ...FP0 IS Y = X'*X' + FMOVE.X FP1,X(a6) ...X IS REALLY X' + + FMOVE.X FP0,FP1 + FMUL.X FP1,FP1 ...FP1 IS Z = Y*Y + + FMOVE.D ATANC5,FP3 + FMOVE.D ATANC4,FP2 + + FMUL.X FP1,FP3 ...Z*C5 + FMUL.X FP1,FP2 ...Z*B4 + + FADD.D ATANC3,FP3 ...C3+Z*C5 + FADD.D ATANC2,FP2 ...C2+Z*C4 + + FMUL.X FP3,FP1 ...Z*(C3+Z*C5), FP3 RELEASED + FMUL.X FP0,FP2 ...Y*(C2+Z*C4) + + FADD.D ATANC1,FP1 ...C1+Z*(C3+Z*C5) + FMUL.X X(a6),FP0 ...X'*Y + + FADD.X FP2,FP1 ...[Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] + + + FMUL.X FP1,FP0 ...X'*Y*([B1+Z*(B3+Z*B5)] +* ... +[Y*(B2+Z*(B4+Z*B6))]) + FADD.X X(a6),FP0 + + FMOVE.L d1,FPCR ;restore users exceptions + + btst.b #7,(a0) + beq.b pos_big + +neg_big: + FADD.X NPIBY2,FP0 + bra t_frcinx + +pos_big: + FADD.X PPIBY2,FP0 + bra t_frcinx + +ATANHUGE: +*--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY + btst.b #7,(a0) + beq.b pos_huge + +neg_huge: + FMOVE.X NPIBY2,fp0 + fmove.l d1,fpcr + fsub.x NTINY,fp0 + bra t_frcinx + +pos_huge: + FMOVE.X PPIBY2,fp0 + fmove.l d1,fpcr + fsub.x PTINY,fp0 + bra t_frcinx + + end diff --git a/sys/arch/m68k/fpsp/satanh.sa b/sys/arch/m68k/fpsp/satanh.sa new file mode 100644 index 00000000000..06362c78d8a --- /dev/null +++ b/sys/arch/m68k/fpsp/satanh.sa @@ -0,0 +1,129 @@ +* $NetBSD: satanh.sa,v 1.2 1994/10/26 07:49:33 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* satanh.sa 3.3 12/19/90 +* +* The entry point satanh computes the inverse +* hyperbolic tangent of +* an input argument; satanhd does the same except for denormalized +* input. +* +* Input: Double-extended number X in location pointed to +* by address register a0. +* +* Output: The value arctanh(X) returned in floating-point register Fp0. +* +* Accuracy and Monotonicity: The returned result is within 3 ulps in +* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the +* result is subsequently rounded to double precision. The +* result is provably monotonic in double precision. +* +* Speed: The program satanh takes approximately 270 cycles. +* +* Algorithm: +* +* ATANH +* 1. If |X| >= 1, go to 3. +* +* 2. (|X| < 1) Calculate atanh(X) by +* sgn := sign(X) +* y := |X| +* z := 2y/(1-y) +* atanh(X) := sgn * (1/2) * logp1(z) +* Exit. +* +* 3. If |X| > 1, go to 5. +* +* 4. (|X| = 1) Generate infinity with an appropriate sign and +* divide-by-zero by +* sgn := sign(X) +* atan(X) := sgn / (+0). +* Exit. +* +* 5. (|X| > 1) Generate an invalid operation by 0 * infinity. +* Exit. +* + +satanh IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + xref t_dz + xref t_operr + xref t_frcinx + xref t_extdnrm + xref slognp1 + + xdef satanhd +satanhd: +*--ATANH(X) = X FOR DENORMALIZED X + + bra t_extdnrm + + xdef satanh +satanh: + move.l (a0),d0 + move.w 4(a0),d0 + ANDI.L #$7FFFFFFF,D0 + CMPI.L #$3FFF8000,D0 + BGE.B ATANHBIG + +*--THIS IS THE USUAL CASE, |X| < 1 +*--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z). + + FABS.X (a0),FP0 ...Y = |X| + FMOVE.X FP0,FP1 + FNEG.X FP1 ...-Y + FADD.X FP0,FP0 ...2Y + FADD.S #:3F800000,FP1 ...1-Y + FDIV.X FP1,FP0 ...2Y/(1-Y) + move.l (a0),d0 + ANDI.L #$80000000,D0 + ORI.L #$3F000000,D0 ...SIGN(X)*HALF + move.l d0,-(sp) + + fmovem.x fp0,(a0) ...overwrite input + move.l d1,-(sp) + clr.l d1 + bsr slognp1 ...LOG1P(Z) + fmove.l (sp)+,fpcr + FMUL.S (sp)+,FP0 + bra t_frcinx + +ATANHBIG: + FABS.X (a0),FP0 ...|X| + FCMP.S #:3F800000,FP0 + fbgt t_operr + bra t_dz + + end diff --git a/sys/arch/m68k/fpsp/scale.sa b/sys/arch/m68k/fpsp/scale.sa new file mode 100644 index 00000000000..e94fded546d --- /dev/null +++ b/sys/arch/m68k/fpsp/scale.sa @@ -0,0 +1,397 @@ +* $NetBSD: scale.sa,v 1.3 1994/10/26 07:49:34 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* scale.sa 3.3 7/30/91 +* +* The entry point sSCALE computes the destination operand +* scaled by the source operand. If the absoulute value of +* the source operand is (>= 2^14) an overflow or underflow +* is returned. +* +* The entry point sscale is called from do_func to emulate +* the fscale unimplemented instruction. +* +* Input: Double-extended destination operand in FPTEMP, +* double-extended source operand in ETEMP. +* +* Output: The function returns scale(X,Y) to fp0. +* +* Modifies: fp0. +* +* Algorithm: +* + +SCALE IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + + xref t_ovfl2 + xref t_unfl + xref round + xref t_resdnrm + +SRC_BNDS dc.w $3fff,$400c + +* +* This entry point is used by the unimplemented instruction exception +* handler. +* +* +* +* FSCALE +* + xdef sscale +sscale: + fmove.l #0,fpcr ;clr user enabled exc + clr.l d1 + move.w FPTEMP(a6),d1 ;get dest exponent + smi L_SCR1(a6) ;use L_SCR1 to hold sign + andi.l #$7fff,d1 ;strip sign + move.w ETEMP(a6),d0 ;check src bounds + andi.w #$7fff,d0 ;clr sign bit + cmp2.w SRC_BNDS,d0 + bcc.b src_in + cmpi.w #$400c,d0 ;test for too large + bge.w src_out +* +* The source input is below 1, so we check for denormalized numbers +* and set unfl. +* +src_small: + move.b DTAG(a6),d0 + andi.b #$e0,d0 + tst.b d0 + beq.b no_denorm + st STORE_FLG(a6) ;dest already contains result + or.l #unfl_mask,USER_FPSR(a6) ;set UNFL +den_done: + lea.l FPTEMP(a6),a0 + bra t_resdnrm +no_denorm: + fmove.l USER_FPCR(a6),FPCR + fmove.x FPTEMP(a6),fp0 ;simply return dest + rts + + +* +* Source is within 2^14 range. To perform the int operation, +* move it to d0. +* +src_in: + fmove.x ETEMP(a6),fp0 ;move in src for int + fmove.l #rz_mode,fpcr ;force rz for src conversion + fmove.l fp0,d0 ;int src to d0 + fmove.l #0,FPSR ;clr status from above + tst.w ETEMP(a6) ;check src sign + blt.w src_neg +* +* Source is positive. Add the src to the dest exponent. +* The result can be denormalized, if src = 0, or overflow, +* if the result of the add sets a bit in the upper word. +* +src_pos: + tst.w d1 ;check for denorm + beq.w dst_dnrm + add.l d0,d1 ;add src to dest exp + beq.b denorm ;if zero, result is denorm + cmpi.l #$7fff,d1 ;test for overflow + bge.b ovfl + tst.b L_SCR1(a6) + beq.b spos_pos + or.w #$8000,d1 +spos_pos: + move.w d1,FPTEMP(a6) ;result in FPTEMP + fmove.l USER_FPCR(a6),FPCR + fmove.x FPTEMP(a6),fp0 ;write result to fp0 + rts +ovfl: + tst.b L_SCR1(a6) + beq.b sovl_pos + or.w #$8000,d1 +sovl_pos: + move.w FPTEMP(a6),ETEMP(a6) ;result in ETEMP + move.l FPTEMP_HI(a6),ETEMP_HI(a6) + move.l FPTEMP_LO(a6),ETEMP_LO(a6) + bra t_ovfl2 + +denorm: + tst.b L_SCR1(a6) + beq.b den_pos + or.w #$8000,d1 +den_pos: + tst.l FPTEMP_HI(a6) ;check j bit + blt.b nden_exit ;if set, not denorm + move.w d1,ETEMP(a6) ;input expected in ETEMP + move.l FPTEMP_HI(a6),ETEMP_HI(a6) + move.l FPTEMP_LO(a6),ETEMP_LO(a6) + or.l #unfl_bit,USER_FPSR(a6) ;set unfl + lea.l ETEMP(a6),a0 + bra t_resdnrm +nden_exit: + move.w d1,FPTEMP(a6) ;result in FPTEMP + fmove.l USER_FPCR(a6),FPCR + fmove.x FPTEMP(a6),fp0 ;write result to fp0 + rts + +* +* Source is negative. Add the src to the dest exponent. +* (The result exponent will be reduced). The result can be +* denormalized. +* +src_neg: + add.l d0,d1 ;add src to dest + beq.b denorm ;if zero, result is denorm + blt.b fix_dnrm ;if negative, result is +* ;needing denormalization + tst.b L_SCR1(a6) + beq.b sneg_pos + or.w #$8000,d1 +sneg_pos: + move.w d1,FPTEMP(a6) ;result in FPTEMP + fmove.l USER_FPCR(a6),FPCR + fmove.x FPTEMP(a6),fp0 ;write result to fp0 + rts + + +* +* The result exponent is below denorm value. Test for catastrophic +* underflow and force zero if true. If not, try to shift the +* mantissa right until a zero exponent exists. +* +fix_dnrm: + cmpi.w #$ffc0,d1 ;lower bound for normalization + blt.w fix_unfl ;if lower, catastrophic unfl + move.w d1,d0 ;use d0 for exp + move.l d2,-(a7) ;free d2 for norm + move.l FPTEMP_HI(a6),d1 + move.l FPTEMP_LO(a6),d2 + clr.l L_SCR2(a6) +fix_loop: + add.w #1,d0 ;drive d0 to 0 + lsr.l #1,d1 ;while shifting the + roxr.l #1,d2 ;mantissa to the right + bcc.b no_carry + st L_SCR2(a6) ;use L_SCR2 to capture inex +no_carry: + tst.w d0 ;it is finished when + blt.b fix_loop ;d0 is zero or the mantissa + tst.b L_SCR2(a6) + beq.b tst_zero + or.l #unfl_inx_mask,USER_FPSR(a6) +* ;set unfl, aunfl, ainex +* +* Test for zero. If zero, simply use fmove to return +/- zero +* to the fpu. +* +tst_zero: + clr.w FPTEMP_EX(a6) + tst.b L_SCR1(a6) ;test for sign + beq.b tst_con + or.w #$8000,FPTEMP_EX(a6) ;set sign bit +tst_con: + move.l d1,FPTEMP_HI(a6) + move.l d2,FPTEMP_LO(a6) + move.l (a7)+,d2 + tst.l d1 + bne.b not_zero + tst.l FPTEMP_LO(a6) + bne.b not_zero +* +* Result is zero. Check for rounding mode to set lsb. If the +* mode is rp, and the zero is positive, return smallest denorm. +* If the mode is rm, and the zero is negative, return smallest +* negative denorm. +* + btst.b #5,FPCR_MODE(a6) ;test if rm or rp + beq.b no_dir + btst.b #4,FPCR_MODE(a6) ;check which one + beq.b zer_rm +zer_rp: + tst.b L_SCR1(a6) ;check sign + bne.b no_dir ;if set, neg op, no inc + move.l #1,FPTEMP_LO(a6) ;set lsb + bra.b sm_dnrm +zer_rm: + tst.b L_SCR1(a6) ;check sign + beq.b no_dir ;if clr, neg op, no inc + move.l #1,FPTEMP_LO(a6) ;set lsb + or.l #neg_mask,USER_FPSR(a6) ;set N + bra.b sm_dnrm +no_dir: + fmove.l USER_FPCR(a6),FPCR + fmove.x FPTEMP(a6),fp0 ;use fmove to set cc's + rts + +* +* The rounding mode changed the zero to a smallest denorm. Call +* t_resdnrm with exceptional operand in ETEMP. +* +sm_dnrm: + move.l FPTEMP_EX(a6),ETEMP_EX(a6) + move.l FPTEMP_HI(a6),ETEMP_HI(a6) + move.l FPTEMP_LO(a6),ETEMP_LO(a6) + lea.l ETEMP(a6),a0 + bra t_resdnrm + +* +* Result is still denormalized. +* +not_zero: + or.l #unfl_mask,USER_FPSR(a6) ;set unfl + tst.b L_SCR1(a6) ;check for sign + beq.b fix_exit + or.l #neg_mask,USER_FPSR(a6) ;set N +fix_exit: + bra.b sm_dnrm + + +* +* The result has underflowed to zero. Return zero and set +* unfl, aunfl, and ainex. +* +fix_unfl: + or.l #unfl_inx_mask,USER_FPSR(a6) + btst.b #5,FPCR_MODE(a6) ;test if rm or rp + beq.b no_dir2 + btst.b #4,FPCR_MODE(a6) ;check which one + beq.b zer_rm2 +zer_rp2: + tst.b L_SCR1(a6) ;check sign + bne.b no_dir2 ;if set, neg op, no inc + clr.l FPTEMP_EX(a6) + clr.l FPTEMP_HI(a6) + move.l #1,FPTEMP_LO(a6) ;set lsb + bra.b sm_dnrm ;return smallest denorm +zer_rm2: + tst.b L_SCR1(a6) ;check sign + beq.b no_dir2 ;if clr, neg op, no inc + move.w #$8000,FPTEMP_EX(a6) + clr.l FPTEMP_HI(a6) + move.l #1,FPTEMP_LO(a6) ;set lsb + or.l #neg_mask,USER_FPSR(a6) ;set N + bra.w sm_dnrm ;return smallest denorm + +no_dir2: + tst.b L_SCR1(a6) + bge.b pos_zero +neg_zero: + clr.l FP_SCR1(a6) ;clear the exceptional operand + clr.l FP_SCR1+4(a6) ;for gen_except. + clr.l FP_SCR1+8(a6) + fmove.s #:80000000,fp0 + rts +pos_zero: + clr.l FP_SCR1(a6) ;clear the exceptional operand + clr.l FP_SCR1+4(a6) ;for gen_except. + clr.l FP_SCR1+8(a6) + fmove.s #:00000000,fp0 + rts + +* +* The destination is a denormalized number. It must be handled +* by first shifting the bits in the mantissa until it is normalized, +* then adding the remainder of the source to the exponent. +* +dst_dnrm: + movem.l d2/d3,-(a7) + move.w FPTEMP_EX(a6),d1 + move.l FPTEMP_HI(a6),d2 + move.l FPTEMP_LO(a6),d3 +dst_loop: + tst.l d2 ;test for normalized result + blt.b dst_norm ;exit loop if so + tst.l d0 ;otherwise, test shift count + beq.b dst_fin ;if zero, shifting is done + subq.l #1,d0 ;dec src + add.l d3,d3 + addx.l d2,d2 + bra.b dst_loop +* +* Destination became normalized. Simply add the remaining +* portion of the src to the exponent. +* +dst_norm: + add.w d0,d1 ;dst is normalized; add src + tst.b L_SCR1(a6) + beq.b dnrm_pos + or.w #$8000,d1 +dnrm_pos: + movem.w d1,FPTEMP_EX(a6) + movem.l d2,FPTEMP_HI(a6) + movem.l d3,FPTEMP_LO(a6) + fmove.l USER_FPCR(a6),FPCR + fmove.x FPTEMP(a6),fp0 + movem.l (a7)+,d2/d3 + rts + +* +* Destination remained denormalized. Call t_excdnrm with +* exceptional operand in ETEMP. +* +dst_fin: + tst.b L_SCR1(a6) ;check for sign + beq.b dst_exit + or.l #neg_mask,USER_FPSR(a6) ;set N + or.w #$8000,d1 +dst_exit: + movem.w d1,ETEMP_EX(a6) + movem.l d2,ETEMP_HI(a6) + movem.l d3,ETEMP_LO(a6) + or.l #unfl_mask,USER_FPSR(a6) ;set unfl + movem.l (a7)+,d2/d3 + lea.l ETEMP(a6),a0 + bra t_resdnrm + +* +* Source is outside of 2^14 range. Test the sign and branch +* to the appropriate exception handler. +* +src_out: + tst.b L_SCR1(a6) + beq.b scro_pos + or.w #$8000,d1 +scro_pos: + move.l FPTEMP_HI(a6),ETEMP_HI(a6) + move.l FPTEMP_LO(a6),ETEMP_LO(a6) + tst.w ETEMP(a6) + blt.b res_neg +res_pos: + move.w d1,ETEMP(a6) ;result in ETEMP + bra t_ovfl2 +res_neg: + move.w d1,ETEMP(a6) ;result in ETEMP + lea.l ETEMP(a6),a0 + bra t_unfl + end diff --git a/sys/arch/m68k/fpsp/scosh.sa b/sys/arch/m68k/fpsp/scosh.sa new file mode 100644 index 00000000000..93fffc268aa --- /dev/null +++ b/sys/arch/m68k/fpsp/scosh.sa @@ -0,0 +1,156 @@ +* $NetBSD: scosh.sa,v 1.2 1994/10/26 07:49:39 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* scosh.sa 3.1 12/10/90 +* +* The entry point sCosh computes the hyperbolic cosine of +* an input argument; sCoshd does the same except for denormalized +* input. +* +* Input: Double-extended number X in location pointed to +* by address register a0. +* +* Output: The value cosh(X) returned in floating-point register Fp0. +* +* Accuracy and Monotonicity: The returned result is within 3 ulps in +* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the +* result is subsequently rounded to double precision. The +* result is provably monotonic in double precision. +* +* Speed: The program sCOSH takes approximately 250 cycles. +* +* Algorithm: +* +* COSH +* 1. If |X| > 16380 log2, go to 3. +* +* 2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae +* y = |X|, z = exp(Y), and +* cosh(X) = (1/2)*( z + 1/z ). +* Exit. +* +* 3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5. +* +* 4. (16380 log2 < |X| <= 16480 log2) +* cosh(X) = sign(X) * exp(|X|)/2. +* However, invoking exp(|X|) may cause premature overflow. +* Thus, we calculate sinh(X) as follows: +* Y := |X| +* Fact := 2**(16380) +* Y' := Y - 16381 log2 +* cosh(X) := Fact * exp(Y'). +* Exit. +* +* 5. (|X| > 16480 log2) sinh(X) must overflow. Return +* Huge*Huge to generate overflow and an infinity with +* the appropriate sign. Huge is the largest finite number in +* extended format. Exit. +* + +SCOSH IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + xref t_ovfl + xref t_frcinx + xref setox + +T1 DC.L $40C62D38,$D3D64634 ... 16381 LOG2 LEAD +T2 DC.L $3D6F90AE,$B1E75CC7 ... 16381 LOG2 TRAIL + +TWO16380 DC.L $7FFB0000,$80000000,$00000000,$00000000 + + xdef scoshd +scoshd: +*--COSH(X) = 1 FOR DENORMALIZED X + + FMOVE.S #:3F800000,FP0 + + FMOVE.L d1,FPCR + FADD.S #:00800000,FP0 + bra t_frcinx + + xdef scosh +scosh: + FMOVE.X (a0),FP0 ...LOAD INPUT + + move.l (a0),d0 + move.w 4(a0),d0 + ANDI.L #$7FFFFFFF,d0 + CMPI.L #$400CB167,d0 + BGT.B COSHBIG + +*--THIS IS THE USUAL CASE, |X| < 16380 LOG2 +*--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) ) + + FABS.X FP0 ...|X| + + move.l d1,-(sp) + clr.l d1 + fmovem.x fp0,(a0) ;pass parameter to setox + bsr setox ...FP0 IS EXP(|X|) + FMUL.S #:3F000000,FP0 ...(1/2)EXP(|X|) + move.l (sp)+,d1 + + FMOVE.S #:3E800000,FP1 ...(1/4) + FDIV.X FP0,FP1 ...1/(2 EXP(|X|)) + + FMOVE.L d1,FPCR + FADD.X fp1,FP0 + + bra t_frcinx + +COSHBIG: + CMPI.L #$400CB2B3,d0 + BGT.B COSHHUGE + + FABS.X FP0 + FSUB.D T1(pc),FP0 ...(|X|-16381LOG2_LEAD) + FSUB.D T2(pc),FP0 ...|X| - 16381 LOG2, ACCURATE + + move.l d1,-(sp) + clr.l d1 + fmovem.x fp0,(a0) + bsr setox + fmove.l (sp)+,fpcr + + FMUL.X TWO16380(pc),FP0 + bra t_frcinx + +COSHHUGE: + fmove.l #0,fpsr ;clr N bit if set by source + bclr.b #7,(a0) ;always return positive value + fmovem.x (a0),fp0 + bra t_ovfl + + end diff --git a/sys/arch/m68k/fpsp/setox.sa b/sys/arch/m68k/fpsp/setox.sa new file mode 100644 index 00000000000..7627b746bdd --- /dev/null +++ b/sys/arch/m68k/fpsp/setox.sa @@ -0,0 +1,889 @@ +* $NetBSD: setox.sa,v 1.3 1994/10/26 07:49:42 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* setox.sa 3.1 12/10/90 +* +* The entry point setox computes the exponential of a value. +* setoxd does the same except the input value is a denormalized +* number. setoxm1 computes exp(X)-1, and setoxm1d computes +* exp(X)-1 for denormalized X. +* +* INPUT +* ----- +* Double-extended value in memory location pointed to by address +* register a0. +* +* OUTPUT +* ------ +* exp(X) or exp(X)-1 returned in floating-point register fp0. +* +* ACCURACY and MONOTONICITY +* ------------------------- +* The returned result is within 0.85 ulps in 64 significant bit, i.e. +* within 0.5001 ulp to 53 bits if the result is subsequently rounded +* to double precision. The result is provably monotonic in double +* precision. +* +* SPEED +* ----- +* Two timings are measured, both in the copy-back mode. The +* first one is measured when the function is invoked the first time +* (so the instructions and data are not in cache), and the +* second one is measured when the function is reinvoked at the same +* input argument. +* +* The program setox takes approximately 210/190 cycles for input +* argument X whose magnitude is less than 16380 log2, which +* is the usual situation. For the less common arguments, +* depending on their values, the program may run faster or slower -- +* but no worse than 10% slower even in the extreme cases. +* +* The program setoxm1 takes approximately ???/??? cycles for input +* argument X, 0.25 <= |X| < 70log2. For |X| < 0.25, it takes +* approximately ???/??? cycles. For the less common arguments, +* depending on their values, the program may run faster or slower -- +* but no worse than 10% slower even in the extreme cases. +* +* ALGORITHM and IMPLEMENTATION NOTES +* ---------------------------------- +* +* setoxd +* ------ +* Step 1. Set ans := 1.0 +* +* Step 2. Return ans := ans + sign(X)*2^(-126). Exit. +* Notes: This will always generate one exception -- inexact. +* +* +* setox +* ----- +* +* Step 1. Filter out extreme cases of input argument. +* 1.1 If |X| >= 2^(-65), go to Step 1.3. +* 1.2 Go to Step 7. +* 1.3 If |X| < 16380 log(2), go to Step 2. +* 1.4 Go to Step 8. +* Notes: The usual case should take the branches 1.1 -> 1.3 -> 2. +* To avoid the use of floating-point comparisons, a +* compact representation of |X| is used. This format is a +* 32-bit integer, the upper (more significant) 16 bits are +* the sign and biased exponent field of |X|; the lower 16 +* bits are the 16 most significant fraction (including the +* explicit bit) bits of |X|. Consequently, the comparisons +* in Steps 1.1 and 1.3 can be performed by integer comparison. +* Note also that the constant 16380 log(2) used in Step 1.3 +* is also in the compact form. Thus taking the branch +* to Step 2 guarantees |X| < 16380 log(2). There is no harm +* to have a small number of cases where |X| is less than, +* but close to, 16380 log(2) and the branch to Step 9 is +* taken. +* +* Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). +* 2.1 Set AdjFlag := 0 (indicates the branch 1.3 -> 2 was taken) +* 2.2 N := round-to-nearest-integer( X * 64/log2 ). +* 2.3 Calculate J = N mod 64; so J = 0,1,2,..., or 63. +* 2.4 Calculate M = (N - J)/64; so N = 64M + J. +* 2.5 Calculate the address of the stored value of 2^(J/64). +* 2.6 Create the value Scale = 2^M. +* Notes: The calculation in 2.2 is really performed by +* +* Z := X * constant +* N := round-to-nearest-integer(Z) +* +* where +* +* constant := single-precision( 64/log 2 ). +* +* Using a single-precision constant avoids memory access. +* Another effect of using a single-precision "constant" is +* that the calculated value Z is +* +* Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24). +* +* This error has to be considered later in Steps 3 and 4. +* +* Step 3. Calculate X - N*log2/64. +* 3.1 R := X + N*L1, where L1 := single-precision(-log2/64). +* 3.2 R := R + N*L2, L2 := extended-precision(-log2/64 - L1). +* Notes: a) The way L1 and L2 are chosen ensures L1+L2 approximate +* the value -log2/64 to 88 bits of accuracy. +* b) N*L1 is exact because N is no longer than 22 bits and +* L1 is no longer than 24 bits. +* c) The calculation X+N*L1 is also exact due to cancellation. +* Thus, R is practically X+N(L1+L2) to full 64 bits. +* d) It is important to estimate how large can |R| be after +* Step 3.2. +* +* N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24) +* X*64/log2 (1+eps) = N + f, |f| <= 0.5 +* X*64/log2 - N = f - eps*X 64/log2 +* X - N*log2/64 = f*log2/64 - eps*X +* +* +* Now |X| <= 16446 log2, thus +* +* |X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64 +* <= 0.57 log2/64. +* This bound will be used in Step 4. +* +* Step 4. Approximate exp(R)-1 by a polynomial +* p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) +* Notes: a) In order to reduce memory access, the coefficients are +* made as "short" as possible: A1 (which is 1/2), A4 and A5 +* are single precision; A2 and A3 are double precision. +* b) Even with the restrictions above, +* |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062. +* Note that 0.0062 is slightly bigger than 0.57 log2/64. +* c) To fully utilize the pipeline, p is separated into +* two independent pieces of roughly equal complexities +* p = [ R + R*S*(A2 + S*A4) ] + +* [ S*(A1 + S*(A3 + S*A5)) ] +* where S = R*R. +* +* Step 5. Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by +* ans := T + ( T*p + t) +* where T and t are the stored values for 2^(J/64). +* Notes: 2^(J/64) is stored as T and t where T+t approximates +* 2^(J/64) to roughly 85 bits; T is in extended precision +* and t is in single precision. Note also that T is rounded +* to 62 bits so that the last two bits of T are zero. The +* reason for such a special form is that T-1, T-2, and T-8 +* will all be exact --- a property that will give much +* more accurate computation of the function EXPM1. +* +* Step 6. Reconstruction of exp(X) +* exp(X) = 2^M * 2^(J/64) * exp(R). +* 6.1 If AdjFlag = 0, go to 6.3 +* 6.2 ans := ans * AdjScale +* 6.3 Restore the user FPCR +* 6.4 Return ans := ans * Scale. Exit. +* Notes: If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R, +* |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will +* neither overflow nor underflow. If AdjFlag = 1, that +* means that +* X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380. +* Hence, exp(X) may overflow or underflow or neither. +* When that is the case, AdjScale = 2^(M1) where M1 is +* approximately M. Thus 6.2 will never cause over/underflow. +* Possible exception in 6.4 is overflow or underflow. +* The inexact exception is not generated in 6.4. Although +* one can argue that the inexact flag should always be +* raised, to simulate that exception cost to much than the +* flag is worth in practical uses. +* +* Step 7. Return 1 + X. +* 7.1 ans := X +* 7.2 Restore user FPCR. +* 7.3 Return ans := 1 + ans. Exit +* Notes: For non-zero X, the inexact exception will always be +* raised by 7.3. That is the only exception raised by 7.3. +* Note also that we use the FMOVEM instruction to move X +* in Step 7.1 to avoid unnecessary trapping. (Although +* the FMOVEM may not seem relevant since X is normalized, +* the precaution will be useful in the library version of +* this code where the separate entry for denormalized inputs +* will be done away with.) +* +* Step 8. Handle exp(X) where |X| >= 16380log2. +* 8.1 If |X| > 16480 log2, go to Step 9. +* (mimic 2.2 - 2.6) +* 8.2 N := round-to-integer( X * 64/log2 ) +* 8.3 Calculate J = N mod 64, J = 0,1,...,63 +* 8.4 K := (N-J)/64, M1 := truncate(K/2), M = K-M1, AdjFlag := 1. +* 8.5 Calculate the address of the stored value 2^(J/64). +* 8.6 Create the values Scale = 2^M, AdjScale = 2^M1. +* 8.7 Go to Step 3. +* Notes: Refer to notes for 2.2 - 2.6. +* +* Step 9. Handle exp(X), |X| > 16480 log2. +* 9.1 If X < 0, go to 9.3 +* 9.2 ans := Huge, go to 9.4 +* 9.3 ans := Tiny. +* 9.4 Restore user FPCR. +* 9.5 Return ans := ans * ans. Exit. +* Notes: Exp(X) will surely overflow or underflow, depending on +* X's sign. "Huge" and "Tiny" are respectively large/tiny +* extended-precision numbers whose square over/underflow +* with an inexact result. Thus, 9.5 always raises the +* inexact together with either overflow or underflow. +* +* +* setoxm1d +* -------- +* +* Step 1. Set ans := 0 +* +* Step 2. Return ans := X + ans. Exit. +* Notes: This will return X with the appropriate rounding +* precision prescribed by the user FPCR. +* +* setoxm1 +* ------- +* +* Step 1. Check |X| +* 1.1 If |X| >= 1/4, go to Step 1.3. +* 1.2 Go to Step 7. +* 1.3 If |X| < 70 log(2), go to Step 2. +* 1.4 Go to Step 10. +* Notes: The usual case should take the branches 1.1 -> 1.3 -> 2. +* However, it is conceivable |X| can be small very often +* because EXPM1 is intended to evaluate exp(X)-1 accurately +* when |X| is small. For further details on the comparisons, +* see the notes on Step 1 of setox. +* +* Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). +* 2.1 N := round-to-nearest-integer( X * 64/log2 ). +* 2.2 Calculate J = N mod 64; so J = 0,1,2,..., or 63. +* 2.3 Calculate M = (N - J)/64; so N = 64M + J. +* 2.4 Calculate the address of the stored value of 2^(J/64). +* 2.5 Create the values Sc = 2^M and OnebySc := -2^(-M). +* Notes: See the notes on Step 2 of setox. +* +* Step 3. Calculate X - N*log2/64. +* 3.1 R := X + N*L1, where L1 := single-precision(-log2/64). +* 3.2 R := R + N*L2, L2 := extended-precision(-log2/64 - L1). +* Notes: Applying the analysis of Step 3 of setox in this case +* shows that |R| <= 0.0055 (note that |X| <= 70 log2 in +* this case). +* +* Step 4. Approximate exp(R)-1 by a polynomial +* p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6))))) +* Notes: a) In order to reduce memory access, the coefficients are +* made as "short" as possible: A1 (which is 1/2), A5 and A6 +* are single precision; A2, A3 and A4 are double precision. +* b) Even with the restriction above, +* |p - (exp(R)-1)| < |R| * 2^(-72.7) +* for all |R| <= 0.0055. +* c) To fully utilize the pipeline, p is separated into +* two independent pieces of roughly equal complexity +* p = [ R*S*(A2 + S*(A4 + S*A6)) ] + +* [ R + S*(A1 + S*(A3 + S*A5)) ] +* where S = R*R. +* +* Step 5. Compute 2^(J/64)*p by +* p := T*p +* where T and t are the stored values for 2^(J/64). +* Notes: 2^(J/64) is stored as T and t where T+t approximates +* 2^(J/64) to roughly 85 bits; T is in extended precision +* and t is in single precision. Note also that T is rounded +* to 62 bits so that the last two bits of T are zero. The +* reason for such a special form is that T-1, T-2, and T-8 +* will all be exact --- a property that will be exploited +* in Step 6 below. The total relative error in p is no +* bigger than 2^(-67.7) compared to the final result. +* +* Step 6. Reconstruction of exp(X)-1 +* exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ). +* 6.1 If M <= 63, go to Step 6.3. +* 6.2 ans := T + (p + (t + OnebySc)). Go to 6.6 +* 6.3 If M >= -3, go to 6.5. +* 6.4 ans := (T + (p + t)) + OnebySc. Go to 6.6 +* 6.5 ans := (T + OnebySc) + (p + t). +* 6.6 Restore user FPCR. +* 6.7 Return ans := Sc * ans. Exit. +* Notes: The various arrangements of the expressions give accurate +* evaluations. +* +* Step 7. exp(X)-1 for |X| < 1/4. +* 7.1 If |X| >= 2^(-65), go to Step 9. +* 7.2 Go to Step 8. +* +* Step 8. Calculate exp(X)-1, |X| < 2^(-65). +* 8.1 If |X| < 2^(-16312), goto 8.3 +* 8.2 Restore FPCR; return ans := X - 2^(-16382). Exit. +* 8.3 X := X * 2^(140). +* 8.4 Restore FPCR; ans := ans - 2^(-16382). +* Return ans := ans*2^(140). Exit +* Notes: The idea is to return "X - tiny" under the user +* precision and rounding modes. To avoid unnecessary +* inefficiency, we stay away from denormalized numbers the +* best we can. For |X| >= 2^(-16312), the straightforward +* 8.2 generates the inexact exception as the case warrants. +* +* Step 9. Calculate exp(X)-1, |X| < 1/4, by a polynomial +* p = X + X*X*(B1 + X*(B2 + ... + X*B12)) +* Notes: a) In order to reduce memory access, the coefficients are +* made as "short" as possible: B1 (which is 1/2), B9 to B12 +* are single precision; B3 to B8 are double precision; and +* B2 is double extended. +* b) Even with the restriction above, +* |p - (exp(X)-1)| < |X| 2^(-70.6) +* for all |X| <= 0.251. +* Note that 0.251 is slightly bigger than 1/4. +* c) To fully preserve accuracy, the polynomial is computed +* as X + ( S*B1 + Q ) where S = X*X and +* Q = X*S*(B2 + X*(B3 + ... + X*B12)) +* d) To fully utilize the pipeline, Q is separated into +* two independent pieces of roughly equal complexity +* Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] + +* [ S*S*(B3 + S*(B5 + ... + S*B11)) ] +* +* Step 10. Calculate exp(X)-1 for |X| >= 70 log 2. +* 10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all practical +* purposes. Therefore, go to Step 1 of setox. +* 10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical purposes. +* ans := -1 +* Restore user FPCR +* Return ans := ans + 2^(-126). Exit. +* Notes: 10.2 will always create an inexact and return -1 + tiny +* in the user rounding precision and mode. +* + +setox IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + +L2 DC.L $3FDC0000,$82E30865,$4361C4C6,$00000000 + +EXPA3 DC.L $3FA55555,$55554431 +EXPA2 DC.L $3FC55555,$55554018 + +HUGE DC.L $7FFE0000,$FFFFFFFF,$FFFFFFFF,$00000000 +TINY DC.L $00010000,$FFFFFFFF,$FFFFFFFF,$00000000 + +EM1A4 DC.L $3F811111,$11174385 +EM1A3 DC.L $3FA55555,$55554F5A + +EM1A2 DC.L $3FC55555,$55555555,$00000000,$00000000 + +EM1B8 DC.L $3EC71DE3,$A5774682 +EM1B7 DC.L $3EFA01A0,$19D7CB68 + +EM1B6 DC.L $3F2A01A0,$1A019DF3 +EM1B5 DC.L $3F56C16C,$16C170E2 + +EM1B4 DC.L $3F811111,$11111111 +EM1B3 DC.L $3FA55555,$55555555 + +EM1B2 DC.L $3FFC0000,$AAAAAAAA,$AAAAAAAB + DC.L $00000000 + +TWO140 DC.L $48B00000,$00000000 +TWON140 DC.L $37300000,$00000000 + +EXPTBL + DC.L $3FFF0000,$80000000,$00000000,$00000000 + DC.L $3FFF0000,$8164D1F3,$BC030774,$9F841A9B + DC.L $3FFF0000,$82CD8698,$AC2BA1D8,$9FC1D5B9 + DC.L $3FFF0000,$843A28C3,$ACDE4048,$A0728369 + DC.L $3FFF0000,$85AAC367,$CC487B14,$1FC5C95C + DC.L $3FFF0000,$871F6196,$9E8D1010,$1EE85C9F + DC.L $3FFF0000,$88980E80,$92DA8528,$9FA20729 + DC.L $3FFF0000,$8A14D575,$496EFD9C,$A07BF9AF + DC.L $3FFF0000,$8B95C1E3,$EA8BD6E8,$A0020DCF + DC.L $3FFF0000,$8D1ADF5B,$7E5BA9E4,$205A63DA + DC.L $3FFF0000,$8EA4398B,$45CD53C0,$1EB70051 + DC.L $3FFF0000,$9031DC43,$1466B1DC,$1F6EB029 + DC.L $3FFF0000,$91C3D373,$AB11C338,$A0781494 + DC.L $3FFF0000,$935A2B2F,$13E6E92C,$9EB319B0 + DC.L $3FFF0000,$94F4EFA8,$FEF70960,$2017457D + DC.L $3FFF0000,$96942D37,$20185A00,$1F11D537 + DC.L $3FFF0000,$9837F051,$8DB8A970,$9FB952DD + DC.L $3FFF0000,$99E04593,$20B7FA64,$1FE43087 + DC.L $3FFF0000,$9B8D39B9,$D54E5538,$1FA2A818 + DC.L $3FFF0000,$9D3ED9A7,$2CFFB750,$1FDE494D + DC.L $3FFF0000,$9EF53260,$91A111AC,$20504890 + DC.L $3FFF0000,$A0B0510F,$B9714FC4,$A073691C + DC.L $3FFF0000,$A2704303,$0C496818,$1F9B7A05 + DC.L $3FFF0000,$A43515AE,$09E680A0,$A0797126 + DC.L $3FFF0000,$A5FED6A9,$B15138EC,$A071A140 + DC.L $3FFF0000,$A7CD93B4,$E9653568,$204F62DA + DC.L $3FFF0000,$A9A15AB4,$EA7C0EF8,$1F283C4A + DC.L $3FFF0000,$AB7A39B5,$A93ED338,$9F9A7FDC + DC.L $3FFF0000,$AD583EEA,$42A14AC8,$A05B3FAC + DC.L $3FFF0000,$AF3B78AD,$690A4374,$1FDF2610 + DC.L $3FFF0000,$B123F581,$D2AC2590,$9F705F90 + DC.L $3FFF0000,$B311C412,$A9112488,$201F678A + DC.L $3FFF0000,$B504F333,$F9DE6484,$1F32FB13 + DC.L $3FFF0000,$B6FD91E3,$28D17790,$20038B30 + DC.L $3FFF0000,$B8FBAF47,$62FB9EE8,$200DC3CC + DC.L $3FFF0000,$BAFF5AB2,$133E45FC,$9F8B2AE6 + DC.L $3FFF0000,$BD08A39F,$580C36C0,$A02BBF70 + DC.L $3FFF0000,$BF1799B6,$7A731084,$A00BF518 + DC.L $3FFF0000,$C12C4CCA,$66709458,$A041DD41 + DC.L $3FFF0000,$C346CCDA,$24976408,$9FDF137B + DC.L $3FFF0000,$C5672A11,$5506DADC,$201F1568 + DC.L $3FFF0000,$C78D74C8,$ABB9B15C,$1FC13A2E + DC.L $3FFF0000,$C9B9BD86,$6E2F27A4,$A03F8F03 + DC.L $3FFF0000,$CBEC14FE,$F2727C5C,$1FF4907D + DC.L $3FFF0000,$CE248C15,$1F8480E4,$9E6E53E4 + DC.L $3FFF0000,$D06333DA,$EF2B2594,$1FD6D45C + DC.L $3FFF0000,$D2A81D91,$F12AE45C,$A076EDB9 + DC.L $3FFF0000,$D4F35AAB,$CFEDFA20,$9FA6DE21 + DC.L $3FFF0000,$D744FCCA,$D69D6AF4,$1EE69A2F + DC.L $3FFF0000,$D99D15C2,$78AFD7B4,$207F439F + DC.L $3FFF0000,$DBFBB797,$DAF23754,$201EC207 + DC.L $3FFF0000,$DE60F482,$5E0E9124,$9E8BE175 + DC.L $3FFF0000,$E0CCDEEC,$2A94E110,$20032C4B + DC.L $3FFF0000,$E33F8972,$BE8A5A50,$2004DFF5 + DC.L $3FFF0000,$E5B906E7,$7C8348A8,$1E72F47A + DC.L $3FFF0000,$E8396A50,$3C4BDC68,$1F722F22 + DC.L $3FFF0000,$EAC0C6E7,$DD243930,$A017E945 + DC.L $3FFF0000,$ED4F301E,$D9942B84,$1F401A5B + DC.L $3FFF0000,$EFE4B99B,$DCDAF5CC,$9FB9A9E3 + DC.L $3FFF0000,$F281773C,$59FFB138,$20744C05 + DC.L $3FFF0000,$F5257D15,$2486CC2C,$1F773A19 + DC.L $3FFF0000,$F7D0DF73,$0AD13BB8,$1FFE90D5 + DC.L $3FFF0000,$FA83B2DB,$722A033C,$A041ED22 + DC.L $3FFF0000,$FD3E0C0C,$F486C174,$1F853F3A + +ADJFLAG equ L_SCR2 +SCALE equ FP_SCR1 +ADJSCALE equ FP_SCR2 +SC equ FP_SCR3 +ONEBYSC equ FP_SCR4 + + xref t_frcinx + xref t_extdnrm + xref t_unfl + xref t_ovfl + + xdef setoxd +setoxd: +*--entry point for EXP(X), X is denormalized + MOVE.L (a0),d0 + ANDI.L #$80000000,d0 + ORI.L #$00800000,d0 ...sign(X)*2^(-126) + MOVE.L d0,-(sp) + FMOVE.S #:3F800000,fp0 + fmove.l d1,fpcr + FADD.S (sp)+,fp0 + bra t_frcinx + + xdef setox +setox: +*--entry point for EXP(X), here X is finite, non-zero, and not NaN's + +*--Step 1. + MOVE.L (a0),d0 ...load part of input X + ANDI.L #$7FFF0000,d0 ...biased expo. of X + CMPI.L #$3FBE0000,d0 ...2^(-65) + BGE.B EXPC1 ...normal case + BRA.W EXPSM + +EXPC1: +*--The case |X| >= 2^(-65) + MOVE.W 4(a0),d0 ...expo. and partial sig. of |X| + CMPI.L #$400CB167,d0 ...16380 log2 trunc. 16 bits + BLT.B EXPMAIN ...normal case + BRA.W EXPBIG + +EXPMAIN: +*--Step 2. +*--This is the normal branch: 2^(-65) <= |X| < 16380 log2. + FMOVE.X (a0),fp0 ...load input from (a0) + + FMOVE.X fp0,fp1 + FMUL.S #:42B8AA3B,fp0 ...64/log2 * X + fmovem.x fp2/fp3,-(a7) ...save fp2 + CLR.L ADJFLAG(a6) + FMOVE.L fp0,d0 ...N = int( X * 64/log2 ) + LEA EXPTBL,a1 + FMOVE.L d0,fp0 ...convert to floating-format + + MOVE.L d0,L_SCR1(a6) ...save N temporarily + ANDI.L #$3F,d0 ...D0 is J = N mod 64 + LSL.L #4,d0 + ADDA.L d0,a1 ...address of 2^(J/64) + MOVE.L L_SCR1(a6),d0 + ASR.L #6,d0 ...D0 is M + ADDI.W #$3FFF,d0 ...biased expo. of 2^(M) + MOVE.W L2,L_SCR1(a6) ...prefetch L2, no need in CB + +EXPCONT1: +*--Step 3. +*--fp1,fp2 saved on the stack. fp0 is N, fp1 is X, +*--a0 points to 2^(J/64), D0 is biased expo. of 2^(M) + FMOVE.X fp0,fp2 + FMUL.S #:BC317218,fp0 ...N * L1, L1 = lead(-log2/64) + FMUL.X L2,fp2 ...N * L2, L1+L2 = -log2/64 + FADD.X fp1,fp0 ...X + N*L1 + FADD.X fp2,fp0 ...fp0 is R, reduced arg. +* MOVE.W #$3FA5,EXPA3 ...load EXPA3 in cache + +*--Step 4. +*--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL +*-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) +*--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R +*--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))] + + FMOVE.X fp0,fp1 + FMUL.X fp1,fp1 ...fp1 IS S = R*R + + FMOVE.S #:3AB60B70,fp2 ...fp2 IS A5 +* CLR.W 2(a1) ...load 2^(J/64) in cache + + FMUL.X fp1,fp2 ...fp2 IS S*A5 + FMOVE.X fp1,fp3 + FMUL.S #:3C088895,fp3 ...fp3 IS S*A4 + + FADD.D EXPA3,fp2 ...fp2 IS A3+S*A5 + FADD.D EXPA2,fp3 ...fp3 IS A2+S*A4 + + FMUL.X fp1,fp2 ...fp2 IS S*(A3+S*A5) + MOVE.W d0,SCALE(a6) ...SCALE is 2^(M) in extended + clr.w SCALE+2(a6) + move.l #$80000000,SCALE+4(a6) + clr.l SCALE+8(a6) + + FMUL.X fp1,fp3 ...fp3 IS S*(A2+S*A4) + + FADD.S #:3F000000,fp2 ...fp2 IS A1+S*(A3+S*A5) + FMUL.X fp0,fp3 ...fp3 IS R*S*(A2+S*A4) + + FMUL.X fp1,fp2 ...fp2 IS S*(A1+S*(A3+S*A5)) + FADD.X fp3,fp0 ...fp0 IS R+R*S*(A2+S*A4), +* ...fp3 released + + FMOVE.X (a1)+,fp1 ...fp1 is lead. pt. of 2^(J/64) + FADD.X fp2,fp0 ...fp0 is EXP(R) - 1 +* ...fp2 released + +*--Step 5 +*--final reconstruction process +*--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) ) + + FMUL.X fp1,fp0 ...2^(J/64)*(Exp(R)-1) + fmovem.x (a7)+,fp2/fp3 ...fp2 restored + FADD.S (a1),fp0 ...accurate 2^(J/64) + + FADD.X fp1,fp0 ...2^(J/64) + 2^(J/64)*... + MOVE.L ADJFLAG(a6),d0 + +*--Step 6 + TST.L D0 + BEQ.B NORMAL +ADJUST: + FMUL.X ADJSCALE(a6),fp0 +NORMAL: + FMOVE.L d1,FPCR ...restore user FPCR + FMUL.X SCALE(a6),fp0 ...multiply 2^(M) + bra t_frcinx + +EXPSM: +*--Step 7 + FMOVEM.X (a0),fp0 ...in case X is denormalized + FMOVE.L d1,FPCR + FADD.S #:3F800000,fp0 ...1+X in user mode + bra t_frcinx + +EXPBIG: +*--Step 8 + CMPI.L #$400CB27C,d0 ...16480 log2 + BGT.B EXP2BIG +*--Steps 8.2 -- 8.6 + FMOVE.X (a0),fp0 ...load input from (a0) + + FMOVE.X fp0,fp1 + FMUL.S #:42B8AA3B,fp0 ...64/log2 * X + fmovem.x fp2/fp3,-(a7) ...save fp2 + MOVE.L #1,ADJFLAG(a6) + FMOVE.L fp0,d0 ...N = int( X * 64/log2 ) + LEA EXPTBL,a1 + FMOVE.L d0,fp0 ...convert to floating-format + MOVE.L d0,L_SCR1(a6) ...save N temporarily + ANDI.L #$3F,d0 ...D0 is J = N mod 64 + LSL.L #4,d0 + ADDA.L d0,a1 ...address of 2^(J/64) + MOVE.L L_SCR1(a6),d0 + ASR.L #6,d0 ...D0 is K + MOVE.L d0,L_SCR1(a6) ...save K temporarily + ASR.L #1,d0 ...D0 is M1 + SUB.L d0,L_SCR1(a6) ...a1 is M + ADDI.W #$3FFF,d0 ...biased expo. of 2^(M1) + MOVE.W d0,ADJSCALE(a6) ...ADJSCALE := 2^(M1) + clr.w ADJSCALE+2(a6) + move.l #$80000000,ADJSCALE+4(a6) + clr.l ADJSCALE+8(a6) + MOVE.L L_SCR1(a6),d0 ...D0 is M + ADDI.W #$3FFF,d0 ...biased expo. of 2^(M) + BRA.W EXPCONT1 ...go back to Step 3 + +EXP2BIG: +*--Step 9 + FMOVE.L d1,FPCR + MOVE.L (a0),d0 + bclr.b #sign_bit,(a0) ...setox always returns positive + TST.L d0 + BLT t_unfl + BRA t_ovfl + + xdef setoxm1d +setoxm1d: +*--entry point for EXPM1(X), here X is denormalized +*--Step 0. + bra t_extdnrm + + + xdef setoxm1 +setoxm1: +*--entry point for EXPM1(X), here X is finite, non-zero, non-NaN + +*--Step 1. +*--Step 1.1 + MOVE.L (a0),d0 ...load part of input X + ANDI.L #$7FFF0000,d0 ...biased expo. of X + CMPI.L #$3FFD0000,d0 ...1/4 + BGE.B EM1CON1 ...|X| >= 1/4 + BRA.W EM1SM + +EM1CON1: +*--Step 1.3 +*--The case |X| >= 1/4 + MOVE.W 4(a0),d0 ...expo. and partial sig. of |X| + CMPI.L #$4004C215,d0 ...70log2 rounded up to 16 bits + BLE.B EM1MAIN ...1/4 <= |X| <= 70log2 + BRA.W EM1BIG + +EM1MAIN: +*--Step 2. +*--This is the case: 1/4 <= |X| <= 70 log2. + FMOVE.X (a0),fp0 ...load input from (a0) + + FMOVE.X fp0,fp1 + FMUL.S #:42B8AA3B,fp0 ...64/log2 * X + fmovem.x fp2/fp3,-(a7) ...save fp2 +* MOVE.W #$3F81,EM1A4 ...prefetch in CB mode + FMOVE.L fp0,d0 ...N = int( X * 64/log2 ) + LEA EXPTBL,a1 + FMOVE.L d0,fp0 ...convert to floating-format + + MOVE.L d0,L_SCR1(a6) ...save N temporarily + ANDI.L #$3F,d0 ...D0 is J = N mod 64 + LSL.L #4,d0 + ADDA.L d0,a1 ...address of 2^(J/64) + MOVE.L L_SCR1(a6),d0 + ASR.L #6,d0 ...D0 is M + MOVE.L d0,L_SCR1(a6) ...save a copy of M +* MOVE.W #$3FDC,L2 ...prefetch L2 in CB mode + +*--Step 3. +*--fp1,fp2 saved on the stack. fp0 is N, fp1 is X, +*--a0 points to 2^(J/64), D0 and a1 both contain M + FMOVE.X fp0,fp2 + FMUL.S #:BC317218,fp0 ...N * L1, L1 = lead(-log2/64) + FMUL.X L2,fp2 ...N * L2, L1+L2 = -log2/64 + FADD.X fp1,fp0 ...X + N*L1 + FADD.X fp2,fp0 ...fp0 is R, reduced arg. +* MOVE.W #$3FC5,EM1A2 ...load EM1A2 in cache + ADDI.W #$3FFF,d0 ...D0 is biased expo. of 2^M + +*--Step 4. +*--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL +*-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6))))) +*--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R +*--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))] + + FMOVE.X fp0,fp1 + FMUL.X fp1,fp1 ...fp1 IS S = R*R + + FMOVE.S #:3950097B,fp2 ...fp2 IS a6 +* CLR.W 2(a1) ...load 2^(J/64) in cache + + FMUL.X fp1,fp2 ...fp2 IS S*A6 + FMOVE.X fp1,fp3 + FMUL.S #:3AB60B6A,fp3 ...fp3 IS S*A5 + + FADD.D EM1A4,fp2 ...fp2 IS A4+S*A6 + FADD.D EM1A3,fp3 ...fp3 IS A3+S*A5 + MOVE.W d0,SC(a6) ...SC is 2^(M) in extended + clr.w SC+2(a6) + move.l #$80000000,SC+4(a6) + clr.l SC+8(a6) + + FMUL.X fp1,fp2 ...fp2 IS S*(A4+S*A6) + MOVE.L L_SCR1(a6),d0 ...D0 is M + NEG.W D0 ...D0 is -M + FMUL.X fp1,fp3 ...fp3 IS S*(A3+S*A5) + ADDI.W #$3FFF,d0 ...biased expo. of 2^(-M) + FADD.D EM1A2,fp2 ...fp2 IS A2+S*(A4+S*A6) + FADD.S #:3F000000,fp3 ...fp3 IS A1+S*(A3+S*A5) + + FMUL.X fp1,fp2 ...fp2 IS S*(A2+S*(A4+S*A6)) + ORI.W #$8000,d0 ...signed/expo. of -2^(-M) + MOVE.W d0,ONEBYSC(a6) ...OnebySc is -2^(-M) + clr.w ONEBYSC+2(a6) + move.l #$80000000,ONEBYSC+4(a6) + clr.l ONEBYSC+8(a6) + FMUL.X fp3,fp1 ...fp1 IS S*(A1+S*(A3+S*A5)) +* ...fp3 released + + FMUL.X fp0,fp2 ...fp2 IS R*S*(A2+S*(A4+S*A6)) + FADD.X fp1,fp0 ...fp0 IS R+S*(A1+S*(A3+S*A5)) +* ...fp1 released + + FADD.X fp2,fp0 ...fp0 IS EXP(R)-1 +* ...fp2 released + fmovem.x (a7)+,fp2/fp3 ...fp2 restored + +*--Step 5 +*--Compute 2^(J/64)*p + + FMUL.X (a1),fp0 ...2^(J/64)*(Exp(R)-1) + +*--Step 6 +*--Step 6.1 + MOVE.L L_SCR1(a6),d0 ...retrieve M + CMPI.L #63,d0 + BLE.B MLE63 +*--Step 6.2 M >= 64 + FMOVE.S 12(a1),fp1 ...fp1 is t + FADD.X ONEBYSC(a6),fp1 ...fp1 is t+OnebySc + FADD.X fp1,fp0 ...p+(t+OnebySc), fp1 released + FADD.X (a1),fp0 ...T+(p+(t+OnebySc)) + BRA.B EM1SCALE +MLE63: +*--Step 6.3 M <= 63 + CMPI.L #-3,d0 + BGE.B MGEN3 +MLTN3: +*--Step 6.4 M <= -4 + FADD.S 12(a1),fp0 ...p+t + FADD.X (a1),fp0 ...T+(p+t) + FADD.X ONEBYSC(a6),fp0 ...OnebySc + (T+(p+t)) + BRA.B EM1SCALE +MGEN3: +*--Step 6.5 -3 <= M <= 63 + FMOVE.X (a1)+,fp1 ...fp1 is T + FADD.S (a1),fp0 ...fp0 is p+t + FADD.X ONEBYSC(a6),fp1 ...fp1 is T+OnebySc + FADD.X fp1,fp0 ...(T+OnebySc)+(p+t) + +EM1SCALE: +*--Step 6.6 + FMOVE.L d1,FPCR + FMUL.X SC(a6),fp0 + + bra t_frcinx + +EM1SM: +*--Step 7 |X| < 1/4. + CMPI.L #$3FBE0000,d0 ...2^(-65) + BGE.B EM1POLY + +EM1TINY: +*--Step 8 |X| < 2^(-65) + CMPI.L #$00330000,d0 ...2^(-16312) + BLT.B EM12TINY +*--Step 8.2 + MOVE.L #$80010000,SC(a6) ...SC is -2^(-16382) + move.l #$80000000,SC+4(a6) + clr.l SC+8(a6) + FMOVE.X (a0),fp0 + FMOVE.L d1,FPCR + FADD.X SC(a6),fp0 + + bra t_frcinx + +EM12TINY: +*--Step 8.3 + FMOVE.X (a0),fp0 + FMUL.D TWO140,fp0 + MOVE.L #$80010000,SC(a6) + move.l #$80000000,SC+4(a6) + clr.l SC+8(a6) + FADD.X SC(a6),fp0 + FMOVE.L d1,FPCR + FMUL.D TWON140,fp0 + + bra t_frcinx + +EM1POLY: +*--Step 9 exp(X)-1 by a simple polynomial + FMOVE.X (a0),fp0 ...fp0 is X + FMUL.X fp0,fp0 ...fp0 is S := X*X + fmovem.x fp2/fp3,-(a7) ...save fp2 + FMOVE.S #:2F30CAA8,fp1 ...fp1 is B12 + FMUL.X fp0,fp1 ...fp1 is S*B12 + FMOVE.S #:310F8290,fp2 ...fp2 is B11 + FADD.S #:32D73220,fp1 ...fp1 is B10+S*B12 + + FMUL.X fp0,fp2 ...fp2 is S*B11 + FMUL.X fp0,fp1 ...fp1 is S*(B10 + ... + + FADD.S #:3493F281,fp2 ...fp2 is B9+S*... + FADD.D EM1B8,fp1 ...fp1 is B8+S*... + + FMUL.X fp0,fp2 ...fp2 is S*(B9+... + FMUL.X fp0,fp1 ...fp1 is S*(B8+... + + FADD.D EM1B7,fp2 ...fp2 is B7+S*... + FADD.D EM1B6,fp1 ...fp1 is B6+S*... + + FMUL.X fp0,fp2 ...fp2 is S*(B7+... + FMUL.X fp0,fp1 ...fp1 is S*(B6+... + + FADD.D EM1B5,fp2 ...fp2 is B5+S*... + FADD.D EM1B4,fp1 ...fp1 is B4+S*... + + FMUL.X fp0,fp2 ...fp2 is S*(B5+... + FMUL.X fp0,fp1 ...fp1 is S*(B4+... + + FADD.D EM1B3,fp2 ...fp2 is B3+S*... + FADD.X EM1B2,fp1 ...fp1 is B2+S*... + + FMUL.X fp0,fp2 ...fp2 is S*(B3+... + FMUL.X fp0,fp1 ...fp1 is S*(B2+... + + FMUL.X fp0,fp2 ...fp2 is S*S*(B3+...) + FMUL.X (a0),fp1 ...fp1 is X*S*(B2... + + FMUL.S #:3F000000,fp0 ...fp0 is S*B1 + FADD.X fp2,fp1 ...fp1 is Q +* ...fp2 released + + fmovem.x (a7)+,fp2/fp3 ...fp2 restored + + FADD.X fp1,fp0 ...fp0 is S*B1+Q +* ...fp1 released + + FMOVE.L d1,FPCR + FADD.X (a0),fp0 + + bra t_frcinx + +EM1BIG: +*--Step 10 |X| > 70 log2 + MOVE.L (a0),d0 + TST.L d0 + BGT.W EXPC1 +*--Step 10.2 + FMOVE.S #:BF800000,fp0 ...fp0 is -1 + FMOVE.L d1,FPCR + FADD.S #:00800000,fp0 ...-1 + 2^(-126) + + bra t_frcinx + + end diff --git a/sys/arch/m68k/fpsp/sgetem.sa b/sys/arch/m68k/fpsp/sgetem.sa new file mode 100644 index 00000000000..2a4f28f612d --- /dev/null +++ b/sys/arch/m68k/fpsp/sgetem.sa @@ -0,0 +1,166 @@ +* $NetBSD: sgetem.sa,v 1.2 1994/10/26 07:49:45 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* sgetem.sa 3.1 12/10/90 +* +* The entry point sGETEXP returns the exponent portion +* of the input argument. The exponent bias is removed +* and the exponent value is returned as an extended +* precision number in fp0. sGETEXPD handles denormalized +* numbers. +* +* The entry point sGETMAN extracts the mantissa of the +* input argument. The mantissa is converted to an +* extended precision number and returned in fp0. The +* range of the result is [1.0 - 2.0). +* +* +* Input: Double-extended number X in the ETEMP space in +* the floating-point save stack. +* +* Output: The functions return exp(X) or man(X) in fp0. +* +* Modified: fp0. +* + +SGETEM IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + + xref nrm_set + +* +* This entry point is used by the unimplemented instruction exception +* handler. It points a0 to the input operand. +* +* +* +* SGETEXP +* + + xdef sgetexp +sgetexp: + move.w LOCAL_EX(a0),d0 ;get the exponent + bclr.l #15,d0 ;clear the sign bit + sub.w #$3fff,d0 ;subtract off the bias + fmove.w d0,fp0 ;move the exp to fp0 + rts + + xdef sgetexpd +sgetexpd: + bclr.b #sign_bit,LOCAL_EX(a0) + bsr nrm_set ;normalize (exp will go negative) + move.w LOCAL_EX(a0),d0 ;load resulting exponent into d0 + sub.w #$3fff,d0 ;subtract off the bias + fmove.w d0,fp0 ;move the exp to fp0 + rts +* +* +* This entry point is used by the unimplemented instruction exception +* handler. It points a0 to the input operand. +* +* +* +* SGETMAN +* +* +* For normalized numbers, leave the mantissa alone, simply load +* with an exponent of +/- $3fff. +* + xdef sgetman +sgetman: + move.l USER_FPCR(a6),d0 + andi.l #$ffffff00,d0 ;clear rounding precision and mode + fmove.l d0,fpcr ;this fpcr setting is used by the 882 + move.w LOCAL_EX(a0),d0 ;get the exp (really just want sign bit) + or.w #$7fff,d0 ;clear old exp + bclr.l #14,d0 ;make it the new exp +-3fff + move.w d0,LOCAL_EX(a0) ;move the sign & exp back to fsave stack + fmove.x (a0),fp0 ;put new value back in fp0 + rts + +* +* For denormalized numbers, shift the mantissa until the j-bit = 1, +* then load the exponent with +/1 $3fff. +* + xdef sgetmand +sgetmand: + move.l LOCAL_HI(a0),d0 ;load ms mant in d0 + move.l LOCAL_LO(a0),d1 ;load ls mant in d1 + bsr shft ;shift mantissa bits till msbit is set + move.l d0,LOCAL_HI(a0) ;put ms mant back on stack + move.l d1,LOCAL_LO(a0) ;put ls mant back on stack + bra.b sgetman + +* +* SHFT +* +* Shifts the mantissa bits until msbit is set. +* input: +* ms mantissa part in d0 +* ls mantissa part in d1 +* output: +* shifted bits in d0 and d1 +shft: + tst.l d0 ;if any bits set in ms mant + bne.b upper ;then branch +* ;else no bits set in ms mant + tst.l d1 ;test if any bits set in ls mant + bne.b cont ;if set then continue + bra.b shft_end ;else return +cont: + move.l d3,-(a7) ;save d3 + exg d0,d1 ;shift ls mant to ms mant + bfffo d0{0:32},d3 ;find first 1 in ls mant to d0 + lsl.l d3,d0 ;shift first 1 to integer bit in ms mant + move.l (a7)+,d3 ;restore d3 + bra.b shft_end +upper: + + movem.l d3/d5/d6,-(a7) ;save registers + bfffo d0{0:32},d3 ;find first 1 in ls mant to d0 + lsl.l d3,d0 ;shift ms mant until j-bit is set + move.l d1,d6 ;save ls mant in d6 + lsl.l d3,d1 ;shift ls mant by count + move.l #32,d5 + sub.l d3,d5 ;sub 32 from shift for ls mant + lsr.l d5,d6 ;shift off all bits but those that will +* ;be shifted into ms mant + or.l d6,d0 ;shift the ls mant bits into the ms mant + movem.l (a7)+,d3/d5/d6 ;restore registers +shft_end: + rts + + end diff --git a/sys/arch/m68k/fpsp/sint.sa b/sys/arch/m68k/fpsp/sint.sa new file mode 100644 index 00000000000..d300a65dcf0 --- /dev/null +++ b/sys/arch/m68k/fpsp/sint.sa @@ -0,0 +1,272 @@ +* $NetBSD: sint.sa,v 1.2 1994/10/26 07:49:48 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* sint.sa 3.1 12/10/90 +* +* The entry point sINT computes the rounded integer +* equivalent of the input argument, sINTRZ computes +* the integer rounded to zero of the input argument. +* +* Entry points sint and sintrz are called from do_func +* to emulate the fint and fintrz unimplemented instructions, +* respectively. Entry point sintdo is used by bindec. +* +* Input: (Entry points sint and sintrz) Double-extended +* number X in the ETEMP space in the floating-point +* save stack. +* (Entry point sintdo) Double-extended number X in +* location pointed to by the address register a0. +* (Entry point sintd) Double-extended denormalized +* number X in the ETEMP space in the floating-point +* save stack. +* +* Output: The function returns int(X) or intrz(X) in fp0. +* +* Modifies: fp0. +* +* Algorithm: (sint and sintrz) +* +* 1. If exp(X) >= 63, return X. +* If exp(X) < 0, return +/- 0 or +/- 1, according to +* the rounding mode. +* +* 2. (X is in range) set rsc = 63 - exp(X). Unnormalize the +* result to the exponent $403e. +* +* 3. Round the result in the mode given in USER_FPCR. For +* sintrz, force round-to-zero mode. +* +* 4. Normalize the rounded result; store in fp0. +* +* For the denormalized cases, force the correct result +* for the given sign and rounding mode. +* +* Sign(X) +* RMODE + - +* ----- -------- +* RN +0 -0 +* RZ +0 -0 +* RM +0 -1 +* RP +1 -0 +* + +SINT IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + + xref dnrm_lp + xref nrm_set + xref round + xref t_inx2 + xref ld_pone + xref ld_mone + xref ld_pzero + xref ld_mzero + xref snzrinx + +* +* FINT +* + xdef sint +sint: + bfextu FPCR_MODE(a6){2:2},d1 ;use user's mode for rounding +* ;implicity has extend precision +* ;in upper word. + move.l d1,L_SCR1(a6) ;save mode bits + bra.b sintexc + +* +* FINT with extended denorm inputs. +* + xdef sintd +sintd: + btst.b #5,FPCR_MODE(a6) + beq snzrinx ;if round nearest or round zero, +/- 0 + btst.b #4,FPCR_MODE(a6) + beq.b rnd_mns +rnd_pls: + btst.b #sign_bit,LOCAL_EX(a0) + bne.b sintmz + bsr ld_pone ;if round plus inf and pos, answer is +1 + bra t_inx2 +rnd_mns: + btst.b #sign_bit,LOCAL_EX(a0) + beq.b sintpz + bsr ld_mone ;if round mns inf and neg, answer is -1 + bra t_inx2 +sintpz: + bsr ld_pzero + bra t_inx2 +sintmz: + bsr ld_mzero + bra t_inx2 + +* +* FINTRZ +* + xdef sintrz +sintrz: + move.l #1,L_SCR1(a6) ;use rz mode for rounding +* ;implicity has extend precision +* ;in upper word. + bra.b sintexc +* +* SINTDO +* +* Input: a0 points to an IEEE extended format operand +* Output: fp0 has the result +* +* Exeptions: +* +* If the subroutine results in an inexact operation, the inx2 and +* ainx bits in the USER_FPSR are set. +* +* + xdef sintdo +sintdo: + bfextu FPCR_MODE(a6){2:2},d1 ;use user's mode for rounding +* ;implicitly has ext precision +* ;in upper word. + move.l d1,L_SCR1(a6) ;save mode bits +* +* Real work of sint is in sintexc +* +sintexc: + bclr.b #sign_bit,LOCAL_EX(a0) ;convert to internal extended +* ;format + sne LOCAL_SGN(a0) + cmp.w #$403e,LOCAL_EX(a0) ;check if (unbiased) exp > 63 + bgt.b out_rnge ;branch if exp < 63 + cmp.w #$3ffd,LOCAL_EX(a0) ;check if (unbiased) exp < 0 + bgt.w in_rnge ;if 63 >= exp > 0, do calc +* +* Input is less than zero. Restore sign, and check for directed +* rounding modes. L_SCR1 contains the rmode in the lower byte. +* +un_rnge: + btst.b #1,L_SCR1+3(a6) ;check for rn and rz + beq.b un_rnrz + tst.b LOCAL_SGN(a0) ;check for sign + bne.b un_rmrp_neg +* +* Sign is +. If rp, load +1.0, if rm, load +0.0 +* + cmpi.b #3,L_SCR1+3(a6) ;check for rp + beq.b un_ldpone ;if rp, load +1.0 + bsr ld_pzero ;if rm, load +0.0 + bra t_inx2 +un_ldpone: + bsr ld_pone + bra t_inx2 +* +* Sign is -. If rm, load -1.0, if rp, load -0.0 +* +un_rmrp_neg: + cmpi.b #2,L_SCR1+3(a6) ;check for rm + beq.b un_ldmone ;if rm, load -1.0 + bsr ld_mzero ;if rp, load -0.0 + bra t_inx2 +un_ldmone: + bsr ld_mone + bra t_inx2 +* +* Rmode is rn or rz; return signed zero +* +un_rnrz: + tst.b LOCAL_SGN(a0) ;check for sign + bne.b un_rnrz_neg + bsr ld_pzero + bra t_inx2 +un_rnrz_neg: + bsr ld_mzero + bra t_inx2 + +* +* Input is greater than 2^63. All bits are significant. Return +* the input. +* +out_rnge: + bfclr LOCAL_SGN(a0){0:8} ;change back to IEEE ext format + beq.b intps + bset.b #sign_bit,LOCAL_EX(a0) +intps: + fmove.l fpcr,-(sp) + fmove.l #0,fpcr + fmove.x LOCAL_EX(a0),fp0 ;if exp > 63 +* ;then return X to the user +* ;there are no fraction bits + fmove.l (sp)+,fpcr + rts + +in_rnge: +* ;shift off fraction bits + clr.l d0 ;clear d0 - initial g,r,s for +* ;dnrm_lp + move.l #$403e,d1 ;set threshold for dnrm_lp +* ;assumes a0 points to operand + bsr dnrm_lp +* ;returns unnormalized number +* ;pointed by a0 +* ;output d0 supplies g,r,s +* ;used by round + move.l L_SCR1(a6),d1 ;use selected rounding mode +* +* + bsr round ;round the unnorm based on users +* ;input a0 ptr to ext X +* ; d0 g,r,s bits +* ; d1 PREC/MODE info +* ;output a0 ptr to rounded result +* ;inexact flag set in USER_FPSR +* ;if initial grs set +* +* normalize the rounded result and store value in fp0 +* + bsr nrm_set ;normalize the unnorm +* ;Input: a0 points to operand to +* ;be normalized +* ;Output: a0 points to normalized +* ;result + bfclr LOCAL_SGN(a0){0:8} + beq.b nrmrndp + bset.b #sign_bit,LOCAL_EX(a0) ;return to IEEE extended format +nrmrndp: + fmove.l fpcr,-(sp) + fmove.l #0,fpcr + fmove.x LOCAL_EX(a0),fp0 ;move result to fp0 + fmove.l (sp)+,fpcr + rts + + end diff --git a/sys/arch/m68k/fpsp/skeleton.sa b/sys/arch/m68k/fpsp/skeleton.sa new file mode 100644 index 00000000000..4ed506d77c7 --- /dev/null +++ b/sys/arch/m68k/fpsp/skeleton.sa @@ -0,0 +1,482 @@ +* $NetBSD: skeleton.sa,v 1.3 1994/10/26 07:49:50 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* skeleton.sa 3.2 4/26/91 +* +* This file contains code that is system dependent and will +* need to be modified to install the FPSP. +* +* Each entry point for exception 'xxxx' begins with a 'jmp fpsp_xxxx'. +* Put any target system specific handling that must be done immediately +* before the jump instruction. If there no handling necessary, then +* the 'fpsp_xxxx' handler entry point should be placed in the exception +* table so that the 'jmp' can be eliminated. If the FPSP determines that the +* exception is one that must be reported then there will be a +* return from the package by a 'jmp real_xxxx'. At that point +* the machine state will be identical to the state before +* the FPSP was entered. In particular, whatever condition +* that caused the exception will still be pending when the FPSP +* package returns. Thus, there will be system specific code +* to handle the exception. +* +* If the exception was completely handled by the package, then +* the return will be via a 'jmp fpsp_done'. Unless there is +* OS specific work to be done (such as handling a context switch or +* interrupt) the user program can be resumed via 'rte'. +* +* In the following skeleton code, some typical 'real_xxxx' handling +* code is shown. This code may need to be moved to an appropriate +* place in the target system, or rewritten. +* + +SKELETON IDNT 2,1 Motorola 040 Floating Point Software Package + + section 15 +* +* The following counters are used for standalone testing +* +sigunimp dc.l 0 +sigbsun dc.l 0 +siginex dc.l 0 +sigdz dc.l 0 +sigunfl dc.l 0 +sigovfl dc.l 0 +sigoperr dc.l 0 +sigsnan dc.l 0 +sigunsupp dc.l 0 + + section 8 + + include fpsp.h + + xref b1238_fix + +* +* Divide by Zero exception +* +* All dz exceptions are 'real', hence no fpsp_dz entry point. +* + xdef dz + xdef real_dz +dz: +real_dz: + link a6,#-LOCAL_SIZE + fsave -(sp) + bclr.b #E1,E_BYTE(a6) + frestore (sp)+ + unlk a6 + + add.l #1,sigdz ;for standalone testing + + rte +* +* Inexact exception +* +* All inexact exceptions are real, but the 'real' handler +* will probably want to clear the pending exception. +* The provided code will clear the E3 exception (if pending), +* otherwise clear the E1 exception. The frestore is not really +* necessary for E1 exceptions. +* +* Code following the 'inex' label is to handle bug #1232. In this +* bug, if an E1 snan, ovfl, or unfl occured, and the process was +* swapped out before taking the exception, the exception taken on +* return was inex, rather than the correct exception. The snan, ovfl, +* and unfl exception to be taken must not have been enabled. The +* fix is to check for E1, and the existence of one of snan, ovfl, +* or unfl bits set in the fpsr. If any of these are set, branch +* to the appropriate handler for the exception in the fpsr. Note +* that this fix is only for d43b parts, and is skipped if the +* version number is not $40. +* +* + xdef real_inex + xdef inex +inex: + link a6,#-LOCAL_SIZE + fsave -(sp) + cmpi.b #VER_40,(sp) ;test version number + bne.b not_fmt40 + fmove.l fpsr,-(sp) + btst.b #E1,E_BYTE(a6) ;test for E1 set + beq.b not_b1232 + btst.b #snan_bit,2(sp) ;test for snan + beq inex_ckofl + add.l #4,sp + frestore (sp)+ + unlk a6 + bra snan +inex_ckofl: + btst.b #ovfl_bit,2(sp) ;test for ovfl + beq inex_ckufl + add.l #4,sp + frestore (sp)+ + unlk a6 + bra ovfl +inex_ckufl: + btst.b #unfl_bit,2(sp) ;test for unfl + beq not_b1232 + add.l #4,sp + frestore (sp)+ + unlk a6 + bra unfl + +* +* We do not have the bug 1232 case. Clean up the stack and call +* real_inex. +* +not_b1232: + add.l #4,sp + frestore (sp)+ + unlk a6 + +real_inex: + + add.l #1,siginex ;for standalone testing + + link a6,#-LOCAL_SIZE + fsave -(sp) +not_fmt40: + bclr.b #E3,E_BYTE(a6) ;clear and test E3 flag + beq.b inex_cke1 +* +* Clear dirty bit on dest resister in the frame before branching +* to b1238_fix. +* + movem.l d0/d1,USER_DA(a6) + bfextu CMDREG1B(a6){6:3},d0 ;get dest reg no + bclr.b d0,FPR_DIRTY_BITS(a6) ;clr dest dirty bit + bsr.l b1238_fix ;test for bug1238 case + movem.l USER_DA(a6),d0/d1 + bra.b inex_done +inex_cke1: + bclr.b #E1,E_BYTE(a6) +inex_done: + frestore (sp)+ + unlk a6 + rte + +* +* Overflow exception +* + xref fpsp_ovfl + xdef real_ovfl + xdef ovfl +ovfl: + jmp fpsp_ovfl +real_ovfl: + + add.l #1,sigovfl ;for standalone testing + + link a6,#-LOCAL_SIZE + fsave -(sp) + bclr.b #E3,E_BYTE(a6) ;clear and test E3 flag + bne.b ovfl_done + bclr.b #E1,E_BYTE(a6) +ovfl_done: + frestore (sp)+ + unlk a6 + rte + +* +* Underflow exception +* + xref fpsp_unfl + xdef real_unfl + xdef unfl +unfl: + jmp fpsp_unfl +real_unfl: + + add.l #1,sigunfl ;for standalone testing + + link a6,#-LOCAL_SIZE + fsave -(sp) + bclr.b #E3,E_BYTE(a6) ;clear and test E3 flag + bne.b unfl_done + bclr.b #E1,E_BYTE(a6) +unfl_done: + frestore (sp)+ + unlk a6 + rte + +* +* Signalling NAN exception +* + xref fpsp_snan + xdef real_snan + xdef snan +snan: + jmp fpsp_snan +real_snan: + link a6,#-LOCAL_SIZE + fsave -(sp) + bclr.b #E1,E_BYTE(a6) ;snan is always an E1 exception + frestore (sp)+ + unlk a6 + + add.l #1,sigsnan ;for standalone testing + rte + +* +* Operand Error exception +* + xref fpsp_operr + xdef real_operr + xdef operr +operr: + jmp fpsp_operr +real_operr: + link a6,#-LOCAL_SIZE + fsave -(sp) + bclr.b #E1,E_BYTE(a6) ;operr is always an E1 exception + frestore (sp)+ + unlk a6 + + add.l #1,sigoperr ;for standalone testing + + rte + +* +* BSUN exception +* +* This sample handler simply clears the nan bit in the FPSR. +* + xref fpsp_bsun + xdef real_bsun + xdef bsun +bsun: + jmp fpsp_bsun +real_bsun: + link a6,#-LOCAL_SIZE + fsave -(sp) + bclr.b #E1,E_BYTE(a6) ;bsun is always an E1 exception + fmove.l FPSR,-(sp) + bclr.b #nan_bit,(sp) + fmove.l (sp)+,FPSR + frestore (sp)+ + unlk a6 + + add.l #1,sigbsun ;for standalone testing + + rte + +* +* F-line exception +* +* A 'real' F-line exception is one that the FPSP isn't supposed to +* handle. E.g. an instruction with a co-processor ID that is not 1. +* +* + xref fpsp_fline + xdef real_fline + xdef fline +fline: + jmp fpsp_fline +real_fline: + + add.l #1,sigunimp ;for standalone testing + + rte + +* +* Unsupported data type exception +* + xref fpsp_unsupp + xdef real_unsupp + xdef unsupp +unsupp: + jmp fpsp_unsupp +real_unsupp: + link a6,#-LOCAL_SIZE + fsave -(sp) + bclr.b #E1,E_BYTE(a6) ;unsupp is always an E1 exception + frestore (sp)+ + unlk a6 + + add.l #1,sigunsupp ;for standalone testing + + rte + +* +* Trace exception +* + xdef real_trace +real_trace: + rte + +* +* fpsp_fmt_error --- exit point for frame format error +* +* The fpu stack frame does not match the frames existing +* or planned at the time of this writing. The fpsp is +* unable to handle frame sizes not in the following +* version:size pairs: +* +* {4060, 4160} - busy frame +* {4028, 4130} - unimp frame +* {4000, 4100} - idle frame +* +* This entry point simply holds an f-line illegal value. +* Replace this with a call to your kernel panic code or +* code to handle future revisions of the fpu. +* + xdef fpsp_fmt_error +fpsp_fmt_error: + + dc.l $f27f0000 ;f-line illegal + +* +* fpsp_done --- FPSP exit point +* +* The exception has been handled by the package and we are ready +* to return to user mode, but there may be OS specific code +* to execute before we do. If there is, do it now. +* +* + xdef fpsp_done +fpsp_done: + rte + +* +* mem_write --- write to user or supervisor address space +* +* Writes to memory while in supervisor mode. copyout accomplishes +* this via a 'moves' instruction. copyout is a UNIX SVR3 (and later) function. +* If you don't have copyout, use the local copy of the function below. +* +* a0 - supervisor source address +* a1 - user destination address +* d0 - number of bytes to write (maximum count is 12) +* +* The supervisor source address is guaranteed to point into the supervisor +* stack. The result is that a UNIX +* process is allowed to sleep as a consequence of a page fault during +* copyout. The probability of a page fault is exceedingly small because +* the 68040 always reads the destination address and thus the page +* faults should have already been handled. +* +* If the EXC_SR shows that the exception was from supervisor space, +* then just do a dumb (and slow) memory move. In a UNIX environment +* there shouldn't be any supervisor mode floating point exceptions. +* + xdef mem_write +mem_write: + btst.b #5,EXC_SR(a6) ;check for supervisor state + beq.b user_write +super_write: + move.b (a0)+,(a1)+ + subq.l #1,d0 + bne.b super_write + rts +user_write: + move.l d1,-(sp) ;preserve d1 just in case + move.l d0,-(sp) + move.l a1,-(sp) + move.l a0,-(sp) + jsr copyout + add.l #12,sp + move.l (sp)+,d1 + rts +* +* mem_read --- read from user or supervisor address space +* +* Reads from memory while in supervisor mode. copyin accomplishes +* this via a 'moves' instruction. copyin is a UNIX SVR3 (and later) function. +* If you don't have copyin, use the local copy of the function below. +* +* The FPSP calls mem_read to read the original F-line instruction in order +* to extract the data register number when the 'Dn' addressing mode is +* used. +* +*Input: +* a0 - user source address +* a1 - supervisor destination address +* d0 - number of bytes to read (maximum count is 12) +* +* Like mem_write, mem_read always reads with a supervisor +* destination address on the supervisor stack. Also like mem_write, +* the EXC_SR is checked and a simple memory copy is done if reading +* from supervisor space is indicated. +* + xdef mem_read +mem_read: + btst.b #5,EXC_SR(a6) ;check for supervisor state + beq.b user_read +super_read: + move.b (a0)+,(a1)+ + subq.l #1,d0 + bne.b super_read + rts +user_read: + move.l d1,-(sp) ;preserve d1 just in case + move.l d0,-(sp) + move.l a1,-(sp) + move.l a0,-(sp) + jsr copyin + add.l #12,sp + move.l (sp)+,d1 + rts + +* +* Use these routines if your kernel doesn't have copyout/copyin equivalents. +* Assumes that D0/D1/A0/A1 are scratch registers. copyout overwrites DFC, +* and copyin overwrites SFC. +* +copyout: + move.l 4(sp),a0 ; source + move.l 8(sp),a1 ; destination + move.l 12(sp),d0 ; count + sub.l #1,d0 ; dec count by 1 for dbra + move.l #1,d1 + movec d1,DFC ; set dfc for user data space +moreout: + move.b (a0)+,d1 ; fetch supervisor byte + moves.b d1,(a1)+ ; write user byte + dbf.w d0,moreout + rts + +copyin: + move.l 4(sp),a0 ; source + move.l 8(sp),a1 ; destination + move.l 12(sp),d0 ; count + sub.l #1,d0 ; dec count by 1 for dbra + move.l #1,d1 + movec d1,SFC ; set sfc for user space +morein: + moves.b (a0)+,d1 ; fetch user byte + move.b d1,(a1)+ ; write supervisor byte + dbf.w d0,morein + rts + + end diff --git a/sys/arch/m68k/fpsp/slog2.sa b/sys/arch/m68k/fpsp/slog2.sa new file mode 100644 index 00000000000..197beb498fb --- /dev/null +++ b/sys/arch/m68k/fpsp/slog2.sa @@ -0,0 +1,213 @@ +* $NetBSD: slog2.sa,v 1.2 1994/10/26 07:49:52 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* slog2.sa 3.1 12/10/90 +* +* The entry point slog10 computes the base-10 +* logarithm of an input argument X. +* slog10d does the same except the input value is a +* denormalized number. +* sLog2 and sLog2d are the base-2 analogues. +* +* INPUT: Double-extended value in memory location pointed to +* by address register a0. +* +* OUTPUT: log_10(X) or log_2(X) returned in floating-point +* register fp0. +* +* ACCURACY and MONOTONICITY: The returned result is within 1.7 +* ulps in 64 significant bit, i.e. within 0.5003 ulp +* to 53 bits if the result is subsequently rounded +* to double precision. The result is provably monotonic +* in double precision. +* +* SPEED: Two timings are measured, both in the copy-back mode. +* The first one is measured when the function is invoked +* the first time (so the instructions and data are not +* in cache), and the second one is measured when the +* function is reinvoked at the same input argument. +* +* ALGORITHM and IMPLEMENTATION NOTES: +* +* slog10d: +* +* Step 0. If X < 0, create a NaN and raise the invalid operation +* flag. Otherwise, save FPCR in D1; set FpCR to default. +* Notes: Default means round-to-nearest mode, no floating-point +* traps, and precision control = double extended. +* +* Step 1. Call slognd to obtain Y = log(X), the natural log of X. +* Notes: Even if X is denormalized, log(X) is always normalized. +* +* Step 2. Compute log_10(X) = log(X) * (1/log(10)). +* 2.1 Restore the user FPCR +* 2.2 Return ans := Y * INV_L10. +* +* +* slog10: +* +* Step 0. If X < 0, create a NaN and raise the invalid operation +* flag. Otherwise, save FPCR in D1; set FpCR to default. +* Notes: Default means round-to-nearest mode, no floating-point +* traps, and precision control = double extended. +* +* Step 1. Call sLogN to obtain Y = log(X), the natural log of X. +* +* Step 2. Compute log_10(X) = log(X) * (1/log(10)). +* 2.1 Restore the user FPCR +* 2.2 Return ans := Y * INV_L10. +* +* +* sLog2d: +* +* Step 0. If X < 0, create a NaN and raise the invalid operation +* flag. Otherwise, save FPCR in D1; set FpCR to default. +* Notes: Default means round-to-nearest mode, no floating-point +* traps, and precision control = double extended. +* +* Step 1. Call slognd to obtain Y = log(X), the natural log of X. +* Notes: Even if X is denormalized, log(X) is always normalized. +* +* Step 2. Compute log_10(X) = log(X) * (1/log(2)). +* 2.1 Restore the user FPCR +* 2.2 Return ans := Y * INV_L2. +* +* +* sLog2: +* +* Step 0. If X < 0, create a NaN and raise the invalid operation +* flag. Otherwise, save FPCR in D1; set FpCR to default. +* Notes: Default means round-to-nearest mode, no floating-point +* traps, and precision control = double extended. +* +* Step 1. If X is not an integer power of two, i.e., X != 2^k, +* go to Step 3. +* +* Step 2. Return k. +* 2.1 Get integer k, X = 2^k. +* 2.2 Restore the user FPCR. +* 2.3 Return ans := convert-to-double-extended(k). +* +* Step 3. Call sLogN to obtain Y = log(X), the natural log of X. +* +* Step 4. Compute log_2(X) = log(X) * (1/log(2)). +* 4.1 Restore the user FPCR +* 4.2 Return ans := Y * INV_L2. +* + +SLOG2 IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + xref t_frcinx + xref t_operr + xref slogn + xref slognd + +INV_L10 DC.L $3FFD0000,$DE5BD8A9,$37287195,$00000000 + +INV_L2 DC.L $3FFF0000,$B8AA3B29,$5C17F0BC,$00000000 + + xdef slog10d +slog10d: +*--entry point for Log10(X), X is denormalized + move.l (a0),d0 + blt.w invalid + move.l d1,-(sp) + clr.l d1 + bsr slognd ...log(X), X denorm. + fmove.l (sp)+,fpcr + fmul.x INV_L10,fp0 + bra t_frcinx + + xdef slog10 +slog10: +*--entry point for Log10(X), X is normalized + + move.l (a0),d0 + blt.w invalid + move.l d1,-(sp) + clr.l d1 + bsr slogn ...log(X), X normal. + fmove.l (sp)+,fpcr + fmul.x INV_L10,fp0 + bra t_frcinx + + + xdef slog2d +slog2d: +*--entry point for Log2(X), X is denormalized + + move.l (a0),d0 + blt.w invalid + move.l d1,-(sp) + clr.l d1 + bsr slognd ...log(X), X denorm. + fmove.l (sp)+,fpcr + fmul.x INV_L2,fp0 + bra t_frcinx + + xdef slog2 +slog2: +*--entry point for Log2(X), X is normalized + move.l (a0),d0 + blt.w invalid + + move.l 8(a0),d0 + bne.b continue ...X is not 2^k + + move.l 4(a0),d0 + and.l #$7FFFFFFF,d0 + tst.l d0 + bne.b continue + +*--X = 2^k. + move.w (a0),d0 + and.l #$00007FFF,d0 + sub.l #$3FFF,d0 + fmove.l d1,fpcr + fmove.l d0,fp0 + bra t_frcinx + +continue: + move.l d1,-(sp) + clr.l d1 + bsr slogn ...log(X), X normal. + fmove.l (sp)+,fpcr + fmul.x INV_L2,fp0 + bra t_frcinx + +invalid: + bra t_operr + + end diff --git a/sys/arch/m68k/fpsp/slogn.sa b/sys/arch/m68k/fpsp/slogn.sa new file mode 100644 index 00000000000..26afe941940 --- /dev/null +++ b/sys/arch/m68k/fpsp/slogn.sa @@ -0,0 +1,617 @@ +* $NetBSD: slogn.sa,v 1.3 1994/10/26 07:49:54 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* slogn.sa 3.1 12/10/90 +* +* slogn computes the natural logarithm of an +* input value. slognd does the same except the input value is a +* denormalized number. slognp1 computes log(1+X), and slognp1d +* computes log(1+X) for denormalized X. +* +* Input: Double-extended value in memory location pointed to by address +* register a0. +* +* Output: log(X) or log(1+X) returned in floating-point register Fp0. +* +* Accuracy and Monotonicity: The returned result is within 2 ulps in +* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the +* result is subsequently rounded to double precision. The +* result is provably monotonic in double precision. +* +* Speed: The program slogn takes approximately 190 cycles for input +* argument X such that |X-1| >= 1/16, which is the the usual +* situation. For those arguments, slognp1 takes approximately +* 210 cycles. For the less common arguments, the program will +* run no worse than 10% slower. +* +* Algorithm: +* LOGN: +* Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in +* u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2. +* +* Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven +* significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base +* 2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7). +* +* Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u, +* log(1+u) = poly. +* +* Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u) +* by k*log(2) + (log(F) + poly). The values of log(F) are calculated +* beforehand and stored in the program. +* +* lognp1: +* Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in +* u where u = 2X/(2+X). Otherwise, move on to Step 2. +* +* Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2 +* of the algorithm for LOGN and compute log(1+X) as +* k*log(2) + log(F) + poly where poly approximates log(1+u), +* u = (Y-F)/F. +* +* Implementation Notes: +* Note 1. There are 64 different possible values for F, thus 64 log(F)'s +* need to be tabulated. Moreover, the values of 1/F are also +* tabulated so that the division in (Y-F)/F can be performed by a +* multiplication. +* +* Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value +* Y-F has to be calculated carefully when 1/2 <= X < 3/2. +* +* Note 3. To fully exploit the pipeline, polynomials are usually separated +* into two parts evaluated independently before being added up. +* + +slogn IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + +BOUNDS1 DC.L $3FFEF07D,$3FFF8841 +BOUNDS2 DC.L $3FFE8000,$3FFFC000 + +LOGOF2 DC.L $3FFE0000,$B17217F7,$D1CF79AC,$00000000 + +one DC.L $3F800000 +zero DC.L $00000000 +infty DC.L $7F800000 +negone DC.L $BF800000 + +LOGA6 DC.L $3FC2499A,$B5E4040B +LOGA5 DC.L $BFC555B5,$848CB7DB + +LOGA4 DC.L $3FC99999,$987D8730 +LOGA3 DC.L $BFCFFFFF,$FF6F7E97 + +LOGA2 DC.L $3FD55555,$555555A4 +LOGA1 DC.L $BFE00000,$00000008 + +LOGB5 DC.L $3F175496,$ADD7DAD6 +LOGB4 DC.L $3F3C71C2,$FE80C7E0 + +LOGB3 DC.L $3F624924,$928BCCFF +LOGB2 DC.L $3F899999,$999995EC + +LOGB1 DC.L $3FB55555,$55555555 +TWO DC.L $40000000,$00000000 + +LTHOLD DC.L $3f990000,$80000000,$00000000,$00000000 + +LOGTBL: + DC.L $3FFE0000,$FE03F80F,$E03F80FE,$00000000 + DC.L $3FF70000,$FF015358,$833C47E2,$00000000 + DC.L $3FFE0000,$FA232CF2,$52138AC0,$00000000 + DC.L $3FF90000,$BDC8D83E,$AD88D549,$00000000 + DC.L $3FFE0000,$F6603D98,$0F6603DA,$00000000 + DC.L $3FFA0000,$9CF43DCF,$F5EAFD48,$00000000 + DC.L $3FFE0000,$F2B9D648,$0F2B9D65,$00000000 + DC.L $3FFA0000,$DA16EB88,$CB8DF614,$00000000 + DC.L $3FFE0000,$EF2EB71F,$C4345238,$00000000 + DC.L $3FFB0000,$8B29B775,$1BD70743,$00000000 + DC.L $3FFE0000,$EBBDB2A5,$C1619C8C,$00000000 + DC.L $3FFB0000,$A8D839F8,$30C1FB49,$00000000 + DC.L $3FFE0000,$E865AC7B,$7603A197,$00000000 + DC.L $3FFB0000,$C61A2EB1,$8CD907AD,$00000000 + DC.L $3FFE0000,$E525982A,$F70C880E,$00000000 + DC.L $3FFB0000,$E2F2A47A,$DE3A18AF,$00000000 + DC.L $3FFE0000,$E1FC780E,$1FC780E2,$00000000 + DC.L $3FFB0000,$FF64898E,$DF55D551,$00000000 + DC.L $3FFE0000,$DEE95C4C,$A037BA57,$00000000 + DC.L $3FFC0000,$8DB956A9,$7B3D0148,$00000000 + DC.L $3FFE0000,$DBEB61EE,$D19C5958,$00000000 + DC.L $3FFC0000,$9B8FE100,$F47BA1DE,$00000000 + DC.L $3FFE0000,$D901B203,$6406C80E,$00000000 + DC.L $3FFC0000,$A9372F1D,$0DA1BD17,$00000000 + DC.L $3FFE0000,$D62B80D6,$2B80D62C,$00000000 + DC.L $3FFC0000,$B6B07F38,$CE90E46B,$00000000 + DC.L $3FFE0000,$D3680D36,$80D3680D,$00000000 + DC.L $3FFC0000,$C3FD0329,$06488481,$00000000 + DC.L $3FFE0000,$D0B69FCB,$D2580D0B,$00000000 + DC.L $3FFC0000,$D11DE0FF,$15AB18CA,$00000000 + DC.L $3FFE0000,$CE168A77,$25080CE1,$00000000 + DC.L $3FFC0000,$DE1433A1,$6C66B150,$00000000 + DC.L $3FFE0000,$CB8727C0,$65C393E0,$00000000 + DC.L $3FFC0000,$EAE10B5A,$7DDC8ADD,$00000000 + DC.L $3FFE0000,$C907DA4E,$871146AD,$00000000 + DC.L $3FFC0000,$F7856E5E,$E2C9B291,$00000000 + DC.L $3FFE0000,$C6980C69,$80C6980C,$00000000 + DC.L $3FFD0000,$82012CA5,$A68206D7,$00000000 + DC.L $3FFE0000,$C4372F85,$5D824CA6,$00000000 + DC.L $3FFD0000,$882C5FCD,$7256A8C5,$00000000 + DC.L $3FFE0000,$C1E4BBD5,$95F6E947,$00000000 + DC.L $3FFD0000,$8E44C60B,$4CCFD7DE,$00000000 + DC.L $3FFE0000,$BFA02FE8,$0BFA02FF,$00000000 + DC.L $3FFD0000,$944AD09E,$F4351AF6,$00000000 + DC.L $3FFE0000,$BD691047,$07661AA3,$00000000 + DC.L $3FFD0000,$9A3EECD4,$C3EAA6B2,$00000000 + DC.L $3FFE0000,$BB3EE721,$A54D880C,$00000000 + DC.L $3FFD0000,$A0218434,$353F1DE8,$00000000 + DC.L $3FFE0000,$B92143FA,$36F5E02E,$00000000 + DC.L $3FFD0000,$A5F2FCAB,$BBC506DA,$00000000 + DC.L $3FFE0000,$B70FBB5A,$19BE3659,$00000000 + DC.L $3FFD0000,$ABB3B8BA,$2AD362A5,$00000000 + DC.L $3FFE0000,$B509E68A,$9B94821F,$00000000 + DC.L $3FFD0000,$B1641795,$CE3CA97B,$00000000 + DC.L $3FFE0000,$B30F6352,$8917C80B,$00000000 + DC.L $3FFD0000,$B7047551,$5D0F1C61,$00000000 + DC.L $3FFE0000,$B11FD3B8,$0B11FD3C,$00000000 + DC.L $3FFD0000,$BC952AFE,$EA3D13E1,$00000000 + DC.L $3FFE0000,$AF3ADDC6,$80AF3ADE,$00000000 + DC.L $3FFD0000,$C2168ED0,$F458BA4A,$00000000 + DC.L $3FFE0000,$AD602B58,$0AD602B6,$00000000 + DC.L $3FFD0000,$C788F439,$B3163BF1,$00000000 + DC.L $3FFE0000,$AB8F69E2,$8359CD11,$00000000 + DC.L $3FFD0000,$CCECAC08,$BF04565D,$00000000 + DC.L $3FFE0000,$A9C84A47,$A07F5638,$00000000 + DC.L $3FFD0000,$D2420487,$2DD85160,$00000000 + DC.L $3FFE0000,$A80A80A8,$0A80A80B,$00000000 + DC.L $3FFD0000,$D7894992,$3BC3588A,$00000000 + DC.L $3FFE0000,$A655C439,$2D7B73A8,$00000000 + DC.L $3FFD0000,$DCC2C4B4,$9887DACC,$00000000 + DC.L $3FFE0000,$A4A9CF1D,$96833751,$00000000 + DC.L $3FFD0000,$E1EEBD3E,$6D6A6B9E,$00000000 + DC.L $3FFE0000,$A3065E3F,$AE7CD0E0,$00000000 + DC.L $3FFD0000,$E70D785C,$2F9F5BDC,$00000000 + DC.L $3FFE0000,$A16B312E,$A8FC377D,$00000000 + DC.L $3FFD0000,$EC1F392C,$5179F283,$00000000 + DC.L $3FFE0000,$9FD809FD,$809FD80A,$00000000 + DC.L $3FFD0000,$F12440D3,$E36130E6,$00000000 + DC.L $3FFE0000,$9E4CAD23,$DD5F3A20,$00000000 + DC.L $3FFD0000,$F61CCE92,$346600BB,$00000000 + DC.L $3FFE0000,$9CC8E160,$C3FB19B9,$00000000 + DC.L $3FFD0000,$FB091FD3,$8145630A,$00000000 + DC.L $3FFE0000,$9B4C6F9E,$F03A3CAA,$00000000 + DC.L $3FFD0000,$FFE97042,$BFA4C2AD,$00000000 + DC.L $3FFE0000,$99D722DA,$BDE58F06,$00000000 + DC.L $3FFE0000,$825EFCED,$49369330,$00000000 + DC.L $3FFE0000,$9868C809,$868C8098,$00000000 + DC.L $3FFE0000,$84C37A7A,$B9A905C9,$00000000 + DC.L $3FFE0000,$97012E02,$5C04B809,$00000000 + DC.L $3FFE0000,$87224C2E,$8E645FB7,$00000000 + DC.L $3FFE0000,$95A02568,$095A0257,$00000000 + DC.L $3FFE0000,$897B8CAC,$9F7DE298,$00000000 + DC.L $3FFE0000,$94458094,$45809446,$00000000 + DC.L $3FFE0000,$8BCF55DE,$C4CD05FE,$00000000 + DC.L $3FFE0000,$92F11384,$0497889C,$00000000 + DC.L $3FFE0000,$8E1DC0FB,$89E125E5,$00000000 + DC.L $3FFE0000,$91A2B3C4,$D5E6F809,$00000000 + DC.L $3FFE0000,$9066E68C,$955B6C9B,$00000000 + DC.L $3FFE0000,$905A3863,$3E06C43B,$00000000 + DC.L $3FFE0000,$92AADE74,$C7BE59E0,$00000000 + DC.L $3FFE0000,$8F1779D9,$FDC3A219,$00000000 + DC.L $3FFE0000,$94E9BFF6,$15845643,$00000000 + DC.L $3FFE0000,$8DDA5202,$37694809,$00000000 + DC.L $3FFE0000,$9723A1B7,$20134203,$00000000 + DC.L $3FFE0000,$8CA29C04,$6514E023,$00000000 + DC.L $3FFE0000,$995899C8,$90EB8990,$00000000 + DC.L $3FFE0000,$8B70344A,$139BC75A,$00000000 + DC.L $3FFE0000,$9B88BDAA,$3A3DAE2F,$00000000 + DC.L $3FFE0000,$8A42F870,$5669DB46,$00000000 + DC.L $3FFE0000,$9DB4224F,$FFE1157C,$00000000 + DC.L $3FFE0000,$891AC73A,$E9819B50,$00000000 + DC.L $3FFE0000,$9FDADC26,$8B7A12DA,$00000000 + DC.L $3FFE0000,$87F78087,$F78087F8,$00000000 + DC.L $3FFE0000,$A1FCFF17,$CE733BD4,$00000000 + DC.L $3FFE0000,$86D90544,$7A34ACC6,$00000000 + DC.L $3FFE0000,$A41A9E8F,$5446FB9F,$00000000 + DC.L $3FFE0000,$85BF3761,$2CEE3C9B,$00000000 + DC.L $3FFE0000,$A633CD7E,$6771CD8B,$00000000 + DC.L $3FFE0000,$84A9F9C8,$084A9F9D,$00000000 + DC.L $3FFE0000,$A8489E60,$0B435A5E,$00000000 + DC.L $3FFE0000,$83993052,$3FBE3368,$00000000 + DC.L $3FFE0000,$AA59233C,$CCA4BD49,$00000000 + DC.L $3FFE0000,$828CBFBE,$B9A020A3,$00000000 + DC.L $3FFE0000,$AC656DAE,$6BCC4985,$00000000 + DC.L $3FFE0000,$81848DA8,$FAF0D277,$00000000 + DC.L $3FFE0000,$AE6D8EE3,$60BB2468,$00000000 + DC.L $3FFE0000,$80808080,$80808081,$00000000 + DC.L $3FFE0000,$B07197A2,$3C46C654,$00000000 + +ADJK equ L_SCR1 + +X equ FP_SCR1 +XDCARE equ X+2 +XFRAC equ X+4 + +F equ FP_SCR2 +FFRAC equ F+4 + +KLOG2 equ FP_SCR3 + +SAVEU equ FP_SCR4 + + xref t_frcinx + xref t_extdnrm + xref t_operr + xref t_dz + + xdef slognd +slognd: +*--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT + + MOVE.L #-100,ADJK(a6) ...INPUT = 2^(ADJK) * FP0 + +*----normalize the input value by left shifting k bits (k to be determined +*----below), adjusting exponent and storing -k to ADJK +*----the value TWOTO100 is no longer needed. +*----Note that this code assumes the denormalized input is NON-ZERO. + + MoveM.L D2-D7,-(A7) ...save some registers + Clr.L D3 ...D3 is exponent of smallest norm. # + Move.L 4(A0),D4 + Move.L 8(A0),D5 ...(D4,D5) is (Hi_X,Lo_X) + Clr.L D2 ...D2 used for holding K + + Tst.L D4 + BNE.B HiX_not0 + +HiX_0: + Move.L D5,D4 + Clr.L D5 + Move.L #32,D2 + Clr.L D6 + BFFFO D4{0:32},D6 + LSL.L D6,D4 + Add.L D6,D2 ...(D3,D4,D5) is normalized + + Move.L D3,X(a6) + Move.L D4,XFRAC(a6) + Move.L D5,XFRAC+4(a6) + Neg.L D2 + Move.L D2,ADJK(a6) + FMove.X X(a6),FP0 + MoveM.L (A7)+,D2-D7 ...restore registers + LEA X(a6),A0 + Bra.B LOGBGN ...begin regular log(X) + + +HiX_not0: + Clr.L D6 + BFFFO D4{0:32},D6 ...find first 1 + Move.L D6,D2 ...get k + LSL.L D6,D4 + Move.L D5,D7 ...a copy of D5 + LSL.L D6,D5 + Neg.L D6 + AddI.L #32,D6 + LSR.L D6,D7 + Or.L D7,D4 ...(D3,D4,D5) normalized + + Move.L D3,X(a6) + Move.L D4,XFRAC(a6) + Move.L D5,XFRAC+4(a6) + Neg.L D2 + Move.L D2,ADJK(a6) + FMove.X X(a6),FP0 + MoveM.L (A7)+,D2-D7 ...restore registers + LEA X(a6),A0 + Bra.B LOGBGN ...begin regular log(X) + + + xdef slogn +slogn: +*--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S + + FMOVE.X (A0),FP0 ...LOAD INPUT + CLR.L ADJK(a6) + +LOGBGN: +*--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS +*--A FINITE, NON-ZERO, NORMALIZED NUMBER. + + move.l (a0),d0 + move.w 4(a0),d0 + + move.l (a0),X(a6) + move.l 4(a0),X+4(a6) + move.l 8(a0),X+8(a6) + + TST.L D0 ...CHECK IF X IS NEGATIVE + BLT.W LOGNEG ...LOG OF NEGATIVE ARGUMENT IS INVALID + CMP2.L BOUNDS1,D0 ...X IS POSITIVE, CHECK IF X IS NEAR 1 + BCC.W LOGNEAR1 ...BOUNDS IS ROUGHLY [15/16, 17/16] + +LOGMAIN: +*--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1 + +*--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY. +*--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1. +*--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y) +*-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F). +*--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING +*--LOG(1+U) CAN BE VERY EFFICIENT. +*--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO +*--DIVISION IS NEEDED TO CALCULATE (Y-F)/F. + +*--GET K, Y, F, AND ADDRESS OF 1/F. + ASR.L #8,D0 + ASR.L #8,D0 ...SHIFTED 16 BITS, BIASED EXPO. OF X + SUBI.L #$3FFF,D0 ...THIS IS K + ADD.L ADJK(a6),D0 ...ADJUST K, ORIGINAL INPUT MAY BE DENORM. + LEA LOGTBL,A0 ...BASE ADDRESS OF 1/F AND LOG(F) + FMOVE.L D0,FP1 ...CONVERT K TO FLOATING-POINT FORMAT + +*--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F + MOVE.L #$3FFF0000,X(a6) ...X IS NOW Y, I.E. 2^(-K)*X + MOVE.L XFRAC(a6),FFRAC(a6) + ANDI.L #$FE000000,FFRAC(a6) ...FIRST 7 BITS OF Y + ORI.L #$01000000,FFRAC(a6) ...GET F: ATTACH A 1 AT THE EIGHTH BIT + MOVE.L FFRAC(a6),D0 ...READY TO GET ADDRESS OF 1/F + ANDI.L #$7E000000,D0 + ASR.L #8,D0 + ASR.L #8,D0 + ASR.L #4,D0 ...SHIFTED 20, D0 IS THE DISPLACEMENT + ADDA.L D0,A0 ...A0 IS THE ADDRESS FOR 1/F + + FMOVE.X X(a6),FP0 + move.l #$3fff0000,F(a6) + clr.l F+8(a6) + FSUB.X F(a6),FP0 ...Y-F + FMOVEm.X FP2/fp3,-(sp) ...SAVE FP2 WHILE FP0 IS NOT READY +*--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K +*--REGISTERS SAVED: FPCR, FP1, FP2 + +LP1CONT1: +*--AN RE-ENTRY POINT FOR LOGNP1 + FMUL.X (A0),FP0 ...FP0 IS U = (Y-F)/F + FMUL.X LOGOF2,FP1 ...GET K*LOG2 WHILE FP0 IS NOT READY + FMOVE.X FP0,FP2 + FMUL.X FP2,FP2 ...FP2 IS V=U*U + FMOVE.X FP1,KLOG2(a6) ...PUT K*LOG2 IN MEMEORY, FREE FP1 + +*--LOG(1+U) IS APPROXIMATED BY +*--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS +*--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))] + + FMOVE.X FP2,FP3 + FMOVE.X FP2,FP1 + + FMUL.D LOGA6,FP1 ...V*A6 + FMUL.D LOGA5,FP2 ...V*A5 + + FADD.D LOGA4,FP1 ...A4+V*A6 + FADD.D LOGA3,FP2 ...A3+V*A5 + + FMUL.X FP3,FP1 ...V*(A4+V*A6) + FMUL.X FP3,FP2 ...V*(A3+V*A5) + + FADD.D LOGA2,FP1 ...A2+V*(A4+V*A6) + FADD.D LOGA1,FP2 ...A1+V*(A3+V*A5) + + FMUL.X FP3,FP1 ...V*(A2+V*(A4+V*A6)) + ADDA.L #16,A0 ...ADDRESS OF LOG(F) + FMUL.X FP3,FP2 ...V*(A1+V*(A3+V*A5)), FP3 RELEASED + + FMUL.X FP0,FP1 ...U*V*(A2+V*(A4+V*A6)) + FADD.X FP2,FP0 ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED + + FADD.X (A0),FP1 ...LOG(F)+U*V*(A2+V*(A4+V*A6)) + FMOVEm.X (sp)+,FP2/fp3 ...RESTORE FP2 + FADD.X FP1,FP0 ...FP0 IS LOG(F) + LOG(1+U) + + fmove.l d1,fpcr + FADD.X KLOG2(a6),FP0 ...FINAL ADD + bra t_frcinx + + +LOGNEAR1: +*--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT. + FMOVE.X FP0,FP1 + FSUB.S one,FP1 ...FP1 IS X-1 + FADD.S one,FP0 ...FP0 IS X+1 + FADD.X FP1,FP1 ...FP1 IS 2(X-1) +*--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL +*--IN U, U = 2(X-1)/(X+1) = FP1/FP0 + +LP1CONT2: +*--THIS IS AN RE-ENTRY POINT FOR LOGNP1 + FDIV.X FP0,FP1 ...FP1 IS U + FMOVEm.X FP2/fp3,-(sp) ...SAVE FP2 +*--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3 +*--LET V=U*U, W=V*V, CALCULATE +*--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY +*--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] ) + FMOVE.X FP1,FP0 + FMUL.X FP0,FP0 ...FP0 IS V + FMOVE.X FP1,SAVEU(a6) ...STORE U IN MEMORY, FREE FP1 + FMOVE.X FP0,FP1 + FMUL.X FP1,FP1 ...FP1 IS W + + FMOVE.D LOGB5,FP3 + FMOVE.D LOGB4,FP2 + + FMUL.X FP1,FP3 ...W*B5 + FMUL.X FP1,FP2 ...W*B4 + + FADD.D LOGB3,FP3 ...B3+W*B5 + FADD.D LOGB2,FP2 ...B2+W*B4 + + FMUL.X FP3,FP1 ...W*(B3+W*B5), FP3 RELEASED + + FMUL.X FP0,FP2 ...V*(B2+W*B4) + + FADD.D LOGB1,FP1 ...B1+W*(B3+W*B5) + FMUL.X SAVEU(a6),FP0 ...FP0 IS U*V + + FADD.X FP2,FP1 ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED + FMOVEm.X (sp)+,FP2/fp3 ...FP2 RESTORED + + FMUL.X FP1,FP0 ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] ) + + fmove.l d1,fpcr + FADD.X SAVEU(a6),FP0 + bra t_frcinx + rts + +LOGNEG: +*--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID + bra t_operr + + xdef slognp1d +slognp1d: +*--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT +* Simply return the denorm + + bra t_extdnrm + + xdef slognp1 +slognp1: +*--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S + + FMOVE.X (A0),FP0 ...LOAD INPUT + fabs.x fp0 ;test magnitude + fcmp.x LTHOLD,fp0 ;compare with min threshold + fbgt.w LP1REAL ;if greater, continue + fmove.l #0,fpsr ;clr N flag from compare + fmove.l d1,fpcr + fmove.x (a0),fp0 ;return signed argument + bra t_frcinx + +LP1REAL: + FMOVE.X (A0),FP0 ...LOAD INPUT + CLR.L ADJK(a6) + FMOVE.X FP0,FP1 ...FP1 IS INPUT Z + FADD.S one,FP0 ...X := ROUND(1+Z) + FMOVE.X FP0,X(a6) + MOVE.W XFRAC(a6),XDCARE(a6) + MOVE.L X(a6),D0 + TST.L D0 + BLE.W LP1NEG0 ...LOG OF ZERO OR -VE + CMP2.L BOUNDS2,D0 + BCS.W LOGMAIN ...BOUNDS2 IS [1/2,3/2] +*--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z, +*--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE, +*--SIMPLY INVOKE LOG(X) FOR LOG(1+Z). + +LP1NEAR1: +*--NEXT SEE IF EXP(-1/16) < X < EXP(1/16) + CMP2.L BOUNDS1,D0 + BCS.B LP1CARE + +LP1ONE16: +*--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2) +*--WHERE U = 2Z/(2+Z) = 2Z/(1+X). + FADD.X FP1,FP1 ...FP1 IS 2Z + FADD.S one,FP0 ...FP0 IS 1+X +*--U = FP1/FP0 + BRA.W LP1CONT2 + +LP1CARE: +*--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE +*--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST +*--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2], +*--THERE ARE ONLY TWO CASES. +*--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z +*--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z +*--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF +*--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED. + + MOVE.L XFRAC(a6),FFRAC(a6) + ANDI.L #$FE000000,FFRAC(a6) + ORI.L #$01000000,FFRAC(a6) ...F OBTAINED + CMPI.L #$3FFF8000,D0 ...SEE IF 1+Z > 1 + BGE.B KISZERO + +KISNEG1: + FMOVE.S TWO,FP0 + move.l #$3fff0000,F(a6) + clr.l F+8(a6) + FSUB.X F(a6),FP0 ...2-F + MOVE.L FFRAC(a6),D0 + ANDI.L #$7E000000,D0 + ASR.L #8,D0 + ASR.L #8,D0 + ASR.L #4,D0 ...D0 CONTAINS DISPLACEMENT FOR 1/F + FADD.X FP1,FP1 ...GET 2Z + FMOVEm.X FP2/fp3,-(sp) ...SAVE FP2 + FADD.X FP1,FP0 ...FP0 IS Y-F = (2-F)+2Z + LEA LOGTBL,A0 ...A0 IS ADDRESS OF 1/F + ADDA.L D0,A0 + FMOVE.S negone,FP1 ...FP1 IS K = -1 + BRA.W LP1CONT1 + +KISZERO: + FMOVE.S one,FP0 + move.l #$3fff0000,F(a6) + clr.l F+8(a6) + FSUB.X F(a6),FP0 ...1-F + MOVE.L FFRAC(a6),D0 + ANDI.L #$7E000000,D0 + ASR.L #8,D0 + ASR.L #8,D0 + ASR.L #4,D0 + FADD.X FP1,FP0 ...FP0 IS Y-F + FMOVEm.X FP2/fp3,-(sp) ...FP2 SAVED + LEA LOGTBL,A0 + ADDA.L D0,A0 ...A0 IS ADDRESS OF 1/F + FMOVE.S zero,FP1 ...FP1 IS K = 0 + BRA.W LP1CONT1 + +LP1NEG0: +*--FPCR SAVED. D0 IS X IN COMPACT FORM. + TST.L D0 + BLT.B LP1NEG +LP1ZERO: + FMOVE.S negone,FP0 + + fmove.l d1,fpcr + bra t_dz + +LP1NEG: + FMOVE.S zero,FP0 + + fmove.l d1,fpcr + bra t_operr + + end diff --git a/sys/arch/m68k/fpsp/smovecr.sa b/sys/arch/m68k/fpsp/smovecr.sa new file mode 100644 index 00000000000..9e13b64b1c4 --- /dev/null +++ b/sys/arch/m68k/fpsp/smovecr.sa @@ -0,0 +1,187 @@ +* $NetBSD: smovecr.sa,v 1.2 1994/10/26 07:49:57 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* smovecr.sa 3.1 12/10/90 +* +* The entry point sMOVECR returns the constant at the +* offset given in the instruction field. +* +* Input: An offset in the instruction word. +* +* Output: The constant rounded to the user's rounding +* mode unchecked for overflow. +* +* Modified: fp0. +* + +SMOVECR IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + + xref nrm_set + xref round + xref PIRN + xref PIRZRM + xref PIRP + xref SMALRN + xref SMALRZRM + xref SMALRP + xref BIGRN + xref BIGRZRM + xref BIGRP + +FZERO dc.l 00000000 +* +* FMOVECR +* + xdef smovcr +smovcr: + bfextu CMDREG1B(a6){9:7},d0 ;get offset + bfextu USER_FPCR(a6){26:2},d1 ;get rmode +* +* check range of offset +* + tst.b d0 ;if zero, offset is to pi + beq.b PI_TBL ;it is pi + cmpi.b #$0a,d0 ;check range $01 - $0a + ble.b Z_VAL ;if in this range, return zero + cmpi.b #$0e,d0 ;check range $0b - $0e + ble.b SM_TBL ;valid constants in this range + cmpi.b #$2f,d0 ;check range $10 - $2f + ble.b Z_VAL ;if in this range, return zero + cmpi.b #$3f,d0 ;check range $30 - $3f + ble BG_TBL ;valid constants in this range +Z_VAL: + fmove.s FZERO,fp0 + rts +PI_TBL: + tst.b d1 ;offset is zero, check for rmode + beq.b PI_RN ;if zero, rn mode + cmpi.b #$3,d1 ;check for rp + beq.b PI_RP ;if 3, rp mode +PI_RZRM: + lea.l PIRZRM,a0 ;rmode is rz or rm, load PIRZRM in a0 + bra set_finx +PI_RN: + lea.l PIRN,a0 ;rmode is rn, load PIRN in a0 + bra set_finx +PI_RP: + lea.l PIRP,a0 ;rmode is rp, load PIRP in a0 + bra set_finx +SM_TBL: + subi.l #$b,d0 ;make offset in 0 - 4 range + tst.b d1 ;check for rmode + beq.b SM_RN ;if zero, rn mode + cmpi.b #$3,d1 ;check for rp + beq.b SM_RP ;if 3, rp mode +SM_RZRM: + lea.l SMALRZRM,a0 ;rmode is rz or rm, load SMRZRM in a0 + cmpi.b #$2,d0 ;check if result is inex + ble set_finx ;if 0 - 2, it is inexact + bra no_finx ;if 3, it is exact +SM_RN: + lea.l SMALRN,a0 ;rmode is rn, load SMRN in a0 + cmpi.b #$2,d0 ;check if result is inex + ble set_finx ;if 0 - 2, it is inexact + bra no_finx ;if 3, it is exact +SM_RP: + lea.l SMALRP,a0 ;rmode is rp, load SMRP in a0 + cmpi.b #$2,d0 ;check if result is inex + ble set_finx ;if 0 - 2, it is inexact + bra no_finx ;if 3, it is exact +BG_TBL: + subi.l #$30,d0 ;make offset in 0 - f range + tst.b d1 ;check for rmode + beq.b BG_RN ;if zero, rn mode + cmpi.b #$3,d1 ;check for rp + beq.b BG_RP ;if 3, rp mode +BG_RZRM: + lea.l BIGRZRM,a0 ;rmode is rz or rm, load BGRZRM in a0 + cmpi.b #$1,d0 ;check if result is inex + ble set_finx ;if 0 - 1, it is inexact + cmpi.b #$7,d0 ;second check + ble no_finx ;if 0 - 7, it is exact + bra set_finx ;if 8 - f, it is inexact +BG_RN: + lea.l BIGRN,a0 ;rmode is rn, load BGRN in a0 + cmpi.b #$1,d0 ;check if result is inex + ble set_finx ;if 0 - 1, it is inexact + cmpi.b #$7,d0 ;second check + ble no_finx ;if 0 - 7, it is exact + bra set_finx ;if 8 - f, it is inexact +BG_RP: + lea.l BIGRP,a0 ;rmode is rp, load SMRP in a0 + cmpi.b #$1,d0 ;check if result is inex + ble set_finx ;if 0 - 1, it is inexact + cmpi.b #$7,d0 ;second check + ble no_finx ;if 0 - 7, it is exact +* bra set_finx ;if 8 - f, it is inexact +set_finx: + or.l #inx2a_mask,USER_FPSR(a6) ;set inex2/ainex +no_finx: + mulu.l #12,d0 ;use offset to point into tables + move.l d1,L_SCR1(a6) ;load mode for round call + bfextu USER_FPCR(a6){24:2},d1 ;get precision + tst.l d1 ;check if extended precision +* +* Precision is extended +* + bne.b not_ext ;if extended, do not call round + fmovem.x (a0,d0),fp0 ;return result in fp0 + rts +* +* Precision is single or double +* +not_ext: + swap d1 ;rnd prec in upper word of d1 + add.l L_SCR1(a6),d1 ;merge rmode in low word of d1 + move.l (a0,d0),FP_SCR1(a6) ;load first word to temp storage + move.l 4(a0,d0),FP_SCR1+4(a6) ;load second word + move.l 8(a0,d0),FP_SCR1+8(a6) ;load third word + clr.l d0 ;clear g,r,s + lea FP_SCR1(a6),a0 + btst.b #sign_bit,LOCAL_EX(a0) + sne LOCAL_SGN(a0) ;convert to internal ext. format + + bsr round ;go round the mantissa + + bfclr LOCAL_SGN(a0){0:8} ;convert back to IEEE ext format + beq.b fin_fcr + bset.b #sign_bit,LOCAL_EX(a0) +fin_fcr: + fmovem.x (a0),fp0 + rts + + end diff --git a/sys/arch/m68k/fpsp/srem_mod.sa b/sys/arch/m68k/fpsp/srem_mod.sa new file mode 100644 index 00000000000..822097985ee --- /dev/null +++ b/sys/arch/m68k/fpsp/srem_mod.sa @@ -0,0 +1,446 @@ +* $NetBSD: srem_mod.sa,v 1.3 1994/10/26 07:49:58 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* srem_mod.sa 3.1 12/10/90 +* +* The entry point sMOD computes the floating point MOD of the +* input values X and Y. The entry point sREM computes the floating +* point (IEEE) REM of the input values X and Y. +* +* INPUT +* ----- +* Double-extended value Y is pointed to by address in register +* A0. Double-extended value X is located in -12(A0). The values +* of X and Y are both nonzero and finite; although either or both +* of them can be denormalized. The special cases of zeros, NaNs, +* and infinities are handled elsewhere. +* +* OUTPUT +* ------ +* FREM(X,Y) or FMOD(X,Y), depending on entry point. +* +* ALGORITHM +* --------- +* +* Step 1. Save and strip signs of X and Y: signX := sign(X), +* signY := sign(Y), X := |X|, Y := |Y|, +* signQ := signX EOR signY. Record whether MOD or REM +* is requested. +* +* Step 2. Set L := expo(X)-expo(Y), k := 0, Q := 0. +* If (L < 0) then +* R := X, go to Step 4. +* else +* R := 2^(-L)X, j := L. +* endif +* +* Step 3. Perform MOD(X,Y) +* 3.1 If R = Y, go to Step 9. +* 3.2 If R > Y, then { R := R - Y, Q := Q + 1} +* 3.3 If j = 0, go to Step 4. +* 3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to +* Step 3.1. +* +* Step 4. At this point, R = X - QY = MOD(X,Y). Set +* Last_Subtract := false (used in Step 7 below). If +* MOD is requested, go to Step 6. +* +* Step 5. R = MOD(X,Y), but REM(X,Y) is requested. +* 5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to +* Step 6. +* 5.2 If R > Y/2, then { set Last_Subtract := true, +* Q := Q + 1, Y := signY*Y }. Go to Step 6. +* 5.3 This is the tricky case of R = Y/2. If Q is odd, +* then { Q := Q + 1, signX := -signX }. +* +* Step 6. R := signX*R. +* +* Step 7. If Last_Subtract = true, R := R - Y. +* +* Step 8. Return signQ, last 7 bits of Q, and R as required. +* +* Step 9. At this point, R = 2^(-j)*X - Q Y = Y. Thus, +* X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1), +* R := 0. Return signQ, last 7 bits of Q, and R. +* + +SREM_MOD IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + +Mod_Flag equ L_SCR3 +SignY equ FP_SCR3+4 +SignX equ FP_SCR3+8 +SignQ equ FP_SCR3+12 +Sc_Flag equ FP_SCR4 + +Y equ FP_SCR1 +Y_Hi equ Y+4 +Y_Lo equ Y+8 + +R equ FP_SCR2 +R_Hi equ R+4 +R_Lo equ R+8 + + +Scale DC.L $00010000,$80000000,$00000000,$00000000 + + xref t_avoid_unsupp + + xdef smod +smod: + + Clr.L Mod_Flag(a6) + BRA.B Mod_Rem + + xdef srem +srem: + + Move.L #1,Mod_Flag(a6) + +Mod_Rem: +*..Save sign of X and Y + MoveM.L D2-D7,-(A7) ...save data registers + Move.W (A0),D3 + Move.W D3,SignY(a6) + AndI.L #$00007FFF,D3 ...Y := |Y| + +* + Move.L 4(A0),D4 + Move.L 8(A0),D5 ...(D3,D4,D5) is |Y| + + Tst.L D3 + BNE.B Y_Normal + + Move.L #$00003FFE,D3 ...$3FFD + 1 + Tst.L D4 + BNE.B HiY_not0 + +HiY_0: + Move.L D5,D4 + CLR.L D5 + SubI.L #32,D3 + CLR.L D6 + BFFFO D4{0:32},D6 + LSL.L D6,D4 + Sub.L D6,D3 ...(D3,D4,D5) is normalized +* ...with bias $7FFD + BRA.B Chk_X + +HiY_not0: + CLR.L D6 + BFFFO D4{0:32},D6 + Sub.L D6,D3 + LSL.L D6,D4 + Move.L D5,D7 ...a copy of D5 + LSL.L D6,D5 + Neg.L D6 + AddI.L #32,D6 + LSR.L D6,D7 + Or.L D7,D4 ...(D3,D4,D5) normalized +* ...with bias $7FFD + BRA.B Chk_X + +Y_Normal: + AddI.L #$00003FFE,D3 ...(D3,D4,D5) normalized +* ...with bias $7FFD + +Chk_X: + Move.W -12(A0),D0 + Move.W D0,SignX(a6) + Move.W SignY(a6),D1 + EOr.L D0,D1 + AndI.L #$00008000,D1 + Move.W D1,SignQ(a6) ...sign(Q) obtained + AndI.L #$00007FFF,D0 + Move.L -8(A0),D1 + Move.L -4(A0),D2 ...(D0,D1,D2) is |X| + Tst.L D0 + BNE.B X_Normal + Move.L #$00003FFE,D0 + Tst.L D1 + BNE.B HiX_not0 + +HiX_0: + Move.L D2,D1 + CLR.L D2 + SubI.L #32,D0 + CLR.L D6 + BFFFO D1{0:32},D6 + LSL.L D6,D1 + Sub.L D6,D0 ...(D0,D1,D2) is normalized +* ...with bias $7FFD + BRA.B Init + +HiX_not0: + CLR.L D6 + BFFFO D1{0:32},D6 + Sub.L D6,D0 + LSL.L D6,D1 + Move.L D2,D7 ...a copy of D2 + LSL.L D6,D2 + Neg.L D6 + AddI.L #32,D6 + LSR.L D6,D7 + Or.L D7,D1 ...(D0,D1,D2) normalized +* ...with bias $7FFD + BRA.B Init + +X_Normal: + AddI.L #$00003FFE,D0 ...(D0,D1,D2) normalized +* ...with bias $7FFD + +Init: +* + Move.L D3,L_SCR1(a6) ...save biased expo(Y) + move.l d0,L_SCR2(a6) ;save d0 + Sub.L D3,D0 ...L := expo(X)-expo(Y) +* Move.L D0,L ...D0 is j + CLR.L D6 ...D6 := carry <- 0 + CLR.L D3 ...D3 is Q + MoveA.L #0,A1 ...A1 is k; j+k=L, Q=0 + +*..(Carry,D1,D2) is R + Tst.L D0 + BGE.B Mod_Loop + +*..expo(X) < expo(Y). Thus X = mod(X,Y) +* + move.l L_SCR2(a6),d0 ;restore d0 + BRA.W Get_Mod + +*..At this point R = 2^(-L)X; Q = 0; k = 0; and k+j = L + + +Mod_Loop: + Tst.L D6 ...test carry bit + BGT.B R_GT_Y + +*..At this point carry = 0, R = (D1,D2), Y = (D4,D5) + Cmp.L D4,D1 ...compare hi(R) and hi(Y) + BNE.B R_NE_Y + Cmp.L D5,D2 ...compare lo(R) and lo(Y) + BNE.B R_NE_Y + +*..At this point, R = Y + BRA.W Rem_is_0 + +R_NE_Y: +*..use the borrow of the previous compare + BCS.B R_LT_Y ...borrow is set iff R < Y + +R_GT_Y: +*..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0 +*..and Y < (D1,D2) < 2Y. Either way, perform R - Y + Sub.L D5,D2 ...lo(R) - lo(Y) + SubX.L D4,D1 ...hi(R) - hi(Y) + CLR.L D6 ...clear carry + AddQ.L #1,D3 ...Q := Q + 1 + +R_LT_Y: +*..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0. + Tst.L D0 ...see if j = 0. + BEQ.B PostLoop + + Add.L D3,D3 ...Q := 2Q + Add.L D2,D2 ...lo(R) = 2lo(R) + AddX.L D1,D1 ...hi(R) = 2hi(R) + carry + SCS D6 ...set Carry if 2(R) overflows + AddQ.L #1,A1 ...k := k+1 + SubQ.L #1,D0 ...j := j - 1 +*..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y. + + BRA.B Mod_Loop + +PostLoop: +*..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y. + +*..normalize R. + Move.L L_SCR1(a6),D0 ...new biased expo of R + Tst.L D1 + BNE.B HiR_not0 + +HiR_0: + Move.L D2,D1 + CLR.L D2 + SubI.L #32,D0 + CLR.L D6 + BFFFO D1{0:32},D6 + LSL.L D6,D1 + Sub.L D6,D0 ...(D0,D1,D2) is normalized +* ...with bias $7FFD + BRA.B Get_Mod + +HiR_not0: + CLR.L D6 + BFFFO D1{0:32},D6 + BMI.B Get_Mod ...already normalized + Sub.L D6,D0 + LSL.L D6,D1 + Move.L D2,D7 ...a copy of D2 + LSL.L D6,D2 + Neg.L D6 + AddI.L #32,D6 + LSR.L D6,D7 + Or.L D7,D1 ...(D0,D1,D2) normalized + +* +Get_Mod: + CmpI.L #$000041FE,D0 + BGE.B No_Scale +Do_Scale: + Move.W D0,R(a6) + clr.w R+2(a6) + Move.L D1,R_Hi(a6) + Move.L D2,R_Lo(a6) + Move.L L_SCR1(a6),D6 + Move.W D6,Y(a6) + clr.w Y+2(a6) + Move.L D4,Y_Hi(a6) + Move.L D5,Y_Lo(a6) + FMove.X R(a6),fp0 ...no exception + Move.L #1,Sc_Flag(a6) + BRA.B ModOrRem +No_Scale: + Move.L D1,R_Hi(a6) + Move.L D2,R_Lo(a6) + SubI.L #$3FFE,D0 + Move.W D0,R(a6) + clr.w R+2(a6) + Move.L L_SCR1(a6),D6 + SubI.L #$3FFE,D6 + Move.L D6,L_SCR1(a6) + FMove.X R(a6),fp0 + Move.W D6,Y(a6) + Move.L D4,Y_Hi(a6) + Move.L D5,Y_Lo(a6) + Clr.L Sc_Flag(a6) + +* + + +ModOrRem: + Move.L Mod_Flag(a6),D6 + BEQ.B Fix_Sign + + Move.L L_SCR1(a6),D6 ...new biased expo(Y) + SubQ.L #1,D6 ...biased expo(Y/2) + Cmp.L D6,D0 + BLT.B Fix_Sign + BGT.B Last_Sub + + Cmp.L D4,D1 + BNE.B Not_EQ + Cmp.L D5,D2 + BNE.B Not_EQ + BRA.W Tie_Case + +Not_EQ: + BCS.B Fix_Sign + +Last_Sub: +* + FSub.X Y(a6),fp0 ...no exceptions + AddQ.L #1,D3 ...Q := Q + 1 + +* + +Fix_Sign: +*..Get sign of X + Move.W SignX(a6),D6 + BGE.B Get_Q + FNeg.X fp0 + +*..Get Q +* +Get_Q: + clr.l d6 + Move.W SignQ(a6),D6 ...D6 is sign(Q) + Move.L #8,D7 + LSR.L D7,D6 + AndI.L #$0000007F,D3 ...7 bits of Q + Or.L D6,D3 ...sign and bits of Q + Swap D3 + FMove.L fpsr,D6 + AndI.L #$FF00FFFF,D6 + Or.L D3,D6 + FMove.L D6,fpsr ...put Q in fpsr + +* +Restore: + MoveM.L (A7)+,D2-D7 + FMove.L USER_FPCR(a6),fpcr + Move.L Sc_Flag(a6),D0 + BEQ.B Finish + FMul.X Scale(pc),fp0 ...may cause underflow + bra t_avoid_unsupp ;check for denorm as a +* ;result of the scaling + +Finish: + fmove.x fp0,fp0 ;capture exceptions & round + rts + +Rem_is_0: +*..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1) + AddQ.L #1,D3 + CmpI.L #8,D0 ...D0 is j + BGE.B Q_Big + + LSL.L D0,D3 + BRA.B Set_R_0 + +Q_Big: + CLR.L D3 + +Set_R_0: + FMove.S #:00000000,fp0 + Clr.L Sc_Flag(a6) + BRA.W Fix_Sign + +Tie_Case: +*..Check parity of Q + Move.L D3,D6 + AndI.L #$00000001,D6 + Tst.L D6 + BEq.W Fix_Sign ...Q is even + +*..Q is odd, Q := Q + 1, signX := -signX + AddQ.L #1,D3 + Move.W SignX(a6),D6 + EOrI.L #$00008000,D6 + Move.W D6,SignX(a6) + BRA.W Fix_Sign + + End diff --git a/sys/arch/m68k/fpsp/ssin.sa b/sys/arch/m68k/fpsp/ssin.sa new file mode 100644 index 00000000000..672281a19ea --- /dev/null +++ b/sys/arch/m68k/fpsp/ssin.sa @@ -0,0 +1,771 @@ +* $NetBSD: ssin.sa,v 1.3 1994/10/26 07:50:01 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* ssin.sa 3.3 7/29/91 +* +* The entry point sSIN computes the sine of an input argument +* sCOS computes the cosine, and sSINCOS computes both. The +* corresponding entry points with a "d" computes the same +* corresponding function values for denormalized inputs. +* +* Input: Double-extended number X in location pointed to +* by address register a0. +* +* Output: The funtion value sin(X) or cos(X) returned in Fp0 if SIN or +* COS is requested. Otherwise, for SINCOS, sin(X) is returned +* in Fp0, and cos(X) is returned in Fp1. +* +* Modifies: Fp0 for SIN or COS; both Fp0 and Fp1 for SINCOS. +* +* Accuracy and Monotonicity: The returned result is within 1 ulp in +* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the +* result is subsequently rounded to double precision. The +* result is provably monotonic in double precision. +* +* Speed: The programs sSIN and sCOS take approximately 150 cycles for +* input argument X such that |X| < 15Pi, which is the the usual +* situation. The speed for sSINCOS is approximately 190 cycles. +* +* Algorithm: +* +* SIN and COS: +* 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1. +* +* 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7. +* +* 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let +* k = N mod 4, so in particular, k = 0,1,2,or 3. Overwirte +* k by k := k + AdjN. +* +* 4. If k is even, go to 6. +* +* 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. Return sgn*cos(r) +* where cos(r) is approximated by an even polynomial in r, +* 1 + r*r*(B1+s*(B2+ ... + s*B8)), s = r*r. +* Exit. +* +* 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r) +* where sin(r) is approximated by an odd polynomial in r +* r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r. +* Exit. +* +* 7. If |X| > 1, go to 9. +* +* 8. (|X|<2**(-40)) If SIN is invoked, return X; otherwise return 1. +* +* 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 3. +* +* SINCOS: +* 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. +* +* 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let +* k = N mod 4, so in particular, k = 0,1,2,or 3. +* +* 3. If k is even, go to 5. +* +* 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), i.e. +* j1 exclusive or with the l.s.b. of k. +* sgn1 := (-1)**j1, sgn2 := (-1)**j2. +* SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where +* sin(r) and cos(r) are computed as odd and even polynomials +* in r, respectively. Exit +* +* 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1. +* SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where +* sin(r) and cos(r) are computed as odd and even polynomials +* in r, respectively. Exit +* +* 6. If |X| > 1, go to 8. +* +* 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit. +* +* 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2. +* + +SSIN IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + +BOUNDS1 DC.L $3FD78000,$4004BC7E +TWOBYPI DC.L $3FE45F30,$6DC9C883 + +SINA7 DC.L $BD6AAA77,$CCC994F5 +SINA6 DC.L $3DE61209,$7AAE8DA1 + +SINA5 DC.L $BE5AE645,$2A118AE4 +SINA4 DC.L $3EC71DE3,$A5341531 + +SINA3 DC.L $BF2A01A0,$1A018B59,$00000000,$00000000 + +SINA2 DC.L $3FF80000,$88888888,$888859AF,$00000000 + +SINA1 DC.L $BFFC0000,$AAAAAAAA,$AAAAAA99,$00000000 + +COSB8 DC.L $3D2AC4D0,$D6011EE3 +COSB7 DC.L $BDA9396F,$9F45AC19 + +COSB6 DC.L $3E21EED9,$0612C972 +COSB5 DC.L $BE927E4F,$B79D9FCF + +COSB4 DC.L $3EFA01A0,$1A01D423,$00000000,$00000000 + +COSB3 DC.L $BFF50000,$B60B60B6,$0B61D438,$00000000 + +COSB2 DC.L $3FFA0000,$AAAAAAAA,$AAAAAB5E +COSB1 DC.L $BF000000 + +INVTWOPI DC.L $3FFC0000,$A2F9836E,$4E44152A + +TWOPI1 DC.L $40010000,$C90FDAA2,$00000000,$00000000 +TWOPI2 DC.L $3FDF0000,$85A308D4,$00000000,$00000000 + + xref PITBL + +INARG equ FP_SCR4 + +X equ FP_SCR5 +XDCARE equ X+2 +XFRAC equ X+4 + +RPRIME equ FP_SCR1 +SPRIME equ FP_SCR2 + +POSNEG1 equ L_SCR1 +TWOTO63 equ L_SCR1 + +ENDFLAG equ L_SCR2 +N equ L_SCR2 + +ADJN equ L_SCR3 + + xref t_frcinx + xref t_extdnrm + xref sto_cos + + xdef ssind +ssind: +*--SIN(X) = X FOR DENORMALIZED X + bra t_extdnrm + + xdef scosd +scosd: +*--COS(X) = 1 FOR DENORMALIZED X + + FMOVE.S #:3F800000,FP0 +* +* 9D25B Fix: Sometimes the previous fmove.s sets fpsr bits +* + fmove.l #0,fpsr +* + bra t_frcinx + + xdef ssin +ssin: +*--SET ADJN TO 0 + CLR.L ADJN(a6) + BRA.B SINBGN + + xdef scos +scos: +*--SET ADJN TO 1 + MOVE.L #1,ADJN(a6) + +SINBGN: +*--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE + + FMOVE.X (a0),FP0 ...LOAD INPUT + + MOVE.L (A0),D0 + MOVE.W 4(A0),D0 + FMOVE.X FP0,X(a6) + ANDI.L #$7FFFFFFF,D0 ...COMPACTIFY X + + CMPI.L #$3FD78000,D0 ...|X| >= 2**(-40)? + BGE.B SOK1 + BRA.W SINSM + +SOK1: + CMPI.L #$4004BC7E,D0 ...|X| < 15 PI? + BLT.B SINMAIN + BRA.W REDUCEX + +SINMAIN: +*--THIS IS THE USUAL CASE, |X| <= 15 PI. +*--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. + FMOVE.X FP0,FP1 + FMUL.D TWOBYPI,FP1 ...X*2/PI + +*--HIDE THE NEXT THREE INSTRUCTIONS + LEA PITBL+$200,A1 ...TABLE OF N*PI/2, N = -32,...,32 + + +*--FP1 IS NOW READY + FMOVE.L FP1,N(a6) ...CONVERT TO INTEGER + + MOVE.L N(a6),D0 + ASL.L #4,D0 + ADDA.L D0,A1 ...A1 IS THE ADDRESS OF N*PIBY2 +* ...WHICH IS IN TWO PIECES Y1 & Y2 + + FSUB.X (A1)+,FP0 ...X-Y1 +*--HIDE THE NEXT ONE + FSUB.S (A1),FP0 ...FP0 IS R = (X-Y1)-Y2 + +SINCONT: +*--continuation from REDUCEX + +*--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED + MOVE.L N(a6),D0 + ADD.L ADJN(a6),D0 ...SEE IF D0 IS ODD OR EVEN + ROR.L #1,D0 ...D0 WAS ODD IFF D0 IS NEGATIVE + TST.L D0 + BLT.W COSPOLY + +SINPOLY: +*--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J. +*--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY +*--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE +*--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS +*--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))]) +*--WHERE T=S*S. +*--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION +*--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT. + FMOVE.X FP0,X(a6) ...X IS R + FMUL.X FP0,FP0 ...FP0 IS S +*---HIDE THE NEXT TWO WHILE WAITING FOR FP0 + FMOVE.D SINA7,FP3 + FMOVE.D SINA6,FP2 +*--FP0 IS NOW READY + FMOVE.X FP0,FP1 + FMUL.X FP1,FP1 ...FP1 IS T +*--HIDE THE NEXT TWO WHILE WAITING FOR FP1 + + ROR.L #1,D0 + ANDI.L #$80000000,D0 +* ...LEAST SIG. BIT OF D0 IN SIGN POSITION + EOR.L D0,X(a6) ...X IS NOW R'= SGN*R + + FMUL.X FP1,FP3 ...TA7 + FMUL.X FP1,FP2 ...TA6 + + FADD.D SINA5,FP3 ...A5+TA7 + FADD.D SINA4,FP2 ...A4+TA6 + + FMUL.X FP1,FP3 ...T(A5+TA7) + FMUL.X FP1,FP2 ...T(A4+TA6) + + FADD.D SINA3,FP3 ...A3+T(A5+TA7) + FADD.X SINA2,FP2 ...A2+T(A4+TA6) + + FMUL.X FP3,FP1 ...T(A3+T(A5+TA7)) + + FMUL.X FP0,FP2 ...S(A2+T(A4+TA6)) + FADD.X SINA1,FP1 ...A1+T(A3+T(A5+TA7)) + FMUL.X X(a6),FP0 ...R'*S + + FADD.X FP2,FP1 ...[A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] +*--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING +*--FP2 RELEASED, RESTORE NOW AND TAKE FULL ADVANTAGE OF HIDING + + + FMUL.X FP1,FP0 ...SIN(R')-R' +*--FP1 RELEASED. + + FMOVE.L d1,FPCR ;restore users exceptions + FADD.X X(a6),FP0 ;last inst - possible exception set + bra t_frcinx + + +COSPOLY: +*--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J. +*--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY +*--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE +*--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS +*--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))]) +*--WHERE T=S*S. +*--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION +*--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2 +*--AND IS THEREFORE STORED AS SINGLE PRECISION. + + FMUL.X FP0,FP0 ...FP0 IS S +*---HIDE THE NEXT TWO WHILE WAITING FOR FP0 + FMOVE.D COSB8,FP2 + FMOVE.D COSB7,FP3 +*--FP0 IS NOW READY + FMOVE.X FP0,FP1 + FMUL.X FP1,FP1 ...FP1 IS T +*--HIDE THE NEXT TWO WHILE WAITING FOR FP1 + FMOVE.X FP0,X(a6) ...X IS S + ROR.L #1,D0 + ANDI.L #$80000000,D0 +* ...LEAST SIG. BIT OF D0 IN SIGN POSITION + + FMUL.X FP1,FP2 ...TB8 +*--HIDE THE NEXT TWO WHILE WAITING FOR THE XU + EOR.L D0,X(a6) ...X IS NOW S'= SGN*S + ANDI.L #$80000000,D0 + + FMUL.X FP1,FP3 ...TB7 +*--HIDE THE NEXT TWO WHILE WAITING FOR THE XU + ORI.L #$3F800000,D0 ...D0 IS SGN IN SINGLE + MOVE.L D0,POSNEG1(a6) + + FADD.D COSB6,FP2 ...B6+TB8 + FADD.D COSB5,FP3 ...B5+TB7 + + FMUL.X FP1,FP2 ...T(B6+TB8) + FMUL.X FP1,FP3 ...T(B5+TB7) + + FADD.D COSB4,FP2 ...B4+T(B6+TB8) + FADD.X COSB3,FP3 ...B3+T(B5+TB7) + + FMUL.X FP1,FP2 ...T(B4+T(B6+TB8)) + FMUL.X FP3,FP1 ...T(B3+T(B5+TB7)) + + FADD.X COSB2,FP2 ...B2+T(B4+T(B6+TB8)) + FADD.S COSB1,FP1 ...B1+T(B3+T(B5+TB7)) + + FMUL.X FP2,FP0 ...S(B2+T(B4+T(B6+TB8))) +*--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING +*--FP2 RELEASED. + + + FADD.X FP1,FP0 +*--FP1 RELEASED + + FMUL.X X(a6),FP0 + + FMOVE.L d1,FPCR ;restore users exceptions + FADD.S POSNEG1(a6),FP0 ;last inst - possible exception set + bra t_frcinx + + +SINBORS: +*--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION. +*--IF |X| < 2**(-40), RETURN X OR 1. + CMPI.L #$3FFF8000,D0 + BGT.B REDUCEX + + +SINSM: + MOVE.L ADJN(a6),D0 + TST.L D0 + BGT.B COSTINY + +SINTINY: + CLR.W XDCARE(a6) ...JUST IN CASE + FMOVE.L d1,FPCR ;restore users exceptions + FMOVE.X X(a6),FP0 ;last inst - possible exception set + bra t_frcinx + + +COSTINY: + FMOVE.S #:3F800000,FP0 + + FMOVE.L d1,FPCR ;restore users exceptions + FSUB.S #:00800000,FP0 ;last inst - possible exception set + bra t_frcinx + + +REDUCEX: +*--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW. +*--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING +*--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE. + + FMOVEM.X FP2-FP5,-(A7) ...save FP2 through FP5 + MOVE.L D2,-(A7) + FMOVE.S #:00000000,FP1 +*--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that +*--there is a danger of unwanted overflow in first LOOP iteration. In this +*--case, reduce argument by one remainder step to make subsequent reduction +*--safe. + cmpi.l #$7ffeffff,d0 ;is argument dangerously large? + bne.b LOOP + move.l #$7ffe0000,FP_SCR2(a6) ;yes +* ;create 2**16383*PI/2 + move.l #$c90fdaa2,FP_SCR2+4(a6) + clr.l FP_SCR2+8(a6) + ftst.x fp0 ;test sign of argument + move.l #$7fdc0000,FP_SCR3(a6) ;create low half of 2**16383* +* ;PI/2 at FP_SCR3 + move.l #$85a308d3,FP_SCR3+4(a6) + clr.l FP_SCR3+8(a6) + fblt.w red_neg + or.w #$8000,FP_SCR2(a6) ;positive arg + or.w #$8000,FP_SCR3(a6) +red_neg: + fadd.x FP_SCR2(a6),fp0 ;high part of reduction is exact + fmove.x fp0,fp1 ;save high result in fp1 + fadd.x FP_SCR3(a6),fp0 ;low part of reduction + fsub.x fp0,fp1 ;determine low component of result + fadd.x FP_SCR3(a6),fp1 ;fp0/fp1 are reduced argument. + +*--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4. +*--integer quotient will be stored in N +*--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1) + +LOOP: + FMOVE.X FP0,INARG(a6) ...+-2**K * F, 1 <= F < 2 + MOVE.W INARG(a6),D0 + MOVE.L D0,A1 ...save a copy of D0 + ANDI.L #$00007FFF,D0 + SUBI.L #$00003FFF,D0 ...D0 IS K + CMPI.L #28,D0 + BLE.B LASTLOOP +CONTLOOP: + SUBI.L #27,D0 ...D0 IS L := K-27 + CLR.L ENDFLAG(a6) + BRA.B WORK +LASTLOOP: + CLR.L D0 ...D0 IS L := 0 + MOVE.L #1,ENDFLAG(a6) + +WORK: +*--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN +*--THAT INT( X * (2/PI) / 2**(L) ) < 2**29. + +*--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63), +*--2**L * (PIby2_1), 2**L * (PIby2_2) + + MOVE.L #$00003FFE,D2 ...BIASED EXPO OF 2/PI + SUB.L D0,D2 ...BIASED EXPO OF 2**(-L)*(2/PI) + + MOVE.L #$A2F9836E,FP_SCR1+4(a6) + MOVE.L #$4E44152A,FP_SCR1+8(a6) + MOVE.W D2,FP_SCR1(a6) ...FP_SCR1 is 2**(-L)*(2/PI) + + FMOVE.X FP0,FP2 + FMUL.X FP_SCR1(a6),FP2 +*--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN +*--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N +*--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT +*--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE +*--US THE DESIRED VALUE IN FLOATING POINT. + +*--HIDE SIX CYCLES OF INSTRUCTION + MOVE.L A1,D2 + SWAP D2 + ANDI.L #$80000000,D2 + ORI.L #$5F000000,D2 ...D2 IS SIGN(INARG)*2**63 IN SGL + MOVE.L D2,TWOTO63(a6) + + MOVE.L D0,D2 + ADDI.L #$00003FFF,D2 ...BIASED EXPO OF 2**L * (PI/2) + +*--FP2 IS READY + FADD.S TWOTO63(a6),FP2 ...THE FRACTIONAL PART OF FP1 IS ROUNDED + +*--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1 and 2**(L)*Piby2_2 + MOVE.W D2,FP_SCR2(a6) + CLR.W FP_SCR2+2(a6) + MOVE.L #$C90FDAA2,FP_SCR2+4(a6) + CLR.L FP_SCR2+8(a6) ...FP_SCR2 is 2**(L) * Piby2_1 + +*--FP2 IS READY + FSUB.S TWOTO63(a6),FP2 ...FP2 is N + + ADDI.L #$00003FDD,D0 + MOVE.W D0,FP_SCR3(a6) + CLR.W FP_SCR3+2(a6) + MOVE.L #$85A308D3,FP_SCR3+4(a6) + CLR.L FP_SCR3+8(a6) ...FP_SCR3 is 2**(L) * Piby2_2 + + MOVE.L ENDFLAG(a6),D0 + +*--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and +*--P2 = 2**(L) * Piby2_2 + FMOVE.X FP2,FP4 + FMul.X FP_SCR2(a6),FP4 ...W = N*P1 + FMove.X FP2,FP5 + FMul.X FP_SCR3(a6),FP5 ...w = N*P2 + FMove.X FP4,FP3 +*--we want P+p = W+w but |p| <= half ulp of P +*--Then, we need to compute A := R-P and a := r-p + FAdd.X FP5,FP3 ...FP3 is P + FSub.X FP3,FP4 ...W-P + + FSub.X FP3,FP0 ...FP0 is A := R - P + FAdd.X FP5,FP4 ...FP4 is p = (W-P)+w + + FMove.X FP0,FP3 ...FP3 A + FSub.X FP4,FP1 ...FP1 is a := r - p + +*--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but +*--|r| <= half ulp of R. + FAdd.X FP1,FP0 ...FP0 is R := A+a +*--No need to calculate r if this is the last loop + TST.L D0 + BGT.W RESTORE + +*--Need to calculate r + FSub.X FP0,FP3 ...A-R + FAdd.X FP3,FP1 ...FP1 is r := (A-R)+a + BRA.W LOOP + +RESTORE: + FMOVE.L FP2,N(a6) + MOVE.L (A7)+,D2 + FMOVEM.X (A7)+,FP2-FP5 + + + MOVE.L ADJN(a6),D0 + CMPI.L #4,D0 + + BLT.W SINCONT + BRA.B SCCONT + + xdef ssincosd +ssincosd: +*--SIN AND COS OF X FOR DENORMALIZED X + + FMOVE.S #:3F800000,FP1 + bsr sto_cos ;store cosine result + bra t_extdnrm + + xdef ssincos +ssincos: +*--SET ADJN TO 4 + MOVE.L #4,ADJN(a6) + + FMOVE.X (a0),FP0 ...LOAD INPUT + + MOVE.L (A0),D0 + MOVE.W 4(A0),D0 + FMOVE.X FP0,X(a6) + ANDI.L #$7FFFFFFF,D0 ...COMPACTIFY X + + CMPI.L #$3FD78000,D0 ...|X| >= 2**(-40)? + BGE.B SCOK1 + BRA.W SCSM + +SCOK1: + CMPI.L #$4004BC7E,D0 ...|X| < 15 PI? + BLT.B SCMAIN + BRA.W REDUCEX + + +SCMAIN: +*--THIS IS THE USUAL CASE, |X| <= 15 PI. +*--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. + FMOVE.X FP0,FP1 + FMUL.D TWOBYPI,FP1 ...X*2/PI + +*--HIDE THE NEXT THREE INSTRUCTIONS + LEA PITBL+$200,A1 ...TABLE OF N*PI/2, N = -32,...,32 + + +*--FP1 IS NOW READY + FMOVE.L FP1,N(a6) ...CONVERT TO INTEGER + + MOVE.L N(a6),D0 + ASL.L #4,D0 + ADDA.L D0,A1 ...ADDRESS OF N*PIBY2, IN Y1, Y2 + + FSUB.X (A1)+,FP0 ...X-Y1 + FSUB.S (A1),FP0 ...FP0 IS R = (X-Y1)-Y2 + +SCCONT: +*--continuation point from REDUCEX + +*--HIDE THE NEXT TWO + MOVE.L N(a6),D0 + ROR.L #1,D0 + + TST.L D0 ...D0 < 0 IFF N IS ODD + BGE.W NEVEN + +NODD: +*--REGISTERS SAVED SO FAR: D0, A0, FP2. + + FMOVE.X FP0,RPRIME(a6) + FMUL.X FP0,FP0 ...FP0 IS S = R*R + FMOVE.D SINA7,FP1 ...A7 + FMOVE.D COSB8,FP2 ...B8 + FMUL.X FP0,FP1 ...SA7 + MOVE.L d2,-(A7) + MOVE.L D0,d2 + FMUL.X FP0,FP2 ...SB8 + ROR.L #1,d2 + ANDI.L #$80000000,d2 + + FADD.D SINA6,FP1 ...A6+SA7 + EOR.L D0,d2 + ANDI.L #$80000000,d2 + FADD.D COSB7,FP2 ...B7+SB8 + + FMUL.X FP0,FP1 ...S(A6+SA7) + EOR.L d2,RPRIME(a6) + MOVE.L (A7)+,d2 + FMUL.X FP0,FP2 ...S(B7+SB8) + ROR.L #1,D0 + ANDI.L #$80000000,D0 + + FADD.D SINA5,FP1 ...A5+S(A6+SA7) + MOVE.L #$3F800000,POSNEG1(a6) + EOR.L D0,POSNEG1(a6) + FADD.D COSB6,FP2 ...B6+S(B7+SB8) + + FMUL.X FP0,FP1 ...S(A5+S(A6+SA7)) + FMUL.X FP0,FP2 ...S(B6+S(B7+SB8)) + FMOVE.X FP0,SPRIME(a6) + + FADD.D SINA4,FP1 ...A4+S(A5+S(A6+SA7)) + EOR.L D0,SPRIME(a6) + FADD.D COSB5,FP2 ...B5+S(B6+S(B7+SB8)) + + FMUL.X FP0,FP1 ...S(A4+...) + FMUL.X FP0,FP2 ...S(B5+...) + + FADD.D SINA3,FP1 ...A3+S(A4+...) + FADD.D COSB4,FP2 ...B4+S(B5+...) + + FMUL.X FP0,FP1 ...S(A3+...) + FMUL.X FP0,FP2 ...S(B4+...) + + FADD.X SINA2,FP1 ...A2+S(A3+...) + FADD.X COSB3,FP2 ...B3+S(B4+...) + + FMUL.X FP0,FP1 ...S(A2+...) + FMUL.X FP0,FP2 ...S(B3+...) + + FADD.X SINA1,FP1 ...A1+S(A2+...) + FADD.X COSB2,FP2 ...B2+S(B3+...) + + FMUL.X FP0,FP1 ...S(A1+...) + FMUL.X FP2,FP0 ...S(B2+...) + + + + FMUL.X RPRIME(a6),FP1 ...R'S(A1+...) + FADD.S COSB1,FP0 ...B1+S(B2...) + FMUL.X SPRIME(a6),FP0 ...S'(B1+S(B2+...)) + + move.l d1,-(sp) ;restore users mode & precision + andi.l #$ff,d1 ;mask off all exceptions + fmove.l d1,FPCR + FADD.X RPRIME(a6),FP1 ...COS(X) + bsr sto_cos ;store cosine result + FMOVE.L (sp)+,FPCR ;restore users exceptions + FADD.S POSNEG1(a6),FP0 ...SIN(X) + + bra t_frcinx + + +NEVEN: +*--REGISTERS SAVED SO FAR: FP2. + + FMOVE.X FP0,RPRIME(a6) + FMUL.X FP0,FP0 ...FP0 IS S = R*R + FMOVE.D COSB8,FP1 ...B8 + FMOVE.D SINA7,FP2 ...A7 + FMUL.X FP0,FP1 ...SB8 + FMOVE.X FP0,SPRIME(a6) + FMUL.X FP0,FP2 ...SA7 + ROR.L #1,D0 + ANDI.L #$80000000,D0 + FADD.D COSB7,FP1 ...B7+SB8 + FADD.D SINA6,FP2 ...A6+SA7 + EOR.L D0,RPRIME(a6) + EOR.L D0,SPRIME(a6) + FMUL.X FP0,FP1 ...S(B7+SB8) + ORI.L #$3F800000,D0 + MOVE.L D0,POSNEG1(a6) + FMUL.X FP0,FP2 ...S(A6+SA7) + + FADD.D COSB6,FP1 ...B6+S(B7+SB8) + FADD.D SINA5,FP2 ...A5+S(A6+SA7) + + FMUL.X FP0,FP1 ...S(B6+S(B7+SB8)) + FMUL.X FP0,FP2 ...S(A5+S(A6+SA7)) + + FADD.D COSB5,FP1 ...B5+S(B6+S(B7+SB8)) + FADD.D SINA4,FP2 ...A4+S(A5+S(A6+SA7)) + + FMUL.X FP0,FP1 ...S(B5+...) + FMUL.X FP0,FP2 ...S(A4+...) + + FADD.D COSB4,FP1 ...B4+S(B5+...) + FADD.D SINA3,FP2 ...A3+S(A4+...) + + FMUL.X FP0,FP1 ...S(B4+...) + FMUL.X FP0,FP2 ...S(A3+...) + + FADD.X COSB3,FP1 ...B3+S(B4+...) + FADD.X SINA2,FP2 ...A2+S(A3+...) + + FMUL.X FP0,FP1 ...S(B3+...) + FMUL.X FP0,FP2 ...S(A2+...) + + FADD.X COSB2,FP1 ...B2+S(B3+...) + FADD.X SINA1,FP2 ...A1+S(A2+...) + + FMUL.X FP0,FP1 ...S(B2+...) + fmul.x fp2,fp0 ...s(a1+...) + + + + FADD.S COSB1,FP1 ...B1+S(B2...) + FMUL.X RPRIME(a6),FP0 ...R'S(A1+...) + FMUL.X SPRIME(a6),FP1 ...S'(B1+S(B2+...)) + + move.l d1,-(sp) ;save users mode & precision + andi.l #$ff,d1 ;mask off all exceptions + fmove.l d1,FPCR + FADD.S POSNEG1(a6),FP1 ...COS(X) + bsr sto_cos ;store cosine result + FMOVE.L (sp)+,FPCR ;restore users exceptions + FADD.X RPRIME(a6),FP0 ...SIN(X) + + bra t_frcinx + +SCBORS: + CMPI.L #$3FFF8000,D0 + BGT.W REDUCEX + + +SCSM: + CLR.W XDCARE(a6) + FMOVE.S #:3F800000,FP1 + + move.l d1,-(sp) ;save users mode & precision + andi.l #$ff,d1 ;mask off all exceptions + fmove.l d1,FPCR + FSUB.S #:00800000,FP1 + bsr sto_cos ;store cosine result + FMOVE.L (sp)+,FPCR ;restore users exceptions + FMOVE.X X(a6),FP0 + bra t_frcinx + + end diff --git a/sys/arch/m68k/fpsp/ssinh.sa b/sys/arch/m68k/fpsp/ssinh.sa new file mode 100644 index 00000000000..8b555f076c1 --- /dev/null +++ b/sys/arch/m68k/fpsp/ssinh.sa @@ -0,0 +1,160 @@ +* $NetBSD: ssinh.sa,v 1.3 1994/10/26 07:50:05 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* ssinh.sa 3.1 12/10/90 +* +* The entry point sSinh computes the hyperbolic sine of +* an input argument; sSinhd does the same except for denormalized +* input. +* +* Input: Double-extended number X in location pointed to +* by address register a0. +* +* Output: The value sinh(X) returned in floating-point register Fp0. +* +* Accuracy and Monotonicity: The returned result is within 3 ulps in +* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the +* result is subsequently rounded to double precision. The +* result is provably monotonic in double precision. +* +* Speed: The program sSINH takes approximately 280 cycles. +* +* Algorithm: +* +* SINH +* 1. If |X| > 16380 log2, go to 3. +* +* 2. (|X| <= 16380 log2) Sinh(X) is obtained by the formulae +* y = |X|, sgn = sign(X), and z = expm1(Y), +* sinh(X) = sgn*(1/2)*( z + z/(1+z) ). +* Exit. +* +* 3. If |X| > 16480 log2, go to 5. +* +* 4. (16380 log2 < |X| <= 16480 log2) +* sinh(X) = sign(X) * exp(|X|)/2. +* However, invoking exp(|X|) may cause premature overflow. +* Thus, we calculate sinh(X) as follows: +* Y := |X| +* sgn := sign(X) +* sgnFact := sgn * 2**(16380) +* Y' := Y - 16381 log2 +* sinh(X) := sgnFact * exp(Y'). +* Exit. +* +* 5. (|X| > 16480 log2) sinh(X) must overflow. Return +* sign(X)*Huge*Huge to generate overflow and an infinity with +* the appropriate sign. Huge is the largest finite number in +* extended format. Exit. +* + +SSINH IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + +T1 DC.L $40C62D38,$D3D64634 ... 16381 LOG2 LEAD +T2 DC.L $3D6F90AE,$B1E75CC7 ... 16381 LOG2 TRAIL + + xref t_frcinx + xref t_ovfl + xref t_extdnrm + xref setox + xref setoxm1 + + xdef ssinhd +ssinhd: +*--SINH(X) = X FOR DENORMALIZED X + + bra t_extdnrm + + xdef ssinh +ssinh: + FMOVE.x (a0),FP0 ...LOAD INPUT + + move.l (a0),d0 + move.w 4(a0),d0 + move.l d0,a1 save a copy of original (compacted) operand + AND.L #$7FFFFFFF,D0 + CMP.L #$400CB167,D0 + BGT.B SINHBIG + +*--THIS IS THE USUAL CASE, |X| < 16380 LOG2 +*--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) ) + + FABS.X FP0 ...Y = |X| + + movem.l a1/d1,-(sp) + fmovem.x fp0,(a0) + clr.l d1 + bsr setoxm1 ...FP0 IS Z = EXPM1(Y) + fmove.l #0,fpcr + movem.l (sp)+,a1/d1 + + FMOVE.X FP0,FP1 + FADD.S #:3F800000,FP1 ...1+Z + FMOVE.X FP0,-(sp) + FDIV.X FP1,FP0 ...Z/(1+Z) + MOVE.L a1,d0 + AND.L #$80000000,D0 + OR.L #$3F000000,D0 + FADD.X (sp)+,FP0 + MOVE.L D0,-(sp) + + fmove.l d1,fpcr + fmul.s (sp)+,fp0 ;last fp inst - possible exceptions set + + bra t_frcinx + +SINHBIG: + cmp.l #$400CB2B3,D0 + bgt t_ovfl + FABS.X FP0 + FSUB.D T1(pc),FP0 ...(|X|-16381LOG2_LEAD) + clr.l -(sp) + move.l #$80000000,-(sp) + move.l a1,d0 + AND.L #$80000000,D0 + OR.L #$7FFB0000,D0 + MOVE.L D0,-(sp) ...EXTENDED FMT + FSUB.D T2(pc),FP0 ...|X| - 16381 LOG2, ACCURATE + + move.l d1,-(sp) + clr.l d1 + fmovem.x fp0,(a0) + bsr setox + fmove.l (sp)+,fpcr + + fmul.x (sp)+,fp0 ;possible exception + bra t_frcinx + + end diff --git a/sys/arch/m68k/fpsp/stan.sa b/sys/arch/m68k/fpsp/stan.sa new file mode 100644 index 00000000000..9bc9904a000 --- /dev/null +++ b/sys/arch/m68k/fpsp/stan.sa @@ -0,0 +1,480 @@ +* $NetBSD: stan.sa,v 1.3 1994/10/26 07:50:10 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* stan.sa 3.3 7/29/91 +* +* The entry point stan computes the tangent of +* an input argument; +* stand does the same except for denormalized input. +* +* Input: Double-extended number X in location pointed to +* by address register a0. +* +* Output: The value tan(X) returned in floating-point register Fp0. +* +* Accuracy and Monotonicity: The returned result is within 3 ulp in +* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the +* result is subsequently rounded to double precision. The +* result is provably monotonic in double precision. +* +* Speed: The program sTAN takes approximately 170 cycles for +* input argument X such that |X| < 15Pi, which is the the usual +* situation. +* +* Algorithm: +* +* 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. +* +* 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let +* k = N mod 2, so in particular, k = 0 or 1. +* +* 3. If k is odd, go to 5. +* +* 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a +* rational function U/V where +* U = r + r*s*(P1 + s*(P2 + s*P3)), and +* V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r. +* Exit. +* +* 4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by a +* rational function U/V where +* U = r + r*s*(P1 + s*(P2 + s*P3)), and +* V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r, +* -Cot(r) = -V/U. Exit. +* +* 6. If |X| > 1, go to 8. +* +* 7. (|X|<2**(-40)) Tan(X) = X. Exit. +* +* 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2. +* + +STAN IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + +BOUNDS1 DC.L $3FD78000,$4004BC7E +TWOBYPI DC.L $3FE45F30,$6DC9C883 + +TANQ4 DC.L $3EA0B759,$F50F8688 +TANP3 DC.L $BEF2BAA5,$A8924F04 + +TANQ3 DC.L $BF346F59,$B39BA65F,$00000000,$00000000 + +TANP2 DC.L $3FF60000,$E073D3FC,$199C4A00,$00000000 + +TANQ2 DC.L $3FF90000,$D23CD684,$15D95FA1,$00000000 + +TANP1 DC.L $BFFC0000,$8895A6C5,$FB423BCA,$00000000 + +TANQ1 DC.L $BFFD0000,$EEF57E0D,$A84BC8CE,$00000000 + +INVTWOPI DC.L $3FFC0000,$A2F9836E,$4E44152A,$00000000 + +TWOPI1 DC.L $40010000,$C90FDAA2,$00000000,$00000000 +TWOPI2 DC.L $3FDF0000,$85A308D4,$00000000,$00000000 + +*--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING +*--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT +*--MOST 69 BITS LONG. + xdef PITBL +PITBL: + DC.L $C0040000,$C90FDAA2,$2168C235,$21800000 + DC.L $C0040000,$C2C75BCD,$105D7C23,$A0D00000 + DC.L $C0040000,$BC7EDCF7,$FF523611,$A1E80000 + DC.L $C0040000,$B6365E22,$EE46F000,$21480000 + DC.L $C0040000,$AFEDDF4D,$DD3BA9EE,$A1200000 + DC.L $C0040000,$A9A56078,$CC3063DD,$21FC0000 + DC.L $C0040000,$A35CE1A3,$BB251DCB,$21100000 + DC.L $C0040000,$9D1462CE,$AA19D7B9,$A1580000 + DC.L $C0040000,$96CBE3F9,$990E91A8,$21E00000 + DC.L $C0040000,$90836524,$88034B96,$20B00000 + DC.L $C0040000,$8A3AE64F,$76F80584,$A1880000 + DC.L $C0040000,$83F2677A,$65ECBF73,$21C40000 + DC.L $C0030000,$FB53D14A,$A9C2F2C2,$20000000 + DC.L $C0030000,$EEC2D3A0,$87AC669F,$21380000 + DC.L $C0030000,$E231D5F6,$6595DA7B,$A1300000 + DC.L $C0030000,$D5A0D84C,$437F4E58,$9FC00000 + DC.L $C0030000,$C90FDAA2,$2168C235,$21000000 + DC.L $C0030000,$BC7EDCF7,$FF523611,$A1680000 + DC.L $C0030000,$AFEDDF4D,$DD3BA9EE,$A0A00000 + DC.L $C0030000,$A35CE1A3,$BB251DCB,$20900000 + DC.L $C0030000,$96CBE3F9,$990E91A8,$21600000 + DC.L $C0030000,$8A3AE64F,$76F80584,$A1080000 + DC.L $C0020000,$FB53D14A,$A9C2F2C2,$1F800000 + DC.L $C0020000,$E231D5F6,$6595DA7B,$A0B00000 + DC.L $C0020000,$C90FDAA2,$2168C235,$20800000 + DC.L $C0020000,$AFEDDF4D,$DD3BA9EE,$A0200000 + DC.L $C0020000,$96CBE3F9,$990E91A8,$20E00000 + DC.L $C0010000,$FB53D14A,$A9C2F2C2,$1F000000 + DC.L $C0010000,$C90FDAA2,$2168C235,$20000000 + DC.L $C0010000,$96CBE3F9,$990E91A8,$20600000 + DC.L $C0000000,$C90FDAA2,$2168C235,$1F800000 + DC.L $BFFF0000,$C90FDAA2,$2168C235,$1F000000 + DC.L $00000000,$00000000,$00000000,$00000000 + DC.L $3FFF0000,$C90FDAA2,$2168C235,$9F000000 + DC.L $40000000,$C90FDAA2,$2168C235,$9F800000 + DC.L $40010000,$96CBE3F9,$990E91A8,$A0600000 + DC.L $40010000,$C90FDAA2,$2168C235,$A0000000 + DC.L $40010000,$FB53D14A,$A9C2F2C2,$9F000000 + DC.L $40020000,$96CBE3F9,$990E91A8,$A0E00000 + DC.L $40020000,$AFEDDF4D,$DD3BA9EE,$20200000 + DC.L $40020000,$C90FDAA2,$2168C235,$A0800000 + DC.L $40020000,$E231D5F6,$6595DA7B,$20B00000 + DC.L $40020000,$FB53D14A,$A9C2F2C2,$9F800000 + DC.L $40030000,$8A3AE64F,$76F80584,$21080000 + DC.L $40030000,$96CBE3F9,$990E91A8,$A1600000 + DC.L $40030000,$A35CE1A3,$BB251DCB,$A0900000 + DC.L $40030000,$AFEDDF4D,$DD3BA9EE,$20A00000 + DC.L $40030000,$BC7EDCF7,$FF523611,$21680000 + DC.L $40030000,$C90FDAA2,$2168C235,$A1000000 + DC.L $40030000,$D5A0D84C,$437F4E58,$1FC00000 + DC.L $40030000,$E231D5F6,$6595DA7B,$21300000 + DC.L $40030000,$EEC2D3A0,$87AC669F,$A1380000 + DC.L $40030000,$FB53D14A,$A9C2F2C2,$A0000000 + DC.L $40040000,$83F2677A,$65ECBF73,$A1C40000 + DC.L $40040000,$8A3AE64F,$76F80584,$21880000 + DC.L $40040000,$90836524,$88034B96,$A0B00000 + DC.L $40040000,$96CBE3F9,$990E91A8,$A1E00000 + DC.L $40040000,$9D1462CE,$AA19D7B9,$21580000 + DC.L $40040000,$A35CE1A3,$BB251DCB,$A1100000 + DC.L $40040000,$A9A56078,$CC3063DD,$A1FC0000 + DC.L $40040000,$AFEDDF4D,$DD3BA9EE,$21200000 + DC.L $40040000,$B6365E22,$EE46F000,$A1480000 + DC.L $40040000,$BC7EDCF7,$FF523611,$21E80000 + DC.L $40040000,$C2C75BCD,$105D7C23,$20D00000 + DC.L $40040000,$C90FDAA2,$2168C235,$A1800000 + +INARG equ FP_SCR4 + +TWOTO63 equ L_SCR1 +ENDFLAG equ L_SCR2 +N equ L_SCR3 + + xref t_frcinx + xref t_extdnrm + + xdef stand +stand: +*--TAN(X) = X FOR DENORMALIZED X + + bra t_extdnrm + + xdef stan +stan: + FMOVE.X (a0),FP0 ...LOAD INPUT + + MOVE.L (A0),D0 + MOVE.W 4(A0),D0 + ANDI.L #$7FFFFFFF,D0 + + CMPI.L #$3FD78000,D0 ...|X| >= 2**(-40)? + BGE.B TANOK1 + BRA.W TANSM +TANOK1: + CMPI.L #$4004BC7E,D0 ...|X| < 15 PI? + BLT.B TANMAIN + BRA.W REDUCEX + + +TANMAIN: +*--THIS IS THE USUAL CASE, |X| <= 15 PI. +*--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. + FMOVE.X FP0,FP1 + FMUL.D TWOBYPI,FP1 ...X*2/PI + +*--HIDE THE NEXT TWO INSTRUCTIONS + lea.l PITBL+$200,a1 ...TABLE OF N*PI/2, N = -32,...,32 + +*--FP1 IS NOW READY + FMOVE.L FP1,D0 ...CONVERT TO INTEGER + + ASL.L #4,D0 + ADDA.L D0,a1 ...ADDRESS N*PIBY2 IN Y1, Y2 + + FSUB.X (a1)+,FP0 ...X-Y1 +*--HIDE THE NEXT ONE + + FSUB.S (a1),FP0 ...FP0 IS R = (X-Y1)-Y2 + + ROR.L #5,D0 + ANDI.L #$80000000,D0 ...D0 WAS ODD IFF D0 < 0 + +TANCONT: + + TST.L D0 + BLT.W NODD + + FMOVE.X FP0,FP1 + FMUL.X FP1,FP1 ...S = R*R + + FMOVE.D TANQ4,FP3 + FMOVE.D TANP3,FP2 + + FMUL.X FP1,FP3 ...SQ4 + FMUL.X FP1,FP2 ...SP3 + + FADD.D TANQ3,FP3 ...Q3+SQ4 + FADD.X TANP2,FP2 ...P2+SP3 + + FMUL.X FP1,FP3 ...S(Q3+SQ4) + FMUL.X FP1,FP2 ...S(P2+SP3) + + FADD.X TANQ2,FP3 ...Q2+S(Q3+SQ4) + FADD.X TANP1,FP2 ...P1+S(P2+SP3) + + FMUL.X FP1,FP3 ...S(Q2+S(Q3+SQ4)) + FMUL.X FP1,FP2 ...S(P1+S(P2+SP3)) + + FADD.X TANQ1,FP3 ...Q1+S(Q2+S(Q3+SQ4)) + FMUL.X FP0,FP2 ...RS(P1+S(P2+SP3)) + + FMUL.X FP3,FP1 ...S(Q1+S(Q2+S(Q3+SQ4))) + + + FADD.X FP2,FP0 ...R+RS(P1+S(P2+SP3)) + + + FADD.S #:3F800000,FP1 ...1+S(Q1+...) + + FMOVE.L d1,fpcr ;restore users exceptions + FDIV.X FP1,FP0 ;last inst - possible exception set + + bra t_frcinx + +NODD: + FMOVE.X FP0,FP1 + FMUL.X FP0,FP0 ...S = R*R + + FMOVE.D TANQ4,FP3 + FMOVE.D TANP3,FP2 + + FMUL.X FP0,FP3 ...SQ4 + FMUL.X FP0,FP2 ...SP3 + + FADD.D TANQ3,FP3 ...Q3+SQ4 + FADD.X TANP2,FP2 ...P2+SP3 + + FMUL.X FP0,FP3 ...S(Q3+SQ4) + FMUL.X FP0,FP2 ...S(P2+SP3) + + FADD.X TANQ2,FP3 ...Q2+S(Q3+SQ4) + FADD.X TANP1,FP2 ...P1+S(P2+SP3) + + FMUL.X FP0,FP3 ...S(Q2+S(Q3+SQ4)) + FMUL.X FP0,FP2 ...S(P1+S(P2+SP3)) + + FADD.X TANQ1,FP3 ...Q1+S(Q2+S(Q3+SQ4)) + FMUL.X FP1,FP2 ...RS(P1+S(P2+SP3)) + + FMUL.X FP3,FP0 ...S(Q1+S(Q2+S(Q3+SQ4))) + + + FADD.X FP2,FP1 ...R+RS(P1+S(P2+SP3)) + FADD.S #:3F800000,FP0 ...1+S(Q1+...) + + + FMOVE.X FP1,-(sp) + EORI.L #$80000000,(sp) + + FMOVE.L d1,fpcr ;restore users exceptions + FDIV.X (sp)+,FP0 ;last inst - possible exception set + + bra t_frcinx + +TANBORS: +*--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION. +*--IF |X| < 2**(-40), RETURN X OR 1. + CMPI.L #$3FFF8000,D0 + BGT.B REDUCEX + +TANSM: + + FMOVE.X FP0,-(sp) + FMOVE.L d1,fpcr ;restore users exceptions + FMOVE.X (sp)+,FP0 ;last inst - posibble exception set + + bra t_frcinx + + +REDUCEX: +*--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW. +*--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING +*--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE. + + FMOVEM.X FP2-FP5,-(A7) ...save FP2 through FP5 + MOVE.L D2,-(A7) + FMOVE.S #:00000000,FP1 + +*--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that +*--there is a danger of unwanted overflow in first LOOP iteration. In this +*--case, reduce argument by one remainder step to make subsequent reduction +*--safe. + cmpi.l #$7ffeffff,d0 ;is argument dangerously large? + bne.b LOOP + move.l #$7ffe0000,FP_SCR2(a6) ;yes +* ;create 2**16383*PI/2 + move.l #$c90fdaa2,FP_SCR2+4(a6) + clr.l FP_SCR2+8(a6) + ftst.x fp0 ;test sign of argument + move.l #$7fdc0000,FP_SCR3(a6) ;create low half of 2**16383* +* ;PI/2 at FP_SCR3 + move.l #$85a308d3,FP_SCR3+4(a6) + clr.l FP_SCR3+8(a6) + fblt.w red_neg + or.w #$8000,FP_SCR2(a6) ;positive arg + or.w #$8000,FP_SCR3(a6) +red_neg: + fadd.x FP_SCR2(a6),fp0 ;high part of reduction is exact + fmove.x fp0,fp1 ;save high result in fp1 + fadd.x FP_SCR3(a6),fp0 ;low part of reduction + fsub.x fp0,fp1 ;determine low component of result + fadd.x FP_SCR3(a6),fp1 ;fp0/fp1 are reduced argument. + +*--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4. +*--integer quotient will be stored in N +*--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1) + +LOOP: + FMOVE.X FP0,INARG(a6) ...+-2**K * F, 1 <= F < 2 + MOVE.W INARG(a6),D0 + MOVE.L D0,A1 ...save a copy of D0 + ANDI.L #$00007FFF,D0 + SUBI.L #$00003FFF,D0 ...D0 IS K + CMPI.L #28,D0 + BLE.B LASTLOOP +CONTLOOP: + SUBI.L #27,D0 ...D0 IS L := K-27 + CLR.L ENDFLAG(a6) + BRA.B WORK +LASTLOOP: + CLR.L D0 ...D0 IS L := 0 + MOVE.L #1,ENDFLAG(a6) + +WORK: +*--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN +*--THAT INT( X * (2/PI) / 2**(L) ) < 2**29. + +*--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63), +*--2**L * (PIby2_1), 2**L * (PIby2_2) + + MOVE.L #$00003FFE,D2 ...BIASED EXPO OF 2/PI + SUB.L D0,D2 ...BIASED EXPO OF 2**(-L)*(2/PI) + + MOVE.L #$A2F9836E,FP_SCR1+4(a6) + MOVE.L #$4E44152A,FP_SCR1+8(a6) + MOVE.W D2,FP_SCR1(a6) ...FP_SCR1 is 2**(-L)*(2/PI) + + FMOVE.X FP0,FP2 + FMUL.X FP_SCR1(a6),FP2 +*--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN +*--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N +*--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT +*--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE +*--US THE DESIRED VALUE IN FLOATING POINT. + +*--HIDE SIX CYCLES OF INSTRUCTION + MOVE.L A1,D2 + SWAP D2 + ANDI.L #$80000000,D2 + ORI.L #$5F000000,D2 ...D2 IS SIGN(INARG)*2**63 IN SGL + MOVE.L D2,TWOTO63(a6) + + MOVE.L D0,D2 + ADDI.L #$00003FFF,D2 ...BIASED EXPO OF 2**L * (PI/2) + +*--FP2 IS READY + FADD.S TWOTO63(a6),FP2 ...THE FRACTIONAL PART OF FP1 IS ROUNDED + +*--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1 and 2**(L)*Piby2_2 + MOVE.W D2,FP_SCR2(a6) + CLR.W FP_SCR2+2(a6) + MOVE.L #$C90FDAA2,FP_SCR2+4(a6) + CLR.L FP_SCR2+8(a6) ...FP_SCR2 is 2**(L) * Piby2_1 + +*--FP2 IS READY + FSUB.S TWOTO63(a6),FP2 ...FP2 is N + + ADDI.L #$00003FDD,D0 + MOVE.W D0,FP_SCR3(a6) + CLR.W FP_SCR3+2(a6) + MOVE.L #$85A308D3,FP_SCR3+4(a6) + CLR.L FP_SCR3+8(a6) ...FP_SCR3 is 2**(L) * Piby2_2 + + MOVE.L ENDFLAG(a6),D0 + +*--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and +*--P2 = 2**(L) * Piby2_2 + FMOVE.X FP2,FP4 + FMul.X FP_SCR2(a6),FP4 ...W = N*P1 + FMove.X FP2,FP5 + FMul.X FP_SCR3(a6),FP5 ...w = N*P2 + FMove.X FP4,FP3 +*--we want P+p = W+w but |p| <= half ulp of P +*--Then, we need to compute A := R-P and a := r-p + FAdd.X FP5,FP3 ...FP3 is P + FSub.X FP3,FP4 ...W-P + + FSub.X FP3,FP0 ...FP0 is A := R - P + FAdd.X FP5,FP4 ...FP4 is p = (W-P)+w + + FMove.X FP0,FP3 ...FP3 A + FSub.X FP4,FP1 ...FP1 is a := r - p + +*--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but +*--|r| <= half ulp of R. + FAdd.X FP1,FP0 ...FP0 is R := A+a +*--No need to calculate r if this is the last loop + TST.L D0 + BGT.W RESTORE + +*--Need to calculate r + FSub.X FP0,FP3 ...A-R + FAdd.X FP3,FP1 ...FP1 is r := (A-R)+a + BRA.W LOOP + +RESTORE: + FMOVE.L FP2,N(a6) + MOVE.L (A7)+,D2 + FMOVEM.X (A7)+,FP2-FP5 + + + MOVE.L N(a6),D0 + ROR.L #1,D0 + + + BRA.W TANCONT + + end diff --git a/sys/arch/m68k/fpsp/stanh.sa b/sys/arch/m68k/fpsp/stanh.sa new file mode 100644 index 00000000000..6c1697c4226 --- /dev/null +++ b/sys/arch/m68k/fpsp/stanh.sa @@ -0,0 +1,210 @@ +* $NetBSD: stanh.sa,v 1.3 1994/10/26 07:50:12 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* stanh.sa 3.1 12/10/90 +* +* The entry point sTanh computes the hyperbolic tangent of +* an input argument; sTanhd does the same except for denormalized +* input. +* +* Input: Double-extended number X in location pointed to +* by address register a0. +* +* Output: The value tanh(X) returned in floating-point register Fp0. +* +* Accuracy and Monotonicity: The returned result is within 3 ulps in +* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the +* result is subsequently rounded to double precision. The +* result is provably monotonic in double precision. +* +* Speed: The program stanh takes approximately 270 cycles. +* +* Algorithm: +* +* TANH +* 1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3. +* +* 2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by +* sgn := sign(X), y := 2|X|, z := expm1(Y), and +* tanh(X) = sgn*( z/(2+z) ). +* Exit. +* +* 3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1, +* go to 7. +* +* 4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6. +* +* 5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by +* sgn := sign(X), y := 2|X|, z := exp(Y), +* tanh(X) = sgn - [ sgn*2/(1+z) ]. +* Exit. +* +* 6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we +* calculate Tanh(X) by +* sgn := sign(X), Tiny := 2**(-126), +* tanh(X) := sgn - sgn*Tiny. +* Exit. +* +* 7. (|X| < 2**(-40)). Tanh(X) = X. Exit. +* + +STANH IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + +X equ FP_SCR5 +XDCARE equ X+2 +XFRAC equ X+4 + +SGN equ L_SCR3 + +V equ FP_SCR6 + +BOUNDS1 DC.L $3FD78000,$3FFFDDCE ... 2^(-40), (5/2)LOG2 + + xref t_frcinx + xref t_extdnrm + xref setox + xref setoxm1 + + xdef stanhd +stanhd: +*--TANH(X) = X FOR DENORMALIZED X + + bra t_extdnrm + + xdef stanh +stanh: + FMOVE.X (a0),FP0 ...LOAD INPUT + + FMOVE.X FP0,X(a6) + move.l (a0),d0 + move.w 4(a0),d0 + MOVE.L D0,X(a6) + AND.L #$7FFFFFFF,D0 + CMP2.L BOUNDS1(pc),D0 ...2**(-40) < |X| < (5/2)LOG2 ? + BCS.B TANHBORS + +*--THIS IS THE USUAL CASE +*--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2). + + MOVE.L X(a6),D0 + MOVE.L D0,SGN(a6) + AND.L #$7FFF0000,D0 + ADD.L #$00010000,D0 ...EXPONENT OF 2|X| + MOVE.L D0,X(a6) + AND.L #$80000000,SGN(a6) + FMOVE.X X(a6),FP0 ...FP0 IS Y = 2|X| + + move.l d1,-(a7) + clr.l d1 + fmovem.x fp0,(a0) + bsr setoxm1 ...FP0 IS Z = EXPM1(Y) + move.l (a7)+,d1 + + FMOVE.X FP0,FP1 + FADD.S #:40000000,FP1 ...Z+2 + MOVE.L SGN(a6),D0 + FMOVE.X FP1,V(a6) + EOR.L D0,V(a6) + + FMOVE.L d1,FPCR ;restore users exceptions + FDIV.X V(a6),FP0 + bra t_frcinx + +TANHBORS: + CMP.L #$3FFF8000,D0 + BLT.W TANHSM + + CMP.L #$40048AA1,D0 + BGT.W TANHHUGE + +*-- (5/2) LOG2 < |X| < 50 LOG2, +*--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X), +*--TANH(X) = SGN - SGN*2/[EXP(Y)+1]. + + MOVE.L X(a6),D0 + MOVE.L D0,SGN(a6) + AND.L #$7FFF0000,D0 + ADD.L #$00010000,D0 ...EXPO OF 2|X| + MOVE.L D0,X(a6) ...Y = 2|X| + AND.L #$80000000,SGN(a6) + MOVE.L SGN(a6),D0 + FMOVE.X X(a6),FP0 ...Y = 2|X| + + move.l d1,-(a7) + clr.l d1 + fmovem.x fp0,(a0) + bsr setox ...FP0 IS EXP(Y) + move.l (a7)+,d1 + move.l SGN(a6),d0 + FADD.S #:3F800000,FP0 ...EXP(Y)+1 + + EOR.L #$C0000000,D0 ...-SIGN(X)*2 + FMOVE.S d0,FP1 ...-SIGN(X)*2 IN SGL FMT + FDIV.X FP0,FP1 ...-SIGN(X)2 / [EXP(Y)+1 ] + + MOVE.L SGN(a6),D0 + OR.L #$3F800000,D0 ...SGN + FMOVE.S d0,FP0 ...SGN IN SGL FMT + + FMOVE.L d1,FPCR ;restore users exceptions + FADD.X fp1,FP0 + + bra t_frcinx + +TANHSM: + CLR.W XDCARE(a6) + + FMOVE.L d1,FPCR ;restore users exceptions + FMOVE.X X(a6),FP0 ;last inst - possible exception set + + bra t_frcinx + +TANHHUGE: +*---RETURN SGN(X) - SGN(X)EPS + MOVE.L X(a6),D0 + AND.L #$80000000,D0 + OR.L #$3F800000,D0 + FMOVE.S d0,FP0 + AND.L #$80000000,D0 + EOR.L #$80800000,D0 ...-SIGN(X)*EPS + + FMOVE.L d1,FPCR ;restore users exceptions + FADD.S d0,FP0 + + bra t_frcinx + + end diff --git a/sys/arch/m68k/fpsp/sto_res.sa b/sys/arch/m68k/fpsp/sto_res.sa new file mode 100644 index 00000000000..2f9141b41c1 --- /dev/null +++ b/sys/arch/m68k/fpsp/sto_res.sa @@ -0,0 +1,123 @@ +* $NetBSD: sto_res.sa,v 1.3 1994/10/26 07:50:14 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* sto_res.sa 3.1 12/10/90 +* +* Takes the result and puts it in where the user expects it. +* Library functions return result in fp0. If fp0 is not the +* users destination register then fp0 is moved to the the +* correct floating-point destination register. fp0 and fp1 +* are then restored to the original contents. +* +* Input: result in fp0,fp1 +* +* d2 & a0 should be kept unmodified +* +* Output: moves the result to the true destination reg or mem +* +* Modifies: destination floating point register +* + +STO_RES IDNT 2,1 Motorola 040 Floating Point Software Package + + + section 8 + + include fpsp.h + + xdef sto_cos +sto_cos: + bfextu CMDREG1B(a6){13:3},d0 ;extract cos destination + cmpi.b #3,d0 ;check for fp0/fp1 cases + ble.b c_fp0123 + fmovem.x fp1,-(a7) + moveq.l #7,d1 + sub.l d0,d1 ;d1 = 7- (dest. reg. no.) + clr.l d0 + bset.l d1,d0 ;d0 is dynamic register mask + fmovem.x (a7)+,d0 + rts +c_fp0123: + tst.b d0 + beq.b c_is_fp0 + cmpi.b #1,d0 + beq.b c_is_fp1 + cmpi.b #2,d0 + beq.b c_is_fp2 +c_is_fp3: + fmovem.x fp1,USER_FP3(a6) + rts +c_is_fp2: + fmovem.x fp1,USER_FP2(a6) + rts +c_is_fp1: + fmovem.x fp1,USER_FP1(a6) + rts +c_is_fp0: + fmovem.x fp1,USER_FP0(a6) + rts + + + xdef sto_res +sto_res: + bfextu CMDREG1B(a6){6:3},d0 ;extract destination register + cmpi.b #3,d0 ;check for fp0/fp1 cases + ble.b fp0123 + fmovem.x fp0,-(a7) + moveq.l #7,d1 + sub.l d0,d1 ;d1 = 7- (dest. reg. no.) + clr.l d0 + bset.l d1,d0 ;d0 is dynamic register mask + fmovem.x (a7)+,d0 + rts +fp0123: + tst.b d0 + beq.b is_fp0 + cmpi.b #1,d0 + beq.b is_fp1 + cmpi.b #2,d0 + beq.b is_fp2 +is_fp3: + fmovem.x fp0,USER_FP3(a6) + rts +is_fp2: + fmovem.x fp0,USER_FP2(a6) + rts +is_fp1: + fmovem.x fp0,USER_FP1(a6) + rts +is_fp0: + fmovem.x fp0,USER_FP0(a6) + rts + + end diff --git a/sys/arch/m68k/fpsp/stwotox.sa b/sys/arch/m68k/fpsp/stwotox.sa new file mode 100644 index 00000000000..f0583bf30a7 --- /dev/null +++ b/sys/arch/m68k/fpsp/stwotox.sa @@ -0,0 +1,452 @@ +* $NetBSD: stwotox.sa,v 1.3 1994/10/26 07:50:15 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* stwotox.sa 3.1 12/10/90 +* +* stwotox --- 2**X +* stwotoxd --- 2**X for denormalized X +* stentox --- 10**X +* stentoxd --- 10**X for denormalized X +* +* Input: Double-extended number X in location pointed to +* by address register a0. +* +* Output: The function values are returned in Fp0. +* +* Accuracy and Monotonicity: The returned result is within 2 ulps in +* 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the +* result is subsequently rounded to double precision. The +* result is provably monotonic in double precision. +* +* Speed: The program stwotox takes approximately 190 cycles and the +* program stentox takes approximately 200 cycles. +* +* Algorithm: +* +* twotox +* 1. If |X| > 16480, go to ExpBig. +* +* 2. If |X| < 2**(-70), go to ExpSm. +* +* 3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore +* decompose N as +* N = 64(M + M') + j, j = 0,1,2,...,63. +* +* 4. Overwrite r := r * log2. Then +* 2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). +* Go to expr to compute that expression. +* +* tentox +* 1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig. +* +* 2. If |X| < 2**(-70), go to ExpSm. +* +* 3. Set y := X*log_2(10)*64 (base 2 log of 10). Set +* N := round-to-int(y). Decompose N as +* N = 64(M + M') + j, j = 0,1,2,...,63. +* +* 4. Define r as +* r := ((X - N*L1)-N*L2) * L10 +* where L1, L2 are the leading and trailing parts of log_10(2)/64 +* and L10 is the natural log of 10. Then +* 10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). +* Go to expr to compute that expression. +* +* expr +* 1. Fetch 2**(j/64) from table as Fact1 and Fact2. +* +* 2. Overwrite Fact1 and Fact2 by +* Fact1 := 2**(M) * Fact1 +* Fact2 := 2**(M) * Fact2 +* Thus Fact1 + Fact2 = 2**(M) * 2**(j/64). +* +* 3. Calculate P where 1 + P approximates exp(r): +* P = r + r*r*(A1+r*(A2+...+r*A5)). +* +* 4. Let AdjFact := 2**(M'). Return +* AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ). +* Exit. +* +* ExpBig +* 1. Generate overflow by Huge * Huge if X > 0; otherwise, generate +* underflow by Tiny * Tiny. +* +* ExpSm +* 1. Return 1 + X. +* + +STWOTOX IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + +BOUNDS1 DC.L $3FB98000,$400D80C0 ... 2^(-70),16480 +BOUNDS2 DC.L $3FB98000,$400B9B07 ... 2^(-70),16480 LOG2/LOG10 + +L2TEN64 DC.L $406A934F,$0979A371 ... 64LOG10/LOG2 +L10TWO1 DC.L $3F734413,$509F8000 ... LOG2/64LOG10 + +L10TWO2 DC.L $BFCD0000,$C0219DC1,$DA994FD2,$00000000 + +LOG10 DC.L $40000000,$935D8DDD,$AAA8AC17,$00000000 + +LOG2 DC.L $3FFE0000,$B17217F7,$D1CF79AC,$00000000 + +EXPA5 DC.L $3F56C16D,$6F7BD0B2 +EXPA4 DC.L $3F811112,$302C712C +EXPA3 DC.L $3FA55555,$55554CC1 +EXPA2 DC.L $3FC55555,$55554A54 +EXPA1 DC.L $3FE00000,$00000000,$00000000,$00000000 + +HUGE DC.L $7FFE0000,$FFFFFFFF,$FFFFFFFF,$00000000 +TINY DC.L $00010000,$FFFFFFFF,$FFFFFFFF,$00000000 + +EXPTBL + DC.L $3FFF0000,$80000000,$00000000,$3F738000 + DC.L $3FFF0000,$8164D1F3,$BC030773,$3FBEF7CA + DC.L $3FFF0000,$82CD8698,$AC2BA1D7,$3FBDF8A9 + DC.L $3FFF0000,$843A28C3,$ACDE4046,$3FBCD7C9 + DC.L $3FFF0000,$85AAC367,$CC487B15,$BFBDE8DA + DC.L $3FFF0000,$871F6196,$9E8D1010,$3FBDE85C + DC.L $3FFF0000,$88980E80,$92DA8527,$3FBEBBF1 + DC.L $3FFF0000,$8A14D575,$496EFD9A,$3FBB80CA + DC.L $3FFF0000,$8B95C1E3,$EA8BD6E7,$BFBA8373 + DC.L $3FFF0000,$8D1ADF5B,$7E5BA9E6,$BFBE9670 + DC.L $3FFF0000,$8EA4398B,$45CD53C0,$3FBDB700 + DC.L $3FFF0000,$9031DC43,$1466B1DC,$3FBEEEB0 + DC.L $3FFF0000,$91C3D373,$AB11C336,$3FBBFD6D + DC.L $3FFF0000,$935A2B2F,$13E6E92C,$BFBDB319 + DC.L $3FFF0000,$94F4EFA8,$FEF70961,$3FBDBA2B + DC.L $3FFF0000,$96942D37,$20185A00,$3FBE91D5 + DC.L $3FFF0000,$9837F051,$8DB8A96F,$3FBE8D5A + DC.L $3FFF0000,$99E04593,$20B7FA65,$BFBCDE7B + DC.L $3FFF0000,$9B8D39B9,$D54E5539,$BFBEBAAF + DC.L $3FFF0000,$9D3ED9A7,$2CFFB751,$BFBD86DA + DC.L $3FFF0000,$9EF53260,$91A111AE,$BFBEBEDD + DC.L $3FFF0000,$A0B0510F,$B9714FC2,$3FBCC96E + DC.L $3FFF0000,$A2704303,$0C496819,$BFBEC90B + DC.L $3FFF0000,$A43515AE,$09E6809E,$3FBBD1DB + DC.L $3FFF0000,$A5FED6A9,$B15138EA,$3FBCE5EB + DC.L $3FFF0000,$A7CD93B4,$E965356A,$BFBEC274 + DC.L $3FFF0000,$A9A15AB4,$EA7C0EF8,$3FBEA83C + DC.L $3FFF0000,$AB7A39B5,$A93ED337,$3FBECB00 + DC.L $3FFF0000,$AD583EEA,$42A14AC6,$3FBE9301 + DC.L $3FFF0000,$AF3B78AD,$690A4375,$BFBD8367 + DC.L $3FFF0000,$B123F581,$D2AC2590,$BFBEF05F + DC.L $3FFF0000,$B311C412,$A9112489,$3FBDFB3C + DC.L $3FFF0000,$B504F333,$F9DE6484,$3FBEB2FB + DC.L $3FFF0000,$B6FD91E3,$28D17791,$3FBAE2CB + DC.L $3FFF0000,$B8FBAF47,$62FB9EE9,$3FBCDC3C + DC.L $3FFF0000,$BAFF5AB2,$133E45FB,$3FBEE9AA + DC.L $3FFF0000,$BD08A39F,$580C36BF,$BFBEAEFD + DC.L $3FFF0000,$BF1799B6,$7A731083,$BFBCBF51 + DC.L $3FFF0000,$C12C4CCA,$66709456,$3FBEF88A + DC.L $3FFF0000,$C346CCDA,$24976407,$3FBD83B2 + DC.L $3FFF0000,$C5672A11,$5506DADD,$3FBDF8AB + DC.L $3FFF0000,$C78D74C8,$ABB9B15D,$BFBDFB17 + DC.L $3FFF0000,$C9B9BD86,$6E2F27A3,$BFBEFE3C + DC.L $3FFF0000,$CBEC14FE,$F2727C5D,$BFBBB6F8 + DC.L $3FFF0000,$CE248C15,$1F8480E4,$BFBCEE53 + DC.L $3FFF0000,$D06333DA,$EF2B2595,$BFBDA4AE + DC.L $3FFF0000,$D2A81D91,$F12AE45A,$3FBC9124 + DC.L $3FFF0000,$D4F35AAB,$CFEDFA1F,$3FBEB243 + DC.L $3FFF0000,$D744FCCA,$D69D6AF4,$3FBDE69A + DC.L $3FFF0000,$D99D15C2,$78AFD7B6,$BFB8BC61 + DC.L $3FFF0000,$DBFBB797,$DAF23755,$3FBDF610 + DC.L $3FFF0000,$DE60F482,$5E0E9124,$BFBD8BE1 + DC.L $3FFF0000,$E0CCDEEC,$2A94E111,$3FBACB12 + DC.L $3FFF0000,$E33F8972,$BE8A5A51,$3FBB9BFE + DC.L $3FFF0000,$E5B906E7,$7C8348A8,$3FBCF2F4 + DC.L $3FFF0000,$E8396A50,$3C4BDC68,$3FBEF22F + DC.L $3FFF0000,$EAC0C6E7,$DD24392F,$BFBDBF4A + DC.L $3FFF0000,$ED4F301E,$D9942B84,$3FBEC01A + DC.L $3FFF0000,$EFE4B99B,$DCDAF5CB,$3FBE8CAC + DC.L $3FFF0000,$F281773C,$59FFB13A,$BFBCBB3F + DC.L $3FFF0000,$F5257D15,$2486CC2C,$3FBEF73A + DC.L $3FFF0000,$F7D0DF73,$0AD13BB9,$BFB8B795 + DC.L $3FFF0000,$FA83B2DB,$722A033A,$3FBEF84B + DC.L $3FFF0000,$FD3E0C0C,$F486C175,$BFBEF581 + +N equ L_SCR1 + +X equ FP_SCR1 +XDCARE equ X+2 +XFRAC equ X+4 + +ADJFACT equ FP_SCR2 + +FACT1 equ FP_SCR3 +FACT1HI equ FACT1+4 +FACT1LOW equ FACT1+8 + +FACT2 equ FP_SCR4 +FACT2HI equ FACT2+4 +FACT2LOW equ FACT2+8 + + xref t_unfl + xref t_ovfl + xref t_frcinx + + xdef stwotoxd +stwotoxd: +*--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT + + fmove.l d1,fpcr ...set user's rounding mode/precision + Fmove.S #:3F800000,FP0 ...RETURN 1 + X + move.l (a0),d0 + or.l #$00800001,d0 + fadd.s d0,fp0 + bra t_frcinx + + xdef stwotox +stwotox: +*--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S + FMOVEM.X (a0),FP0 ...LOAD INPUT, do not set cc's + + MOVE.L (A0),D0 + MOVE.W 4(A0),D0 + FMOVE.X FP0,X(a6) + ANDI.L #$7FFFFFFF,D0 + + CMPI.L #$3FB98000,D0 ...|X| >= 2**(-70)? + BGE.B TWOOK1 + BRA.W EXPBORS + +TWOOK1: + CMPI.L #$400D80C0,D0 ...|X| > 16480? + BLE.B TWOMAIN + BRA.W EXPBORS + + +TWOMAIN: +*--USUAL CASE, 2^(-70) <= |X| <= 16480 + + FMOVE.X FP0,FP1 + FMUL.S #:42800000,FP1 ...64 * X + + FMOVE.L FP1,N(a6) ...N = ROUND-TO-INT(64 X) + MOVE.L d2,-(sp) + LEA EXPTBL,a1 ...LOAD ADDRESS OF TABLE OF 2^(J/64) + FMOVE.L N(a6),FP1 ...N --> FLOATING FMT + MOVE.L N(a6),D0 + MOVE.L D0,d2 + ANDI.L #$3F,D0 ...D0 IS J + ASL.L #4,D0 ...DISPLACEMENT FOR 2^(J/64) + ADDA.L D0,a1 ...ADDRESS FOR 2^(J/64) + ASR.L #6,d2 ...d2 IS L, N = 64L + J + MOVE.L d2,D0 + ASR.L #1,D0 ...D0 IS M + SUB.L D0,d2 ...d2 IS M', N = 64(M+M') + J + ADDI.L #$3FFF,d2 + MOVE.W d2,ADJFACT(a6) ...ADJFACT IS 2^(M') + MOVE.L (sp)+,d2 +*--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64), +*--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN. +*--ADJFACT = 2^(M'). +*--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2. + + FMUL.S #:3C800000,FP1 ...(1/64)*N + MOVE.L (a1)+,FACT1(a6) + MOVE.L (a1)+,FACT1HI(a6) + MOVE.L (a1)+,FACT1LOW(a6) + MOVE.W (a1)+,FACT2(a6) + clr.w FACT2+2(a6) + + FSUB.X FP1,FP0 ...X - (1/64)*INT(64 X) + + MOVE.W (a1)+,FACT2HI(a6) + clr.w FACT2HI+2(a6) + clr.l FACT2LOW(a6) + ADD.W D0,FACT1(a6) + + FMUL.X LOG2,FP0 ...FP0 IS R + ADD.W D0,FACT2(a6) + + BRA.W expr + +EXPBORS: +*--FPCR, D0 SAVED + CMPI.L #$3FFF8000,D0 + BGT.B EXPBIG + +EXPSM: +*--|X| IS SMALL, RETURN 1 + X + + FMOVE.L d1,FPCR ;restore users exceptions + FADD.S #:3F800000,FP0 ...RETURN 1 + X + + bra t_frcinx + +EXPBIG: +*--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW +*--REGISTERS SAVE SO FAR ARE FPCR AND D0 + MOVE.L X(a6),D0 + TST.L D0 + BLT.B EXPNEG + + bclr.b #7,(a0) ;t_ovfl expects positive value + bra t_ovfl + +EXPNEG: + bclr.b #7,(a0) ;t_unfl expects positive value + bra t_unfl + + xdef stentoxd +stentoxd: +*--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT + + fmove.l d1,fpcr ...set user's rounding mode/precision + Fmove.S #:3F800000,FP0 ...RETURN 1 + X + move.l (a0),d0 + or.l #$00800001,d0 + fadd.s d0,fp0 + bra t_frcinx + + xdef stentox +stentox: +*--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S + FMOVEM.X (a0),FP0 ...LOAD INPUT, do not set cc's + + MOVE.L (A0),D0 + MOVE.W 4(A0),D0 + FMOVE.X FP0,X(a6) + ANDI.L #$7FFFFFFF,D0 + + CMPI.L #$3FB98000,D0 ...|X| >= 2**(-70)? + BGE.B TENOK1 + BRA.W EXPBORS + +TENOK1: + CMPI.L #$400B9B07,D0 ...|X| <= 16480*log2/log10 ? + BLE.B TENMAIN + BRA.W EXPBORS + +TENMAIN: +*--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10 + + FMOVE.X FP0,FP1 + FMUL.D L2TEN64,FP1 ...X*64*LOG10/LOG2 + + FMOVE.L FP1,N(a6) ...N=INT(X*64*LOG10/LOG2) + MOVE.L d2,-(sp) + LEA EXPTBL,a1 ...LOAD ADDRESS OF TABLE OF 2^(J/64) + FMOVE.L N(a6),FP1 ...N --> FLOATING FMT + MOVE.L N(a6),D0 + MOVE.L D0,d2 + ANDI.L #$3F,D0 ...D0 IS J + ASL.L #4,D0 ...DISPLACEMENT FOR 2^(J/64) + ADDA.L D0,a1 ...ADDRESS FOR 2^(J/64) + ASR.L #6,d2 ...d2 IS L, N = 64L + J + MOVE.L d2,D0 + ASR.L #1,D0 ...D0 IS M + SUB.L D0,d2 ...d2 IS M', N = 64(M+M') + J + ADDI.L #$3FFF,d2 + MOVE.W d2,ADJFACT(a6) ...ADJFACT IS 2^(M') + MOVE.L (sp)+,d2 + +*--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64), +*--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN. +*--ADJFACT = 2^(M'). +*--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2. + + FMOVE.X FP1,FP2 + + FMUL.D L10TWO1,FP1 ...N*(LOG2/64LOG10)_LEAD + MOVE.L (a1)+,FACT1(a6) + + FMUL.X L10TWO2,FP2 ...N*(LOG2/64LOG10)_TRAIL + + MOVE.L (a1)+,FACT1HI(a6) + MOVE.L (a1)+,FACT1LOW(a6) + FSUB.X FP1,FP0 ...X - N L_LEAD + MOVE.W (a1)+,FACT2(a6) + + FSUB.X FP2,FP0 ...X - N L_TRAIL + + clr.w FACT2+2(a6) + MOVE.W (a1)+,FACT2HI(a6) + clr.w FACT2HI+2(a6) + clr.l FACT2LOW(a6) + + FMUL.X LOG10,FP0 ...FP0 IS R + + ADD.W D0,FACT1(a6) + ADD.W D0,FACT2(a6) + +expr: +*--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN. +*--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64). +*--FP0 IS R. THE FOLLOWING CODE COMPUTES +*-- 2**(M'+M) * 2**(J/64) * EXP(R) + + FMOVE.X FP0,FP1 + FMUL.X FP1,FP1 ...FP1 IS S = R*R + + FMOVE.D EXPA5,FP2 ...FP2 IS A5 + FMOVE.D EXPA4,FP3 ...FP3 IS A4 + + FMUL.X FP1,FP2 ...FP2 IS S*A5 + FMUL.X FP1,FP3 ...FP3 IS S*A4 + + FADD.D EXPA3,FP2 ...FP2 IS A3+S*A5 + FADD.D EXPA2,FP3 ...FP3 IS A2+S*A4 + + FMUL.X FP1,FP2 ...FP2 IS S*(A3+S*A5) + FMUL.X FP1,FP3 ...FP3 IS S*(A2+S*A4) + + FADD.D EXPA1,FP2 ...FP2 IS A1+S*(A3+S*A5) + FMUL.X FP0,FP3 ...FP3 IS R*S*(A2+S*A4) + + FMUL.X FP1,FP2 ...FP2 IS S*(A1+S*(A3+S*A5)) + FADD.X FP3,FP0 ...FP0 IS R+R*S*(A2+S*A4) + + FADD.X FP2,FP0 ...FP0 IS EXP(R) - 1 + + +*--FINAL RECONSTRUCTION PROCESS +*--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1) - (1 OR 0) + + FMUL.X FACT1(a6),FP0 + FADD.X FACT2(a6),FP0 + FADD.X FACT1(a6),FP0 + + FMOVE.L d1,FPCR ;restore users exceptions + clr.w ADJFACT+2(a6) + move.l #$80000000,ADJFACT+4(a6) + clr.l ADJFACT+8(a6) + FMUL.X ADJFACT(a6),FP0 ...FINAL ADJUSTMENT + + bra t_frcinx + + end diff --git a/sys/arch/m68k/fpsp/tbldo.sa b/sys/arch/m68k/fpsp/tbldo.sa new file mode 100644 index 00000000000..f61a9fcee50 --- /dev/null +++ b/sys/arch/m68k/fpsp/tbldo.sa @@ -0,0 +1,579 @@ +* $NetBSD: tbldo.sa,v 1.2 1994/10/26 07:50:18 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* tbldo.sa 3.1 12/10/90 +* +* Modified: +* 8/16/90 chinds The table was constructed to use only one level +* of indirection in do_func for monoadic +* functions. Dyadic functions require two +* levels, and the tables are still contained +* in do_func. The table is arranged for +* index with a 10-bit index, with the first +* 7 bits the opcode, and the remaining 3 +* the stag. For dyadic functions, all +* valid addresses are to the generic entry +* point. +* + +TBLDO IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + xref ld_pinf,ld_pone,ld_ppi2 + xref t_dz2,t_operr + xref serror,sone,szero,sinf,snzrinx + xref sopr_inf,spi_2,src_nan,szr_inf + + xref smovcr + xref pmod,prem,pscale + xref satanh,satanhd + xref sacos,sacosd,sasin,sasind,satan,satand + xref setox,setoxd,setoxm1,setoxm1d,setoxm1i + xref sgetexp,sgetexpd,sgetman,sgetmand + xref sint,sintd,sintrz + xref ssincos,ssincosd,ssincosi,ssincosnan,ssincosz + xref scos,scosd,ssin,ssind,stan,stand + xref scosh,scoshd,ssinh,ssinhd,stanh,stanhd + xref sslog10,sslog2,sslogn,sslognp1 + xref sslog10d,sslog2d,sslognd,slognp1d + xref stentox,stentoxd,stwotox,stwotoxd + +* instruction ;opcode-stag Notes + xdef tblpre +tblpre: + dc.l smovcr ;$00-0 fmovecr all + dc.l smovcr ;$00-1 fmovecr all + dc.l smovcr ;$00-2 fmovecr all + dc.l smovcr ;$00-3 fmovecr all + dc.l smovcr ;$00-4 fmovecr all + dc.l smovcr ;$00-5 fmovecr all + dc.l smovcr ;$00-6 fmovecr all + dc.l smovcr ;$00-7 fmovecr all + + dc.l sint ;$01-0 fint norm + dc.l szero ;$01-1 fint zero + dc.l sinf ;$01-2 fint inf + dc.l src_nan ;$01-3 fint nan + dc.l sintd ;$01-4 fint denorm inx + dc.l serror ;$01-5 fint ERROR + dc.l serror ;$01-6 fint ERROR + dc.l serror ;$01-7 fint ERROR + + dc.l ssinh ;$02-0 fsinh norm + dc.l szero ;$02-1 fsinh zero + dc.l sinf ;$02-2 fsinh inf + dc.l src_nan ;$02-3 fsinh nan + dc.l ssinhd ;$02-4 fsinh denorm + dc.l serror ;$02-5 fsinh ERROR + dc.l serror ;$02-6 fsinh ERROR + dc.l serror ;$02-7 fsinh ERROR + + dc.l sintrz ;$03-0 fintrz norm + dc.l szero ;$03-1 fintrz zero + dc.l sinf ;$03-2 fintrz inf + dc.l src_nan ;$03-3 fintrz nan + dc.l snzrinx ;$03-4 fintrz denorm inx + dc.l serror ;$03-5 fintrz ERROR + dc.l serror ;$03-6 fintrz ERROR + dc.l serror ;$03-7 fintrz ERROR + + dc.l serror ;$04-0 ERROR - illegal extension + dc.l serror ;$04-1 ERROR - illegal extension + dc.l serror ;$04-2 ERROR - illegal extension + dc.l serror ;$04-3 ERROR - illegal extension + dc.l serror ;$04-4 ERROR - illegal extension + dc.l serror ;$04-5 ERROR - illegal extension + dc.l serror ;$04-6 ERROR - illegal extension + dc.l serror ;$04-7 ERROR - illegal extension + + dc.l serror ;$05-0 ERROR - illegal extension + dc.l serror ;$05-1 ERROR - illegal extension + dc.l serror ;$05-2 ERROR - illegal extension + dc.l serror ;$05-3 ERROR - illegal extension + dc.l serror ;$05-4 ERROR - illegal extension + dc.l serror ;$05-5 ERROR - illegal extension + dc.l serror ;$05-6 ERROR - illegal extension + dc.l serror ;$05-7 ERROR - illegal extension + + dc.l sslognp1 ;$06-0 flognp1 norm + dc.l szero ;$06-1 flognp1 zero + dc.l sopr_inf ;$06-2 flognp1 inf + dc.l src_nan ;$06-3 flognp1 nan + dc.l slognp1d ;$06-4 flognp1 denorm + dc.l serror ;$06-5 flognp1 ERROR + dc.l serror ;$06-6 flognp1 ERROR + dc.l serror ;$06-7 flognp1 ERROR + + dc.l serror ;$07-0 ERROR - illegal extension + dc.l serror ;$07-1 ERROR - illegal extension + dc.l serror ;$07-2 ERROR - illegal extension + dc.l serror ;$07-3 ERROR - illegal extension + dc.l serror ;$07-4 ERROR - illegal extension + dc.l serror ;$07-5 ERROR - illegal extension + dc.l serror ;$07-6 ERROR - illegal extension + dc.l serror ;$07-7 ERROR - illegal extension + + dc.l setoxm1 ;$08-0 fetoxm1 norm + dc.l szero ;$08-1 fetoxm1 zero + dc.l setoxm1i ;$08-2 fetoxm1 inf + dc.l src_nan ;$08-3 fetoxm1 nan + dc.l setoxm1d ;$08-4 fetoxm1 denorm + dc.l serror ;$08-5 fetoxm1 ERROR + dc.l serror ;$08-6 fetoxm1 ERROR + dc.l serror ;$08-7 fetoxm1 ERROR + + dc.l stanh ;$09-0 ftanh norm + dc.l szero ;$09-1 ftanh zero + dc.l sone ;$09-2 ftanh inf + dc.l src_nan ;$09-3 ftanh nan + dc.l stanhd ;$09-4 ftanh denorm + dc.l serror ;$09-5 ftanh ERROR + dc.l serror ;$09-6 ftanh ERROR + dc.l serror ;$09-7 ftanh ERROR + + dc.l satan ;$0a-0 fatan norm + dc.l szero ;$0a-1 fatan zero + dc.l spi_2 ;$0a-2 fatan inf + dc.l src_nan ;$0a-3 fatan nan + dc.l satand ;$0a-4 fatan denorm + dc.l serror ;$0a-5 fatan ERROR + dc.l serror ;$0a-6 fatan ERROR + dc.l serror ;$0a-7 fatan ERROR + + dc.l serror ;$0b-0 ERROR - illegal extension + dc.l serror ;$0b-1 ERROR - illegal extension + dc.l serror ;$0b-2 ERROR - illegal extension + dc.l serror ;$0b-3 ERROR - illegal extension + dc.l serror ;$0b-4 ERROR - illegal extension + dc.l serror ;$0b-5 ERROR - illegal extension + dc.l serror ;$0b-6 ERROR - illegal extension + dc.l serror ;$0b-7 ERROR - illegal extension + + dc.l sasin ;$0c-0 fasin norm + dc.l szero ;$0c-1 fasin zero + dc.l t_operr ;$0c-2 fasin inf + dc.l src_nan ;$0c-3 fasin nan + dc.l sasind ;$0c-4 fasin denorm + dc.l serror ;$0c-5 fasin ERROR + dc.l serror ;$0c-6 fasin ERROR + dc.l serror ;$0c-7 fasin ERROR + + dc.l satanh ;$0d-0 fatanh norm + dc.l szero ;$0d-1 fatanh zero + dc.l t_operr ;$0d-2 fatanh inf + dc.l src_nan ;$0d-3 fatanh nan + dc.l satanhd ;$0d-4 fatanh denorm + dc.l serror ;$0d-5 fatanh ERROR + dc.l serror ;$0d-6 fatanh ERROR + dc.l serror ;$0d-7 fatanh ERROR + + dc.l ssin ;$0e-0 fsin norm + dc.l szero ;$0e-1 fsin zero + dc.l t_operr ;$0e-2 fsin inf + dc.l src_nan ;$0e-3 fsin nan + dc.l ssind ;$0e-4 fsin denorm + dc.l serror ;$0e-5 fsin ERROR + dc.l serror ;$0e-6 fsin ERROR + dc.l serror ;$0e-7 fsin ERROR + + dc.l stan ;$0f-0 ftan norm + dc.l szero ;$0f-1 ftan zero + dc.l t_operr ;$0f-2 ftan inf + dc.l src_nan ;$0f-3 ftan nan + dc.l stand ;$0f-4 ftan denorm + dc.l serror ;$0f-5 ftan ERROR + dc.l serror ;$0f-6 ftan ERROR + dc.l serror ;$0f-7 ftan ERROR + + dc.l setox ;$10-0 fetox norm + dc.l ld_pone ;$10-1 fetox zero + dc.l szr_inf ;$10-2 fetox inf + dc.l src_nan ;$10-3 fetox nan + dc.l setoxd ;$10-4 fetox denorm + dc.l serror ;$10-5 fetox ERROR + dc.l serror ;$10-6 fetox ERROR + dc.l serror ;$10-7 fetox ERROR + + dc.l stwotox ;$11-0 ftwotox norm + dc.l ld_pone ;$11-1 ftwotox zero + dc.l szr_inf ;$11-2 ftwotox inf + dc.l src_nan ;$11-3 ftwotox nan + dc.l stwotoxd ;$11-4 ftwotox denorm + dc.l serror ;$11-5 ftwotox ERROR + dc.l serror ;$11-6 ftwotox ERROR + dc.l serror ;$11-7 ftwotox ERROR + + dc.l stentox ;$12-0 ftentox norm + dc.l ld_pone ;$12-1 ftentox zero + dc.l szr_inf ;$12-2 ftentox inf + dc.l src_nan ;$12-3 ftentox nan + dc.l stentoxd ;$12-4 ftentox denorm + dc.l serror ;$12-5 ftentox ERROR + dc.l serror ;$12-6 ftentox ERROR + dc.l serror ;$12-7 ftentox ERROR + + dc.l serror ;$13-0 ERROR - illegal extension + dc.l serror ;$13-1 ERROR - illegal extension + dc.l serror ;$13-2 ERROR - illegal extension + dc.l serror ;$13-3 ERROR - illegal extension + dc.l serror ;$13-4 ERROR - illegal extension + dc.l serror ;$13-5 ERROR - illegal extension + dc.l serror ;$13-6 ERROR - illegal extension + dc.l serror ;$13-7 ERROR - illegal extension + + dc.l sslogn ;$14-0 flogn norm + dc.l t_dz2 ;$14-1 flogn zero + dc.l sopr_inf ;$14-2 flogn inf + dc.l src_nan ;$14-3 flogn nan + dc.l sslognd ;$14-4 flogn denorm + dc.l serror ;$14-5 flogn ERROR + dc.l serror ;$14-6 flogn ERROR + dc.l serror ;$14-7 flogn ERROR + + dc.l sslog10 ;$15-0 flog10 norm + dc.l t_dz2 ;$15-1 flog10 zero + dc.l sopr_inf ;$15-2 flog10 inf + dc.l src_nan ;$15-3 flog10 nan + dc.l sslog10d ;$15-4 flog10 denorm + dc.l serror ;$15-5 flog10 ERROR + dc.l serror ;$15-6 flog10 ERROR + dc.l serror ;$15-7 flog10 ERROR + + dc.l sslog2 ;$16-0 flog2 norm + dc.l t_dz2 ;$16-1 flog2 zero + dc.l sopr_inf ;$16-2 flog2 inf + dc.l src_nan ;$16-3 flog2 nan + dc.l sslog2d ;$16-4 flog2 denorm + dc.l serror ;$16-5 flog2 ERROR + dc.l serror ;$16-6 flog2 ERROR + dc.l serror ;$16-7 flog2 ERROR + + dc.l serror ;$17-0 ERROR - illegal extension + dc.l serror ;$17-1 ERROR - illegal extension + dc.l serror ;$17-2 ERROR - illegal extension + dc.l serror ;$17-3 ERROR - illegal extension + dc.l serror ;$17-4 ERROR - illegal extension + dc.l serror ;$17-5 ERROR - illegal extension + dc.l serror ;$17-6 ERROR - illegal extension + dc.l serror ;$17-7 ERROR - illegal extension + + dc.l serror ;$18-0 ERROR - illegal extension + dc.l serror ;$18-1 ERROR - illegal extension + dc.l serror ;$18-2 ERROR - illegal extension + dc.l serror ;$18-3 ERROR - illegal extension + dc.l serror ;$18-4 ERROR - illegal extension + dc.l serror ;$18-5 ERROR - illegal extension + dc.l serror ;$18-6 ERROR - illegal extension + dc.l serror ;$18-7 ERROR - illegal extension + + dc.l scosh ;$19-0 fcosh norm + dc.l ld_pone ;$19-1 fcosh zero + dc.l ld_pinf ;$19-2 fcosh inf + dc.l src_nan ;$19-3 fcosh nan + dc.l scoshd ;$19-4 fcosh denorm + dc.l serror ;$19-5 fcosh ERROR + dc.l serror ;$19-6 fcosh ERROR + dc.l serror ;$19-7 fcosh ERROR + + dc.l serror ;$1a-0 ERROR - illegal extension + dc.l serror ;$1a-1 ERROR - illegal extension + dc.l serror ;$1a-2 ERROR - illegal extension + dc.l serror ;$1a-3 ERROR - illegal extension + dc.l serror ;$1a-4 ERROR - illegal extension + dc.l serror ;$1a-5 ERROR - illegal extension + dc.l serror ;$1a-6 ERROR - illegal extension + dc.l serror ;$1a-7 ERROR - illegal extension + + dc.l serror ;$1b-0 ERROR - illegal extension + dc.l serror ;$1b-1 ERROR - illegal extension + dc.l serror ;$1b-2 ERROR - illegal extension + dc.l serror ;$1b-3 ERROR - illegal extension + dc.l serror ;$1b-4 ERROR - illegal extension + dc.l serror ;$1b-5 ERROR - illegal extension + dc.l serror ;$1b-6 ERROR - illegal extension + dc.l serror ;$1b-7 ERROR - illegal extension + + dc.l sacos ;$1c-0 facos norm + dc.l ld_ppi2 ;$1c-1 facos zero + dc.l t_operr ;$1c-2 facos inf + dc.l src_nan ;$1c-3 facos nan + dc.l sacosd ;$1c-4 facos denorm + dc.l serror ;$1c-5 facos ERROR + dc.l serror ;$1c-6 facos ERROR + dc.l serror ;$1c-7 facos ERROR + + dc.l scos ;$1d-0 fcos norm + dc.l ld_pone ;$1d-1 fcos zero + dc.l t_operr ;$1d-2 fcos inf + dc.l src_nan ;$1d-3 fcos nan + dc.l scosd ;$1d-4 fcos denorm + dc.l serror ;$1d-5 fcos ERROR + dc.l serror ;$1d-6 fcos ERROR + dc.l serror ;$1d-7 fcos ERROR + + dc.l sgetexp ;$1e-0 fgetexp norm + dc.l szero ;$1e-1 fgetexp zero + dc.l t_operr ;$1e-2 fgetexp inf + dc.l src_nan ;$1e-3 fgetexp nan + dc.l sgetexpd ;$1e-4 fgetexp denorm + dc.l serror ;$1e-5 fgetexp ERROR + dc.l serror ;$1e-6 fgetexp ERROR + dc.l serror ;$1e-7 fgetexp ERROR + + dc.l sgetman ;$1f-0 fgetman norm + dc.l szero ;$1f-1 fgetman zero + dc.l t_operr ;$1f-2 fgetman inf + dc.l src_nan ;$1f-3 fgetman nan + dc.l sgetmand ;$1f-4 fgetman denorm + dc.l serror ;$1f-5 fgetman ERROR + dc.l serror ;$1f-6 fgetman ERROR + dc.l serror ;$1f-7 fgetman ERROR + + dc.l serror ;$20-0 ERROR - illegal extension + dc.l serror ;$20-1 ERROR - illegal extension + dc.l serror ;$20-2 ERROR - illegal extension + dc.l serror ;$20-3 ERROR - illegal extension + dc.l serror ;$20-4 ERROR - illegal extension + dc.l serror ;$20-5 ERROR - illegal extension + dc.l serror ;$20-6 ERROR - illegal extension + dc.l serror ;$20-7 ERROR - illegal extension + + dc.l pmod ;$21-0 fmod all + dc.l pmod ;$21-1 fmod all + dc.l pmod ;$21-2 fmod all + dc.l pmod ;$21-3 fmod all + dc.l pmod ;$21-4 fmod all + dc.l serror ;$21-5 fmod ERROR + dc.l serror ;$21-6 fmod ERROR + dc.l serror ;$21-7 fmod ERROR + + dc.l serror ;$22-0 ERROR - illegal extension + dc.l serror ;$22-1 ERROR - illegal extension + dc.l serror ;$22-2 ERROR - illegal extension + dc.l serror ;$22-3 ERROR - illegal extension + dc.l serror ;$22-4 ERROR - illegal extension + dc.l serror ;$22-5 ERROR - illegal extension + dc.l serror ;$22-6 ERROR - illegal extension + dc.l serror ;$22-7 ERROR - illegal extension + + dc.l serror ;$23-0 ERROR - illegal extension + dc.l serror ;$23-1 ERROR - illegal extension + dc.l serror ;$23-2 ERROR - illegal extension + dc.l serror ;$23-3 ERROR - illegal extension + dc.l serror ;$23-4 ERROR - illegal extension + dc.l serror ;$23-5 ERROR - illegal extension + dc.l serror ;$23-6 ERROR - illegal extension + dc.l serror ;$23-7 ERROR - illegal extension + + dc.l serror ;$24-0 ERROR - illegal extension + dc.l serror ;$24-1 ERROR - illegal extension + dc.l serror ;$24-2 ERROR - illegal extension + dc.l serror ;$24-3 ERROR - illegal extension + dc.l serror ;$24-4 ERROR - illegal extension + dc.l serror ;$24-5 ERROR - illegal extension + dc.l serror ;$24-6 ERROR - illegal extension + dc.l serror ;$24-7 ERROR - illegal extension + + dc.l prem ;$25-0 frem all + dc.l prem ;$25-1 frem all + dc.l prem ;$25-2 frem all + dc.l prem ;$25-3 frem all + dc.l prem ;$25-4 frem all + dc.l serror ;$25-5 frem ERROR + dc.l serror ;$25-6 frem ERROR + dc.l serror ;$25-7 frem ERROR + + dc.l pscale ;$26-0 fscale all + dc.l pscale ;$26-1 fscale all + dc.l pscale ;$26-2 fscale all + dc.l pscale ;$26-3 fscale all + dc.l pscale ;$26-4 fscale all + dc.l serror ;$26-5 fscale ERROR + dc.l serror ;$26-6 fscale ERROR + dc.l serror ;$26-7 fscale ERROR + + dc.l serror ;$27-0 ERROR - illegal extension + dc.l serror ;$27-1 ERROR - illegal extension + dc.l serror ;$27-2 ERROR - illegal extension + dc.l serror ;$27-3 ERROR - illegal extension + dc.l serror ;$27-4 ERROR - illegal extension + dc.l serror ;$27-5 ERROR - illegal extension + dc.l serror ;$27-6 ERROR - illegal extension + dc.l serror ;$27-7 ERROR - illegal extension + + dc.l serror ;$28-0 ERROR - illegal extension + dc.l serror ;$28-1 ERROR - illegal extension + dc.l serror ;$28-2 ERROR - illegal extension + dc.l serror ;$28-3 ERROR - illegal extension + dc.l serror ;$28-4 ERROR - illegal extension + dc.l serror ;$28-5 ERROR - illegal extension + dc.l serror ;$28-6 ERROR - illegal extension + dc.l serror ;$28-7 ERROR - illegal extension + + dc.l serror ;$29-0 ERROR - illegal extension + dc.l serror ;$29-1 ERROR - illegal extension + dc.l serror ;$29-2 ERROR - illegal extension + dc.l serror ;$29-3 ERROR - illegal extension + dc.l serror ;$29-4 ERROR - illegal extension + dc.l serror ;$29-5 ERROR - illegal extension + dc.l serror ;$29-6 ERROR - illegal extension + dc.l serror ;$29-7 ERROR - illegal extension + + dc.l serror ;$2a-0 ERROR - illegal extension + dc.l serror ;$2a-1 ERROR - illegal extension + dc.l serror ;$2a-2 ERROR - illegal extension + dc.l serror ;$2a-3 ERROR - illegal extension + dc.l serror ;$2a-4 ERROR - illegal extension + dc.l serror ;$2a-5 ERROR - illegal extension + dc.l serror ;$2a-6 ERROR - illegal extension + dc.l serror ;$2a-7 ERROR - illegal extension + + dc.l serror ;$2b-0 ERROR - illegal extension + dc.l serror ;$2b-1 ERROR - illegal extension + dc.l serror ;$2b-2 ERROR - illegal extension + dc.l serror ;$2b-3 ERROR - illegal extension + dc.l serror ;$2b-4 ERROR - illegal extension + dc.l serror ;$2b-5 ERROR - illegal extension + dc.l serror ;$2b-6 ERROR - illegal extension + dc.l serror ;$2b-7 ERROR - illegal extension + + dc.l serror ;$2c-0 ERROR - illegal extension + dc.l serror ;$2c-1 ERROR - illegal extension + dc.l serror ;$2c-2 ERROR - illegal extension + dc.l serror ;$2c-3 ERROR - illegal extension + dc.l serror ;$2c-4 ERROR - illegal extension + dc.l serror ;$2c-5 ERROR - illegal extension + dc.l serror ;$2c-6 ERROR - illegal extension + dc.l serror ;$2c-7 ERROR - illegal extension + + dc.l serror ;$2d-0 ERROR - illegal extension + dc.l serror ;$2d-1 ERROR - illegal extension + dc.l serror ;$2d-2 ERROR - illegal extension + dc.l serror ;$2d-3 ERROR - illegal extension + dc.l serror ;$2d-4 ERROR - illegal extension + dc.l serror ;$2d-5 ERROR - illegal extension + dc.l serror ;$2d-6 ERROR - illegal extension + dc.l serror ;$2d-7 ERROR - illegal extension + + dc.l serror ;$2e-0 ERROR - illegal extension + dc.l serror ;$2e-1 ERROR - illegal extension + dc.l serror ;$2e-2 ERROR - illegal extension + dc.l serror ;$2e-3 ERROR - illegal extension + dc.l serror ;$2e-4 ERROR - illegal extension + dc.l serror ;$2e-5 ERROR - illegal extension + dc.l serror ;$2e-6 ERROR - illegal extension + dc.l serror ;$2e-7 ERROR - illegal extension + + dc.l serror ;$2f-0 ERROR - illegal extension + dc.l serror ;$2f-1 ERROR - illegal extension + dc.l serror ;$2f-2 ERROR - illegal extension + dc.l serror ;$2f-3 ERROR - illegal extension + dc.l serror ;$2f-4 ERROR - illegal extension + dc.l serror ;$2f-5 ERROR - illegal extension + dc.l serror ;$2f-6 ERROR - illegal extension + dc.l serror ;$2f-7 ERROR - illegal extension + + dc.l ssincos ;$30-0 fsincos norm + dc.l ssincosz ;$30-1 fsincos zero + dc.l ssincosi ;$30-2 fsincos inf + dc.l ssincosnan ;$30-3 fsincos nan + dc.l ssincosd ;$30-4 fsincos denorm + dc.l serror ;$30-5 fsincos ERROR + dc.l serror ;$30-6 fsincos ERROR + dc.l serror ;$30-7 fsincos ERROR + + dc.l ssincos ;$31-0 fsincos norm + dc.l ssincosz ;$31-1 fsincos zero + dc.l ssincosi ;$31-2 fsincos inf + dc.l ssincosnan ;$31-3 fsincos nan + dc.l ssincosd ;$31-4 fsincos denorm + dc.l serror ;$31-5 fsincos ERROR + dc.l serror ;$31-6 fsincos ERROR + dc.l serror ;$31-7 fsincos ERROR + + dc.l ssincos ;$32-0 fsincos norm + dc.l ssincosz ;$32-1 fsincos zero + dc.l ssincosi ;$32-2 fsincos inf + dc.l ssincosnan ;$32-3 fsincos nan + dc.l ssincosd ;$32-4 fsincos denorm + dc.l serror ;$32-5 fsincos ERROR + dc.l serror ;$32-6 fsincos ERROR + dc.l serror ;$32-7 fsincos ERROR + + dc.l ssincos ;$33-0 fsincos norm + dc.l ssincosz ;$33-1 fsincos zero + dc.l ssincosi ;$33-2 fsincos inf + dc.l ssincosnan ;$33-3 fsincos nan + dc.l ssincosd ;$33-4 fsincos denorm + dc.l serror ;$33-5 fsincos ERROR + dc.l serror ;$33-6 fsincos ERROR + dc.l serror ;$33-7 fsincos ERROR + + dc.l ssincos ;$34-0 fsincos norm + dc.l ssincosz ;$34-1 fsincos zero + dc.l ssincosi ;$34-2 fsincos inf + dc.l ssincosnan ;$34-3 fsincos nan + dc.l ssincosd ;$34-4 fsincos denorm + dc.l serror ;$34-5 fsincos ERROR + dc.l serror ;$34-6 fsincos ERROR + dc.l serror ;$34-7 fsincos ERROR + + dc.l ssincos ;$35-0 fsincos norm + dc.l ssincosz ;$35-1 fsincos zero + dc.l ssincosi ;$35-2 fsincos inf + dc.l ssincosnan ;$35-3 fsincos nan + dc.l ssincosd ;$35-4 fsincos denorm + dc.l serror ;$35-5 fsincos ERROR + dc.l serror ;$35-6 fsincos ERROR + dc.l serror ;$35-7 fsincos ERROR + + dc.l ssincos ;$36-0 fsincos norm + dc.l ssincosz ;$36-1 fsincos zero + dc.l ssincosi ;$36-2 fsincos inf + dc.l ssincosnan ;$36-3 fsincos nan + dc.l ssincosd ;$36-4 fsincos denorm + dc.l serror ;$36-5 fsincos ERROR + dc.l serror ;$36-6 fsincos ERROR + dc.l serror ;$36-7 fsincos ERROR + + dc.l ssincos ;$37-0 fsincos norm + dc.l ssincosz ;$37-1 fsincos zero + dc.l ssincosi ;$37-2 fsincos inf + dc.l ssincosnan ;$37-3 fsincos nan + dc.l ssincosd ;$37-4 fsincos denorm + dc.l serror ;$37-5 fsincos ERROR + dc.l serror ;$37-6 fsincos ERROR + dc.l serror ;$37-7 fsincos ERROR + + end diff --git a/sys/arch/m68k/fpsp/util.sa b/sys/arch/m68k/fpsp/util.sa new file mode 100644 index 00000000000..9c03ac747b5 --- /dev/null +++ b/sys/arch/m68k/fpsp/util.sa @@ -0,0 +1,773 @@ +* $NetBSD: util.sa,v 1.3 1994/10/26 07:50:20 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* util.sa 3.7 7/29/91 +* +* This file contains routines used by other programs. +* +* ovf_res: used by overflow to force the correct +* result. ovf_r_k, ovf_r_x2, ovf_r_x3 are +* derivatives of this routine. +* get_fline: get user's opcode word +* g_dfmtou: returns the destination format. +* g_opcls: returns the opclass of the float instruction. +* g_rndpr: returns the rounding precision. +* reg_dest: write byte, word, or long data to Dn +* + +UTIL IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + + xref mem_read + + xdef g_dfmtou + xdef g_opcls + xdef g_rndpr + xdef get_fline + xdef reg_dest + +* +* Final result table for ovf_res. Note that the negative counterparts +* are unnecessary as ovf_res always returns the sign separately from +* the exponent. +* ;+inf +EXT_PINF dc.l $7fff0000,$00000000,$00000000,$00000000 +* ;largest +ext +EXT_PLRG dc.l $7ffe0000,$ffffffff,$ffffffff,$00000000 +* ;largest magnitude +sgl in ext +SGL_PLRG dc.l $407e0000,$ffffff00,$00000000,$00000000 +* ;largest magnitude +dbl in ext +DBL_PLRG dc.l $43fe0000,$ffffffff,$fffff800,$00000000 +* ;largest -ext + +tblovfl: + dc.l EXT_RN + dc.l EXT_RZ + dc.l EXT_RM + dc.l EXT_RP + dc.l SGL_RN + dc.l SGL_RZ + dc.l SGL_RM + dc.l SGL_RP + dc.l DBL_RN + dc.l DBL_RZ + dc.l DBL_RM + dc.l DBL_RP + dc.l error + dc.l error + dc.l error + dc.l error + + +* +* ovf_r_k --- overflow result calculation +* +* This entry point is used by kernel_ex. +* +* This forces the destination precision to be extended +* +* Input: operand in ETEMP +* Output: a result is in ETEMP (internal extended format) +* + xdef ovf_r_k +ovf_r_k: + lea ETEMP(a6),a0 ;a0 points to source operand + bclr.b #sign_bit,ETEMP_EX(a6) + sne ETEMP_SGN(a6) ;convert to internal IEEE format + +* +* ovf_r_x2 --- overflow result calculation +* +* This entry point used by x_ovfl. (opclass 0 and 2) +* +* Input a0 points to an operand in the internal extended format +* Output a0 points to the result in the internal extended format +* +* This sets the round precision according to the user's FPCR unless the +* instruction is fsgldiv or fsglmul or fsadd, fdadd, fsub, fdsub, fsmul, +* fdmul, fsdiv, fddiv, fssqrt, fsmove, fdmove, fsabs, fdabs, fsneg, fdneg. +* If the instruction is fsgldiv of fsglmul, the rounding precision must be +* extended. If the instruction is not fsgldiv or fsglmul but a force- +* precision instruction, the rounding precision is then set to the force +* precision. + + xdef ovf_r_x2 +ovf_r_x2: + btst.b #E3,E_BYTE(a6) ;check for nu exception + beq.l ovf_e1_exc ;it is cu exception +ovf_e3_exc: + move.w CMDREG3B(a6),d0 ;get the command word + andi.w #$00000060,d0 ;clear all bits except 6 and 5 + cmpi.l #$00000040,d0 + beq.l ovff_sgl ;force precision is single + cmpi.l #$00000060,d0 + beq.l ovff_dbl ;force precision is double + move.w CMDREG3B(a6),d0 ;get the command word again + andi.l #$7f,d0 ;clear all except operation + cmpi.l #$33,d0 + beq.l ovf_fsgl ;fsglmul or fsgldiv + cmpi.l #$30,d0 + beq.l ovf_fsgl + bra ovf_fpcr ;instruction is none of the above +* ;use FPCR +ovf_e1_exc: + move.w CMDREG1B(a6),d0 ;get command word + andi.l #$00000044,d0 ;clear all bits except 6 and 2 + cmpi.l #$00000040,d0 + beq.l ovff_sgl ;the instruction is force single + cmpi.l #$00000044,d0 + beq.l ovff_dbl ;the instruction is force double + move.w CMDREG1B(a6),d0 ;again get the command word + andi.l #$0000007f,d0 ;clear all except the op code + cmpi.l #$00000027,d0 + beq.l ovf_fsgl ;fsglmul + cmpi.l #$00000024,d0 + beq.l ovf_fsgl ;fsgldiv + bra ovf_fpcr ;none of the above, use FPCR +* +* +* Inst is either fsgldiv or fsglmul. Force extended precision. +* +ovf_fsgl: + clr.l d0 + bra.b ovf_res + +ovff_sgl: + move.l #$00000001,d0 ;set single + bra.b ovf_res +ovff_dbl: + move.l #$00000002,d0 ;set double + bra.b ovf_res +* +* The precision is in the fpcr. +* +ovf_fpcr: + bfextu FPCR_MODE(a6){0:2},d0 ;set round precision + bra.b ovf_res + +* +* +* ovf_r_x3 --- overflow result calculation +* +* This entry point used by x_ovfl. (opclass 3 only) +* +* Input a0 points to an operand in the internal extended format +* Output a0 points to the result in the internal extended format +* +* This sets the round precision according to the destination size. +* + xdef ovf_r_x3 +ovf_r_x3: + bsr g_dfmtou ;get dest fmt in d0{1:0} +* ;for fmovout, the destination format +* ;is the rounding precision + +* +* ovf_res --- overflow result calculation +* +* Input: +* a0 points to operand in internal extended format +* Output: +* a0 points to result in internal extended format +* + xdef ovf_res +ovf_res: + lsl.l #2,d0 ;move round precision to d0{3:2} + bfextu FPCR_MODE(a6){2:2},d1 ;set round mode + or.l d1,d0 ;index is fmt:mode in d0{3:0} + lea.l tblovfl,a1 ;load a1 with table address + move.l (a1,d0*4),a1 ;use d0 as index to the table + jmp (a1) ;go to the correct routine +* +*case DEST_FMT = EXT +* +EXT_RN: + lea.l EXT_PINF,a1 ;answer is +/- infinity + bset.b #inf_bit,FPSR_CC(a6) + bra set_sign ;now go set the sign +EXT_RZ: + lea.l EXT_PLRG,a1 ;answer is +/- large number + bra set_sign ;now go set the sign +EXT_RM: + tst.b LOCAL_SGN(a0) ;if negative overflow + beq.b e_rm_pos +e_rm_neg: + lea.l EXT_PINF,a1 ;answer is negative infinity + or.l #neginf_mask,USER_FPSR(a6) + bra end_ovfr +e_rm_pos: + lea.l EXT_PLRG,a1 ;answer is large positive number + bra end_ovfr +EXT_RP: + tst.b LOCAL_SGN(a0) ;if negative overflow + beq.b e_rp_pos +e_rp_neg: + lea.l EXT_PLRG,a1 ;answer is large negative number + bset.b #neg_bit,FPSR_CC(a6) + bra end_ovfr +e_rp_pos: + lea.l EXT_PINF,a1 ;answer is positive infinity + bset.b #inf_bit,FPSR_CC(a6) + bra end_ovfr +* +*case DEST_FMT = DBL +* +DBL_RN: + lea.l EXT_PINF,a1 ;answer is +/- infinity + bset.b #inf_bit,FPSR_CC(a6) + bra set_sign +DBL_RZ: + lea.l DBL_PLRG,a1 ;answer is +/- large number + bra set_sign ;now go set the sign +DBL_RM: + tst.b LOCAL_SGN(a0) ;if negative overflow + beq.b d_rm_pos +d_rm_neg: + lea.l EXT_PINF,a1 ;answer is negative infinity + or.l #neginf_mask,USER_FPSR(a6) + bra end_ovfr ;inf is same for all precisions (ext,dbl,sgl) +d_rm_pos: + lea.l DBL_PLRG,a1 ;answer is large positive number + bra end_ovfr +DBL_RP: + tst.b LOCAL_SGN(a0) ;if negative overflow + beq.b d_rp_pos +d_rp_neg: + lea.l DBL_PLRG,a1 ;answer is large negative number + bset.b #neg_bit,FPSR_CC(a6) + bra end_ovfr +d_rp_pos: + lea.l EXT_PINF,a1 ;answer is positive infinity + bset.b #inf_bit,FPSR_CC(a6) + bra end_ovfr +* +*case DEST_FMT = SGL +* +SGL_RN: + lea.l EXT_PINF,a1 ;answer is +/- infinity + bset.b #inf_bit,FPSR_CC(a6) + bra.b set_sign +SGL_RZ: + lea.l SGL_PLRG,a1 ;anwer is +/- large number + bra.b set_sign +SGL_RM: + tst.b LOCAL_SGN(a0) ;if negative overflow + beq.b s_rm_pos +s_rm_neg: + lea.l EXT_PINF,a1 ;answer is negative infinity + or.l #neginf_mask,USER_FPSR(a6) + bra.b end_ovfr +s_rm_pos: + lea.l SGL_PLRG,a1 ;answer is large positive number + bra.b end_ovfr +SGL_RP: + tst.b LOCAL_SGN(a0) ;if negative overflow + beq.b s_rp_pos +s_rp_neg: + lea.l SGL_PLRG,a1 ;answer is large negative number + bset.b #neg_bit,FPSR_CC(a6) + bra.b end_ovfr +s_rp_pos: + lea.l EXT_PINF,a1 ;answer is postive infinity + bset.b #inf_bit,FPSR_CC(a6) + bra.b end_ovfr + +set_sign: + tst.b LOCAL_SGN(a0) ;if negative overflow + beq.b end_ovfr +neg_sign: + bset.b #neg_bit,FPSR_CC(a6) + +end_ovfr: + move.w LOCAL_EX(a1),LOCAL_EX(a0) ;do not overwrite sign + move.l LOCAL_HI(a1),LOCAL_HI(a0) + move.l LOCAL_LO(a1),LOCAL_LO(a0) + rts + + +* +* ERROR +* +error: + rts +* +* get_fline --- get f-line opcode of interrupted instruction +* +* Returns opcode in the low word of d0. +* +get_fline: + move.l USER_FPIAR(a6),a0 ;opcode address + clr.l -(a7) ;reserve a word on the stack + lea.l 2(a7),a1 ;point to low word of temporary + move.l #2,d0 ;count + bsr.l mem_read + move.l (a7)+,d0 + rts +* +* g_rndpr --- put rounding precision in d0{1:0} +* +* valid return codes are: +* 00 - extended +* 01 - single +* 10 - double +* +* begin +* get rounding precision (cmdreg3b{6:5}) +* begin +* case opclass = 011 (move out) +* get destination format - this is the also the rounding precision +* +* case opclass = 0x0 +* if E3 +* *case RndPr(from cmdreg3b{6:5} = 11 then RND_PREC = DBL +* *case RndPr(from cmdreg3b{6:5} = 10 then RND_PREC = SGL +* case RndPr(from cmdreg3b{6:5} = 00 | 01 +* use precision from FPCR{7:6} +* case 00 then RND_PREC = EXT +* case 01 then RND_PREC = SGL +* case 10 then RND_PREC = DBL +* else E1 +* use precision in FPCR{7:6} +* case 00 then RND_PREC = EXT +* case 01 then RND_PREC = SGL +* case 10 then RND_PREC = DBL +* end +* +g_rndpr: + bsr.w g_opcls ;get opclass in d0{2:0} + cmp.w #$0003,d0 ;check for opclass 011 + bne.b op_0x0 + +* +* For move out instructions (opclass 011) the destination format +* is the same as the rounding precision. Pass results from g_dfmtou. +* + bsr.w g_dfmtou + rts +op_0x0: + btst.b #E3,E_BYTE(a6) + beq.l unf_e1_exc ;branch to e1 underflow +unf_e3_exc: + move.l CMDREG3B(a6),d0 ;rounding precision in d0{10:9} + bfextu d0{9:2},d0 ;move the rounding prec bits to d0{1:0} + cmpi.l #$2,d0 + beq.l unff_sgl ;force precision is single + cmpi.l #$3,d0 ;force precision is double + beq.l unff_dbl + move.w CMDREG3B(a6),d0 ;get the command word again + andi.l #$7f,d0 ;clear all except operation + cmpi.l #$33,d0 + beq.l unf_fsgl ;fsglmul or fsgldiv + cmpi.l #$30,d0 + beq.l unf_fsgl ;fsgldiv or fsglmul + bra unf_fpcr +unf_e1_exc: + move.l CMDREG1B(a6),d0 ;get 32 bits off the stack, 1st 16 bits +* ;are the command word + andi.l #$00440000,d0 ;clear all bits except bits 6 and 2 + cmpi.l #$00400000,d0 + beq.l unff_sgl ;force single + cmpi.l #$00440000,d0 ;force double + beq.l unff_dbl + move.l CMDREG1B(a6),d0 ;get the command word again + andi.l #$007f0000,d0 ;clear all bits except the operation + cmpi.l #$00270000,d0 + beq.l unf_fsgl ;fsglmul + cmpi.l #$00240000,d0 + beq.l unf_fsgl ;fsgldiv + bra unf_fpcr + +* +* Convert to return format. The values from cmdreg3b and the return +* values are: +* cmdreg3b return precision +* -------- ------ --------- +* 00,01 0 ext +* 10 1 sgl +* 11 2 dbl +* Force single +* +unff_sgl: + move.l #1,d0 ;return 1 + rts +* +* Force double +* +unff_dbl: + move.l #2,d0 ;return 2 + rts +* +* Force extended +* +unf_fsgl: + clr.l d0 + rts +* +* Get rounding precision set in FPCR{7:6}. +* +unf_fpcr: + move.l USER_FPCR(a6),d0 ;rounding precision bits in d0{7:6} + bfextu d0{24:2},d0 ;move the rounding prec bits to d0{1:0} + rts +* +* g_opcls --- put opclass in d0{2:0} +* +g_opcls: + btst.b #E3,E_BYTE(a6) + beq.b opc_1b ;if set, go to cmdreg1b +opc_3b: + clr.l d0 ;if E3, only opclass 0x0 is possible + rts +opc_1b: + move.l CMDREG1B(a6),d0 + bfextu d0{0:3},d0 ;shift opclass bits d0{31:29} to d0{2:0} + rts +* +* g_dfmtou --- put destination format in d0{1:0} +* +* If E1, the format is from cmdreg1b{12:10} +* If E3, the format is extended. +* +* Dest. Fmt. +* extended 010 -> 00 +* single 001 -> 01 +* double 101 -> 10 +* +g_dfmtou: + btst.b #E3,E_BYTE(a6) + beq.b op011 + clr.l d0 ;if E1, size is always ext + rts +op011: + move.l CMDREG1B(a6),d0 + bfextu d0{3:3},d0 ;dest fmt from cmdreg1b{12:10} + cmp.b #1,d0 ;check for single + bne.b not_sgl + move.l #1,d0 + rts +not_sgl: + cmp.b #5,d0 ;check for double + bne.b not_dbl + move.l #2,d0 + rts +not_dbl: + clr.l d0 ;must be extended + rts + +* +* +* Final result table for unf_sub. Note that the negative counterparts +* are unnecessary as unf_sub always returns the sign separately from +* the exponent. +* ;+zero +EXT_PZRO dc.l $00000000,$00000000,$00000000,$00000000 +* ;+zero +SGL_PZRO dc.l $3f810000,$00000000,$00000000,$00000000 +* ;+zero +DBL_PZRO dc.l $3c010000,$00000000,$00000000,$00000000 +* ;smallest +ext denorm +EXT_PSML dc.l $00000000,$00000000,$00000001,$00000000 +* ;smallest +sgl denorm +SGL_PSML dc.l $3f810000,$00000100,$00000000,$00000000 +* ;smallest +dbl denorm +DBL_PSML dc.l $3c010000,$00000000,$00000800,$00000000 +* +* UNF_SUB --- underflow result calculation +* +* Input: +* d0 contains round precision +* a0 points to input operand in the internal extended format +* +* Output: +* a0 points to correct internal extended precision result. +* + +tblunf: + dc.l uEXT_RN + dc.l uEXT_RZ + dc.l uEXT_RM + dc.l uEXT_RP + dc.l uSGL_RN + dc.l uSGL_RZ + dc.l uSGL_RM + dc.l uSGL_RP + dc.l uDBL_RN + dc.l uDBL_RZ + dc.l uDBL_RM + dc.l uDBL_RP + dc.l uDBL_RN + dc.l uDBL_RZ + dc.l uDBL_RM + dc.l uDBL_RP + + xdef unf_sub +unf_sub: + lsl.l #2,d0 ;move round precision to d0{3:2} + bfextu FPCR_MODE(a6){2:2},d1 ;set round mode + or.l d1,d0 ;index is fmt:mode in d0{3:0} + lea.l tblunf,a1 ;load a1 with table address + move.l (a1,d0*4),a1 ;use d0 as index to the table + jmp (a1) ;go to the correct routine +* +*case DEST_FMT = EXT +* +uEXT_RN: + lea.l EXT_PZRO,a1 ;answer is +/- zero + bset.b #z_bit,FPSR_CC(a6) + bra uset_sign ;now go set the sign +uEXT_RZ: + lea.l EXT_PZRO,a1 ;answer is +/- zero + bset.b #z_bit,FPSR_CC(a6) + bra uset_sign ;now go set the sign +uEXT_RM: + tst.b LOCAL_SGN(a0) ;if negative underflow + beq.b ue_rm_pos +ue_rm_neg: + lea.l EXT_PSML,a1 ;answer is negative smallest denorm + bset.b #neg_bit,FPSR_CC(a6) + bra end_unfr +ue_rm_pos: + lea.l EXT_PZRO,a1 ;answer is positive zero + bset.b #z_bit,FPSR_CC(a6) + bra end_unfr +uEXT_RP: + tst.b LOCAL_SGN(a0) ;if negative underflow + beq.b ue_rp_pos +ue_rp_neg: + lea.l EXT_PZRO,a1 ;answer is negative zero + ori.l #negz_mask,USER_FPSR(a6) + bra end_unfr +ue_rp_pos: + lea.l EXT_PSML,a1 ;answer is positive smallest denorm + bra end_unfr +* +*case DEST_FMT = DBL +* +uDBL_RN: + lea.l DBL_PZRO,a1 ;answer is +/- zero + bset.b #z_bit,FPSR_CC(a6) + bra uset_sign +uDBL_RZ: + lea.l DBL_PZRO,a1 ;answer is +/- zero + bset.b #z_bit,FPSR_CC(a6) + bra uset_sign ;now go set the sign +uDBL_RM: + tst.b LOCAL_SGN(a0) ;if negative overflow + beq.b ud_rm_pos +ud_rm_neg: + lea.l DBL_PSML,a1 ;answer is smallest denormalized negative + bset.b #neg_bit,FPSR_CC(a6) + bra end_unfr +ud_rm_pos: + lea.l DBL_PZRO,a1 ;answer is positive zero + bset.b #z_bit,FPSR_CC(a6) + bra end_unfr +uDBL_RP: + tst.b LOCAL_SGN(a0) ;if negative overflow + beq.b ud_rp_pos +ud_rp_neg: + lea.l DBL_PZRO,a1 ;answer is negative zero + ori.l #negz_mask,USER_FPSR(a6) + bra end_unfr +ud_rp_pos: + lea.l DBL_PSML,a1 ;answer is smallest denormalized negative + bra end_unfr +* +*case DEST_FMT = SGL +* +uSGL_RN: + lea.l SGL_PZRO,a1 ;answer is +/- zero + bset.b #z_bit,FPSR_CC(a6) + bra.b uset_sign +uSGL_RZ: + lea.l SGL_PZRO,a1 ;answer is +/- zero + bset.b #z_bit,FPSR_CC(a6) + bra.b uset_sign +uSGL_RM: + tst.b LOCAL_SGN(a0) ;if negative overflow + beq.b us_rm_pos +us_rm_neg: + lea.l SGL_PSML,a1 ;answer is smallest denormalized negative + bset.b #neg_bit,FPSR_CC(a6) + bra.b end_unfr +us_rm_pos: + lea.l SGL_PZRO,a1 ;answer is positive zero + bset.b #z_bit,FPSR_CC(a6) + bra.b end_unfr +uSGL_RP: + tst.b LOCAL_SGN(a0) ;if negative overflow + beq.b us_rp_pos +us_rp_neg: + lea.l SGL_PZRO,a1 ;answer is negative zero + ori.l #negz_mask,USER_FPSR(a6) + bra.b end_unfr +us_rp_pos: + lea.l SGL_PSML,a1 ;answer is smallest denormalized positive + bra.b end_unfr + +uset_sign: + tst.b LOCAL_SGN(a0) ;if negative overflow + beq.b end_unfr +uneg_sign: + bset.b #neg_bit,FPSR_CC(a6) + +end_unfr: + move.w LOCAL_EX(a1),LOCAL_EX(a0) ;be careful not to overwrite sign + move.l LOCAL_HI(a1),LOCAL_HI(a0) + move.l LOCAL_LO(a1),LOCAL_LO(a0) + rts +* +* reg_dest --- write byte, word, or long data to Dn +* +* +* Input: +* L_SCR1: Data +* d1: data size and dest register number formatted as: +* +* 32 5 4 3 2 1 0 +* ----------------------------------------------- +* | 0 | Size | Dest Reg # | +* ----------------------------------------------- +* +* Size is: +* 0 - Byte +* 1 - Word +* 2 - Long/Single +* +pregdst: + dc.l byte_d0 + dc.l byte_d1 + dc.l byte_d2 + dc.l byte_d3 + dc.l byte_d4 + dc.l byte_d5 + dc.l byte_d6 + dc.l byte_d7 + dc.l word_d0 + dc.l word_d1 + dc.l word_d2 + dc.l word_d3 + dc.l word_d4 + dc.l word_d5 + dc.l word_d6 + dc.l word_d7 + dc.l long_d0 + dc.l long_d1 + dc.l long_d2 + dc.l long_d3 + dc.l long_d4 + dc.l long_d5 + dc.l long_d6 + dc.l long_d7 + +reg_dest: + lea.l pregdst,a0 + move.l (a0,d1*4),a0 + jmp (a0) + +byte_d0: + move.b L_SCR1(a6),USER_D0+3(a6) + rts +byte_d1: + move.b L_SCR1(a6),USER_D1+3(a6) + rts +byte_d2: + move.b L_SCR1(a6),d2 + rts +byte_d3: + move.b L_SCR1(a6),d3 + rts +byte_d4: + move.b L_SCR1(a6),d4 + rts +byte_d5: + move.b L_SCR1(a6),d5 + rts +byte_d6: + move.b L_SCR1(a6),d6 + rts +byte_d7: + move.b L_SCR1(a6),d7 + rts +word_d0: + move.w L_SCR1(a6),USER_D0+2(a6) + rts +word_d1: + move.w L_SCR1(a6),USER_D1+2(a6) + rts +word_d2: + move.w L_SCR1(a6),d2 + rts +word_d3: + move.w L_SCR1(a6),d3 + rts +word_d4: + move.w L_SCR1(a6),d4 + rts +word_d5: + move.w L_SCR1(a6),d5 + rts +word_d6: + move.w L_SCR1(a6),d6 + rts +word_d7: + move.w L_SCR1(a6),d7 + rts +long_d0: + move.l L_SCR1(a6),USER_D0(a6) + rts +long_d1: + move.l L_SCR1(a6),USER_D1(a6) + rts +long_d2: + move.l L_SCR1(a6),d2 + rts +long_d3: + move.l L_SCR1(a6),d3 + rts +long_d4: + move.l L_SCR1(a6),d4 + rts +long_d5: + move.l L_SCR1(a6),d5 + rts +long_d6: + move.l L_SCR1(a6),d6 + rts +long_d7: + move.l L_SCR1(a6),d7 + rts + end diff --git a/sys/arch/m68k/fpsp/x_bsun.sa b/sys/arch/m68k/fpsp/x_bsun.sa new file mode 100644 index 00000000000..b3da064c209 --- /dev/null +++ b/sys/arch/m68k/fpsp/x_bsun.sa @@ -0,0 +1,72 @@ +* $NetBSD: x_bsun.sa,v 1.2 1994/10/26 07:50:22 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* x_bsun.sa 3.3 7/1/91 +* +* fpsp_bsun --- FPSP handler for branch/set on unordered exception +* +* Copy the PC to FPIAR to maintain 881/882 compatability +* +* The real_bsun handler will need to perform further corrective +* measures as outlined in the 040 User's Manual on pages +* 9-41f, section 9.8.3. +* + +X_BSUN IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + + xref real_bsun + + xdef fpsp_bsun +fpsp_bsun: +* + link a6,#-LOCAL_SIZE + fsave -(a7) + movem.l d0-d1/a0-a1,USER_DA(a6) + fmovem.x fp0-fp3,USER_FP0(a6) + fmovem.l fpcr/fpsr/fpiar,USER_FPCR(a6) + +* + move.l EXC_PC(a6),USER_FPIAR(a6) +* + movem.l USER_DA(a6),d0-d1/a0-a1 + fmovem.x USER_FP0(a6),fp0-fp3 + fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar + frestore (a7)+ + unlk a6 + bra.l real_bsun +* + end diff --git a/sys/arch/m68k/fpsp/x_fline.sa b/sys/arch/m68k/fpsp/x_fline.sa new file mode 100644 index 00000000000..9f72985a72b --- /dev/null +++ b/sys/arch/m68k/fpsp/x_fline.sa @@ -0,0 +1,129 @@ +* $NetBSD: x_fline.sa,v 1.2 1994/10/26 07:50:23 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* x_fline.sa 3.3 1/10/91 +* +* fpsp_fline --- FPSP handler for fline exception +* +* First determine if the exception is one of the unimplemented +* floating point instructions. If so, let fpsp_unimp handle it. +* Next, determine if the instruction is an fmovecr with a non-zero +* <ea> field. If so, handle here and return. Otherwise, it +* must be a real F-line exception. +* + +X_FLINE IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + + xref real_fline + xref fpsp_unimp + xref uni_2 + xref mem_read + xref fpsp_fmt_error + + xdef fpsp_fline +fpsp_fline: +* +* check for unimplemented vector first. Use EXC_VEC-4 because +* the equate is valid only after a 'link a6' has pushed one more +* long onto the stack. +* + cmp.w #UNIMP_VEC,EXC_VEC-4(a7) + beq.l fpsp_unimp + +* +* fmovecr with non-zero <ea> handling here +* + sub.l #4,a7 ;4 accounts for 2-word difference +* ;between six word frame (unimp) and +* ;four word frame + link a6,#-LOCAL_SIZE + fsave -(a7) + movem.l d0-d1/a0-a1,USER_DA(a6) + movea.l EXC_PC+4(a6),a0 ;get address of fline instruction + lea.l L_SCR1(a6),a1 ;use L_SCR1 as scratch + move.l #4,d0 + add.l #4,a6 ;to offset the sub.l #4,a7 above so that +* ;a6 can point correctly to the stack frame +* ;before branching to mem_read + bsr.l mem_read + sub.l #4,a6 + move.l L_SCR1(a6),d0 ;d0 contains the fline and command word + bfextu d0{4:3},d1 ;extract coprocessor id + cmpi.b #1,d1 ;check if cpid=1 + bne.w not_mvcr ;exit if not + bfextu d0{16:6},d1 + cmpi.b #$17,d1 ;check if it is an FMOVECR encoding + bne.w not_mvcr +* ;if an FMOVECR instruction, fix stack +* ;and go to FPSP_UNIMP +fix_stack: + cmpi.b #VER_40,(a7) ;test for orig unimp frame + bne.b ck_rev + sub.l #UNIMP_40_SIZE-4,a7 ;emulate an orig fsave + move.b #VER_40,(a7) + move.b #UNIMP_40_SIZE-4,1(a7) + clr.w 2(a7) + bra.b fix_con +ck_rev: + cmpi.b #VER_41,(a7) ;test for rev unimp frame + bne.l fpsp_fmt_error ;if not $40 or $41, exit with error + sub.l #UNIMP_41_SIZE-4,a7 ;emulate a rev fsave + move.b #VER_41,(a7) + move.b #UNIMP_41_SIZE-4,1(a7) + clr.w 2(a7) +fix_con: + move.w EXC_SR+4(a6),EXC_SR(a6) ;move stacked sr to new position + move.l EXC_PC+4(a6),EXC_PC(a6) ;move stacked pc to new position + fmove.l EXC_PC(a6),FPIAR ;point FPIAR to fline inst + move.l #4,d1 + add.l d1,EXC_PC(a6) ;increment stacked pc value to next inst + move.w #$202c,EXC_VEC(a6) ;reformat vector to unimp + clr.l EXC_EA(a6) ;clear the EXC_EA field + move.w d0,CMDREG1B(a6) ;move the lower word into CMDREG1B + clr.l E_BYTE(a6) + bset.b #UFLAG,T_BYTE(a6) + movem.l USER_DA(a6),d0-d1/a0-a1 ;restore data registers + bra.l uni_2 + +not_mvcr: + movem.l USER_DA(a6),d0-d1/a0-a1 ;restore data registers + frestore (a7)+ + unlk a6 + add.l #4,a7 + bra.l real_fline + + end diff --git a/sys/arch/m68k/fpsp/x_operr.sa b/sys/arch/m68k/fpsp/x_operr.sa new file mode 100644 index 00000000000..9e1292d108a --- /dev/null +++ b/sys/arch/m68k/fpsp/x_operr.sa @@ -0,0 +1,381 @@ +* $NetBSD: x_operr.sa,v 1.4 1994/10/26 07:50:24 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* x_operr.sa 3.5 7/1/91 +* +* fpsp_operr --- FPSP handler for operand error exception +* +* See 68040 User's Manual pp. 9-44f +* +* Note 1: For trap disabled 040 does the following: +* If the dest is a fp reg, then an extended precision non_signaling +* NAN is stored in the dest reg. If the dest format is b, w, or l and +* the source op is a NAN, then garbage is stored as the result (actually +* the upper 32 bits of the mantissa are sent to the integer unit). If +* the dest format is integer (b, w, l) and the operr is caused by +* integer overflow, or the source op is inf, then the result stored is +* garbage. +* There are three cases in which operr is incorrectly signaled on the +* 040. This occurs for move_out of format b, w, or l for the largest +* negative integer (-2^7 for b, -2^15 for w, -2^31 for l). +* +* On opclass = 011 fmove.(b,w,l) that causes a conversion +* overflow -> OPERR, the exponent in wbte (and fpte) is: +* byte 56 - (62 - exp) +* word 48 - (62 - exp) +* long 32 - (62 - exp) +* +* where exp = (true exp) - 1 +* +* So, wbtemp and fptemp will contain the following on erroneoulsy +* signalled operr: +* fpts = 1 +* fpte = $4000 (15 bit externally) +* byte fptm = $ffffffff ffffff80 +* word fptm = $ffffffff ffff8000 +* long fptm = $ffffffff 80000000 +* +* Note 2: For trap enabled 040 does the following: +* If the inst is move_out, then same as Note 1. +* If the inst is not move_out, the dest is not modified. +* The exceptional operand is not defined for integer overflow +* during a move_out. +* + +X_OPERR IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + + xref mem_write + xref real_operr + xref real_inex + xref get_fline + xref fpsp_done + xref reg_dest + + xdef fpsp_operr +fpsp_operr: +* + link a6,#-LOCAL_SIZE + fsave -(a7) + movem.l d0-d1/a0-a1,USER_DA(a6) + fmovem.x fp0-fp3,USER_FP0(a6) + fmovem.l fpcr/fpsr/fpiar,USER_FPCR(a6) + +* +* Check if this is an opclass 3 instruction. +* If so, fall through, else branch to operr_end +* + btst.b #TFLAG,T_BYTE(a6) + beq.b operr_end + +* +* If the destination size is B,W,or L, the operr must be +* handled here. +* + move.l CMDREG1B(a6),d0 + bfextu d0{3:3},d0 ;0=long, 4=word, 6=byte + tst.b d0 ;determine size; check long + beq.w operr_long + cmpi.b #4,d0 ;check word + beq.w operr_word + cmpi.b #6,d0 ;check byte + beq.w operr_byte + +* +* The size is not B,W,or L, so the operr is handled by the +* kernel handler. Set the operr bits and clean up, leaving +* only the integer exception frame on the stack, and the +* fpu in the original exceptional state. +* +operr_end: + bset.b #operr_bit,FPSR_EXCEPT(a6) + bset.b #aiop_bit,FPSR_AEXCEPT(a6) + + movem.l USER_DA(a6),d0-d1/a0-a1 + fmovem.x USER_FP0(a6),fp0-fp3 + fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar + frestore (a7)+ + unlk a6 + bra.l real_operr + +operr_long: + moveq.l #4,d1 ;write size to d1 + move.b STAG(a6),d0 ;test stag for nan + andi.b #$e0,d0 ;clr all but tag + cmpi.b #$60,d0 ;check for nan + beq operr_nan + cmpi.l #$80000000,FPTEMP_LO(a6) ;test if ls lword is special + bne.b chklerr ;if not equal, check for incorrect operr + bsr check_upper ;check if exp and ms mant are special + tst.l d0 + bne.b chklerr ;if d0 is true, check for incorrect operr + move.l #$80000000,d0 ;store special case result + bsr operr_store + bra.w not_enabled ;clean and exit +* +* CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE +* +chklerr: + move.w FPTEMP_EX(a6),d0 + and.w #$7FFF,d0 ;ignore sign bit + cmp.w #$3FFE,d0 ;this is the only possible exponent value + bne.b chklerr2 +fixlong: + move.l FPTEMP_LO(a6),d0 + bsr operr_store + bra.w not_enabled +chklerr2: + move.w FPTEMP_EX(a6),d0 + and.w #$7FFF,d0 ;ignore sign bit + cmp.w #$4000,d0 + bcc.w store_max ;exponent out of range + + move.l FPTEMP_LO(a6),d0 + and.l #$7FFF0000,d0 ;look for all 1's on bits 30-16 + cmp.l #$7FFF0000,d0 + beq.b fixlong + + tst.l FPTEMP_LO(a6) + bpl.b chklepos + cmp.l #$FFFFFFFF,FPTEMP_HI(a6) + beq.b fixlong + bra.w store_max +chklepos: + tst.l FPTEMP_HI(a6) + beq.b fixlong + bra.w store_max + +operr_word: + moveq.l #2,d1 ;write size to d1 + move.b STAG(a6),d0 ;test stag for nan + andi.b #$e0,d0 ;clr all but tag + cmpi.b #$60,d0 ;check for nan + beq.w operr_nan + cmpi.l #$ffff8000,FPTEMP_LO(a6) ;test if ls lword is special + bne.b chkwerr ;if not equal, check for incorrect operr + bsr check_upper ;check if exp and ms mant are special + tst.l d0 + bne.b chkwerr ;if d0 is true, check for incorrect operr + move.l #$80000000,d0 ;store special case result + bsr operr_store + bra.w not_enabled ;clean and exit +* +* CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE +* +chkwerr: + move.w FPTEMP_EX(a6),d0 + and.w #$7FFF,d0 ;ignore sign bit + cmp.w #$3FFE,d0 ;this is the only possible exponent value + bne.b store_max + move.l FPTEMP_LO(a6),d0 + swap d0 + bsr operr_store + bra.w not_enabled + +operr_byte: + moveq.l #1,d1 ;write size to d1 + move.b STAG(a6),d0 ;test stag for nan + andi.b #$e0,d0 ;clr all but tag + cmpi.b #$60,d0 ;check for nan + beq.b operr_nan + cmpi.l #$ffffff80,FPTEMP_LO(a6) ;test if ls lword is special + bne.b chkberr ;if not equal, check for incorrect operr + bsr check_upper ;check if exp and ms mant are special + tst.l d0 + bne.b chkberr ;if d0 is true, check for incorrect operr + move.l #$80000000,d0 ;store special case result + bsr operr_store + bra.w not_enabled ;clean and exit +* +* CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE +* +chkberr: + move.w FPTEMP_EX(a6),d0 + and.w #$7FFF,d0 ;ignore sign bit + cmp.w #$3FFE,d0 ;this is the only possible exponent value + bne.b store_max + move.l FPTEMP_LO(a6),d0 + asl.l #8,d0 + swap d0 + bsr operr_store + bra.w not_enabled + +* +* This operr condition is not of the special case. Set operr +* and aiop and write the portion of the nan to memory for the +* given size. +* +operr_nan: + or.l #opaop_mask,USER_FPSR(a6) ;set operr & aiop + + move.l ETEMP_HI(a6),d0 ;output will be from upper 32 bits + bsr operr_store + bra end_operr +* +* Store_max loads the max pos or negative for the size, sets +* the operr and aiop bits, and clears inex and ainex, incorrectly +* set by the 040. +* +store_max: + or.l #opaop_mask,USER_FPSR(a6) ;set operr & aiop + bclr.b #inex2_bit,FPSR_EXCEPT(a6) + bclr.b #ainex_bit,FPSR_AEXCEPT(a6) + fmove.l #0,FPSR + + tst.w FPTEMP_EX(a6) ;check sign + blt.b load_neg + move.l #$7fffffff,d0 + bsr operr_store + bra end_operr +load_neg: + move.l #$80000000,d0 + bsr operr_store + bra end_operr + +* +* This routine stores the data in d0, for the given size in d1, +* to memory or data register as required. A read of the fline +* is required to determine the destination. +* +operr_store: + move.l d0,L_SCR1(a6) ;move write data to L_SCR1 + move.l d1,-(a7) ;save register size + bsr.l get_fline ;fline returned in d0 + move.l (a7)+,d1 + bftst d0{26:3} ;if mode is zero, dest is Dn + bne.b dest_mem +* +* Destination is Dn. Get register number from d0. Data is on +* the stack at (a7). D1 has size: 1=byte,2=word,4=long/single +* + andi.l #7,d0 ;isolate register number + cmpi.l #4,d1 + beq.b op_long ;the most frequent case + cmpi.l #2,d1 + bne.b op_con + or.l #8,d0 + bra.b op_con +op_long: + or.l #$10,d0 +op_con: + move.l d0,d1 ;format size:reg for reg_dest + bra.l reg_dest ;call to reg_dest returns to caller +* ;of operr_store +* +* Destination is memory. Get <ea> from integer exception frame +* and call mem_write. +* +dest_mem: + lea.l L_SCR1(a6),a0 ;put ptr to write data in a0 + move.l EXC_EA(a6),a1 ;put user destination address in a1 + move.l d1,d0 ;put size in d0 + bsr.l mem_write + rts +* +* Check the exponent for $c000 and the upper 32 bits of the +* mantissa for $ffffffff. If both are true, return d0 clr +* and store the lower n bits of the least lword of FPTEMP +* to d0 for write out. If not, it is a real operr, and set d0. +* +check_upper: + cmpi.l #$ffffffff,FPTEMP_HI(a6) ;check if first byte is all 1's + bne.b true_operr ;if not all 1's then was true operr + cmpi.w #$c000,FPTEMP_EX(a6) ;check if incorrectly signalled + beq.b not_true_operr ;branch if not true operr + cmpi.w #$bfff,FPTEMP_EX(a6) ;check if incorrectly signalled + beq.b not_true_operr ;branch if not true operr +true_operr: + move.l #1,d0 ;signal real operr + rts +not_true_operr: + clr.l d0 ;signal no real operr + rts + +* +* End_operr tests for operr enabled. If not, it cleans up the stack +* and does an rte. If enabled, it cleans up the stack and branches +* to the kernel operr handler with only the integer exception +* frame on the stack and the fpu in the original exceptional state +* with correct data written to the destination. +* +end_operr: + btst.b #operr_bit,FPCR_ENABLE(a6) + beq.b not_enabled +enabled: + movem.l USER_DA(a6),d0-d1/a0-a1 + fmovem.x USER_FP0(a6),fp0-fp3 + fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar + frestore (a7)+ + unlk a6 + bra.l real_operr + +not_enabled: +* +* It is possible to have either inex2 or inex1 exceptions with the +* operr. If the inex enable bit is set in the FPCR, and either +* inex2 or inex1 occured, we must clean up and branch to the +* real inex handler. +* +ck_inex: + move.b FPCR_ENABLE(a6),d0 + and.b FPSR_EXCEPT(a6),d0 + andi.b #$3,d0 + beq.w operr_exit +* +* Inexact enabled and reported, and we must take an inexact exception. +* +take_inex: + move.b #INEX_VEC,EXC_VEC+1(a6) + move.l USER_FPSR(a6),FPSR_SHADOW(a6) + or.l #sx_mask,E_BYTE(a6) + movem.l USER_DA(a6),d0-d1/a0-a1 + fmovem.x USER_FP0(a6),fp0-fp3 + fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar + frestore (a7)+ + unlk a6 + bra.l real_inex +* +* Since operr is only an E1 exception, there is no need to frestore +* any state back to the fpu. +* +operr_exit: + movem.l USER_DA(a6),d0-d1/a0-a1 + fmovem.x USER_FP0(a6),fp0-fp3 + fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar + unlk a6 + bra.l fpsp_done + + end diff --git a/sys/arch/m68k/fpsp/x_ovfl.sa b/sys/arch/m68k/fpsp/x_ovfl.sa new file mode 100644 index 00000000000..c161fcb5564 --- /dev/null +++ b/sys/arch/m68k/fpsp/x_ovfl.sa @@ -0,0 +1,210 @@ +* $NetBSD: x_ovfl.sa,v 1.2 1994/10/26 07:50:26 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* x_ovfl.sa 3.5 7/1/91 +* +* fpsp_ovfl --- FPSP handler for overflow exception +* +* Overflow occurs when a floating-point intermediate result is +* too large to be represented in a floating-point data register, +* or when storing to memory, the contents of a floating-point +* data register are too large to be represented in the +* destination format. +* +* Trap disabled results +* +* If the instruction is move_out, then garbage is stored in the +* destination. If the instruction is not move_out, then the +* destination is not affected. For 68881 compatibility, the +* following values should be stored at the destination, based +* on the current rounding mode: +* +* RN Infinity with the sign of the intermediate result. +* RZ Largest magnitude number, with the sign of the +* intermediate result. +* RM For pos overflow, the largest pos number. For neg overflow, +* -infinity +* RP For pos overflow, +infinity. For neg overflow, the largest +* neg number +* +* Trap enabled results +* All trap disabled code applies. In addition the exceptional +* operand needs to be made available to the users exception handler +* with a bias of $6000 subtracted from the exponent. +* + +X_OVFL IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + + xref ovf_r_x2 + xref ovf_r_x3 + xref store + xref real_ovfl + xref real_inex + xref fpsp_done + xref g_opcls + xref b1238_fix + + xdef fpsp_ovfl +fpsp_ovfl: + link a6,#-LOCAL_SIZE + fsave -(a7) + movem.l d0-d1/a0-a1,USER_DA(a6) + fmovem.x fp0-fp3,USER_FP0(a6) + fmovem.l fpcr/fpsr/fpiar,USER_FPCR(a6) + +* +* The 040 doesn't set the AINEX bit in the FPSR, the following +* line temporarily rectifies this error. +* + bset.b #ainex_bit,FPSR_AEXCEPT(a6) +* + bsr.l ovf_adj ;denormalize, round & store interm op +* +* if overflow traps not enabled check for inexact exception +* + btst.b #ovfl_bit,FPCR_ENABLE(a6) + beq.b ck_inex +* + btst.b #E3,E_BYTE(a6) + beq.b no_e3_1 + bfextu CMDREG3B(a6){6:3},d0 ;get dest reg no + bclr.b d0,FPR_DIRTY_BITS(a6) ;clr dest dirty bit + bsr.l b1238_fix + move.l USER_FPSR(a6),FPSR_SHADOW(a6) + or.l #sx_mask,E_BYTE(a6) +no_e3_1: + movem.l USER_DA(a6),d0-d1/a0-a1 + fmovem.x USER_FP0(a6),fp0-fp3 + fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar + frestore (a7)+ + unlk a6 + bra.l real_ovfl +* +* It is possible to have either inex2 or inex1 exceptions with the +* ovfl. If the inex enable bit is set in the FPCR, and either +* inex2 or inex1 occured, we must clean up and branch to the +* real inex handler. +* +ck_inex: +* move.b FPCR_ENABLE(a6),d0 +* and.b FPSR_EXCEPT(a6),d0 +* andi.b #$3,d0 + btst.b #inex2_bit,FPCR_ENABLE(a6) + beq.b ovfl_exit +* +* Inexact enabled and reported, and we must take an inexact exception. +* +take_inex: + btst.b #E3,E_BYTE(a6) + beq.b no_e3_2 + bfextu CMDREG3B(a6){6:3},d0 ;get dest reg no + bclr.b d0,FPR_DIRTY_BITS(a6) ;clr dest dirty bit + bsr.l b1238_fix + move.l USER_FPSR(a6),FPSR_SHADOW(a6) + or.l #sx_mask,E_BYTE(a6) +no_e3_2: + move.b #INEX_VEC,EXC_VEC+1(a6) + movem.l USER_DA(a6),d0-d1/a0-a1 + fmovem.x USER_FP0(a6),fp0-fp3 + fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar + frestore (a7)+ + unlk a6 + bra.l real_inex + +ovfl_exit: + bclr.b #E3,E_BYTE(a6) ;test and clear E3 bit + beq.b e1_set +* +* Clear dirty bit on dest resister in the frame before branching +* to b1238_fix. +* + bfextu CMDREG3B(a6){6:3},d0 ;get dest reg no + bclr.b d0,FPR_DIRTY_BITS(a6) ;clr dest dirty bit + bsr.l b1238_fix ;test for bug1238 case + + move.l USER_FPSR(a6),FPSR_SHADOW(a6) + or.l #sx_mask,E_BYTE(a6) + movem.l USER_DA(a6),d0-d1/a0-a1 + fmovem.x USER_FP0(a6),fp0-fp3 + fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar + frestore (a7)+ + unlk a6 + bra.l fpsp_done +e1_set: + movem.l USER_DA(a6),d0-d1/a0-a1 + fmovem.x USER_FP0(a6),fp0-fp3 + fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar + unlk a6 + bra.l fpsp_done + +* +* ovf_adj +* +ovf_adj: +* +* Have a0 point to the correct operand. +* + btst.b #E3,E_BYTE(a6) ;test E3 bit + beq.b ovf_e1 + + lea WBTEMP(a6),a0 + bra.b ovf_com +ovf_e1: + lea ETEMP(a6),a0 + +ovf_com: + bclr.b #sign_bit,LOCAL_EX(a0) + sne LOCAL_SGN(a0) + + bsr.l g_opcls ;returns opclass in d0 + cmpi.w #3,d0 ;check for opclass3 + bne.b not_opc011 + +* +* FPSR_CC is saved and restored because ovf_r_x3 affects it. The +* CCs are defined to be 'not affected' for the opclass3 instruction. +* + move.b FPSR_CC(a6),L_SCR1(a6) + bsr.l ovf_r_x3 ;returns a0 pointing to result + move.b L_SCR1(a6),FPSR_CC(a6) + bra.l store ;stores to memory or register + +not_opc011: + bsr.l ovf_r_x2 ;returns a0 pointing to result + bra.l store ;stores to memory or register + + end diff --git a/sys/arch/m68k/fpsp/x_snan.sa b/sys/arch/m68k/fpsp/x_snan.sa new file mode 100644 index 00000000000..0dba98298ab --- /dev/null +++ b/sys/arch/m68k/fpsp/x_snan.sa @@ -0,0 +1,302 @@ +* $NetBSD: x_snan.sa,v 1.3 1994/10/26 07:50:28 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* x_snan.sa 3.3 7/1/91 +* +* fpsp_snan --- FPSP handler for signalling NAN exception +* +* SNAN for float -> integer conversions (integer conversion of +* an SNAN) is a non-maskable run-time exception. +* +* For trap disabled the 040 does the following: +* If the dest data format is s, d, or x, then the SNAN bit in the NAN +* is set to one and the resulting non-signaling NAN (truncated if +* necessary) is transferred to the dest. If the dest format is b, w, +* or l, then garbage is written to the dest (actually the upper 32 bits +* of the mantissa are sent to the integer unit). +* +* For trap enabled the 040 does the following: +* If the inst is move_out, then the results are the same as for trap +* disabled with the exception posted. If the instruction is not move_ +* out, the dest. is not modified, and the exception is posted. +* + +X_SNAN IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + + xref get_fline + xref mem_write + xref real_snan + xref real_inex + xref fpsp_done + xref reg_dest + + xdef fpsp_snan +fpsp_snan: + link a6,#-LOCAL_SIZE + fsave -(a7) + movem.l d0-d1/a0-a1,USER_DA(a6) + fmovem.x fp0-fp3,USER_FP0(a6) + fmovem.l fpcr/fpsr/fpiar,USER_FPCR(a6) + +* +* Check if trap enabled +* + btst.b #snan_bit,FPCR_ENABLE(a6) + bne.b ena ;If enabled, then branch + + bsr.l move_out ;else SNAN disabled +* +* It is possible to have an inex1 exception with the +* snan. If the inex enable bit is set in the FPCR, and either +* inex2 or inex1 occured, we must clean up and branch to the +* real inex handler. +* +ck_inex: + move.b FPCR_ENABLE(a6),d0 + and.b FPSR_EXCEPT(a6),d0 + andi.b #$3,d0 + beq.w end_snan +* +* Inexact enabled and reported, and we must take an inexact exception. +* +take_inex: + move.b #INEX_VEC,EXC_VEC+1(a6) + movem.l USER_DA(a6),d0-d1/a0-a1 + fmovem.x USER_FP0(a6),fp0-fp3 + fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar + frestore (a7)+ + unlk a6 + bra.l real_inex +* +* SNAN is enabled. Check if inst is move_out. +* Make any corrections to the 040 output as necessary. +* +ena: + btst.b #5,CMDREG1B(a6) ;if set, inst is move out + beq.w not_out + + bsr.l move_out + +report_snan: + move.b (a7),VER_TMP(a6) + cmpi.b #VER_40,(a7) ;test for orig unimp frame + bne.b ck_rev + moveq.l #13,d0 ;need to zero 14 lwords + bra.b rep_con +ck_rev: + moveq.l #11,d0 ;need to zero 12 lwords +rep_con: + clr.l (a7) +loop1: + clr.l -(a7) ;clear and dec a7 + dbra.w d0,loop1 + move.b VER_TMP(a6),(a7) ;format a busy frame + move.b #BUSY_SIZE-4,1(a7) + move.l USER_FPSR(a6),FPSR_SHADOW(a6) + or.l #sx_mask,E_BYTE(a6) + movem.l USER_DA(a6),d0-d1/a0-a1 + fmovem.x USER_FP0(a6),fp0-fp3 + fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar + frestore (a7)+ + unlk a6 + bra.l real_snan +* +* Exit snan handler by expanding the unimp frame into a busy frame +* +end_snan: + bclr.b #E1,E_BYTE(a6) + + move.b (a7),VER_TMP(a6) + cmpi.b #VER_40,(a7) ;test for orig unimp frame + bne.b ck_rev2 + moveq.l #13,d0 ;need to zero 14 lwords + bra.b rep_con2 +ck_rev2: + moveq.l #11,d0 ;need to zero 12 lwords +rep_con2: + clr.l (a7) +loop2: + clr.l -(a7) ;clear and dec a7 + dbra.w d0,loop2 + move.b VER_TMP(a6),(a7) ;format a busy frame + move.b #BUSY_SIZE-4,1(a7) ;write busy size + move.l USER_FPSR(a6),FPSR_SHADOW(a6) + or.l #sx_mask,E_BYTE(a6) + movem.l USER_DA(a6),d0-d1/a0-a1 + fmovem.x USER_FP0(a6),fp0-fp3 + fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar + frestore (a7)+ + unlk a6 + bra.l fpsp_done + +* +* Move_out +* +move_out: + move.l EXC_EA(a6),a0 ;get <ea> from exc frame + + bfextu CMDREG1B(a6){3:3},d0 ;move rx field to d0{2:0} + tst.l d0 ;check for long + beq.b sto_long ;branch if move_out long + + cmpi.l #4,d0 ;check for word + beq.b sto_word ;branch if move_out word + + cmpi.l #6,d0 ;check for byte + beq.b sto_byte ;branch if move_out byte + +* +* Not byte, word or long +* + rts +* +* Get the 32 most significant bits of etemp mantissa +* +sto_long: + move.l ETEMP_HI(a6),d1 + move.l #4,d0 ;load byte count +* +* Set signalling nan bit +* + bset.l #30,d1 +* +* Store to the users destination address +* + tst.l a0 ;check if <ea> is 0 + beq.b wrt_dn ;destination is a data register + + move.l d1,-(a7) ;move the snan onto the stack + move.l a0,a1 ;load dest addr into a1 + move.l a7,a0 ;load src addr of snan into a0 + bsr.l mem_write ;write snan to user memory + move.l (a7)+,d1 ;clear off stack + rts +* +* Get the 16 most significant bits of etemp mantissa +* +sto_word: + move.l ETEMP_HI(a6),d1 + move.l #2,d0 ;load byte count +* +* Set signalling nan bit +* + bset.l #30,d1 +* +* Store to the users destination address +* + tst.l a0 ;check if <ea> is 0 + beq.b wrt_dn ;destination is a data register + + move.l d1,-(a7) ;move the snan onto the stack + move.l a0,a1 ;load dest addr into a1 + move.l a7,a0 ;point to low word + bsr.l mem_write ;write snan to user memory + move.l (a7)+,d1 ;clear off stack + rts +* +* Get the 8 most significant bits of etemp mantissa +* +sto_byte: + move.l ETEMP_HI(a6),d1 + move.l #1,d0 ;load byte count +* +* Set signalling nan bit +* + bset.l #30,d1 +* +* Store to the users destination address +* + tst.l a0 ;check if <ea> is 0 + beq.b wrt_dn ;destination is a data register + move.l d1,-(a7) ;move the snan onto the stack + move.l a0,a1 ;load dest addr into a1 + move.l a7,a0 ;point to source byte + bsr.l mem_write ;write snan to user memory + move.l (a7)+,d1 ;clear off stack + rts + +* +* wrt_dn --- write to a data register +* +* We get here with D1 containing the data to write and D0 the +* number of bytes to write: 1=byte,2=word,4=long. +* +wrt_dn: + move.l d1,L_SCR1(a6) ;data + move.l d0,-(a7) ;size + bsr.l get_fline ;returns fline word in d0 + move.l d0,d1 + andi.l #$7,d1 ;d1 now holds register number + move.l (sp)+,d0 ;get original size + cmpi.l #4,d0 + beq.b wrt_long + cmpi.l #2,d0 + bne.b wrt_byte +wrt_word: + or.l #$8,d1 + bra.l reg_dest +wrt_long: + or.l #$10,d1 + bra.l reg_dest +wrt_byte: + bra.l reg_dest +* +* Check if it is a src nan or dst nan +* +not_out: + move.l DTAG(a6),d0 + bfextu d0{0:3},d0 ;isolate dtag in lsbs + + cmpi.b #3,d0 ;check for nan in destination + bne.b issrc ;destination nan has priority +dst_nan: + btst.b #6,FPTEMP_HI(a6) ;check if dest nan is an snan + bne.b issrc ;no, so check source for snan + move.w FPTEMP_EX(a6),d0 + bra.b cont +issrc: + move.w ETEMP_EX(a6),d0 +cont: + btst.l #15,d0 ;test for sign of snan + beq.b clr_neg + bset.b #neg_bit,FPSR_CC(a6) + bra.w report_snan +clr_neg: + bclr.b #neg_bit,FPSR_CC(a6) + bra.w report_snan + + end diff --git a/sys/arch/m68k/fpsp/x_store.sa b/sys/arch/m68k/fpsp/x_store.sa new file mode 100644 index 00000000000..4139d87b862 --- /dev/null +++ b/sys/arch/m68k/fpsp/x_store.sa @@ -0,0 +1,281 @@ +* $NetBSD: x_store.sa,v 1.3 1994/10/26 07:50:29 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* x_store.sa 3.2 1/24/91 +* +* store --- store operand to memory or register +* +* Used by underflow and overflow handlers. +* +* a6 = points to fp value to be stored. +* + +X_STORE IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + +fpreg_mask: + dc.b $80,$40,$20,$10,$08,$04,$02,$01 + + include fpsp.h + + xref mem_write + xref get_fline + xref g_opcls + xref g_dfmtou + xref reg_dest + + xdef dest_ext + xdef dest_dbl + xdef dest_sgl + + xdef store +store: + btst.b #E3,E_BYTE(a6) + beq.b E1_sto +E3_sto: + move.l CMDREG3B(a6),d0 + bfextu d0{6:3},d0 ;isolate dest. reg from cmdreg3b +sto_fp: + lea fpreg_mask,a1 + move.b (a1,d0.w),d0 ;convert reg# to dynamic register mask + tst.b LOCAL_SGN(a0) + beq.b is_pos + bset.b #sign_bit,LOCAL_EX(a0) +is_pos: + fmovem.x (a0),d0 ;move to correct register +* +* if fp0-fp3 is being modified, we must put a copy +* in the USER_FPn variable on the stack because all exception +* handlers restore fp0-fp3 from there. +* + cmp.b #$80,d0 + bne.b not_fp0 + fmovem.x fp0,USER_FP0(a6) + rts +not_fp0: + cmp.b #$40,d0 + bne.b not_fp1 + fmovem.x fp1,USER_FP1(a6) + rts +not_fp1: + cmp.b #$20,d0 + bne.b not_fp2 + fmovem.x fp2,USER_FP2(a6) + rts +not_fp2: + cmp.b #$10,d0 + bne.b not_fp3 + fmovem.x fp3,USER_FP3(a6) + rts +not_fp3: + rts + +E1_sto: + bsr.l g_opcls ;returns opclass in d0 + cmpi.b #3,d0 + beq opc011 ;branch if opclass 3 + move.l CMDREG1B(a6),d0 + bfextu d0{6:3},d0 ;extract destination register + bra.b sto_fp + +opc011: + bsr.l g_dfmtou ;returns dest format in d0 +* ;ext=00, sgl=01, dbl=10 + move.l a0,a1 ;save source addr in a1 + move.l EXC_EA(a6),a0 ;get the address + tst.l d0 ;if dest format is extended + beq.w dest_ext ;then branch + cmpi.l #1,d0 ;if dest format is single + beq.b dest_sgl ;then branch +* +* fall through to dest_dbl +* + +* +* dest_dbl --- write double precision value to user space +* +*Input +* a0 -> destination address +* a1 -> source in extended precision +*Output +* a0 -> destroyed +* a1 -> destroyed +* d0 -> 0 +* +*Changes extended precision to double precision. +* Note: no attempt is made to round the extended value to double. +* dbl_sign = ext_sign +* dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias) +* get rid of ext integer bit +* dbl_mant = ext_mant{62:12} +* +* --------------- --------------- --------------- +* extended -> |s| exp | |1| ms mant | | ls mant | +* --------------- --------------- --------------- +* 95 64 63 62 32 31 11 0 +* | | +* | | +* | | +* v v +* --------------- --------------- +* double -> |s|exp| mant | | mant | +* --------------- --------------- +* 63 51 32 31 0 +* +dest_dbl: + clr.l d0 ;clear d0 + move.w LOCAL_EX(a1),d0 ;get exponent + sub.w #$3fff,d0 ;subtract extended precision bias + cmp.w #$4000,d0 ;check if inf + beq.b inf ;if so, special case + add.w #$3ff,d0 ;add double precision bias + swap d0 ;d0 now in upper word + lsl.l #4,d0 ;d0 now in proper place for dbl prec exp + tst.b LOCAL_SGN(a1) + beq.b get_mant ;if postive, go process mantissa + bset.l #31,d0 ;if negative, put in sign information +* ; before continuing + bra.b get_mant ;go process mantissa +inf: + move.l #$7ff00000,d0 ;load dbl inf exponent + clr.l LOCAL_HI(a1) ;clear msb + tst.b LOCAL_SGN(a1) + beq.b dbl_inf ;if positive, go ahead and write it + bset.l #31,d0 ;if negative put in sign information +dbl_inf: + move.l d0,LOCAL_EX(a1) ;put the new exp back on the stack + bra.b dbl_wrt +get_mant: + move.l LOCAL_HI(a1),d1 ;get ms mantissa + bfextu d1{1:20},d1 ;get upper 20 bits of ms + or.l d1,d0 ;put these bits in ms word of double + move.l d0,LOCAL_EX(a1) ;put the new exp back on the stack + move.l LOCAL_HI(a1),d1 ;get ms mantissa + move.l #21,d0 ;load shift count + lsl.l d0,d1 ;put lower 11 bits in upper bits + move.l d1,LOCAL_HI(a1) ;build lower lword in memory + move.l LOCAL_LO(a1),d1 ;get ls mantissa + bfextu d1{0:21},d0 ;get ls 21 bits of double + or.l d0,LOCAL_HI(a1) ;put them in double result +dbl_wrt: + move.l #$8,d0 ;byte count for double precision number + exg a0,a1 ;a0=supervisor source, a1=user dest + bsr.l mem_write ;move the number to the user's memory + rts +* +* dest_sgl --- write single precision value to user space +* +*Input +* a0 -> destination address +* a1 -> source in extended precision +* +*Output +* a0 -> destroyed +* a1 -> destroyed +* d0 -> 0 +* +*Changes extended precision to single precision. +* sgl_sign = ext_sign +* sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias) +* get rid of ext integer bit +* sgl_mant = ext_mant{62:12} +* +* --------------- --------------- --------------- +* extended -> |s| exp | |1| ms mant | | ls mant | +* --------------- --------------- --------------- +* 95 64 63 62 40 32 31 12 0 +* | | +* | | +* | | +* v v +* --------------- +* single -> |s|exp| mant | +* --------------- +* 31 22 0 +* +dest_sgl: + clr.l d0 + move.w LOCAL_EX(a1),d0 ;get exponent + sub.w #$3fff,d0 ;subtract extended precision bias + cmp.w #$4000,d0 ;check if inf + beq.b sinf ;if so, special case + add.w #$7f,d0 ;add single precision bias + swap d0 ;put exp in upper word of d0 + lsl.l #7,d0 ;shift it into single exp bits + tst.b LOCAL_SGN(a1) + beq.b get_sman ;if positive, continue + bset.l #31,d0 ;if negative, put in sign first + bra.b get_sman ;get mantissa +sinf: + move.l #$7f800000,d0 ;load single inf exp to d0 + tst.b LOCAL_SGN(a1) + beq.b sgl_wrt ;if positive, continue + bset.l #31,d0 ;if negative, put in sign info + bra.b sgl_wrt + +get_sman: + move.l LOCAL_HI(a1),d1 ;get ms mantissa + bfextu d1{1:23},d1 ;get upper 23 bits of ms + or.l d1,d0 ;put these bits in ms word of single + +sgl_wrt: + move.l d0,L_SCR1(a6) ;put the new exp back on the stack + move.l #$4,d0 ;byte count for single precision number + tst.l a0 ;users destination address + beq.b sgl_Dn ;destination is a data register + exg a0,a1 ;a0=supervisor source, a1=user dest + lea.l L_SCR1(a6),a0 ;point a0 to data + bsr.l mem_write ;move the number to the user's memory + rts +sgl_Dn: + bsr.l get_fline ;returns fline word in d0 + and.w #$7,d0 ;isolate register number + move.l d0,d1 ;d1 has size:reg formatted for reg_dest + or.l #$10,d1 ;reg_dest wants size added to reg# + bra.l reg_dest ;size is X, rts in reg_dest will +* ;return to caller of dest_sgl + +dest_ext: + tst.b LOCAL_SGN(a1) ;put back sign into exponent word + beq.b dstx_cont + bset.b #sign_bit,LOCAL_EX(a1) +dstx_cont: + clr.b LOCAL_SGN(a1) ;clear out the sign byte + + move.l #$0c,d0 ;byte count for extended number + exg a0,a1 ;a0=supervisor source, a1=user dest + bsr.l mem_write ;move the number to the user's memory + rts + + end diff --git a/sys/arch/m68k/fpsp/x_unfl.sa b/sys/arch/m68k/fpsp/x_unfl.sa new file mode 100644 index 00000000000..9987455bc88 --- /dev/null +++ b/sys/arch/m68k/fpsp/x_unfl.sa @@ -0,0 +1,294 @@ +* $NetBSD: x_unfl.sa,v 1.3 1994/10/26 07:50:30 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* x_unfl.sa 3.4 7/1/91 +* +* fpsp_unfl --- FPSP handler for underflow exception +* +* Trap disabled results +* For 881/2 compatibility, sw must denormalize the intermediate +* result, then store the result. Denormalization is accomplished +* by taking the intermediate result (which is always normalized) and +* shifting the mantissa right while incrementing the exponent until +* it is equal to the denormalized exponent for the destination +* format. After denormalizatoin, the result is rounded to the +* destination format. +* +* Trap enabled results +* All trap disabled code applies. In addition the exceptional +* operand needs to made available to the user with a bias of $6000 +* added to the exponent. +* + +X_UNFL IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + + xref denorm + xref round + xref store + xref g_rndpr + xref g_opcls + xref g_dfmtou + xref real_unfl + xref real_inex + xref fpsp_done + xref b1238_fix + + xdef fpsp_unfl +fpsp_unfl: + link a6,#-LOCAL_SIZE + fsave -(a7) + movem.l d0-d1/a0-a1,USER_DA(a6) + fmovem.x fp0-fp3,USER_FP0(a6) + fmovem.l fpcr/fpsr/fpiar,USER_FPCR(a6) + +* + bsr.l unf_res ;denormalize, round & store interm op +* +* If underflow exceptions are not enabled, check for inexact +* exception +* + btst.b #unfl_bit,FPCR_ENABLE(a6) + beq.b ck_inex + + btst.b #E3,E_BYTE(a6) + beq.b no_e3_1 +* +* Clear dirty bit on dest resister in the frame before branching +* to b1238_fix. +* + bfextu CMDREG3B(a6){6:3},d0 ;get dest reg no + bclr.b d0,FPR_DIRTY_BITS(a6) ;clr dest dirty bit + bsr.l b1238_fix ;test for bug1238 case + move.l USER_FPSR(a6),FPSR_SHADOW(a6) + or.l #sx_mask,E_BYTE(a6) +no_e3_1: + movem.l USER_DA(a6),d0-d1/a0-a1 + fmovem.x USER_FP0(a6),fp0-fp3 + fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar + frestore (a7)+ + unlk a6 + bra.l real_unfl +* +* It is possible to have either inex2 or inex1 exceptions with the +* unfl. If the inex enable bit is set in the FPCR, and either +* inex2 or inex1 occured, we must clean up and branch to the +* real inex handler. +* +ck_inex: + move.b FPCR_ENABLE(a6),d0 + and.b FPSR_EXCEPT(a6),d0 + andi.b #$3,d0 + beq.b unfl_done + +* +* Inexact enabled and reported, and we must take an inexact exception +* +take_inex: + btst.b #E3,E_BYTE(a6) + beq.b no_e3_2 +* +* Clear dirty bit on dest resister in the frame before branching +* to b1238_fix. +* + bfextu CMDREG3B(a6){6:3},d0 ;get dest reg no + bclr.b d0,FPR_DIRTY_BITS(a6) ;clr dest dirty bit + bsr.l b1238_fix ;test for bug1238 case + move.l USER_FPSR(a6),FPSR_SHADOW(a6) + or.l #sx_mask,E_BYTE(a6) +no_e3_2: + move.b #INEX_VEC,EXC_VEC+1(a6) + movem.l USER_DA(a6),d0-d1/a0-a1 + fmovem.x USER_FP0(a6),fp0-fp3 + fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar + frestore (a7)+ + unlk a6 + bra.l real_inex + +unfl_done: + bclr.b #E3,E_BYTE(a6) + beq.b e1_set ;if set then branch +* +* Clear dirty bit on dest resister in the frame before branching +* to b1238_fix. +* + bfextu CMDREG3B(a6){6:3},d0 ;get dest reg no + bclr.b d0,FPR_DIRTY_BITS(a6) ;clr dest dirty bit + bsr.l b1238_fix ;test for bug1238 case + move.l USER_FPSR(a6),FPSR_SHADOW(a6) + or.l #sx_mask,E_BYTE(a6) + movem.l USER_DA(a6),d0-d1/a0-a1 + fmovem.x USER_FP0(a6),fp0-fp3 + fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar + frestore (a7)+ + unlk a6 + bra.l fpsp_done +e1_set: + movem.l USER_DA(a6),d0-d1/a0-a1 + fmovem.x USER_FP0(a6),fp0-fp3 + fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar + unlk a6 + bra.l fpsp_done +* +* unf_res --- underflow result calculation +* +unf_res: + bsr.l g_rndpr ;returns RND_PREC in d0 0=ext, +* ;1=sgl, 2=dbl +* ;we need the RND_PREC in the +* ;upper word for round + clr.w -(a7) + move.w d0,-(a7) ;copy RND_PREC to stack +* +* +* If the exception bit set is E3, the exceptional operand from the +* fpu is in WBTEMP; else it is in FPTEMP. +* + btst.b #E3,E_BYTE(a6) + beq.b unf_E1 +unf_E3: + lea WBTEMP(a6),a0 ;a0 now points to operand +* +* Test for fsgldiv and fsglmul. If the inst was one of these, then +* force the precision to extended for the denorm routine. Use +* the user's precision for the round routine. +* + move.w CMDREG3B(a6),d1 ;check for fsgldiv or fsglmul + andi.w #$7f,d1 + cmpi.w #$30,d1 ;check for sgldiv + beq.b unf_sgl + cmpi.w #$33,d1 ;check for sglmul + bne.b unf_cont ;if not, use fpcr prec in round +unf_sgl: + clr.l d0 + move.w #$1,(a7) ;override g_rndpr precision +* ;force single + bra.b unf_cont +unf_E1: + lea FPTEMP(a6),a0 ;a0 now points to operand +unf_cont: + bclr.b #sign_bit,LOCAL_EX(a0) ;clear sign bit + sne LOCAL_SGN(a0) ;store sign + + bsr.l denorm ;returns denorm, a0 points to it +* +* WARNING: +* ;d0 has guard,round sticky bit +* ;make sure that it is not corrupted +* ;before it reaches the round subroutine +* ;also ensure that a0 isn't corrupted + +* +* Set up d1 for round subroutine d1 contains the PREC/MODE +* information respectively on upper/lower register halves. +* + bfextu FPCR_MODE(a6){2:2},d1 ;get mode from FPCR +* ;mode in lower d1 + add.l (a7)+,d1 ;merge PREC/MODE +* +* WARNING: a0 and d0 are assumed to be intact between the denorm and +* round subroutines. All code between these two subroutines +* must not corrupt a0 and d0. +* +* +* Perform Round +* Input: a0 points to input operand +* d0{31:29} has guard, round, sticky +* d1{01:00} has rounding mode +* d1{17:16} has rounding precision +* Output: a0 points to rounded operand +* + + bsr.l round ;returns rounded denorm at (a0) +* +* Differentiate between store to memory vs. store to register +* +unf_store: + bsr.l g_opcls ;returns opclass in d0{2:0} + cmpi.b #$3,d0 + bne.b not_opc011 +* +* At this point, a store to memory is pending +* +opc011: + bsr.l g_dfmtou + tst.b d0 + beq.b ext_opc011 ;If extended, do not subtract +* ;If destination format is sgl/dbl, + tst.b LOCAL_HI(a0) ;If rounded result is normal,don't +* ;subtract + bmi.b ext_opc011 + subq.w #1,LOCAL_EX(a0) ;account for denorm bias vs. +* ;normalized bias +* ; normalized denormalized +* ;single $7f $7e +* ;double $3ff $3fe +* +ext_opc011: + bsr.l store ;stores to memory + bra.b unf_done ;finish up + +* +* At this point, a store to a float register is pending +* +not_opc011: + bsr.l store ;stores to float register +* ;a0 is not corrupted on a store to a +* ;float register. +* +* Set the condition codes according to result +* + tst.l LOCAL_HI(a0) ;check upper mantissa + bne.b ck_sgn + tst.l LOCAL_LO(a0) ;check lower mantissa + bne.b ck_sgn + bset.b #z_bit,FPSR_CC(a6) ;set condition codes if zero +ck_sgn: + btst.b #sign_bit,LOCAL_EX(a0) ;check the sign bit + beq.b unf_done + bset.b #neg_bit,FPSR_CC(a6) + +* +* Finish. +* +unf_done: + btst.b #inex2_bit,FPSR_EXCEPT(a6) + beq.b no_aunfl + bset.b #aunfl_bit,FPSR_AEXCEPT(a6) +no_aunfl: + rts + + end diff --git a/sys/arch/m68k/fpsp/x_unimp.sa b/sys/arch/m68k/fpsp/x_unimp.sa new file mode 100644 index 00000000000..3abf3f57eb9 --- /dev/null +++ b/sys/arch/m68k/fpsp/x_unimp.sa @@ -0,0 +1,102 @@ +* $NetBSD: x_unimp.sa,v 1.2 1994/10/26 07:50:32 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* x_unimp.sa 3.3 7/1/91 +* +* fpsp_unimp --- FPSP handler for unimplemented instruction +* exception. +* +* Invoked when the user program encounters a floating-point +* op-code that hardware does not support. Trap vector# 11 +* (See table 8-1 MC68030 User's Manual). +* +* +* Note: An fsave for an unimplemented inst. will create a short +* fsave stack. +* +* Input: 1. Six word stack frame for unimplemented inst, four word +* for illegal +* (See table 8-7 MC68030 User's Manual). +* 2. Unimp (short) fsave state frame created here by fsave +* instruction. +* + +X_UNIMP IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + + xref get_op + xref do_func + xref sto_res + xref gen_except + xref fpsp_fmt_error + + xdef fpsp_unimp + xdef uni_2 +fpsp_unimp: + link a6,#-LOCAL_SIZE + fsave -(a7) +uni_2: + movem.l d0-d1/a0-a1,USER_DA(a6) + fmovem.x fp0-fp3,USER_FP0(a6) + fmovem.l fpcr/fpsr/fpiar,USER_FPCR(a6) + move.b (a7),d0 ;test for valid version num + andi.b #$f0,d0 ;test for $4x + cmpi.b #VER_4,d0 ;must be $4x or exit + bne.l fpsp_fmt_error +* +* Temporary D25B Fix +* The following lines are used to ensure that the FPSR +* exception byte and condition codes are clear before proceeding +* + move.l USER_FPSR(a6),d0 + and.l #$FF00FF,d0 ;clear all but accrued exceptions + move.l d0,USER_FPSR(a6) + fmove.l #0,FPSR ;clear all user bits + fmove.l #0,FPCR ;clear all user exceptions for FPSP + + clr.b UFLG_TMP(a6) ;clr flag for unsupp data + + bsr.l get_op ;go get operand(s) + clr.b STORE_FLG(a6) + bsr.l do_func ;do the function + fsave -(a7) ;capture possible exc state + tst.b STORE_FLG(a6) + bne.b no_store ;if STORE_FLG is set, no store + bsr.l sto_res ;store the result in user space +no_store: + bra.l gen_except ;post any exceptions and return + + end diff --git a/sys/arch/m68k/fpsp/x_unsupp.sa b/sys/arch/m68k/fpsp/x_unsupp.sa new file mode 100644 index 00000000000..cf44cfa780b --- /dev/null +++ b/sys/arch/m68k/fpsp/x_unsupp.sa @@ -0,0 +1,107 @@ +* $NetBSD: x_unsupp.sa,v 1.2 1994/10/26 07:50:33 cgd Exp $ + +* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP +* M68000 Hi-Performance Microprocessor Division +* M68040 Software Package +* +* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. +* All rights reserved. +* +* THE SOFTWARE is provided on an "AS IS" basis and without warranty. +* To the maximum extent permitted by applicable law, +* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, +* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A +* PARTICULAR PURPOSE and any warranty against infringement with +* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) +* and any accompanying written materials. +* +* To the maximum extent permitted by applicable law, +* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER +* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS +* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR +* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE +* SOFTWARE. Motorola assumes no responsibility for the maintenance +* and support of the SOFTWARE. +* +* You are hereby granted a copyright license to use, modify, and +* distribute the SOFTWARE so long as this entire notice is retained +* without alteration in any modified and/or redistributed versions, +* and that such modified versions are clearly identified as such. +* No licenses are granted by implication, estoppel or otherwise +* under any patents or trademarks of Motorola, Inc. + +* +* x_unsupp.sa 3.3 7/1/91 +* +* fpsp_unsupp --- FPSP handler for unsupported data type exception +* +* Trap vector #55 (See table 8-1 Mc68030 User's manual). +* Invoked when the user program encounters a data format (packed) that +* hardware does not support or a data type (denormalized numbers or un- +* normalized numbers). +* Normalizes denorms and unnorms, unpacks packed numbers then stores +* them back into the machine to let the 040 finish the operation. +* +* Unsupp calls two routines: +* 1. get_op - gets the operand(s) +* 2. res_func - restore the function back into the 040 or +* if fmove.p fpm,<ea> then pack source (fpm) +* and store in users memory <ea>. +* +* Input: Long fsave stack frame +* + +X_UNSUPP IDNT 2,1 Motorola 040 Floating Point Software Package + + section 8 + + include fpsp.h + + xref get_op + xref res_func + xref gen_except + xref fpsp_fmt_error + + xdef fpsp_unsupp +fpsp_unsupp: +* + link a6,#-LOCAL_SIZE + fsave -(a7) + movem.l d0-d1/a0-a1,USER_DA(a6) + fmovem.x fp0-fp3,USER_FP0(a6) + fmovem.l fpcr/fpsr/fpiar,USER_FPCR(a6) + + + move.b (a7),VER_TMP(a6) ;save version number + move.b (a7),d0 ;test for valid version num + andi.b #$f0,d0 ;test for $4x + cmpi.b #VER_4,d0 ;must be $4x or exit + bne.l fpsp_fmt_error + + fmove.l #0,FPSR ;clear all user status bits + fmove.l #0,FPCR ;clear all user control bits +* +* The following lines are used to ensure that the FPSR +* exception byte and condition codes are clear before proceeding, +* except in the case of fmove, which leaves the cc's intact. +* +unsupp_con: + move.l USER_FPSR(a6),d1 + btst #5,CMDREG1B(a6) ;looking for fmove out + bne fmove_con + and.l #$FF00FF,d1 ;clear all but aexcs and qbyte + bra.b end_fix +fmove_con: + and.l #$0FFF40FF,d1 ;clear all but cc's, snan bit, aexcs, and qbyte +end_fix: + move.l d1,USER_FPSR(a6) + + st UFLG_TMP(a6) ;set flag for unsupp data + + bsr.l get_op ;everything okay, go get operand(s) + bsr.l res_func ;fix up stack frame so can restore it + clr.l -(a7) + move.b VER_TMP(a6),(a7) ;move idle fmt word to top of stack + bra.l gen_except +* + end |