1 files changed, 2065 insertions, 0 deletions
diff --git a/sys/arch/m68k/fpsp/res_func.sa b/sys/arch/m68k/fpsp/res_func.sa
new file mode 100644
index 00000000000..5c036b742fc
--- /dev/null
+++ b/sys/arch/m68k/fpsp/res_func.sa
@@ -0,0 +1,2065 @@
+*	$NetBSD: res_func.sa,v 1.3 1994/10/26 07:49:22 cgd Exp $
+
+*	MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+*	M68000 Hi-Performance Microprocessor Division
+*	M68040 Software Package 
+*
+*	M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+*	All rights reserved.
+*
+*	THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+*	To the maximum extent permitted by applicable law,
+*	MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+*	INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+*	PARTICULAR PURPOSE and any warranty against infringement with
+*	regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+*	and any accompanying written materials. 
+*
+*	To the maximum extent permitted by applicable law,
+*	IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+*	(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+*	PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+*	OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+*	SOFTWARE.  Motorola assumes no responsibility for the maintenance
+*	and support of the SOFTWARE.  
+*
+*	You are hereby granted a copyright license to use, modify, and
+*	distribute the SOFTWARE so long as this entire notice is retained
+*	without alteration in any modified and/or redistributed versions,
+*	and that such modified versions are clearly identified as such.
+*	No licenses are granted by implication, estoppel or otherwise
+*	under any patents or trademarks of Motorola, Inc.
+
+*
+*	res_func.sa 3.9 7/29/91
+*
+* Normalizes denormalized numbers if necessary and updates the
+* stack frame.  The function is then restored back into the
+* machine and the 040 completes the operation.  This routine
+* is only used by the unsupported data type/format handler.
+* (Exception vector 55).
+*
+* For packed move out (fmove.p fpm,<ea>) the operation is
+* completed here; data is packed and moved to user memory. 
+* The stack is restored to the 040 only in the case of a
+* reportable exception in the conversion.
+*
+
+RES_FUNC    IDNT    2,1 Motorola 040 Floating Point Software Package
+
+	section	8
+
+	include	fpsp.h
+
+sp_bnds:	dc.w	$3f81,$407e
+		dc.w	$3f6a,$0000
+dp_bnds:	dc.w	$3c01,$43fe
+		dc.w	$3bcd,$0000
+
+	xref	mem_write
+	xref	bindec
+	xref	get_fline
+	xref	round
+	xref	denorm
+	xref	dest_ext
+	xref	dest_dbl
+	xref	dest_sgl
+	xref	unf_sub
+	xref	nrm_set
+	xref	dnrm_lp
+	xref	ovf_res
+	xref	reg_dest
+	xref	t_ovfl
+	xref	t_unfl
+
+	xdef	res_func
+	xdef 	p_move
+
+res_func:
+	clr.b	DNRM_FLG(a6)
+	clr.b	RES_FLG(a6)
+	clr.b	CU_ONLY(a6)
+	tst.b	DY_MO_FLG(a6)
+	beq.b	monadic
+dyadic:
+	btst.b	#7,DTAG(a6)	;if dop = norm=000, zero=001,
+*				;inf=010 or nan=011
+	beq.b	monadic		;then branch
+*				;else denorm
+* HANDLE DESTINATION DENORM HERE
+*				;set dtag to norm
+*				;write the tag & fpte15 to the fstack
+	lea.l	FPTEMP(a6),a0
+
+	bclr.b	#sign_bit,LOCAL_EX(a0)
+	sne	LOCAL_SGN(a0)
+
+	bsr	nrm_set		;normalize number (exp will go negative)
+	bclr.b	#sign_bit,LOCAL_EX(a0) ;get rid of false sign
+	bfclr	LOCAL_SGN(a0){0:8}	;change back to IEEE ext format
+	beq.b	dpos
+	bset.b	#sign_bit,LOCAL_EX(a0)
+dpos:
+	bfclr	DTAG(a6){0:4}	;set tag to normalized, FPTE15 = 0
+	bset.b	#4,DTAG(a6)	;set FPTE15
+	or.b	#$0f,DNRM_FLG(a6)
+monadic:
+	lea.l	ETEMP(a6),a0
+	btst.b	#direction_bit,CMDREG1B(a6)	;check direction
+	bne.w	opclass3			;it is a mv out
+*
+* At this point, only oplcass 0 and 2 possible
+*
+	btst.b	#7,STAG(a6)	;if sop = norm=000, zero=001,
+*				;inf=010 or nan=011
+	bne.w	mon_dnrm	;else denorm
+	tst.b	DY_MO_FLG(a6)	;all cases of dyadic instructions would
+	bne.w	normal		;require normalization of denorm
+
+* At this point:
+*	monadic instructions:	fabs  = $18  fneg   = $1a  ftst   = $3a
+*				fmove = $00  fsmove = $40  fdmove = $44
+*				fsqrt = $05* fssqrt = $41  fdsqrt = $45
+*				(*fsqrt reencoded to $05)
+*
+	move.w	CMDREG1B(a6),d0	;get command register
+	andi.l	#$7f,d0			;strip to only command word
+*
+* At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and 
+* fdsqrt are possible.
+* For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize)
+* For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize)
+*
+	btst.l	#0,d0
+	bne.w	normal			;weed out fsqrt instructions
+*
+* cu_norm handles fmove in instructions with normalized inputs.
+* The routine round is used to correctly round the input for the
+* destination precision and mode.
+*
+cu_norm:
+	st	CU_ONLY(a6)		;set cu-only inst flag
+	move.w	CMDREG1B(a6),d0
+	andi.b	#$3b,d0		;isolate bits to select inst
+	tst.b	d0
+	beq.l	cu_nmove	;if zero, it is an fmove
+	cmpi.b	#$18,d0
+	beq.l	cu_nabs		;if $18, it is fabs
+	cmpi.b	#$1a,d0
+	beq.l	cu_nneg		;if $1a, it is fneg
+*
+* Inst is ftst.  Check the source operand and set the cc's accordingly.
+* No write is done, so simply rts.
+*
+cu_ntst:
+	move.w	LOCAL_EX(a0),d0
+	bclr.l	#15,d0
+	sne	LOCAL_SGN(a0)
+	beq.b	cu_ntpo
+	or.l	#neg_mask,USER_FPSR(a6) ;set N
+cu_ntpo:
+	cmpi.w	#$7fff,d0	;test for inf/nan
+	bne.b	cu_ntcz
+	tst.l	LOCAL_HI(a0)
+	bne.b	cu_ntn
+	tst.l	LOCAL_LO(a0)
+	bne.b	cu_ntn
+	or.l	#inf_mask,USER_FPSR(a6)
+	rts
+cu_ntn:
+	or.l	#nan_mask,USER_FPSR(a6)
+	move.l	ETEMP_EX(a6),FPTEMP_EX(a6)	;set up fptemp sign for 
+*						;snan handler
+
+	rts
+cu_ntcz:
+	tst.l	LOCAL_HI(a0)
+	bne.l	cu_ntsx
+	tst.l	LOCAL_LO(a0)
+	bne.l	cu_ntsx
+	or.l	#z_mask,USER_FPSR(a6)
+cu_ntsx:
+	rts
+*
+* Inst is fabs.  Execute the absolute value function on the input.
+* Branch to the fmove code.  If the operand is NaN, do nothing.
+*
+cu_nabs:
+	move.b	STAG(a6),d0
+	btst.l	#5,d0			;test for NaN or zero
+	bne	wr_etemp		;if either, simply write it
+	bclr.b	#7,LOCAL_EX(a0)		;do abs
+	bra.b	cu_nmove		;fmove code will finish
+*
+* Inst is fneg.  Execute the negate value function on the input.
+* Fall though to the fmove code.  If the operand is NaN, do nothing.
+*
+cu_nneg:
+	move.b	STAG(a6),d0
+	btst.l	#5,d0			;test for NaN or zero
+	bne	wr_etemp		;if either, simply write it
+	bchg.b	#7,LOCAL_EX(a0)		;do neg
+*
+* Inst is fmove.  This code also handles all result writes.
+* If bit 2 is set, round is forced to double.  If it is clear,
+* and bit 6 is set, round is forced to single.  If both are clear,
+* the round precision is found in the fpcr.  If the rounding precision
+* is double or single, round the result before the write.
+*
+cu_nmove:
+	move.b	STAG(a6),d0
+	andi.b	#$e0,d0			;isolate stag bits
+	bne	wr_etemp		;if not norm, simply write it
+	btst.b	#2,CMDREG1B+1(a6)	;check for rd
+	bne	cu_nmrd
+	btst.b	#6,CMDREG1B+1(a6)	;check for rs
+	bne	cu_nmrs
+*
+* The move or operation is not with forced precision.  Test for
+* nan or inf as the input; if so, simply write it to FPn.  Use the
+* FPCR_MODE byte to get rounding on norms and zeros.
+*
+cu_nmnr:
+	bfextu	FPCR_MODE(a6){0:2},d0
+	tst.b	d0			;check for extended
+	beq	cu_wrexn		;if so, just write result
+	cmpi.b	#1,d0			;check for single
+	beq	cu_nmrs			;fall through to double
+*
+* The move is fdmove or round precision is double.
+*
+cu_nmrd:
+	move.l	#2,d0			;set up the size for denorm
+	move.w	LOCAL_EX(a0),d1		;compare exponent to double threshold
+	and.w	#$7fff,d1	
+	cmp.w	#$3c01,d1
+	bls	cu_nunfl
+	bfextu	FPCR_MODE(a6){2:2},d1	;get rmode
+	or.l	#$00020000,d1		;or in rprec (double)
+	clr.l	d0			;clear g,r,s for round
+	bclr.b	#sign_bit,LOCAL_EX(a0)	;convert to internal format
+	sne	LOCAL_SGN(a0)
+	bsr.l	round
+	bfclr	LOCAL_SGN(a0){0:8}
+	beq.b	cu_nmrdc
+	bset.b	#sign_bit,LOCAL_EX(a0)
+cu_nmrdc:
+	move.w	LOCAL_EX(a0),d1		;check for overflow
+	and.w	#$7fff,d1
+	cmp.w	#$43ff,d1
+	bge	cu_novfl		;take care of overflow case
+	bra.w	cu_wrexn
+*
+* The move is fsmove or round precision is single.
+*
+cu_nmrs:
+	move.l	#1,d0
+	move.w	LOCAL_EX(a0),d1
+	and.w	#$7fff,d1
+	cmp.w	#$3f81,d1
+	bls	cu_nunfl
+	bfextu	FPCR_MODE(a6){2:2},d1
+	or.l	#$00010000,d1
+	clr.l	d0
+	bclr.b	#sign_bit,LOCAL_EX(a0)
+	sne	LOCAL_SGN(a0)
+	bsr.l	round
+	bfclr	LOCAL_SGN(a0){0:8}
+	beq.b	cu_nmrsc
+	bset.b	#sign_bit,LOCAL_EX(a0)
+cu_nmrsc:
+	move.w	LOCAL_EX(a0),d1
+	and.w	#$7FFF,d1
+	cmp.w	#$407f,d1
+	blt	cu_wrexn
+*
+* The operand is above precision boundaries.  Use t_ovfl to
+* generate the correct value.
+*
+cu_novfl:
+	bsr	t_ovfl
+	bra	cu_wrexn
+*
+* The operand is below precision boundaries.  Use denorm to
+* generate the correct value.
+*
+cu_nunfl:
+	bclr.b	#sign_bit,LOCAL_EX(a0)
+	sne	LOCAL_SGN(a0)
+	bsr	denorm
+	bfclr	LOCAL_SGN(a0){0:8}	;change back to IEEE ext format
+	beq.b	cu_nucont
+	bset.b	#sign_bit,LOCAL_EX(a0)
+cu_nucont:
+	bfextu	FPCR_MODE(a6){2:2},d1
+	btst.b	#2,CMDREG1B+1(a6)	;check for rd
+	bne	inst_d
+	btst.b	#6,CMDREG1B+1(a6)	;check for rs
+	bne	inst_s
+	swap	d1
+	move.b	FPCR_MODE(a6),d1
+	lsr.b	#6,d1
+	swap	d1
+	bra	inst_sd
+inst_d:
+	or.l	#$00020000,d1
+	bra	inst_sd
+inst_s:
+	or.l	#$00010000,d1
+inst_sd:
+	bclr.b	#sign_bit,LOCAL_EX(a0)
+	sne	LOCAL_SGN(a0)
+	bsr.l	round
+	bfclr	LOCAL_SGN(a0){0:8}
+	beq.b	cu_nuflp
+	bset.b	#sign_bit,LOCAL_EX(a0)
+cu_nuflp:
+	btst.b	#inex2_bit,FPSR_EXCEPT(a6)
+	beq.b	cu_nuninx
+	or.l	#aunfl_mask,USER_FPSR(a6) ;if the round was inex, set AUNFL
+cu_nuninx:
+	tst.l	LOCAL_HI(a0)		;test for zero
+	bne.b	cu_nunzro
+	tst.l	LOCAL_LO(a0)
+	bne.b	cu_nunzro
+*
+* The mantissa is zero from the denorm loop.  Check sign and rmode
+* to see if rounding should have occured which would leave the lsb.
+*
+	move.l	USER_FPCR(a6),d0
+	andi.l	#$30,d0		;isolate rmode
+	cmpi.l	#$20,d0
+	blt.b	cu_nzro
+	bne.b	cu_nrp
+cu_nrm:
+	tst.w	LOCAL_EX(a0)	;if positive, set lsb
+	bge.b	cu_nzro
+	btst.b	#7,FPCR_MODE(a6) ;check for double
+	beq.b	cu_nincs
+	bra.b	cu_nincd
+cu_nrp:
+	tst.w	LOCAL_EX(a0)	;if positive, set lsb
+	blt.b	cu_nzro
+	btst.b	#7,FPCR_MODE(a6) ;check for double
+	beq.b	cu_nincs
+cu_nincd:
+	or.l	#$800,LOCAL_LO(a0) ;inc for double
+	bra	cu_nunzro
+cu_nincs:
+	or.l	#$100,LOCAL_HI(a0) ;inc for single
+	bra	cu_nunzro
+cu_nzro:
+	or.l	#z_mask,USER_FPSR(a6)
+	move.b	STAG(a6),d0
+	andi.b	#$e0,d0
+	cmpi.b	#$40,d0		;check if input was tagged zero
+	beq.b	cu_numv
+cu_nunzro:
+	or.l	#unfl_mask,USER_FPSR(a6) ;set unfl
+cu_numv:
+	move.l	(a0),ETEMP(a6)
+	move.l	4(a0),ETEMP_HI(a6)
+	move.l	8(a0),ETEMP_LO(a6)
+*
+* Write the result to memory, setting the fpsr cc bits.  NaN and Inf
+* bypass cu_wrexn.
+*
+cu_wrexn:
+	tst.w	LOCAL_EX(a0)		;test for zero
+	beq.b	cu_wrzero
+	cmp.w	#$8000,LOCAL_EX(a0)	;test for zero
+	bne.b	cu_wreon
+cu_wrzero:
+	or.l	#z_mask,USER_FPSR(a6)	;set Z bit
+cu_wreon:
+	tst.w	LOCAL_EX(a0)
+	bpl	wr_etemp
+	or.l	#neg_mask,USER_FPSR(a6)
+	bra	wr_etemp
+
+*
+* HANDLE SOURCE DENORM HERE
+*
+*				;clear denorm stag to norm
+*				;write the new tag & ete15 to the fstack
+mon_dnrm:
+*
+* At this point, check for the cases in which normalizing the 
+* denorm produces incorrect results.
+*
+	tst.b	DY_MO_FLG(a6)	;all cases of dyadic instructions would
+	bne.b	nrm_src		;require normalization of denorm
+
+* At this point:
+*	monadic instructions:	fabs  = $18  fneg   = $1a  ftst   = $3a
+*				fmove = $00  fsmove = $40  fdmove = $44
+*				fsqrt = $05* fssqrt = $41  fdsqrt = $45
+*				(*fsqrt reencoded to $05)
+*
+	move.w	CMDREG1B(a6),d0	;get command register
+	andi.l	#$7f,d0			;strip to only command word
+*
+* At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and 
+* fdsqrt are possible.
+* For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize)
+* For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize)
+*
+	btst.l	#0,d0
+	bne.b	nrm_src		;weed out fsqrt instructions
+	st	CU_ONLY(a6)	;set cu-only inst flag
+	bra	cu_dnrm		;fmove, fabs, fneg, ftst 
+*				;cases go to cu_dnrm
+nrm_src:
+	bclr.b	#sign_bit,LOCAL_EX(a0)
+	sne	LOCAL_SGN(a0)
+	bsr	nrm_set		;normalize number (exponent will go 
+*				; negative)
+	bclr.b	#sign_bit,LOCAL_EX(a0) ;get rid of false sign
+
+	bfclr	LOCAL_SGN(a0){0:8}	;change back to IEEE ext format
+	beq.b	spos
+	bset.b	#sign_bit,LOCAL_EX(a0)
+spos:
+	bfclr	STAG(a6){0:4}	;set tag to normalized, FPTE15 = 0
+	bset.b	#4,STAG(a6)	;set ETE15
+	or.b	#$f0,DNRM_FLG(a6)
+normal:
+	tst.b	DNRM_FLG(a6)	;check if any of the ops were denorms
+	bne	ck_wrap		;if so, check if it is a potential
+*				;wrap-around case
+fix_stk:
+	move.b	#$fe,CU_SAVEPC(a6)
+	bclr.b	#E1,E_BYTE(a6)
+
+	clr.w	NMNEXC(a6)
+
+	st.b	RES_FLG(a6)	;indicate that a restore is needed
+	rts
+
+*
+* cu_dnrm handles all cu-only instructions (fmove, fabs, fneg, and
+* ftst) completly in software without an frestore to the 040. 
+*
+cu_dnrm:
+	st.b	CU_ONLY(a6)
+	move.w	CMDREG1B(a6),d0
+	andi.b	#$3b,d0		;isolate bits to select inst
+	tst.b	d0
+	beq.l	cu_dmove	;if zero, it is an fmove
+	cmpi.b	#$18,d0
+	beq.l	cu_dabs		;if $18, it is fabs
+	cmpi.b	#$1a,d0
+	beq.l	cu_dneg		;if $1a, it is fneg
+*
+* Inst is ftst.  Check the source operand and set the cc's accordingly.
+* No write is done, so simply rts.
+*
+cu_dtst:
+	move.w	LOCAL_EX(a0),d0
+	bclr.l	#15,d0
+	sne	LOCAL_SGN(a0)
+	beq.b	cu_dtpo
+	or.l	#neg_mask,USER_FPSR(a6) ;set N
+cu_dtpo:
+	cmpi.w	#$7fff,d0	;test for inf/nan
+	bne.b	cu_dtcz
+	tst.l	LOCAL_HI(a0)
+	bne.b	cu_dtn
+	tst.l	LOCAL_LO(a0)
+	bne.b	cu_dtn
+	or.l	#inf_mask,USER_FPSR(a6)
+	rts
+cu_dtn:
+	or.l	#nan_mask,USER_FPSR(a6)
+	move.l	ETEMP_EX(a6),FPTEMP_EX(a6)	;set up fptemp sign for 
+*						;snan handler
+	rts
+cu_dtcz:
+	tst.l	LOCAL_HI(a0)
+	bne.l	cu_dtsx
+	tst.l	LOCAL_LO(a0)
+	bne.l	cu_dtsx
+	or.l	#z_mask,USER_FPSR(a6)
+cu_dtsx:
+	rts
+*
+* Inst is fabs.  Execute the absolute value function on the input.
+* Branch to the fmove code.
+*
+cu_dabs:
+	bclr.b	#7,LOCAL_EX(a0)		;do abs
+	bra.b	cu_dmove		;fmove code will finish
+*
+* Inst is fneg.  Execute the negate value function on the input.
+* Fall though to the fmove code.
+*
+cu_dneg:
+	bchg.b	#7,LOCAL_EX(a0)		;do neg
+*
+* Inst is fmove.  This code also handles all result writes.
+* If bit 2 is set, round is forced to double.  If it is clear,
+* and bit 6 is set, round is forced to single.  If both are clear,
+* the round precision is found in the fpcr.  If the rounding precision
+* is double or single, the result is zero, and the mode is checked
+* to determine if the lsb of the result should be set.
+*
+cu_dmove:
+	btst.b	#2,CMDREG1B+1(a6)	;check for rd
+	bne	cu_dmrd
+	btst.b	#6,CMDREG1B+1(a6)	;check for rs
+	bne	cu_dmrs
+*
+* The move or operation is not with forced precision.  Use the
+* FPCR_MODE byte to get rounding.
+*
+cu_dmnr:
+	bfextu	FPCR_MODE(a6){0:2},d0
+	tst.b	d0			;check for extended
+	beq	cu_wrexd		;if so, just write result
+	cmpi.b	#1,d0			;check for single
+	beq	cu_dmrs			;fall through to double
+*
+* The move is fdmove or round precision is double.  Result is zero.
+* Check rmode for rp or rm and set lsb accordingly.
+*
+cu_dmrd:
+	bfextu	FPCR_MODE(a6){2:2},d1	;get rmode
+	tst.w	LOCAL_EX(a0)		;check sign
+	blt.b	cu_dmdn
+	cmpi.b	#3,d1			;check for rp
+	bne	cu_dpd			;load double pos zero
+	bra	cu_dpdr			;load double pos zero w/lsb
+cu_dmdn:
+	cmpi.b	#2,d1			;check for rm
+	bne	cu_dnd			;load double neg zero
+	bra	cu_dndr			;load double neg zero w/lsb
+*
+* The move is fsmove or round precision is single.  Result is zero.
+* Check for rp or rm and set lsb accordingly.
+*
+cu_dmrs:
+	bfextu	FPCR_MODE(a6){2:2},d1	;get rmode
+	tst.w	LOCAL_EX(a0)		;check sign
+	blt.b	cu_dmsn
+	cmpi.b	#3,d1			;check for rp
+	bne	cu_spd			;load single pos zero
+	bra	cu_spdr			;load single pos zero w/lsb
+cu_dmsn:
+	cmpi.b	#2,d1			;check for rm
+	bne	cu_snd			;load single neg zero
+	bra	cu_sndr			;load single neg zero w/lsb
+*
+* The precision is extended, so the result in etemp is correct.
+* Simply set unfl (not inex2 or aunfl) and write the result to 
+* the correct fp register.
+cu_wrexd:
+	or.l	#unfl_mask,USER_FPSR(a6)
+	tst.w	LOCAL_EX(a0)
+	beq	wr_etemp
+	or.l	#neg_mask,USER_FPSR(a6)
+	bra	wr_etemp
+*
+* These routines write +/- zero in double format.  The routines
+* cu_dpdr and cu_dndr set the double lsb.
+*
+cu_dpd:
+	move.l	#$3c010000,LOCAL_EX(a0)	;force pos double zero
+	clr.l	LOCAL_HI(a0)
+	clr.l	LOCAL_LO(a0)
+	or.l	#z_mask,USER_FPSR(a6)
+	or.l	#unfinx_mask,USER_FPSR(a6)
+	bra	wr_etemp
+cu_dpdr:
+	move.l	#$3c010000,LOCAL_EX(a0)	;force pos double zero
+	clr.l	LOCAL_HI(a0)
+	move.l	#$800,LOCAL_LO(a0)	;with lsb set
+	or.l	#unfinx_mask,USER_FPSR(a6)
+	bra	wr_etemp
+cu_dnd:
+	move.l	#$bc010000,LOCAL_EX(a0)	;force pos double zero
+	clr.l	LOCAL_HI(a0)
+	clr.l	LOCAL_LO(a0)
+	or.l	#z_mask,USER_FPSR(a6)
+	or.l	#neg_mask,USER_FPSR(a6)
+	or.l	#unfinx_mask,USER_FPSR(a6)
+	bra	wr_etemp
+cu_dndr:
+	move.l	#$bc010000,LOCAL_EX(a0)	;force pos double zero
+	clr.l	LOCAL_HI(a0)
+	move.l	#$800,LOCAL_LO(a0)	;with lsb set
+	or.l	#neg_mask,USER_FPSR(a6)
+	or.l	#unfinx_mask,USER_FPSR(a6)
+	bra	wr_etemp
+*
+* These routines write +/- zero in single format.  The routines
+* cu_dpdr and cu_dndr set the single lsb.
+*
+cu_spd:
+	move.l	#$3f810000,LOCAL_EX(a0)	;force pos single zero
+	clr.l	LOCAL_HI(a0)
+	clr.l	LOCAL_LO(a0)
+	or.l	#z_mask,USER_FPSR(a6)
+	or.l	#unfinx_mask,USER_FPSR(a6)
+	bra	wr_etemp
+cu_spdr:
+	move.l	#$3f810000,LOCAL_EX(a0)	;force pos single zero
+	move.l	#$100,LOCAL_HI(a0)	;with lsb set
+	clr.l	LOCAL_LO(a0)
+	or.l	#unfinx_mask,USER_FPSR(a6)
+	bra	wr_etemp
+cu_snd:
+	move.l	#$bf810000,LOCAL_EX(a0)	;force pos single zero
+	clr.l	LOCAL_HI(a0)
+	clr.l	LOCAL_LO(a0)
+	or.l	#z_mask,USER_FPSR(a6)
+	or.l	#neg_mask,USER_FPSR(a6)
+	or.l	#unfinx_mask,USER_FPSR(a6)
+	bra	wr_etemp
+cu_sndr:
+	move.l	#$bf810000,LOCAL_EX(a0)	;force pos single zero
+	move.l	#$100,LOCAL_HI(a0)	;with lsb set
+	clr.l	LOCAL_LO(a0)
+	or.l	#neg_mask,USER_FPSR(a6)
+	or.l	#unfinx_mask,USER_FPSR(a6)
+	bra	wr_etemp
+	
+*
+* This code checks for 16-bit overflow conditions on dyadic
+* operations which are not restorable into the floating-point
+* unit and must be completed in software.  Basically, this
+* condition exists with a very large norm and a denorm.  One
+* of the operands must be denormalized to enter this code.
+*
+* Flags used:
+*	DY_MO_FLG contains 0 for monadic op, $ff for dyadic
+*	DNRM_FLG contains $00 for neither op denormalized
+*	                  $0f for the destination op denormalized
+*	                  $f0 for the source op denormalized
+*	                  $ff for both ops denormalzed
+*
+* The wrap-around condition occurs for add, sub, div, and cmp
+* when 
+*
+*	abs(dest_exp - src_exp) >= $8000
+*
+* and for mul when
+*
+*	(dest_exp + src_exp) < $0
+*
+* we must process the operation here if this case is true.
+*
+* The rts following the frcfpn routine is the exit from res_func
+* for this condition.  The restore flag (RES_FLG) is left clear.
+* No frestore is done unless an exception is to be reported.
+*
+* For fadd: 
+*	if(sign_of(dest) != sign_of(src))
+*		replace exponent of src with $3fff (keep sign)
+*		use fpu to perform dest+new_src (user's rmode and X)
+*		clr sticky
+*	else
+*		set sticky
+*	call round with user's precision and mode
+*	move result to fpn and wbtemp
+*
+* For fsub:
+*	if(sign_of(dest) == sign_of(src))
+*		replace exponent of src with $3fff (keep sign)
+*		use fpu to perform dest+new_src (user's rmode and X)
+*		clr sticky
+*	else
+*		set sticky
+*	call round with user's precision and mode
+*	move result to fpn and wbtemp
+*
+* For fdiv/fsgldiv:
+*	if(both operands are denorm)
+*		restore_to_fpu;
+*	if(dest is norm)
+*		force_ovf;
+*	else(dest is denorm)
+*		force_unf:
+*
+* For fcmp:
+*	if(dest is norm)
+*		N = sign_of(dest);
+*	else(dest is denorm)
+*		N = sign_of(src);
+*
+* For fmul:
+*	if(both operands are denorm)
+*		force_unf;
+*	if((dest_exp + src_exp) < 0)
+*		force_unf:
+*	else
+*		restore_to_fpu;
+*
+* local equates:
+addcode	equ	$22
+subcode	equ	$28
+mulcode	equ	$23
+divcode	equ	$20
+cmpcode	equ	$38
+ck_wrap:
+	tst.b	DY_MO_FLG(a6)	;check for fsqrt
+	beq	fix_stk		;if zero, it is fsqrt
+	move.w	CMDREG1B(a6),d0
+	andi.w	#$3b,d0		;strip to command bits
+	cmpi.w	#addcode,d0
+	beq	wrap_add
+	cmpi.w	#subcode,d0
+	beq	wrap_sub
+	cmpi.w	#mulcode,d0
+	beq	wrap_mul
+	cmpi.w	#cmpcode,d0
+	beq	wrap_cmp
+*
+* Inst is fdiv.  
+*
+wrap_div:
+	cmp.b	#$ff,DNRM_FLG(a6) ;if both ops denorm, 
+	beq	fix_stk		 ;restore to fpu
+*
+* One of the ops is denormalized.  Test for wrap condition
+* and force the result.
+*
+	cmp.b	#$0f,DNRM_FLG(a6) ;check for dest denorm
+	bne.b	div_srcd
+div_destd:
+	bsr.l	ckinf_ns
+	bne	fix_stk
+	bfextu	ETEMP_EX(a6){1:15},d0	;get src exp (always pos)
+	bfexts	FPTEMP_EX(a6){1:15},d1	;get dest exp (always neg)
+	sub.l	d1,d0			;subtract dest from src
+	cmp.l	#$7fff,d0
+	blt	fix_stk			;if less, not wrap case
+	clr.b	WBTEMP_SGN(a6)
+	move.w	ETEMP_EX(a6),d0		;find the sign of the result
+	move.w	FPTEMP_EX(a6),d1
+	eor.w	d1,d0
+	andi.w	#$8000,d0
+	beq	force_unf
+	st.b	WBTEMP_SGN(a6)
+	bra	force_unf
+
+ckinf_ns:
+	move.b	STAG(a6),d0		;check source tag for inf or nan
+	bra	ck_in_com
+ckinf_nd:
+	move.b	DTAG(a6),d0		;check destination tag for inf or nan
+ck_in_com:	
+	andi.b	#$60,d0			;isolate tag bits
+	cmp.b	#$40,d0			;is it inf?
+	beq	nan_or_inf		;not wrap case
+	cmp.b	#$60,d0			;is it nan?
+	beq	nan_or_inf		;yes, not wrap case?
+	cmp.b	#$20,d0			;is it a zero?
+	beq	nan_or_inf		;yes
+	clr.l	d0
+	rts				;then it is either a zero of norm,
+*					;check wrap case
+nan_or_inf:
+	moveq.l	#-1,d0
+	rts
+
+
+
+div_srcd:
+	bsr.l	ckinf_nd
+	bne	fix_stk
+	bfextu	FPTEMP_EX(a6){1:15},d0	;get dest exp (always pos)
+	bfexts	ETEMP_EX(a6){1:15},d1	;get src exp (always neg)
+	sub.l	d1,d0			;subtract src from dest
+	cmp.l	#$8000,d0
+	blt	fix_stk			;if less, not wrap case
+	clr.b	WBTEMP_SGN(a6)
+	move.w	ETEMP_EX(a6),d0		;find the sign of the result
+	move.w	FPTEMP_EX(a6),d1
+	eor.w	d1,d0
+	andi.w	#$8000,d0
+	beq.b	force_ovf
+	st.b	WBTEMP_SGN(a6)
+*
+* This code handles the case of the instruction resulting in 
+* an overflow condition.
+*
+force_ovf:
+	bclr.b	#E1,E_BYTE(a6)
+	or.l	#ovfl_inx_mask,USER_FPSR(a6)
+	clr.w	NMNEXC(a6)
+	lea.l	WBTEMP(a6),a0		;point a0 to memory location
+	move.w	CMDREG1B(a6),d0
+	btst.l	#6,d0			;test for forced precision
+	beq.b	frcovf_fpcr
+	btst.l	#2,d0			;check for double
+	bne.b	frcovf_dbl
+	move.l	#$1,d0			;inst is forced single
+	bra.b	frcovf_rnd
+frcovf_dbl:
+	move.l	#$2,d0			;inst is forced double
+	bra.b	frcovf_rnd
+frcovf_fpcr:
+	bfextu	FPCR_MODE(a6){0:2},d0	;inst not forced - use fpcr prec
+frcovf_rnd:
+
+* The 881/882 does not set inex2 for the following case, so the 
+* line is commented out to be compatible with 881/882
+*	tst.b	d0
+*	beq.b	frcovf_x
+*	or.l	#inex2_mask,USER_FPSR(a6) ;if prec is s or d, set inex2
+
+*frcovf_x:
+	bsr.l	ovf_res			;get correct result based on
+*					;round precision/mode.  This 
+*					;sets FPSR_CC correctly
+*					;returns in external format
+	bfclr	WBTEMP_SGN(a6){0:8}
+	beq	frcfpn
+	bset.b	#sign_bit,WBTEMP_EX(a6)
+	bra	frcfpn
+*
+* Inst is fadd.
+*
+wrap_add:
+	cmp.b	#$ff,DNRM_FLG(a6) ;if both ops denorm, 
+	beq	fix_stk		 ;restore to fpu
+*
+* One of the ops is denormalized.  Test for wrap condition
+* and complete the instruction.
+*
+	cmp.b	#$0f,DNRM_FLG(a6) ;check for dest denorm
+	bne.b	add_srcd
+add_destd:
+	bsr.l	ckinf_ns
+	bne	fix_stk
+	bfextu	ETEMP_EX(a6){1:15},d0	;get src exp (always pos)
+	bfexts	FPTEMP_EX(a6){1:15},d1	;get dest exp (always neg)
+	sub.l	d1,d0			;subtract dest from src
+	cmp.l	#$8000,d0
+	blt	fix_stk			;if less, not wrap case
+	bra	add_wrap
+add_srcd:
+	bsr.l	ckinf_nd
+	bne	fix_stk
+	bfextu	FPTEMP_EX(a6){1:15},d0	;get dest exp (always pos)
+	bfexts	ETEMP_EX(a6){1:15},d1	;get src exp (always neg)
+	sub.l	d1,d0			;subtract src from dest
+	cmp.l	#$8000,d0
+	blt	fix_stk			;if less, not wrap case
+*
+* Check the signs of the operands.  If they are unlike, the fpu
+* can be used to add the norm and 1.0 with the sign of the
+* denorm and it will correctly generate the result in extended
+* precision.  We can then call round with no sticky and the result
+* will be correct for the user's rounding mode and precision.  If
+* the signs are the same, we call round with the sticky bit set
+* and the result will be correctfor the user's rounding mode and
+* precision.
+*
+add_wrap:
+	move.w	ETEMP_EX(a6),d0
+	move.w	FPTEMP_EX(a6),d1
+	eor.w	d1,d0
+	andi.w	#$8000,d0
+	beq	add_same
+*
+* The signs are unlike.
+*
+	cmp.b	#$0f,DNRM_FLG(a6) ;is dest the denorm?
+	bne.b	add_u_srcd
+	move.w	FPTEMP_EX(a6),d0
+	andi.w	#$8000,d0
+	or.w	#$3fff,d0	;force the exponent to +/- 1
+	move.w	d0,FPTEMP_EX(a6) ;in the denorm
+	move.l	USER_FPCR(a6),d0
+	andi.l	#$30,d0
+	fmove.l	d0,fpcr		;set up users rmode and X
+	fmove.x	ETEMP(a6),fp0
+	fadd.x	FPTEMP(a6),fp0
+	lea.l	WBTEMP(a6),a0	;point a0 to wbtemp in frame
+	fmove.l	fpsr,d1
+	or.l	d1,USER_FPSR(a6) ;capture cc's and inex from fadd
+	fmove.x	fp0,WBTEMP(a6)	;write result to memory
+	lsr.l	#4,d0		;put rmode in lower 2 bits
+	move.l	USER_FPCR(a6),d1
+	andi.l	#$c0,d1
+	lsr.l	#6,d1		;put precision in upper word
+	swap	d1
+	or.l	d0,d1		;set up for round call
+	clr.l	d0		;force sticky to zero
+	bclr.b	#sign_bit,WBTEMP_EX(a6)
+	sne	WBTEMP_SGN(a6)
+	bsr.l	round		;round result to users rmode & prec
+	bfclr	WBTEMP_SGN(a6){0:8}	;convert back to IEEE ext format
+	beq	frcfpnr
+	bset.b	#sign_bit,WBTEMP_EX(a6)
+	bra	frcfpnr
+add_u_srcd:
+	move.w	ETEMP_EX(a6),d0
+	andi.w	#$8000,d0
+	or.w	#$3fff,d0	;force the exponent to +/- 1
+	move.w	d0,ETEMP_EX(a6) ;in the denorm
+	move.l	USER_FPCR(a6),d0
+	andi.l	#$30,d0
+	fmove.l	d0,fpcr		;set up users rmode and X
+	fmove.x	ETEMP(a6),fp0
+	fadd.x	FPTEMP(a6),fp0
+	fmove.l	fpsr,d1
+	or.l	d1,USER_FPSR(a6) ;capture cc's and inex from fadd
+	lea.l	WBTEMP(a6),a0	;point a0 to wbtemp in frame
+	fmove.x	fp0,WBTEMP(a6)	;write result to memory
+	lsr.l	#4,d0		;put rmode in lower 2 bits
+	move.l	USER_FPCR(a6),d1
+	andi.l	#$c0,d1
+	lsr.l	#6,d1		;put precision in upper word
+	swap	d1
+	or.l	d0,d1		;set up for round call
+	clr.l	d0		;force sticky to zero
+	bclr.b	#sign_bit,WBTEMP_EX(a6)
+	sne	WBTEMP_SGN(a6)	;use internal format for round
+	bsr.l	round		;round result to users rmode & prec
+	bfclr	WBTEMP_SGN(a6){0:8}	;convert back to IEEE ext format
+	beq	frcfpnr
+	bset.b	#sign_bit,WBTEMP_EX(a6)
+	bra	frcfpnr
+*
+* Signs are alike:
+*
+add_same:
+	cmp.b	#$0f,DNRM_FLG(a6) ;is dest the denorm?
+	bne.b	add_s_srcd
+add_s_destd:
+	lea.l	ETEMP(a6),a0
+	move.l	USER_FPCR(a6),d0
+	andi.l	#$30,d0
+	lsr.l	#4,d0		;put rmode in lower 2 bits
+	move.l	USER_FPCR(a6),d1
+	andi.l	#$c0,d1
+	lsr.l	#6,d1		;put precision in upper word
+	swap	d1
+	or.l	d0,d1		;set up for round call
+	move.l	#$20000000,d0	;set sticky for round
+	bclr.b	#sign_bit,ETEMP_EX(a6)
+	sne	ETEMP_SGN(a6)
+	bsr.l	round		;round result to users rmode & prec
+	bfclr	ETEMP_SGN(a6){0:8}	;convert back to IEEE ext format
+	beq.b	add_s_dclr
+	bset.b	#sign_bit,ETEMP_EX(a6)
+add_s_dclr:
+	lea.l	WBTEMP(a6),a0
+	move.l	ETEMP(a6),(a0)	;write result to wbtemp
+	move.l	ETEMP_HI(a6),4(a0)
+	move.l	ETEMP_LO(a6),8(a0)
+	tst.w	ETEMP_EX(a6)
+	bgt	add_ckovf
+	or.l	#neg_mask,USER_FPSR(a6)
+	bra	add_ckovf
+add_s_srcd:
+	lea.l	FPTEMP(a6),a0
+	move.l	USER_FPCR(a6),d0
+	andi.l	#$30,d0
+	lsr.l	#4,d0		;put rmode in lower 2 bits
+	move.l	USER_FPCR(a6),d1
+	andi.l	#$c0,d1
+	lsr.l	#6,d1		;put precision in upper word
+	swap	d1
+	or.l	d0,d1		;set up for round call
+	move.l	#$20000000,d0	;set sticky for round
+	bclr.b	#sign_bit,FPTEMP_EX(a6)
+	sne	FPTEMP_SGN(a6)
+	bsr.l	round		;round result to users rmode & prec
+	bfclr	FPTEMP_SGN(a6){0:8}	;convert back to IEEE ext format
+	beq.b	add_s_sclr
+	bset.b	#sign_bit,FPTEMP_EX(a6)
+add_s_sclr:
+	lea.l	WBTEMP(a6),a0
+	move.l	FPTEMP(a6),(a0)	;write result to wbtemp
+	move.l	FPTEMP_HI(a6),4(a0)
+	move.l	FPTEMP_LO(a6),8(a0)
+	tst.w	FPTEMP_EX(a6)
+	bgt	add_ckovf
+	or.l	#neg_mask,USER_FPSR(a6)
+add_ckovf:
+	move.w	WBTEMP_EX(a6),d0
+	andi.w	#$7fff,d0
+	cmpi.w	#$7fff,d0
+	bne	frcfpnr
+*
+* The result has overflowed to $7fff exponent.  Set I, ovfl,
+* and aovfl, and clr the mantissa (incorrectly set by the
+* round routine.)
+*
+	or.l	#inf_mask+ovfl_inx_mask,USER_FPSR(a6)	
+	clr.l	4(a0)
+	bra	frcfpnr
+*
+* Inst is fsub.
+*
+wrap_sub:
+	cmp.b	#$ff,DNRM_FLG(a6) ;if both ops denorm, 
+	beq	fix_stk		 ;restore to fpu
+*
+* One of the ops is denormalized.  Test for wrap condition
+* and complete the instruction.
+*
+	cmp.b	#$0f,DNRM_FLG(a6) ;check for dest denorm
+	bne.b	sub_srcd
+sub_destd:
+	bsr.l	ckinf_ns
+	bne	fix_stk
+	bfextu	ETEMP_EX(a6){1:15},d0	;get src exp (always pos)
+	bfexts	FPTEMP_EX(a6){1:15},d1	;get dest exp (always neg)
+	sub.l	d1,d0			;subtract src from dest
+	cmp.l	#$8000,d0
+	blt	fix_stk			;if less, not wrap case
+	bra	sub_wrap
+sub_srcd:
+	bsr.l	ckinf_nd
+	bne	fix_stk
+	bfextu	FPTEMP_EX(a6){1:15},d0	;get dest exp (always pos)
+	bfexts	ETEMP_EX(a6){1:15},d1	;get src exp (always neg)
+	sub.l	d1,d0			;subtract dest from src
+	cmp.l	#$8000,d0
+	blt	fix_stk			;if less, not wrap case
+*
+* Check the signs of the operands.  If they are alike, the fpu
+* can be used to subtract from the norm 1.0 with the sign of the
+* denorm and it will correctly generate the result in extended
+* precision.  We can then call round with no sticky and the result
+* will be correct for the user's rounding mode and precision.  If
+* the signs are unlike, we call round with the sticky bit set
+* and the result will be correctfor the user's rounding mode and
+* precision.
+*
+sub_wrap:
+	move.w	ETEMP_EX(a6),d0
+	move.w	FPTEMP_EX(a6),d1
+	eor.w	d1,d0
+	andi.w	#$8000,d0
+	bne	sub_diff
+*
+* The signs are alike.
+*
+	cmp.b	#$0f,DNRM_FLG(a6) ;is dest the denorm?
+	bne.b	sub_u_srcd
+	move.w	FPTEMP_EX(a6),d0
+	andi.w	#$8000,d0
+	or.w	#$3fff,d0	;force the exponent to +/- 1
+	move.w	d0,FPTEMP_EX(a6) ;in the denorm
+	move.l	USER_FPCR(a6),d0
+	andi.l	#$30,d0
+	fmove.l	d0,fpcr		;set up users rmode and X
+	fmove.x	FPTEMP(a6),fp0
+	fsub.x	ETEMP(a6),fp0
+	fmove.l	fpsr,d1
+	or.l	d1,USER_FPSR(a6) ;capture cc's and inex from fadd
+	lea.l	WBTEMP(a6),a0	;point a0 to wbtemp in frame
+	fmove.x	fp0,WBTEMP(a6)	;write result to memory
+	lsr.l	#4,d0		;put rmode in lower 2 bits
+	move.l	USER_FPCR(a6),d1
+	andi.l	#$c0,d1
+	lsr.l	#6,d1		;put precision in upper word
+	swap	d1
+	or.l	d0,d1		;set up for round call
+	clr.l	d0		;force sticky to zero
+	bclr.b	#sign_bit,WBTEMP_EX(a6)
+	sne	WBTEMP_SGN(a6)
+	bsr.l	round		;round result to users rmode & prec
+	bfclr	WBTEMP_SGN(a6){0:8}	;convert back to IEEE ext format
+	beq	frcfpnr
+	bset.b	#sign_bit,WBTEMP_EX(a6)
+	bra	frcfpnr
+sub_u_srcd:
+	move.w	ETEMP_EX(a6),d0
+	andi.w	#$8000,d0
+	or.w	#$3fff,d0	;force the exponent to +/- 1
+	move.w	d0,ETEMP_EX(a6) ;in the denorm
+	move.l	USER_FPCR(a6),d0
+	andi.l	#$30,d0
+	fmove.l	d0,fpcr		;set up users rmode and X
+	fmove.x	FPTEMP(a6),fp0
+	fsub.x	ETEMP(a6),fp0
+	fmove.l	fpsr,d1
+	or.l	d1,USER_FPSR(a6) ;capture cc's and inex from fadd
+	lea.l	WBTEMP(a6),a0	;point a0 to wbtemp in frame
+	fmove.x	fp0,WBTEMP(a6)	;write result to memory
+	lsr.l	#4,d0		;put rmode in lower 2 bits
+	move.l	USER_FPCR(a6),d1
+	andi.l	#$c0,d1
+	lsr.l	#6,d1		;put precision in upper word
+	swap	d1
+	or.l	d0,d1		;set up for round call
+	clr.l	d0		;force sticky to zero
+	bclr.b	#sign_bit,WBTEMP_EX(a6)
+	sne	WBTEMP_SGN(a6)
+	bsr.l	round		;round result to users rmode & prec
+	bfclr	WBTEMP_SGN(a6){0:8}	;convert back to IEEE ext format
+	beq	frcfpnr
+	bset.b	#sign_bit,WBTEMP_EX(a6)
+	bra	frcfpnr
+*
+* Signs are unlike:
+*
+sub_diff:
+	cmp.b	#$0f,DNRM_FLG(a6) ;is dest the denorm?
+	bne.b	sub_s_srcd
+sub_s_destd:
+	lea.l	ETEMP(a6),a0
+	move.l	USER_FPCR(a6),d0
+	andi.l	#$30,d0
+	lsr.l	#4,d0		;put rmode in lower 2 bits
+	move.l	USER_FPCR(a6),d1
+	andi.l	#$c0,d1
+	lsr.l	#6,d1		;put precision in upper word
+	swap	d1
+	or.l	d0,d1		;set up for round call
+	move.l	#$20000000,d0	;set sticky for round
+*
+* Since the dest is the denorm, the sign is the opposite of the
+* norm sign.
+*
+	eori.w	#$8000,ETEMP_EX(a6)	;flip sign on result
+	tst.w	ETEMP_EX(a6)
+	bgt.b	sub_s_dwr
+	or.l	#neg_mask,USER_FPSR(a6)
+sub_s_dwr:
+	bclr.b	#sign_bit,ETEMP_EX(a6)
+	sne	ETEMP_SGN(a6)
+	bsr.l	round		;round result to users rmode & prec
+	bfclr	ETEMP_SGN(a6){0:8}	;convert back to IEEE ext format
+	beq.b	sub_s_dclr
+	bset.b	#sign_bit,ETEMP_EX(a6)
+sub_s_dclr:
+	lea.l	WBTEMP(a6),a0
+	move.l	ETEMP(a6),(a0)	;write result to wbtemp
+	move.l	ETEMP_HI(a6),4(a0)
+	move.l	ETEMP_LO(a6),8(a0)
+	bra	sub_ckovf
+sub_s_srcd:
+	lea.l	FPTEMP(a6),a0
+	move.l	USER_FPCR(a6),d0
+	andi.l	#$30,d0
+	lsr.l	#4,d0		;put rmode in lower 2 bits
+	move.l	USER_FPCR(a6),d1
+	andi.l	#$c0,d1
+	lsr.l	#6,d1		;put precision in upper word
+	swap	d1
+	or.l	d0,d1		;set up for round call
+	move.l	#$20000000,d0	;set sticky for round
+	bclr.b	#sign_bit,FPTEMP_EX(a6)
+	sne	FPTEMP_SGN(a6)
+	bsr.l	round		;round result to users rmode & prec
+	bfclr	FPTEMP_SGN(a6){0:8}	;convert back to IEEE ext format
+	beq.b	sub_s_sclr
+	bset.b	#sign_bit,FPTEMP_EX(a6)
+sub_s_sclr:
+	lea.l	WBTEMP(a6),a0
+	move.l	FPTEMP(a6),(a0)	;write result to wbtemp
+	move.l	FPTEMP_HI(a6),4(a0)
+	move.l	FPTEMP_LO(a6),8(a0)
+	tst.w	FPTEMP_EX(a6)
+	bgt	sub_ckovf
+	or.l	#neg_mask,USER_FPSR(a6)
+sub_ckovf:
+	move.w	WBTEMP_EX(a6),d0
+	andi.w	#$7fff,d0
+	cmpi.w	#$7fff,d0
+	bne	frcfpnr
+*
+* The result has overflowed to $7fff exponent.  Set I, ovfl,
+* and aovfl, and clr the mantissa (incorrectly set by the
+* round routine.)
+*
+	or.l	#inf_mask+ovfl_inx_mask,USER_FPSR(a6)	
+	clr.l	4(a0)
+	bra	frcfpnr
+*
+* Inst is fcmp.
+*
+wrap_cmp:
+	cmp.b	#$ff,DNRM_FLG(a6) ;if both ops denorm, 
+	beq	fix_stk		 ;restore to fpu
+*
+* One of the ops is denormalized.  Test for wrap condition
+* and complete the instruction.
+*
+	cmp.b	#$0f,DNRM_FLG(a6) ;check for dest denorm
+	bne.b	cmp_srcd
+cmp_destd:
+	bsr.l	ckinf_ns
+	bne	fix_stk
+	bfextu	ETEMP_EX(a6){1:15},d0	;get src exp (always pos)
+	bfexts	FPTEMP_EX(a6){1:15},d1	;get dest exp (always neg)
+	sub.l	d1,d0			;subtract dest from src
+	cmp.l	#$8000,d0
+	blt	fix_stk			;if less, not wrap case
+	tst.w	ETEMP_EX(a6)		;set N to ~sign_of(src)
+	bge	cmp_setn
+	rts
+cmp_srcd:
+	bsr.l	ckinf_nd
+	bne	fix_stk
+	bfextu	FPTEMP_EX(a6){1:15},d0	;get dest exp (always pos)
+	bfexts	ETEMP_EX(a6){1:15},d1	;get src exp (always neg)
+	sub.l	d1,d0			;subtract src from dest
+	cmp.l	#$8000,d0
+	blt	fix_stk			;if less, not wrap case
+	tst.w	FPTEMP_EX(a6)		;set N to sign_of(dest)
+	blt	cmp_setn
+	rts
+cmp_setn:
+	or.l	#neg_mask,USER_FPSR(a6)
+	rts
+
+*
+* Inst is fmul.
+*
+wrap_mul:
+	cmp.b	#$ff,DNRM_FLG(a6) ;if both ops denorm, 
+	beq	force_unf	;force an underflow (really!)
+*
+* One of the ops is denormalized.  Test for wrap condition
+* and complete the instruction.
+*
+	cmp.b	#$0f,DNRM_FLG(a6) ;check for dest denorm
+	bne.b	mul_srcd
+mul_destd:
+	bsr.l	ckinf_ns
+	bne	fix_stk
+	bfextu	ETEMP_EX(a6){1:15},d0	;get src exp (always pos)
+	bfexts	FPTEMP_EX(a6){1:15},d1	;get dest exp (always neg)
+	add.l	d1,d0			;subtract dest from src
+	bgt	fix_stk
+	bra	force_unf
+mul_srcd:
+	bsr.l	ckinf_nd
+	bne	fix_stk
+	bfextu	FPTEMP_EX(a6){1:15},d0	;get dest exp (always pos)
+	bfexts	ETEMP_EX(a6){1:15},d1	;get src exp (always neg)
+	add.l	d1,d0			;subtract src from dest
+	bgt	fix_stk
+	
+*
+* This code handles the case of the instruction resulting in 
+* an underflow condition.
+*
+force_unf:
+	bclr.b	#E1,E_BYTE(a6)
+	or.l	#unfinx_mask,USER_FPSR(a6)
+	clr.w	NMNEXC(a6)
+	clr.b	WBTEMP_SGN(a6)
+	move.w	ETEMP_EX(a6),d0		;find the sign of the result
+	move.w	FPTEMP_EX(a6),d1
+	eor.w	d1,d0
+	andi.w	#$8000,d0
+	beq.b	frcunfcont
+	st.b	WBTEMP_SGN(a6)
+frcunfcont:
+	lea	WBTEMP(a6),a0		;point a0 to memory location
+	move.w	CMDREG1B(a6),d0
+	btst.l	#6,d0			;test for forced precision
+	beq.b	frcunf_fpcr
+	btst.l	#2,d0			;check for double
+	bne.b	frcunf_dbl
+	move.l	#$1,d0			;inst is forced single
+	bra.b	frcunf_rnd
+frcunf_dbl:
+	move.l	#$2,d0			;inst is forced double
+	bra.b	frcunf_rnd
+frcunf_fpcr:
+	bfextu	FPCR_MODE(a6){0:2},d0	;inst not forced - use fpcr prec
+frcunf_rnd:
+	bsr.l	unf_sub			;get correct result based on
+*					;round precision/mode.  This 
+*					;sets FPSR_CC correctly
+	bfclr	WBTEMP_SGN(a6){0:8}	;convert back to IEEE ext format
+	beq.b	frcfpn
+	bset.b	#sign_bit,WBTEMP_EX(a6)
+	bra	frcfpn
+
+*
+* Write the result to the user's fpn.  All results must be HUGE to be
+* written; otherwise the results would have overflowed or underflowed.
+* If the rounding precision is single or double, the ovf_res routine
+* is needed to correctly supply the max value.
+*
+frcfpnr:
+	move.w	CMDREG1B(a6),d0
+	btst.l	#6,d0			;test for forced precision
+	beq.b	frcfpn_fpcr
+	btst.l	#2,d0			;check for double
+	bne.b	frcfpn_dbl
+	move.l	#$1,d0			;inst is forced single
+	bra.b	frcfpn_rnd
+frcfpn_dbl:
+	move.l	#$2,d0			;inst is forced double
+	bra.b	frcfpn_rnd
+frcfpn_fpcr:
+	bfextu	FPCR_MODE(a6){0:2},d0	;inst not forced - use fpcr prec
+	tst.b	d0
+	beq.b	frcfpn			;if extended, write what you got
+frcfpn_rnd:
+	bclr.b	#sign_bit,WBTEMP_EX(a6)
+	sne	WBTEMP_SGN(a6)
+	bsr.l	ovf_res			;get correct result based on
+*					;round precision/mode.  This 
+*					;sets FPSR_CC correctly
+	bfclr	WBTEMP_SGN(a6){0:8}	;convert back to IEEE ext format
+	beq.b	frcfpn_clr
+	bset.b	#sign_bit,WBTEMP_EX(a6)
+frcfpn_clr:
+	or.l	#ovfinx_mask,USER_FPSR(a6)
+* 
+* Perform the write.
+*
+frcfpn:
+	bfextu	CMDREG1B(a6){6:3},d0	;extract fp destination register
+	cmpi.b	#3,d0
+	ble.b	frc0123			;check if dest is fp0-fp3
+	move.l	#7,d1
+	sub.l	d0,d1
+	clr.l	d0
+	bset.l	d1,d0
+	fmovem.x WBTEMP(a6),d0
+	rts
+frc0123:
+	tst.b	d0
+	beq.b	frc0_dst
+	cmpi.b	#1,d0
+	beq.b	frc1_dst 
+	cmpi.b	#2,d0
+	beq.b	frc2_dst 
+frc3_dst:
+	move.l	WBTEMP_EX(a6),USER_FP3(a6)
+	move.l	WBTEMP_HI(a6),USER_FP3+4(a6)
+	move.l	WBTEMP_LO(a6),USER_FP3+8(a6)
+	rts
+frc2_dst:
+	move.l	WBTEMP_EX(a6),USER_FP2(a6)
+	move.l	WBTEMP_HI(a6),USER_FP2+4(a6)
+	move.l	WBTEMP_LO(a6),USER_FP2+8(a6)
+	rts
+frc1_dst:
+	move.l	WBTEMP_EX(a6),USER_FP1(a6)
+	move.l	WBTEMP_HI(a6),USER_FP1+4(a6)
+	move.l	WBTEMP_LO(a6),USER_FP1+8(a6)
+	rts
+frc0_dst:
+	move.l	WBTEMP_EX(a6),USER_FP0(a6)
+	move.l	WBTEMP_HI(a6),USER_FP0+4(a6)
+	move.l	WBTEMP_LO(a6),USER_FP0+8(a6)
+	rts
+
+*
+* Write etemp to fpn.
+* A check is made on enabled and signalled snan exceptions,
+* and the destination is not overwritten if this condition exists.
+* This code is designed to make fmoveins of unsupported data types
+* faster.
+*
+wr_etemp:
+	btst.b	#snan_bit,FPSR_EXCEPT(a6)	;if snan is set, and
+	beq.b	fmoveinc		;enabled, force restore
+	btst.b	#snan_bit,FPCR_ENABLE(a6) ;and don't overwrite
+	beq.b	fmoveinc		;the dest
+	move.l	ETEMP_EX(a6),FPTEMP_EX(a6)	;set up fptemp sign for 
+*						;snan handler
+	tst.b	ETEMP(a6)		;check for negative
+	blt.b	snan_neg
+	rts
+snan_neg:
+	or.l	#neg_bit,USER_FPSR(a6)	;snan is negative; set N
+	rts
+fmoveinc:
+	clr.w	NMNEXC(a6)
+	bclr.b	#E1,E_BYTE(a6)
+	move.b	STAG(a6),d0		;check if stag is inf
+	andi.b	#$e0,d0
+	cmpi.b	#$40,d0
+	bne.b	fminc_cnan
+	or.l	#inf_mask,USER_FPSR(a6) ;if inf, nothing yet has set I
+	tst.w	LOCAL_EX(a0)		;check sign
+	bge.b	fminc_con
+	or.l	#neg_mask,USER_FPSR(a6)
+	bra	fminc_con
+fminc_cnan:
+	cmpi.b	#$60,d0			;check if stag is NaN
+	bne.b	fminc_czero
+	or.l	#nan_mask,USER_FPSR(a6) ;if nan, nothing yet has set NaN
+	move.l	ETEMP_EX(a6),FPTEMP_EX(a6)	;set up fptemp sign for 
+*						;snan handler
+	tst.w	LOCAL_EX(a0)		;check sign
+	bge.b	fminc_con
+	or.l	#neg_mask,USER_FPSR(a6)
+	bra	fminc_con
+fminc_czero:
+	cmpi.b	#$20,d0			;check if zero
+	bne.b	fminc_con
+	or.l	#z_mask,USER_FPSR(a6)	;if zero, set Z
+	tst.w	LOCAL_EX(a0)		;check sign
+	bge.b	fminc_con
+	or.l	#neg_mask,USER_FPSR(a6)
+fminc_con:
+	bfextu	CMDREG1B(a6){6:3},d0	;extract fp destination register
+	cmpi.b	#3,d0
+	ble.b	fp0123			;check if dest is fp0-fp3
+	move.l	#7,d1
+	sub.l	d0,d1
+	clr.l	d0
+	bset.l	d1,d0
+	fmovem.x ETEMP(a6),d0
+	rts
+
+fp0123:
+	tst.b	d0
+	beq.b	fp0_dst
+	cmpi.b	#1,d0
+	beq.b	fp1_dst 
+	cmpi.b	#2,d0
+	beq.b	fp2_dst 
+fp3_dst:
+	move.l	ETEMP_EX(a6),USER_FP3(a6)
+	move.l	ETEMP_HI(a6),USER_FP3+4(a6)
+	move.l	ETEMP_LO(a6),USER_FP3+8(a6)
+	rts
+fp2_dst:
+	move.l	ETEMP_EX(a6),USER_FP2(a6)
+	move.l	ETEMP_HI(a6),USER_FP2+4(a6)
+	move.l	ETEMP_LO(a6),USER_FP2+8(a6)
+	rts
+fp1_dst:
+	move.l	ETEMP_EX(a6),USER_FP1(a6)
+	move.l	ETEMP_HI(a6),USER_FP1+4(a6)
+	move.l	ETEMP_LO(a6),USER_FP1+8(a6)
+	rts
+fp0_dst:
+	move.l	ETEMP_EX(a6),USER_FP0(a6)
+	move.l	ETEMP_HI(a6),USER_FP0+4(a6)
+	move.l	ETEMP_LO(a6),USER_FP0+8(a6)
+	rts
+
+opclass3:
+	st.b	CU_ONLY(a6)
+	move.w	CMDREG1B(a6),d0	;check if packed moveout
+	andi.w	#$0c00,d0	;isolate last 2 bits of size field
+	cmpi.w	#$0c00,d0	;if size is 011 or 111, it is packed
+	beq.w	pack_out	;else it is norm or denorm
+	bra.w	mv_out
+
+	
+*
+*	MOVE OUT
+*
+
+mv_tbl:
+	dc.l	li
+	dc.l 	sgp
+	dc.l 	xp
+	dc.l 	mvout_end	;should never be taken
+	dc.l 	wi
+	dc.l 	dp
+	dc.l 	bi
+	dc.l 	mvout_end	;should never be taken
+mv_out:
+	bfextu	CMDREG1B(a6){3:3},d1	;put source specifier in d1
+	lea.l	mv_tbl,a0
+	move.l	(a0,d1*4),a0
+	jmp	(a0)
+
+*
+* This exit is for move-out to memory.  The aunfl bit is 
+* set if the result is inex and unfl is signalled.
+*
+mvout_end:
+	btst.b	#inex2_bit,FPSR_EXCEPT(a6)
+	beq.b	no_aufl
+	btst.b	#unfl_bit,FPSR_EXCEPT(a6)
+	beq.b	no_aufl
+	bset.b	#aunfl_bit,FPSR_AEXCEPT(a6)
+no_aufl:
+	clr.w	NMNEXC(a6)
+	bclr.b	#E1,E_BYTE(a6)
+	fmove.l	#0,FPSR			;clear any cc bits from res_func
+*
+* Return ETEMP to extended format from internal extended format so
+* that gen_except will have a correctly signed value for ovfl/unfl
+* handlers.
+*
+	bfclr	ETEMP_SGN(a6){0:8}
+	beq.b	mvout_con
+	bset.b	#sign_bit,ETEMP_EX(a6)
+mvout_con:
+	rts
+*
+* This exit is for move-out to int register.  The aunfl bit is 
+* not set in any case for this move.
+*
+mvouti_end:
+	clr.w	NMNEXC(a6)
+	bclr.b	#E1,E_BYTE(a6)
+	fmove.l	#0,FPSR			;clear any cc bits from res_func
+*
+* Return ETEMP to extended format from internal extended format so
+* that gen_except will have a correctly signed value for ovfl/unfl
+* handlers.
+*
+	bfclr	ETEMP_SGN(a6){0:8}
+	beq.b	mvouti_con
+	bset.b	#sign_bit,ETEMP_EX(a6)
+mvouti_con:
+	rts
+*
+* li is used to handle a long integer source specifier
+*
+
+li:
+	moveq.l	#4,d0		;set byte count
+
+	btst.b	#7,STAG(a6)	;check for extended denorm
+	bne.w	int_dnrm	;if so, branch
+
+	fmovem.x ETEMP(a6),fp0
+	fcmp.d	#:41dfffffffc00000,fp0
+* 41dfffffffc00000 in dbl prec = 401d0000fffffffe00000000 in ext prec
+	fbge.w	lo_plrg	
+	fcmp.d	#:c1e0000000000000,fp0
+* c1e0000000000000 in dbl prec = c01e00008000000000000000 in ext prec
+	fble.w	lo_nlrg
+*
+* at this point, the answer is between the largest pos and neg values
+*
+	move.l	USER_FPCR(a6),d1	;use user's rounding mode
+	andi.l	#$30,d1
+	fmove.l	d1,fpcr
+	fmove.l	fp0,L_SCR1(a6)	;let the 040 perform conversion
+	fmove.l fpsr,d1
+	or.l	d1,USER_FPSR(a6)	;capture inex2/ainex if set
+	bra.w	int_wrt
+
+
+lo_plrg:
+	move.l	#$7fffffff,L_SCR1(a6)	;answer is largest positive int
+	fbeq.w	int_wrt			;exact answer
+	fcmp.d	#:41dfffffffe00000,fp0
+* 41dfffffffe00000 in dbl prec = 401d0000ffffffff00000000 in ext prec
+	fbge.w	int_operr		;set operr
+	bra.w	int_inx			;set inexact
+
+lo_nlrg:
+	move.l	#$80000000,L_SCR1(a6)
+	fbeq.w	int_wrt			;exact answer
+	fcmp.d	#:c1e0000000100000,fp0
+* c1e0000000100000 in dbl prec = c01e00008000000080000000 in ext prec
+	fblt.w	int_operr		;set operr
+	bra.w	int_inx			;set inexact
+
+*
+* wi is used to handle a word integer source specifier
+*
+
+wi:
+	moveq.l	#2,d0		;set byte count
+
+	btst.b	#7,STAG(a6)	;check for extended denorm
+	bne.w	int_dnrm	;branch if so
+
+	fmovem.x ETEMP(a6),fp0
+	fcmp.s	#:46fffe00,fp0
+* 46fffe00 in sgl prec = 400d0000fffe000000000000 in ext prec
+	fbge.w	wo_plrg	
+	fcmp.s	#:c7000000,fp0
+* c7000000 in sgl prec = c00e00008000000000000000 in ext prec
+	fble.w	wo_nlrg
+
+*
+* at this point, the answer is between the largest pos and neg values
+*
+	move.l	USER_FPCR(a6),d1	;use user's rounding mode
+	andi.l	#$30,d1
+	fmove.l	d1,fpcr
+	fmove.w	fp0,L_SCR1(a6)	;let the 040 perform conversion
+	fmove.l fpsr,d1
+	or.l	d1,USER_FPSR(a6)	;capture inex2/ainex if set
+	bra.w	int_wrt
+
+wo_plrg:
+	move.w	#$7fff,L_SCR1(a6)	;answer is largest positive int
+	fbeq.w	int_wrt			;exact answer
+	fcmp.s	#:46ffff00,fp0
+* 46ffff00 in sgl prec = 400d0000ffff000000000000 in ext prec
+	fbge.w	int_operr		;set operr
+	bra.w	int_inx			;set inexact
+
+wo_nlrg:
+	move.w	#$8000,L_SCR1(a6)
+	fbeq.w	int_wrt			;exact answer
+	fcmp.s	#:c7000080,fp0
+* c7000080 in sgl prec = c00e00008000800000000000 in ext prec
+	fblt.w	int_operr		;set operr
+	bra.w	int_inx			;set inexact
+
+*
+* bi is used to handle a byte integer source specifier
+*
+
+bi:
+	moveq.l	#1,d0		;set byte count
+
+	btst.b	#7,STAG(a6)	;check for extended denorm
+	bne.w	int_dnrm	;branch if so
+
+	fmovem.x ETEMP(a6),fp0
+	fcmp.s	#:42fe0000,fp0
+* 42fe0000 in sgl prec = 40050000fe00000000000000 in ext prec
+	fbge.w	by_plrg	
+	fcmp.s	#:c3000000,fp0
+* c3000000 in sgl prec = c00600008000000000000000 in ext prec
+	fble.w	by_nlrg
+
+*
+* at this point, the answer is between the largest pos and neg values
+*
+	move.l	USER_FPCR(a6),d1	;use user's rounding mode
+	andi.l	#$30,d1
+	fmove.l	d1,fpcr
+	fmove.b	fp0,L_SCR1(a6)	;let the 040 perform conversion
+	fmove.l fpsr,d1
+	or.l	d1,USER_FPSR(a6)	;capture inex2/ainex if set
+	bra.w	int_wrt
+
+by_plrg:
+	move.b	#$7f,L_SCR1(a6)		;answer is largest positive int
+	fbeq.w	int_wrt			;exact answer
+	fcmp.s	#:42ff0000,fp0
+* 42ff0000 in sgl prec = 40050000ff00000000000000 in ext prec
+	fbge.w	int_operr		;set operr
+	bra.w	int_inx			;set inexact
+
+by_nlrg:
+	move.b	#$80,L_SCR1(a6)
+	fbeq.w	int_wrt			;exact answer
+	fcmp.s	#:c3008000,fp0
+* c3008000 in sgl prec = c00600008080000000000000 in ext prec
+	fblt.w	int_operr		;set operr
+	bra.w	int_inx			;set inexact
+
+*
+* Common integer routines
+*
+* int_drnrm---account for possible nonzero result for round up with positive
+* operand and round down for negative answer.  In the first case (result = 1)
+* byte-width (store in d0) of result must be honored.  In the second case,
+* -1 in L_SCR1(a6) will cover all contingencies (FMOVE.B/W/L out).
+
+int_dnrm:
+	clr.l	L_SCR1(a6)	; initialize result to 0
+	bfextu	FPCR_MODE(a6){2:2},d1	; d1 is the rounding mode
+	cmp.b	#2,d1		
+	bmi.b	int_inx		; if RN or RZ, done
+	bne.b	int_rp		; if RP, continue below
+	tst.w	ETEMP(a6)	; RM: store -1 in L_SCR1 if src is negative
+	bpl.b	int_inx		; otherwise result is 0
+	move.l	#-1,L_SCR1(a6)
+	bra.b	int_inx
+int_rp:
+	tst.w	ETEMP(a6)	; RP: store +1 of proper width in L_SCR1 if
+*				; source is greater than 0
+	bmi.b	int_inx		; otherwise, result is 0
+	lea	L_SCR1(a6),a1	; a1 is address of L_SCR1
+	adda.l	d0,a1		; offset by destination width -1
+	suba.l	#1,a1		
+	bset.b	#0,(a1)		; set low bit at a1 address
+int_inx:
+	ori.l	#inx2a_mask,USER_FPSR(a6)
+	bra.b	int_wrt
+int_operr:
+	fmovem.x fp0,FPTEMP(a6)	;FPTEMP must contain the extended
+*				;precision source that needs to be
+*				;converted to integer this is required
+*				;if the operr exception is enabled.
+*				;set operr/aiop (no inex2 on int ovfl)
+
+	ori.l	#opaop_mask,USER_FPSR(a6)
+*				;fall through to perform int_wrt
+int_wrt: 
+	move.l	EXC_EA(a6),a1	;load destination address
+	tst.l	a1		;check to see if it is a dest register
+	beq.b	wrt_dn		;write data register 
+	lea	L_SCR1(a6),a0	;point to supervisor source address
+	bsr.l	mem_write
+	bra.w	mvouti_end
+
+wrt_dn:
+	move.l	d0,-(sp)	;d0 currently contains the size to write
+	bsr.l	get_fline	;get_fline returns Dn in d0
+	andi.w	#$7,d0		;isolate register
+	move.l	(sp)+,d1	;get size
+	cmpi.l	#4,d1		;most frequent case
+	beq.b	sz_long
+	cmpi.l	#2,d1
+	bne.b	sz_con
+	or.l	#8,d0		;add 'word' size to register#
+	bra.b	sz_con
+sz_long:
+	or.l	#$10,d0		;add 'long' size to register#
+sz_con:
+	move.l	d0,d1		;reg_dest expects size:reg in d1
+	bsr.l	reg_dest	;load proper data register
+	bra.w	mvouti_end 
+xp:
+	lea	ETEMP(a6),a0
+	bclr.b	#sign_bit,LOCAL_EX(a0)
+	sne	LOCAL_SGN(a0)
+	btst.b	#7,STAG(a6)	;check for extended denorm
+	bne.w	xdnrm
+	clr.l	d0
+	bra.b	do_fp		;do normal case
+sgp:
+	lea	ETEMP(a6),a0
+	bclr.b	#sign_bit,LOCAL_EX(a0)
+	sne	LOCAL_SGN(a0)
+	btst.b	#7,STAG(a6)	;check for extended denorm
+	bne.w	sp_catas	;branch if so
+	move.w	LOCAL_EX(a0),d0
+	lea	sp_bnds,a1
+	cmp.w	(a1),d0
+	blt.w	sp_under
+	cmp.w	2(a1),d0
+	bgt.w	sp_over
+	move.l	#1,d0		;set destination format to single
+	bra.b	do_fp		;do normal case
+dp:
+	lea	ETEMP(a6),a0
+	bclr.b	#sign_bit,LOCAL_EX(a0)
+	sne	LOCAL_SGN(a0)
+
+	btst.b	#7,STAG(a6)	;check for extended denorm
+	bne.w	dp_catas	;branch if so
+
+	move.w	LOCAL_EX(a0),d0
+	lea	dp_bnds,a1
+
+	cmp.w	(a1),d0
+	blt.w	dp_under
+	cmp.w	2(a1),d0
+	bgt.w	dp_over
+	
+	move.l	#2,d0		;set destination format to double
+*				;fall through to do_fp
+*
+do_fp:
+	bfextu	FPCR_MODE(a6){2:2},d1	;rnd mode in d1
+	swap	d0			;rnd prec in upper word
+	add.l	d0,d1			;d1 has PREC/MODE info
+	
+	clr.l	d0			;clear g,r,s 
+
+	bsr.l	round			;round 
+
+	move.l	a0,a1
+	move.l	EXC_EA(a6),a0
+
+	bfextu	CMDREG1B(a6){3:3},d1	;extract destination format
+*					;at this point only the dest
+*					;formats sgl, dbl, ext are
+*					;possible
+	cmp.b	#2,d1
+	bgt.b	ddbl			;double=5, extended=2, single=1
+	bne.b	dsgl
+*					;fall through to dext
+dext:
+	bsr.l	dest_ext
+	bra.w	mvout_end
+dsgl:
+	bsr.l	dest_sgl
+	bra.w	mvout_end
+ddbl:
+	bsr.l	dest_dbl
+	bra.w	mvout_end
+
+*
+* Handle possible denorm or catastrophic underflow cases here
+*
+xdnrm:
+	bsr.w	set_xop		;initialize WBTEMP
+	bset.b	#wbtemp15_bit,WB_BYTE(a6) ;set wbtemp15
+
+	move.l	a0,a1
+	move.l	EXC_EA(a6),a0	;a0 has the destination pointer
+	bsr.l	dest_ext	;store to memory
+	bset.b	#unfl_bit,FPSR_EXCEPT(a6)
+	bra.w	mvout_end
+	
+sp_under:
+	bset.b	#etemp15_bit,STAG(a6)
+
+	cmp.w	4(a1),d0
+	blt.b	sp_catas	;catastrophic underflow case	
+
+	move.l	#1,d0		;load in round precision
+	move.l	#sgl_thresh,d1	;load in single denorm threshold
+	bsr.l	dpspdnrm	;expects d1 to have the proper
+*				;denorm threshold
+	bsr.l	dest_sgl	;stores value to destination
+	bset.b	#unfl_bit,FPSR_EXCEPT(a6)
+	bra.w	mvout_end	;exit
+
+dp_under:
+	bset.b	#etemp15_bit,STAG(a6)
+
+	cmp.w	4(a1),d0
+	blt.b	dp_catas	;catastrophic underflow case
+		
+	move.l	#dbl_thresh,d1	;load in double precision threshold
+	move.l	#2,d0		
+	bsr.l	dpspdnrm	;expects d1 to have proper
+*				;denorm threshold
+*				;expects d0 to have round precision
+	bsr.l	dest_dbl	;store value to destination
+	bset.b	#unfl_bit,FPSR_EXCEPT(a6)
+	bra.w	mvout_end	;exit
+
+*
+* Handle catastrophic underflow cases here
+*
+sp_catas:
+* Temp fix for z bit set in unf_sub
+	move.l	USER_FPSR(a6),-(a7)
+
+	move.l	#1,d0		;set round precision to sgl
+
+	bsr.l	unf_sub		;a0 points to result
+
+	move.l	(a7)+,USER_FPSR(a6)
+
+	move.l	#1,d0
+	sub.w	d0,LOCAL_EX(a0) ;account for difference between
+*				;denorm/norm bias
+
+	move.l	a0,a1		;a1 has the operand input
+	move.l	EXC_EA(a6),a0	;a0 has the destination pointer
+	
+	bsr.l	dest_sgl	;store the result
+	ori.l	#unfinx_mask,USER_FPSR(a6)
+	bra.w	mvout_end
+	
+dp_catas:
+* Temp fix for z bit set in unf_sub
+	move.l	USER_FPSR(a6),-(a7)
+
+	move.l	#2,d0		;set round precision to dbl
+	bsr.l	unf_sub		;a0 points to result
+
+	move.l	(a7)+,USER_FPSR(a6)
+
+	move.l	#1,d0
+	sub.w	d0,LOCAL_EX(a0) ;account for difference between 
+*				;denorm/norm bias
+
+	move.l	a0,a1		;a1 has the operand input
+	move.l	EXC_EA(a6),a0	;a0 has the destination pointer
+	
+	bsr.l	dest_dbl	;store the result
+	ori.l	#unfinx_mask,USER_FPSR(a6)
+	bra.w	mvout_end
+
+*
+* Handle catastrophic overflow cases here
+*
+sp_over:
+* Temp fix for z bit set in unf_sub
+	move.l	USER_FPSR(a6),-(a7)
+
+	move.l	#1,d0
+	lea.l	FP_SCR1(a6),a0	;use FP_SCR1 for creating result
+	move.l	ETEMP_EX(a6),(a0)
+	move.l	ETEMP_HI(a6),4(a0)
+	move.l	ETEMP_LO(a6),8(a0)
+	bsr.l	ovf_res
+
+	move.l	(a7)+,USER_FPSR(a6)
+
+	move.l	a0,a1
+	move.l	EXC_EA(a6),a0
+	bsr.l	dest_sgl
+	or.l	#ovfinx_mask,USER_FPSR(a6)
+	bra.w	mvout_end
+
+dp_over:
+* Temp fix for z bit set in ovf_res
+	move.l	USER_FPSR(a6),-(a7)
+
+	move.l	#2,d0
+	lea.l	FP_SCR1(a6),a0	;use FP_SCR1 for creating result
+	move.l	ETEMP_EX(a6),(a0)
+	move.l	ETEMP_HI(a6),4(a0)
+	move.l	ETEMP_LO(a6),8(a0)
+	bsr.l	ovf_res
+
+	move.l	(a7)+,USER_FPSR(a6)
+
+	move.l	a0,a1
+	move.l	EXC_EA(a6),a0
+	bsr.l	dest_dbl
+	or.l	#ovfinx_mask,USER_FPSR(a6)
+	bra.w	mvout_end
+
+*
+* 	DPSPDNRM
+*
+* This subroutine takes an extended normalized number and denormalizes
+* it to the given round precision. This subroutine also decrements
+* the input operand's exponent by 1 to account for the fact that
+* dest_sgl or dest_dbl expects a normalized number's bias.
+*
+* Input: a0  points to a normalized number in internal extended format
+*	 d0  is the round precision (=1 for sgl; =2 for dbl)
+*	 d1  is the the single precision or double precision
+*	     denorm threshold
+*
+* Output: (In the format for dest_sgl or dest_dbl)
+*	 a0   points to the destination
+*   	 a1   points to the operand
+*
+* Exceptions: Reports inexact 2 exception by setting USER_FPSR bits
+*
+dpspdnrm:
+	move.l	d0,-(a7)	;save round precision
+	clr.l	d0		;clear initial g,r,s
+	bsr.l	dnrm_lp		;careful with d0, it's needed by round
+
+	bfextu	FPCR_MODE(a6){2:2},d1 ;get rounding mode
+	swap	d1
+	move.w	2(a7),d1	;set rounding precision 
+	swap	d1		;at this point d1 has PREC/MODE info
+	bsr.l	round		;round result, sets the inex bit in
+*				;USER_FPSR if needed
+
+	move.w	#1,d0
+	sub.w	d0,LOCAL_EX(a0) ;account for difference in denorm
+*				;vs norm bias
+
+	move.l	a0,a1		;a1 has the operand input
+	move.l	EXC_EA(a6),a0	;a0 has the destination pointer
+	addq.l	#4,a7		;pop stack
+	rts
+*
+* SET_XOP initialized WBTEMP with the value pointed to by a0
+* input: a0 points to input operand in the internal extended format
+*
+set_xop:
+	move.l	LOCAL_EX(a0),WBTEMP_EX(a6)
+	move.l	LOCAL_HI(a0),WBTEMP_HI(a6)
+	move.l	LOCAL_LO(a0),WBTEMP_LO(a6)
+	bfclr	WBTEMP_SGN(a6){0:8}
+	beq.b	sxop
+	bset.b	#sign_bit,WBTEMP_EX(a6)
+sxop:
+	bfclr	STAG(a6){5:4}	;clear wbtm66,wbtm1,wbtm0,sbit
+	rts
+*
+*	P_MOVE
+*
+p_movet:
+	dc.l	p_move
+	dc.l	p_movez
+	dc.l	p_movei
+	dc.l	p_moven
+	dc.l	p_move
+p_regd:
+	dc.l	p_dyd0
+	dc.l	p_dyd1
+	dc.l	p_dyd2
+	dc.l	p_dyd3
+	dc.l	p_dyd4
+	dc.l	p_dyd5
+	dc.l	p_dyd6
+	dc.l	p_dyd7
+
+pack_out:
+ 	lea.l	p_movet,a0	;load jmp table address
+	move.w	STAG(a6),d0	;get source tag
+	bfextu	d0{16:3},d0	;isolate source bits
+	move.l	(a0,d0.w*4),a0	;load a0 with routine label for tag
+	jmp	(a0)		;go to the routine
+
+p_write:
+	move.l	#$0c,d0 	;get byte count
+	move.l	EXC_EA(a6),a1	;get the destination address
+	bsr 	mem_write	;write the user's destination
+	clr.b	CU_SAVEPC(a6) ;set the cu save pc to all 0's
+
+*
+* Also note that the dtag must be set to norm here - this is because 
+* the 040 uses the dtag to execute the correct microcode.
+*
+        bfclr    DTAG(a6){0:3}  ;set dtag to norm
+
+	rts
+
+* Notes on handling of special case (zero, inf, and nan) inputs:
+*	1. Operr is not signalled if the k-factor is greater than 18.
+*	2. Per the manual, status bits are not set.
+*
+
+p_move:
+	move.w	CMDREG1B(a6),d0
+	btst.l	#kfact_bit,d0	;test for dynamic k-factor
+	beq.b	statick		;if clear, k-factor is static
+dynamick:
+	bfextu	d0{25:3},d0	;isolate register for dynamic k-factor
+	lea	p_regd,a0
+	move.l	(a0,d0*4),a0
+	jmp	(a0)
+statick:
+	andi.w	#$007f,d0	;get k-factor
+	bfexts	d0{25:7},d0	;sign extend d0 for bindec
+	lea.l	ETEMP(a6),a0	;a0 will point to the packed decimal
+	bsr.l	bindec		;perform the convert; data at a6
+	lea.l	FP_SCR1(a6),a0	;load a0 with result address
+	bra.l	p_write
+p_movez:
+	lea.l	ETEMP(a6),a0	;a0 will point to the packed decimal
+	clr.w	2(a0)		;clear lower word of exp
+	clr.l	4(a0)		;load second lword of ZERO
+	clr.l	8(a0)		;load third lword of ZERO
+	bra.w	p_write		;go write results
+p_movei:
+	fmove.l	#0,FPSR		;clear aiop
+	lea.l	ETEMP(a6),a0	;a0 will point to the packed decimal
+	clr.w	2(a0)		;clear lower word of exp
+	bra.w	p_write		;go write the result
+p_moven:
+	lea.l	ETEMP(a6),a0	;a0 will point to the packed decimal
+	clr.w	2(a0)		;clear lower word of exp
+	bra.w	p_write		;go write the result
+
+*
+* Routines to read the dynamic k-factor from Dn.
+*
+p_dyd0:
+	move.l	USER_D0(a6),d0
+	bra.b	statick
+p_dyd1:
+	move.l	USER_D1(a6),d0
+	bra.b	statick
+p_dyd2:
+	move.l	d2,d0
+	bra.b	statick
+p_dyd3:
+	move.l	d3,d0
+	bra.b	statick
+p_dyd4:
+	move.l	d4,d0
+	bra.b	statick
+p_dyd5:
+	move.l	d5,d0
+	bra.b	statick
+p_dyd6:
+	move.l	d6,d0
+	bra.w	statick
+p_dyd7:
+	move.l	d7,d0
+	bra.w	statick
+
+	end