1 files changed, 2192 insertions, 1959 deletions
diff --git a/sys/arch/mvme88k/mvme88k/m88100_fp.S b/sys/arch/mvme88k/mvme88k/m88100_fp.S
index 01c2c98b04a..3223ac7ee3e 100644
--- a/sys/arch/mvme88k/mvme88k/m88100_fp.S
+++ b/sys/arch/mvme88k/mvme88k/m88100_fp.S
@@ -1,4 +1,4 @@
-/* $OpenBSD: m88100_fp.S,v 1.18 2003/11/03 06:54:26 david Exp $	*/
+/* $OpenBSD: m88100_fp.S,v 1.19 2003/12/24 22:41:45 miod Exp $	*/
 /*
  * Mach Operating System
  * Copyright (c) 1991 Carnegie Mellon University
@@ -31,81 +31,35 @@
 #include <machine/trap.h>
 #include <machine/asm.h>
 
-#define psr cr1
-#define spsr cr2
-#define ssb cr3
-#define scip cr4
-#define snip cr5
-#define sfip cr6
-#define vbr cr7	
-#define dmt0 cr8	
-#define scratch1 cr18
-#define scratch2 cr20
-#define fpecr fcr0
-#define s1hi fcr1
-#define s1lo fcr2
-#define s2hi fcr3
-#define s2lo fcr4
-#define pcr fcr5
-#define manthi fcr6
-#define mantlo fcr7
-#define impcr fcr8
-#define fpsr fcr62
-#define fpcr fcr63
-#define valid 1
-#define exception 0
-#define exc_disable 0
-#define FP_disable 3
-#define dexc 27
-#define serial 29
 #define destsize 10
 #define inexact 0
 #define overflow 1
 #define underflow 2
 #define divzero 3
 #define oper 4
+
 #define sign 31
 #define s1size 9
 #define s2size 7
 #define dsize 5
-#define full 1
-#define fault 0
+
 #define FADDop 0x05
 #define FSUBop 0x06
 #define FCMPop 0x07
 #define FMULop 0x00
 #define FDIVop 0x0e
 #define FSQRTop 0x0f
-#define FLTop 0x04
 #define INTop 0x09
 #define NINTop 0x0a
 #define TRNCop 0x0b
-#define mode 31
-#define s1sign 9
-#define s2sign 8
+
 #define s1nan 7
 #define s2nan 6
 #define s1inf 5
 #define s2inf 4
 #define s1zero 3
 #define s2zero 2
-#define s1denorm 1
-#define s2denorm 0
 #define sigbit 19
-#define sigbits 22
-#define sigbitd 19
-#define nc 0
-#define cp 1
-#define eq 2
-#define ne 3
-#define gt 4
-#define le 5
-#define lt 6
-#define ge 7
-#define ou 8
-#define ib 9
-#define in 10
-#define ob 11
 
 #define modehi 30
 #define modelo 29
@@ -115,1277 +69,1407 @@
 #define efovf 6
 #define efinx 5
 
-#define MARK	or	r21, r0, __LINE__
-	
 ASENTRY(m88100_Xfp_precise)
-	or	r29, r3, r0     /*  r29 is now the E.F. */
+	or	r29, r3,  r0		/* r29 is now the E.F. */
 	subu	r31, r31, 40
 	st	r1,  r31, 32
 	st	r29, r31, 36
-  
-	ld	r2, r29, EF_FPSR  * 4
-	ld	r3, r29, EF_FPCR  * 4
+
+	ld	r2, r29, EF_FPSR * 4
+	ld	r3, r29, EF_FPCR * 4
 	ld	r4, r29, EF_FPECR * 4
 	ld	r5, r29, EF_FPHS1 * 4
 	ld	r6, r29, EF_FPLS1 * 4
 	ld	r7, r29, EF_FPHS2 * 4
 	ld	r8, r29, EF_FPLS2 * 4
-	ld	r9, r29, EF_FPPT  * 4
-	
-	
-	/* Load into r1 the return address for the 0 handlers.  Looking */
-	/* at FPECR, branch to the appropriate 0 handler.  However, */
-	/* if none of the 0 bits are enabled, then a floating point */
-	/* instruction was issued with the floating point unit disabled.  This */
-	/* will cause an unimplemented opcode 0. */
-	
-	or.u	r1,r0,hi16(wrapup) /* load return address of function  */
+	ld	r9, r29, EF_FPPT * 4
+
+
+	/*
+	 * Load into r1 the return address for the 0 handlers. Looking at
+	 * FPECR, branch to the appropriate 0 handler. However, if none of the
+	 * 0 bits are enabled, then a floating point instruction was issued
+	 * with the floating point unit disabled. This will cause an
+	 * unimplemented opcode 0.
+	 */
+
+	or.u	r1,r0,hi16(wrapup)	/* load return address of function */
 	or	r1,r1,lo16(wrapup)
-2:   	bb0	6,r4, 3f   	/* branch to FPunimp if bit set */
-     	br	FPuimp
-3:	bb0	7,r4, 4f 	/* branch to FPintover if bit set */
-	br	_FPintover
-4: /* 	bb0	5,r4, 5f 	;branch to FPpriviol if bit set */
-   /* 	br	_FPpriviol */
-5: 	bb0	4,r4, 6f 	/* branch to FPresoper if bit set */
-	br	_FPresoper
-6:   	bb0	3,r4, 7f 	/* branch to FPdivzero if bit set */
-    	br	_FPdivzero
-7: 
+2:
+	bb0	6,r4, 3f		/* branch to FPunimp if bit set */
+	br	FPuimp
+3:
+	bb0	7,r4, 4f		/* branch to FPintover if bit set */
+	br	FPintover
+4:
+#if 0
+	bb0	5,r4, 5f		/* branch to FPpriviol if bit set */
+	br	FPpriviol
+#endif
+5:
+	bb0	4,r4, 6f		/* branch to FPresoper if bit set */
+	br	FPresoper
+6:
+	bb0	3,r4, 7f		/* branch to FPdivzero if bit set */
+	br	FPdivzero
+7:
 	or.u	r4, r4, 0xffff
 
-FPuimp: global FPuimp
-fp_p_trap:
-	subu    r31,r31,40      /* allocate stack */
-	st      r1,r31,36       /* save return address */
-	st      r3,r31,32       /* save exception frame */
-	or      r2,r0,T_FPEPFLT /* load trap type */
+ASLOCAL(FPuimp)
+	subu	r31,r31,40		/* allocate stack */
+	st	r1,r31,36		/* save return address */
+	st	r3,r31,32		/* save exception frame */
+	or	r2,r0,T_FPEPFLT		/* load trap type */
 	or	r3, r29, r0
-	bsr     _C_LABEL(m88100_trap)	/* trap */
-	ld      r1,r31,36       /* recover return address */
-	addu    r31,r31,40      /* deallocate stack */
- 	br	fp_p_return
-
-	/* To write back the results to the user registers, disable exceptions */
-	/* and the floating point unit.  Write FPSR and FPCR and load the SNIP */
-	/* and SFIP. */
-	/* r5 will contain the upper word of the result */
-	/* r6 will contain the lower word of the result */
-	
-wrapup: global wrapup
-	tb1	0,r0,0          /* make sure all floating point operations */
-	/* have finished */
-	ldcr	r10, cr1        /* load the PSR */
-	or	r10, r10, 0x2   /* disable interrupts */
-	stcr	r10, cr1
+	bsr	_C_LABEL(m88100_trap)
+	ld	r1,r31,36		/* recover return address */
+	addu	r31,r31,40		/* deallocate stack */
+	br	fp_p_return
+
+	/*
+	 * To write back the results to the user registers, disable exceptions
+	 * and the floating point unit. Write FPSR and FPCR and load the SNIP
+	 * and SFIP.
+	 * r5 will contain the upper word of the result
+	 * r6 will contain the lower word of the result
+	 */
+
+ASLOCAL(wrapup)
+	tb1	0,r0,0		/* make sure all floating point operations */
+				/* have finished */
+	ldcr	r10, cr1	/* load the PSR */
 #if 0
-Why is this done? -jfriedl
-	or	r10, r10, 0x8 /* set SFU 1 disable bit, disable SFU 1 */
-	stcr	r10, cr1
+	set	r10, r10, 1<PSR_FPU_DISABLE_BIT>
 #endif
+	set	r10, r10, 1<PSR_INTERRUPT_DISABLE_BIT>
+	stcr	r10, cr1
+
 	ld	r1, r31, 32
 	ld	r29, r31, 36
 	addu	r31, r31, 40
-  
-	fstcr	r2, fpsr	/* write revised value of FPSR */
-	fstcr	r3, fpcr	/* write revised value of FPCR */
-	
+
+	fstcr	r2, FPSR	/* write revised value of FPSR */
+	fstcr	r3, FPCR	/* write revised value of FPCR */
+
 	/* result writeback routine */
-	addu   r3, r29, EF_R0 * 4
-	extu   r2, r9, 5<0>       /* get 5 bits of destination register */
-	bb0    5, r9, writesingle /* branch if destination is single */
-	
+	addu	r3, r29, EF_R0 * 4
+	extu	r2, r9, 5<0>		/* get 5 bits of destination register */
+	bb0	5, r9, writesingle	/* branch if destination is single */
+
 /* writedouble here */
-	st     r5, r3 [r2]       /* write high word */
-	add    r2, r2, 1          /* for double, the low word is the  */
+	st	r5, r3 [r2]		/* write high word */
+	add	r2, r2, 1		/* for double, the low word is the */
 	/* unspecified register */
-	clr    r2, r2, 27<5>      /* perform equivalent of mod 32 */
-writesingle: 
-	st     r6, r3 [r2]       /* write low word into memory */
+	clr	r2, r2, 27<5>		/* perform equivalent of mod 32 */
+ASLOCAL(writesingle)
+	st	r6, r3 [r2]		/* write low word into memory */
 
-fp_p_return:
+ASLOCAL(fp_p_return)
 	jmp	r1
 
-             text
-	     align 8
-             global _FPdivzero
-
-
-/* Check if the numerator is zero.  If the numerator is zero, then handle */
-/* this instruction as you would a 0/0 invalid operation. */
+/*
+ * Check if the numerator is zero. If the numerator is zero, then handle
+ * this instruction as you would a 0/0 invalid operation.
+ */
 
-_FPdivzero:  
-             st    r1,r31,0  /* save return address */
-	     bb1   s1size,r9,1f  /* branch if numerator double */
+ASLOCAL(FPdivzero)
+	st	r1,r31,0		/* save return address */
+	bb1	s1size,r9,1f		/* branch if numerator double */
 /* single number */
-  	     clr   r10,r5,1<sign>   /* clear sign bit */
-             extu  r11,r6,3<29>     /* grab upper bits of lower word */
-             or    r10,r10,r11      /* combine ones of mantissa */
-             bcnd  eq0,r10,resoper  /* numerator is zero, handle reserved */
-                                    /* operand  */
-             br    setbit           /* set divzero bit */
+	clr	r10,r5,1<sign>	/* clear sign bit */
+	extu	r11,r6,3<29>	/* grab upper bits of lower word */
+	or	r10,r10,r11	/* combine ones of mantissa */
+	bcnd	eq0,r10,resoper	/* numerator is zero, handle reserved operand */
+	br	setbit		/* set divzero bit */
 1:
 /* double number */
-	     clr   r10,r5,1<sign>   /* clear sign bit */
-             or    r10,r10,r6       /* or high and low words */
-             bcnd  ne0,r10,setbit   /* set divzero bit */
+	clr	r10,r5,1<sign>	/* clear sign bit */
+	or	r10,r10,r6	/* or high and low words */
+	bcnd	ne0,r10,setbit	/* set divzero bit */
 
-/* The numerator is zero, so handle the invalid operation by setting the */
-/* invalid operation bit and branching to the user handler if there is one */
-/* or writing a quiet NaN to the destination. */
+/*
+ * The numerator is zero, so handle the invalid operation by setting the
+ * invalid operation bit and branching to the user handler if there is one
+ * or writing a quiet NaN to the destination.
+ */
 
-resoper:     
-             set   r2,r2,1<oper>    /* set bit in FPSR */
+ASLOCAL(resoper)
+	set	r2,r2,1<oper>	/* set bit in FPSR */
 #ifdef HANDLER
-	     bb0   oper,r3,noreshand /* branch to execute default handling for */
-                                    /* reserved operands */
-             bsr   _handler         /* branch to user handler */
-             br    FP_div_return           /* return from function */
+	bb0	oper,r3,noreshand	/* branch to execute default handling */
+					/* for reserved operands */
+	bsr	_handler		/* branch to user handler */
+	br	FP_div_return
 #endif
-             
-noreshand:   
- 	     set   r5,r0,0<0>      /* put a NaN in high word */
-             set   r6,r0,0<0>      /* put a NaN in low word */
-             br  FP_div_return            /* return from subroutine */
-                                   /* writing to a word which may be ignored */
-                                   /* is just as quick as checking the precision */
-                                   /* of the destination */
-
-/* The operation is divide by zero, so set the divide by zero bit in the */
-/* FPSR.  If the user handler is set, then go to the user handler, else */
-/* go to the default mode. */
+
+noreshand:
+	set	r5,r0,0<0>	/* put a NaN in high word */
+	set	r6,r0,0<0>	/* put a NaN in low word */
+	br	FP_div_return
+				/* writing to a word which may be ignored */
+				/* is just as quick as checking the precision */
+				/* of the destination */
+
+/*
+ * The operation is divide by zero, so set the divide by zero bit in the
+ * FPSR. If the user handler is set, then go to the user handler, else
+ * go to the default mode.
+ */
 
 setbit:
 #ifdef HANDLER
-             set   r2,r2,1<divzero> /* set bit in FPSR */
-             bb0   divzero,r3,default /* go to default routine if no handler */
-             bsr   _handler         /* execute handler routine */
-             br    FP_div_return           /* return from subroutine */
+	set	r2,r2,1<divzero>	/* set bit in FPSR */
+	bb0	divzero,r3,default	/* go to default routine if no hdlr */
+	bsr	_handler		/* execute handler routine */
+	br	FP_div_return
 #endif
 
+/*
+ * Considering the sign of the numerator and zero, write a correctly
+ * signed infinity of the proper precision into the destination.
+ */
+
+default:
+	bb1	dsize,r9,FPzero_double	/* branch to handle double result */
+FPzero_single:
+	clr	r10,r5,31<0>	/* clear all of S1HI except sign bit */
+	xor	r10,r7,r10	/* xor the sign bits of the operands */
+	or.u	r6,r0,0x7f80	/* load single precision infinity */
+	br.n	FP_div_return
+	 or	r6,r6,r10	/* load correctly signed infinity */
+
+FPzero_double:
+	clr	r10,r5,31<0>	/* clear all of S1HI except sign bit */
+	xor	r10,r7,r10	/* xor the sign bits of the operands */
+	or.u	r5,r0,0x7ff0	/* load double precision infinity */
+	or	r5,r5,r10	/* load correctly signed infinity */
+	or	r6,r0,r0	/* clear lower word of double */
+
+FP_div_return:
+	ld	r1,r31,0	/* load return address */
+	jmp	r1
 
-/* Considering the sign of the numerator and zero, write a correctly */
-/* signed infinity of the proper precision into the destination. */
-
-default:     
-	     bb1   dsize,r9,FPzero_double  /* branch to handle double result */
-FPzero_single:    
-   	     clr   r10,r5,31<0>     /* clear all of S1HI except sign bit */
-             xor   r10,r7,r10       /* xor the sign bits of the operands */
-             or.u  r6,r0,0x7f80     /* load single precision infinity */
-             br.n  FP_div_return           /* return from subroutine */
-             or    r6,r6,r10        /* load correctly signed infinity */
-
-FPzero_double:     
-             clr   r10,r5,31<0>     /* clear all of S1HI except sign bit */
-             xor   r10,r7,r10       /* xor the sign bits of the operands */
-             or.u  r5,r0,0x7ff0     /* load double precision infinity */
-             or    r5,r5,r10        /* load correctly signed infinity */
-             or    r6,r0,r0         /* clear lower word of double */
-
-FP_div_return:      
-	     ld    r1,r31,0  /* load return address */
-             jmp   r1               /* return from subroutine */
-
-	
-	
-/* Both NINT and TRNC require a certain rounding mode, so check which */
-/* instruction caused the integer conversion overflow.  Use a substitute */
-/* FPCR in r1, and modify the rounding mode if the instruction is NINT or TRNC. */
-	text
-	align 8
-_FPintover:  global _FPintover
-	extu   r10,r9,5<11>         /* extract opcode */
-	cmp    r11,r10,INTop        /* see if instruction is INT */
-	st     r1,r31,0      /* save return address */
-	bb1.n  eq,r11,checksize     /* instruction is INT, do not modify */
-	/* rounding mode */
-	or     r1,r0,r3             /* load FPCR into r1 */
-	cmp    r11,r10,NINTop       /* see if instruction is NINT */
-	bb1    eq,r11,NINT          /* instruction is NINT */
-	
-TRNC:   clr    r1,r1,2<rndlo>          /* clear rounding mode bits, */
+/*
+ * Both NINT and TRNC require a certain rounding mode, so check which
+ * instruction caused the integer conversion overflow. Use a substitute
+ * FPCR in r1, and modify the rounding mode if the instruction is NINT
+ * or TRNC.
+ */
+ASLOCAL(FPintover)
+	extu	r10,r9,5<11>		/* extract opcode */
+	cmp	r11,r10,INTop		/* see if instruction is INT */
+	st	r1,r31,0		/* save return address */
+	bb1.n	eq,r11,checksize	/* instruction is INT, do not modify */
+					/* rounding mode */
+	 or	r1,r0,r3		/* load FPCR into r1 */
+	cmp	r11,r10,NINTop		/* see if instruction is NINT */
+	bb1	eq,r11,NINT		/* instruction is NINT */
+TRNC:
+	clr	r1,r1,2<rndlo>		/* clear rounding mode bits, */
 					/* instruction is TRNC */
-	br.n   checksize            /* branch to check size */
-	set    r1,r1,1<rndlo>          /* make rounding mode round towards zero */
-	
-NINT:   clr    r1,r1,2<rndlo>          /* make rounding mode round to nearest */
-	
-	
+	br.n	checksize		/* branch to check size */
+	 set	r1,r1,1<rndlo>		/* make rounding mode round towards */
+					/* zero */
+NINT:
+	clr	r1,r1,2<rndlo>		/* make rounding mode round to */
+					/* nearest */
+
 /* See whether the source is single or double precision. */
-	
-checksize:   bb1    s2size,r9,checkdoub  /* S2 is double, branch to see if there */
-/* is a false alarm */
-	
-	
-/* An integer has more bits than the mantissa of a single precision floating */
-/* point number, so to check for false alarms (i.e. valid conversion), simply */
-/* check the exponents.  False alarms are detected for 2**30 to (2**30) - 1 and */
-/* -2**30 to -2**31.  Only seven bits need to be looked at since an exception */
-/* will not occur for the other half of the numbering system. */
-/* To speed up the processing, first check to see if the exponent is 32 or */
-/* greater. */
-
-/* This code was originally written for the exponent in the control */
-/* register to have the most significant bit (8 - single, 11 - double)  */
-/* flipped and sign extended.  For precise exceptions, however, the most */
-/* significant bit is only sign extended.  Therefore, the code was chopped */
-/* up so that it would work for positive values of real exponent which were */
-/* only sign extended. */
-	
-checksing:   extu   r10,r7,7<20>         /* internal representation for single */
-/* precision is IEEE 8 bits sign extended */
-/* to 11 bits; for real exp. = 30, the */
-/* above instruction gives a result exp. */
-/* that has the MSB flipped and sign */
-/* extended like in the IMPCR */
-	cmp    r11,r10,31           /* compare to 32,but exp. off by 1 */
-/* these 2 instructions to speed up valid */
-/* execution of valid cases */
-	bb1    ge,r11,overflw       /* valid case, perform overflow routine */
-	bb1    sign,r7,checksingn   /* source operand is negative */
-	
-/* If the number is positve and the exponent is greater than 30, than it is */
-	/* overflow. */
-	
-checksingp:  cmp    r10,r10,29           /* compare to 30, but exp. off by 1 */
-	bb1    gt,r10,overflw       /* no false alarm, its overflow */
-	br     conversionsp         /* finish single precision conversion */
-	
-/* If the number is negative, and the exponent is 30, or 31 with a mantissa */
-/* of 0, then it is a false alarm. */
-	
-checksingn:  cmp    r11,r10,30           /* compare to 31,but exp. off by 1 */
-	bb1    lt,r11,conversionsn  /* exp. less than 31, so convert */
-	extu   r10,r8,3<29>         /* get upper three bits of lower mantissa */
-	mak    r12,r7,20<3>         /* get upper 20 bits of mantissa */
-	or     r10,r10,r12          /* form complete mantissa */
-	bcnd   eq0,r10,conversionsn /* complete conversion if mantissa is 0 */
-	br     overflw              /* no false alarm, its overflow */
-	
-	
-/* False alarms are detected for 2**30 to (2**30) - 1 and */
-/* -2**30 to -2**31.  Only seven bits need to be looked at since an exception */
-/* will not occur for the other half of the numbering system. */
-/* To speed up the processing, first check to see if the exponent is 32 or */
-/* greater.  Since there are more mantissa bits than integer bits, rounding */
-/* could cause overflow.  (2**31) - 1 needs to be checked so that it does */
-/* not round to 2**31, and -2**31 needs to be checked in case it rounds to */
-/* -((2**31) + 1). */
-	
-checkdoub:   extu   r10,r7,10<20>        /* internal representation for double */
-/* precision is the same IEEE 11 bits  */
-/* for real exp. = 30, the */
-/* above instruction gives a result exp. */
-/* that has the MSB flipped and sign */
-/* extended like in the IMPCR */
-	cmp    r11,r10,31           /* compare to 32,but exp. off by 1 */
-/* these 2 instructions to speed up valid */
-/* execution of valid cases */
-	bb1    ge,r11,overflw       /* valid case, perform overflow routine */
-	bb1    sign,r7,checkdoubn   /* source operand is negative */
-	
-/* If the exponent is not 31, then the floating point number will be rounded */
-/* before the conversion is done.  A branch table is set up with bits 4 and 3 */
-/* being the rounding mode, and bits 2, 1, and 0 are the guard, round, and  */
-/* sticky bits. */
-	
-checkdoubp:  cmp    r11,r10,30           /* compare to 31, but exponent off by 1 */
-	bb1    eq,r11,overflw       /* no false alarm, its overflow */
-	extu   r12,r8,1<22>         /* get LSB for integer with exp. = 30 */
-	mak    r12,r12,1<2>         /* start to set up field for branch table */
-	extu   r11,r8,1<21>         /* get guard bit */
-	mak    r11,r11,1<1>         /* set up field for branch table */
-	or     r12,r11,r12          /* set up field for branch table */
-	extu   r11,r8,21<0>         /* get bits for sticky bit */
-	bcnd   eq0,r11,nostickyp    /* do not set sticky */
-	set    r12,r12,1<0>         /* set sticky bit */
-nostickyp:   rot    r11,r1,0<rndlo>      /* shift rounding mode to 2 LSB''s */
-	mak    r11,r11,2<3>         /* set up field, clear other bits */
-	or     r12,r11,r12          /* set up field for branch table */
-	lda    r12,r0[r12]          /* scale r12 */
-	or.u   r12,r12,hi16(ptable) /* load pointer into table */
-	addu   r12,r12,lo16(ptable)
-	jmp    r12                  /* jump into branch table */
-	
-ptable:      br     conversiondp
-p00001:      br     conversiondp
-p00010:      br     conversiondp
-p00011:      br     paddone
-p00100:      br     conversiondp
-p00101:      br     conversiondp
-p00110:      br     paddone
-p00111:      br     paddone
-p01000:      br     conversiondp
-p01001:      br     conversiondp
-p01010:      br     conversiondp
-p01011:      br     conversiondp
-p01100:      br     conversiondp
-p01101:      br     conversiondp
-p01110:      br     conversiondp
-p01111:      br     conversiondp
-p10000:      br     conversiondp
-p10001:      br     conversiondp
-p10010:      br     conversiondp
-p10011:      br     conversiondp
-p10100:      br     conversiondp
-p10101:      br     conversiondp
-p10110:      br     conversiondp
-p10111:      br     conversiondp
-p11000:      br     conversiondp
-p11001:      br     paddone
-p11010:      br     paddone
-p11011:      br     paddone
-p11100:      br     conversiondp
-p11101:      br     paddone
-p11110:      br     paddone
-p11111:      br     paddone
-	
-/* Add one to the bit of the mantissa which corresponds to the LSB of an */
-/* integer.  If the mantissa overflows, then there is a valid integer */
-/* overflow conversion; otherwise, the mantissa can be converted to the integer. */
-	
-paddone:     or     r10,r0,r0           /* clear r10 */
-	set    r10,r10,1<22>       /* set LSB bit to 1 for adding */
-	addu.co r8,r8,r10          /* add the 1 obtained from rounding */
-	clr    r11,r7,12<20>       /* clear exponent and sign */
-	addu.ci r11,r0,r11         /* add carry */
-	bb1    20,r11,overflw      /* overflow to 2**31, abort the rest */
-	br.n   conversiondp        /* since the exp. was 30, and the exp. */
-	/* did not round up to 31, the largest */
-	/* number that S2 could become is 2**31-1 */
-	or     r7,r0,r11           /* store r11 into r7 for conversion */
-	
-/* Now check for negative double precision sources.  If the exponent is 30, */
-/* then convert the false alarm.  If the exponent is 31, then check the mantissa */
-/* bits which correspond to integer bits.  If any of them are a one, then there */
-/* is overflow.  If they are zero, then check the guard, round, and sticky bits. */
-/* Round toward zero and positive will not cause a roundup, but round toward */
-/* nearest and negative may, so perform those roundings.  If there is no overflow, */
-	/* then convert and return from subroutine. */
-	
-checkdoubn:  cmp    r11,r10,29           /* compare to 30, but exp. off by 1 */
-	bb1    eq,r11,conversiondn  /* false alarm if exp. = 30 */
-	extu   r10,r8,11<21>        /* check upper bits of lower mantissa */
-	bcnd   ne0,r10,overflw      /* one of the bits is a 1, so overflow */
-	extu   r10,r7,20<0>         /* check upper bits of upper mantissa */
-	bcnd   ne0,r10,overflw      /* one of the bits is a 1, so overflow */
-	bb0    rndlo,r1,possround      /* rounding mode is either round near or */
-	/* round negative, which may cause a */
-	/* round */
-	br.n   FPintov_return               /* round positive, which will not cause a */
-	/* round */
-	set    r6,r0,1<sign>        /* rounding mode is either round zero or */
-possround:   extu   r12,r8,1<20>         /* get guard bit */
-	extu   r11,r8,20<0>         /* get bits for sticky bit */
-	bcnd.n eq0,r11,nostickyn    /* do not set sticky */
-	mak    r12,r12,1<1>         /* set up field for branch table */
-	set    r12,r12,1<0>         /* set sticky bit */
-nostickyn:   bb1    rndhi,r1,negative    /* rounding mode is negative */
-nearest:     cmp    r12,r12,3            /* are both guard and sticky set */
-	bb1    eq,r12,overflw       /* both guard and sticky are set, */
-	/* so signal overflow */
-	or     r6,r0,r0             /* clear destination register r6 */
-	br.n   FPintov_return               /* return from subroutine */
-	set    r6,r6,1<sign>        /* set the sign bit and take care of */
-	/* this special case */
-negative:    bcnd   ne0,r12,overflw      /* -2**31 will be rounded to -(2**31+1), */
-	/* so signal overflow */
-	or     r6,r0,r0             /* clear destination register r6 */
-	br.n   FPintov_return               /* return from subroutine */
-	set    r6,r6,1<sign>        /* set the sign bit and take care of */
-	/* this special case */
-	
-	/* since the exp. was 30, and there was */
-	/* no round-up, the largest number that */
-	/* S2 could have been was 2**31 - 1 */
-	
-	
+
+checksize:
+	bb1	s2size,r9,checkdoub 	/* S2 is double, branch to see if */
+					/* there is a false alarm */
+
+/*
+ * An integer has more bits than the mantissa of a single precision floating
+ * point number, so to check for false alarms (i.e. valid conversion), simply
+ * check the exponents. False alarms are detected for 2**30 to (2**30) - 1
+ * and -2**30 to -2**31. Only seven bits need to be looked at since an
+ * exception will not occur for the other half of the numbering system.
+ * To speed up the processing, first check to see if the exponent is 32 or
+ * greater.
+ *
+ * This code was originally written for the exponent in the control
+ * register to have the most significant bit (8 - single, 11 - double)
+ * flipped and sign extended. For precise exceptions, however, the most
+ * significant bit is only sign extended. Therefore, the code was chopped
+ * up so that it would work for positive values of real exponent which were
+ * only sign extended.
+ */
+
+checksing:
+	extu	r10,r7,7<20>	/* internal representation for single */
+				/* precision is IEEE 8 bits sign extended */
+				/* to 11 bits; for real exp. = 30, the */
+				/* above instruction gives a result exp. */
+				/* that has the MSB flipped and sign */
+				/* extended like in the IMPCR */
+	cmp	r11,r10,31	/* compare to 32,but exp. off by 1 */
+				/* these 2 instructions to speed up valid */
+				/* execution of valid cases */
+	bb1	ge,r11,overflw	/* valid case, perform overflow routine */
+	bb1	sign,r7,checksingn /* source operand is negative */
+
+/*
+ * If the number is positve and the exponent is greater than 30, than it is
+ * overflow.
+ */
+checksingp:
+	cmp	r10,r10,29	/* compare to 30, but exp. off by 1 */
+	bb1	gt,r10,overflw	/* no false alarm, its overflow */
+	br	conversionsp	/* finish single precision conversion */
+
+/*
+ * If the number is negative, and the exponent is 30, or 31 with a mantissa
+ * of 0, then it is a false alarm.
+ */
+checksingn:
+	cmp	r11,r10,30		/* compare to 31,but exp. off by 1 */
+	bb1	lt,r11,conversionsn	/* exp. less than 31, so convert */
+	extu	r10,r8,3<29>		/* get upper three bits of lower */
+					/* mantissa */
+	mak	r12,r7,20<3>		/* get upper 20 bits of mantissa */
+	or	r10,r10,r12		/* form complete mantissa */
+	bcnd	eq0,r10,conversionsn	/* complete conversion if mantissa */
+					/* is 0 */
+	br	overflw			/* no false alarm, its overflow */
+
+/*
+ * False alarms are detected for 2**30 to (2**30) - 1 and -2**30 to -2**31.
+ * Only seven bits need to be looked at since an exception will not occur
+ * for the other half of the numbering system.
+ * To speed up the processing, first check to see if the exponent is 32 or
+ * greater. Since there are more mantissa bits than integer bits, rounding
+ * could cause overflow. (2**31) - 1 needs to be checked so that it does
+ * not round to 2**31, and -2**31 needs to be checked in case it rounds to
+ * -((2**31) + 1).
+ */
+checkdoub:
+	extu	r10,r7,10<20>	/* internal representation for double */
+				/* precision is the same IEEE 11 bits */
+				/* for real exp. = 30, the */
+				/* above instruction gives a result exp. */
+				/* that has the MSB flipped and sign */
+				/* extended like in the IMPCR */
+	cmp	r11,r10,31	/* compare to 32,but exp. off by 1 */
+				/* these 2 instructions to speed up valid */
+				/* execution of valid cases */
+	bb1	ge,r11,overflw	/* valid case, perform overflow routine */
+	bb1	sign,r7,checkdoubn /* source operand is negative */
+
+/*
+ * If the exponent is not 31, then the floating point number will be rounded
+ * before the conversion is done. A branch table is set up with bits 4 and 3
+ * being the rounding mode, and bits 2, 1, and 0 are the guard, round, and
+ * sticky bits.
+ */
+checkdoubp:
+	cmp	r11,r10,30	/* compare to 31, but exponent off by 1 */
+	bb1	eq,r11,overflw	/* no false alarm, its overflow */
+	extu	r12,r8,1<22>	/* get LSB for integer with exp. = 30 */
+	mak	r12,r12,1<2>	/* start to set up field for branch table */
+	extu	r11,r8,1<21>	/* get guard bit */
+	mak	r11,r11,1<1>	/* set up field for branch table */
+	or	r12,r11,r12	/* set up field for branch table */
+	extu	r11,r8,21<0>	/* get bits for sticky bit */
+	bcnd	eq0,r11,nostickyp /* do not set sticky */
+	set	r12,r12,1<0>	/* set sticky bit */
+nostickyp:
+	rot	r11,r1,0<rndlo>		/* shift rounding mode to 2 LSB''s */
+	mak	r11,r11,2<3>		/* set up field, clear other bits */
+	or	r12,r11,r12		/* set up field for branch table */
+	lda	r12,r0[r12]		/* scale r12 */
+	or.u	r12,r12,hi16(ptable)	/* load pointer into table */
+	addu	r12,r12,lo16(ptable)
+	jmp	r12
+
+ptable:
+	br	conversiondp
+	br	conversiondp
+	br	conversiondp
+	br	paddone
+	br	conversiondp
+	br	conversiondp
+	br	paddone
+	br	paddone
+	br	conversiondp
+	br	conversiondp
+	br	conversiondp
+	br	conversiondp
+	br	conversiondp
+	br	conversiondp
+	br	conversiondp
+	br	conversiondp
+	br	conversiondp
+	br	conversiondp
+	br	conversiondp
+	br	conversiondp
+	br	conversiondp
+	br	conversiondp
+	br	conversiondp
+	br	conversiondp
+	br	conversiondp
+	br	paddone
+	br	paddone
+	br	paddone
+	br	conversiondp
+	br	paddone
+	br	paddone
+	br	paddone
+
+/*
+ * Add one to the bit of the mantissa which corresponds to the LSB of an
+ * integer. If the mantissa overflows, then there is a valid integer
+ * overflow conversion; otherwise, the mantissa can be converted to the
+ * integer.
+ */
+paddone:
+	or	r10,r0,r0	/* clear r10 */
+	set	r10,r10,1<22>	/* set LSB bit to 1 for adding */
+	addu.co	r8,r8,r10	/* add the 1 obtained from rounding */
+	clr	r11,r7,12<20>	/* clear exponent and sign */
+	addu.ci	r11,r0,r11	/* add carry */
+	bb1	20,r11,overflw	/* overflow to 2**31, abort the rest */
+	br.n	conversiondp	/* since the exp. was 30, and the exp. */
+				/* did not round up to 31, the largest */
+				/* number that S2 could become is 2**31-1 */
+	 or	r7,r0,r11	/* store r11 into r7 for conversion */
+
+/*
+ * Now check for negative double precision sources. If the exponent is 30,
+ * then convert the false alarm. If the exponent is 31, then check the
+ * mantissa bits which correspond to integer bits. If any of them are a one,
+ * then there is overflow. If they are zero, then check the guard, round,
+ * and sticky bits.
+ * Round toward zero and positive will not cause a roundup, but round toward
+ * nearest and negative may, so perform those roundings. If there is no
+ * overflow, then convert and return.
+ */
+checkdoubn:
+	cmp	r11,r10,29		/* compare to 30, but exp. off by 1 */
+	bb1	eq,r11,conversiondn	/* false alarm if exp. = 30 */
+	extu	r10,r8,11<21>		/* check upper bits of lower mantissa */
+	bcnd	ne0,r10,overflw		/* one of the bits is a 1, so oflow */
+	extu	r10,r7,20<0>		/* check upper bits of upper mantissa */
+	bcnd	ne0,r10,overflw		/* one of the bits is a 1, so oflow */
+	bb0	rndlo,r1,possround	/* rounding mode is either round near */
+					/* or round negative, which may cause */
+					/* a round */
+	br.n	FPintov_return		/* round positive, which will not */
+					/* cause a round */
+	 set	r6,r0,1<sign>
+possround:
+	extu	r12,r8,1<20>		/* get guard bit */
+	extu	r11,r8,20<0>		/* get bits for sticky bit */
+	bcnd.n	eq0,r11,nostickyn	/* do not set sticky */
+	 mak	r12,r12,1<1>		/* set up field for branch table */
+	set	r12,r12,1<0>		/* set sticky bit */
+nostickyn:
+	bb1	rndhi,r1,negative	/* rounding mode is negative */
+nearest:
+	cmp	r12,r12,3		/* are both guard and sticky set */
+	bb1	eq,r12,overflw		/* both guard and sticky are set, */
+					/* so signal overflow */
+	or	r6,r0,r0		/* clear destination register r6 */
+	br.n	FPintov_return
+	 set	r6,r6,1<sign>		/* set the sign bit and take care of */
+					/* this special case */
+negative:
+	bcnd	ne0,r12,overflw		/* -2**31 will be rounded to */
+					/* -(2**31+1), so signal overflow */
+	or	r6,r0,r0		/* clear destination register r6 */
+	br.n	FPintov_return
+	 set	r6,r6,1<sign>		/* set the sign bit and take care of */
+					/* this special case */
+
+	/*
+	 * Since the exp. was 30, and there was no round-up, the largest
+	 * number that S2 could have been was 2**31 - 1
+	 */
+
+
 	/* Convert the single precision positive floating point number. */
-	
-conversionsp: extu  r6,r8,3<29>         /* extract lower bits of integer */
-	mak   r6,r6,3<7>           /* shift left to correct place in integer */
-	mak   r10,r7,20<10>        /* shift left upper bits of integer */
-	or    r6,r6,r10            /* form most of integer */
-	br.n  FPintov_return               /* return from subroutine */
-	set   r6,r6,1<30>          /* set hidden one */
-	
-	
+
+conversionsp:
+	extu	r6,r8,3<29>	/* extract lower bits of integer */
+	mak	r6,r6,3<7>	/* shift left to correct place in integer */
+	mak	r10,r7,20<10>	/* shift left upper bits of integer */
+	or	r6,r6,r10	/* form most of integer */
+	br.n	FPintov_return
+	 set	r6,r6,1<30>	/* set hidden one */
+
 	/* Convert the single precision negative floating point number. */
-	
-conversionsn: bb1   eq,r11,exp31s       /* use old r11 to see if exp. is 31 */
-	extu  r6,r8,3<29>          /* extract lower bits of mantissa */
-	mak   r6,r6,3<7>           /* shift left to correct place in integer */
-	mak   r10,r7,20<10>        /* shift left upper bits of integer */
-	or    r6,r6,r10            /* form most of integer */
-	set   r6,r6,1<30>          /* set hidden one */
-	or.c  r6,r0,r6             /* negate result */
-	br.n  FPintov_return               /* return from subroutine */
-	addu  r6,r6,1              /* add 1 to get 2''s complement */
-exp31s:      or    r6,r0,r0             /* clear r6 */
-	br.n  FPintov_return               /* return from subroutine */
-	set   r6,r6,1<sign>        /* set sign bit */
-	
-	
+
+conversionsn:
+	bb1	eq,r11,exp31s	/* use old r11 to see if exp. is 31 */
+	extu	r6,r8,3<29>	/* extract lower bits of mantissa */
+	mak	r6,r6,3<7>	/* shift left to correct place in integer */
+	mak	r10,r7,20<10>	/* shift left upper bits of integer */
+	or	r6,r6,r10	/* form most of integer */
+	set	r6,r6,1<30>	/* set hidden one */
+	or.c	r6,r0,r6	/* negate result */
+	br.n	FPintov_return
+	 addu	r6,r6,1		/* add 1 to get 2''s complement */
+exp31s:
+	or	r6,r0,r0	/* clear r6 */
+	br.n	FPintov_return
+	 set	r6,r6,1<sign>	/* set sign bit */
+
 	/* Convert the double precision positive floating point number. */
-	
-conversiondp: extu r6,r8,10<22>         /* extract lower bits of integer */
-	mak   r10,r7,20<10>        /* shift left upper bits of integer */
-	or    r6,r6,r10            /* form most of integer */
-	br.n  FPintov_return               /* return from subroutine */
-	set   r6,r6,1<30>          /* set hidden one */
-	
-	
-	/* Convert the double precision negative floating point number.  The number, */
-	/* whose exponent is 30, must be rounded before converting.  Bits 4 and 3 are */
-	/* the rounding mode, and bits 2, 1, and 0 are the guard, round, and sticky */
-	/* bits for the branch table. */
-	
-conversiondn: extu   r12,r8,1<22>       /* get LSB for integer with exp. = 30 */
-	mak    r12,r12,1<2>        /* start to set up field for branch table */
-	extu   r11,r8,1<21>        /* get guard bit */
-	mak    r11,r11,1<1>        /* set up field for branch table */
-	or     r12,r11,r12         /* set up field for branch table */
-	extu   r11,r8,21<0>        /* get bits for sticky bit */
-	bcnd   eq0,r11,nostkyn     /* do not set sticky */
-	set    r12,r12,1<0>        /* set sticky bit */
-nostkyn:     rot    r11,r1,0<rndlo>     /* shift rounding mode to 2 LSB''s */
-	mak    r11,r11,2<3>        /* set up field, clear other bits */
-	or     r12,r11,r12         /* set up field for branch table */
-	lda    r12,r0[r12]         /* scale r12 */
-	or.u   r12,r12,hi16(ntable)/* load pointer into table */
-	addu   r12,r12,lo16(ntable)
-	jmp    r12                 /* jump into branch table */
-	
-ntable:      br     nnoaddone
-n00001:      br     nnoaddone
-n00010:      br     nnoaddone
-n00011:      br     naddone
-n00100:      br     nnoaddone
-n00101:      br     nnoaddone
-n00110:      br     naddone
-n00111:      br     naddone
-n01000:      br     nnoaddone
-n01001:      br     nnoaddone
-n01010:      br     nnoaddone
-n01011:      br     nnoaddone
-n01100:      br     nnoaddone
-n01101:      br     nnoaddone
-n01110:      br     nnoaddone
-n01111:      br     nnoaddone
-n10000:      br     nnoaddone
-n10001:      br     naddone
-n10010:      br     naddone
-n10011:      br     naddone
-n10100:      br     nnoaddone
-n10101:      br     naddone
-n10110:      br     naddone
-n10111:      br     naddone
-n11000:      br     nnoaddone
-n11001:      br     nnoaddone
-n11010:      br     nnoaddone
-n11011:      br     nnoaddone
-n11100:      br     nnoaddone
-n11101:      br     nnoaddone
-n11110:      br     nnoaddone
-n11111:      br     nnoaddone
-	
-	
-	/* Add one to the mantissa, and check to see if it overflows to -2**31. */
-/* The conversion is done in nnoaddone:. */
-	
-naddone:     or     r10,r0,r0           /* clear r10 */
-	set    r10,r10,1<22>       /* set LSB bit to 1 for adding */
-	add.co r8,r8,r10           /* add the 1 obtained from rounding */
-	clr    r7,r7,12<20>        /* clear exponent and sign */
-	add.ci r7,r0,r7            /* add carry */
-	bb1    20,r7,maxneg        /* rounded to -2**31,handle separately */
-	/* the exponent was originally 30 */
-nnoaddone:   extu   r6,r8,11<22>        /* extract lower bits of integer */
-	mak    r10,r7,20<10>       /* shift left upper bits of integer */
-	or     r6,r6,r10           /* form most of integer */
-	set    r6,r6,1<30>         /* set hidden one */
-	or.c   r6,r0,r6            /* negate integer */
-	br.n   FPintov_return              /* return from subroutine */
-	addu   r6,r6,1             /* add 1 to get 2''s complement */
-	
-maxneg:      or     r6,r0,r0            /* clear integer */
-	br.n   FPintov_return              /* return from subroutine */
-	set    r6,r6,1<sign>       /* set sign bit */
-	
-	
-	/* For valid overflows, check to see if the integer overflow user handler is  */
-	/* set.  If it is set, then go to user handler, else write the correctly */
-	/* signed largest integer. */
-	
-overflw:  
+
+conversiondp:
+	extu	r6,r8,10<22>	/* extract lower bits of integer */
+	mak	r10,r7,20<10>	/* shift left upper bits of integer */
+	or	r6,r6,r10	/* form most of integer */
+	br.n	FPintov_return
+	 set	r6,r6,1<30>	/* set hidden one */
+
+	/*
+	 * Convert the double precision negative floating point number.
+	 * The number, whose exponent is 30, must be rounded before converting.
+	 * Bits 4 and 3 are the rounding mode, and bits 2, 1, and 0 are the
+	 * guard, round, and sticky bits for the branch table.
+	 */
+
+conversiondn:
+	extu	r12,r8,1<22>	/* get LSB for integer with exp. = 30 */
+	mak	r12,r12,1<2>	/* start to set up field for branch table */
+	extu	r11,r8,1<21>	/* get guard bit */
+	mak	r11,r11,1<1>	/* set up field for branch table */
+	or	r12,r11,r12	/* set up field for branch table */
+	extu	r11,r8,21<0>	/* get bits for sticky bit */
+	bcnd	eq0,r11,nostkyn	/* do not set sticky */
+	set	r12,r12,1<0>	/* set sticky bit */
+nostkyn:
+	rot	r11,r1,0<rndlo>	/* shift rounding mode to 2 LSB''s */
+	mak	r11,r11,2<3>	/* set up field, clear other bits */
+	or	r12,r11,r12	/* set up field for branch table */
+	lda	r12,r0[r12]	/* scale r12 */
+	or.u	r12,r12,hi16(ntable) /* load pointer into table */
+	addu	r12,r12,lo16(ntable)
+	jmp	r12
+
+ntable:
+	br	nnoaddone
+	br	nnoaddone
+	br	nnoaddone
+	br	naddone
+	br	nnoaddone
+	br	nnoaddone
+	br	naddone
+	br	naddone
+	br	nnoaddone
+	br	nnoaddone
+	br	nnoaddone
+	br	nnoaddone
+	br	nnoaddone
+	br	nnoaddone
+	br	nnoaddone
+	br	nnoaddone
+	br	nnoaddone
+	br	naddone
+	br	naddone
+	br	naddone
+	br	nnoaddone
+	br	naddone
+	br	naddone
+	br	naddone
+	br	nnoaddone
+	br	nnoaddone
+	br	nnoaddone
+	br	nnoaddone
+	br	nnoaddone
+	br	nnoaddone
+	br	nnoaddone
+	br	nnoaddone
+
+/*
+ * Add one to the mantissa, and check to see if it overflows to -2**31.
+ * The conversion is done in nnoaddone.
+ */
+
+naddone:
+	or	r10,r0,r0	/* clear r10 */
+	set	r10,r10,1<22>	/* set LSB bit to 1 for adding */
+	add.co	r8,r8,r10	/* add the 1 obtained from rounding */
+	clr	r7,r7,12<20>	/* clear exponent and sign */
+	add.ci	r7,r0,r7	/* add carry */
+	bb1	20,r7,maxneg	/* rounded to -2**31,handle separately */
+				/* the exponent was originally 30 */
+nnoaddone:
+	extu	r6,r8,11<22>	/* extract lower bits of integer */
+	mak	r10,r7,20<10>	/* shift left upper bits of integer */
+	or	r6,r6,r10	/* form most of integer */
+	set	r6,r6,1<30>	/* set hidden one */
+	or.c	r6,r0,r6	/* negate integer */
+	br.n	FPintov_return
+	 addu	r6,r6,1		/* add 1 to get 2''s complement */
+
+maxneg:
+	or	r6,r0,r0	/* clear integer */
+	br.n	FPintov_return
+	 set	r6,r6,1<sign>	/* set sign bit */
+
+	/*
+	 * For valid overflows, check to see if the integer overflow user
+	 * handler is set. If it is set, then go to user handler, else write
+	 * the correctly signed largest integer.
+	 */
+
+overflw:
 #ifdef HANDLER
-	bb0.n  oper,r3,nohandler   /* do not go to user handler routine */
-	set    r2,r2,1<oper>       /* set invalid operand bit */
-	bsr    _handler            /* go to user handler routine */
-	br     FPintov_return              /* return from subroutine */
-nohandler:   
+	bb0.n	oper,r3,nohandler	/* do not go to user handler routine */
+	 set	r2,r2,1<oper>		/* set invalid operand bit */
+	bsr	_handler		/* go to user handler routine */
+	br	FPintov_return
+nohandler:
 #endif
-	bb0.n  sign,r7,FPintov_return      /* if positive then return from subroutine */
-	set    r6,r6,31<0>         /* set result to largest positive integer */
-	or.c   r6,r0,r6            /* negate r6,giving largest negative int. */
-	
-FPintov_return:      ld     r1,r31,0     /* load return address from memory */
-	jmp    r1                  /* return from subroutine */
-	
-	data
+	bb0.n	sign,r7,FPintov_return	/* if positive then return */
+	 set	r6,r6,31<0>		/* set result to largest positive int */
+	or.c	r6,r0,r6		/* negate r6, giving largest negative */
+					/* integer */
+
+FPintov_return:
+	ld	r1,r31,0		/* load return address from memory */
+	jmp	r1
 
-/* Some instructions only have the S2 operations, so clear S1HI and S1LO */
-/* for those instructions so that the previous contents of S1HI and S1LO */
-/* do not influence this instruction. */
-
-             text
-GLOBAL(FPresoper)
-	     st	    r1, r31, 0
-	     extu   r10,r9,5<11>   /* extract opcode */
-/*             cmp    r11,r10,FSQRTop ;compare to FSQRT */
-/*             bb1    eq,r11,S1clear ;clear S1 if instruction only had S2 operand */
-             cmp    r11,r10,INTop  /* compare to INT */
-             bb1    eq,r11,S1clear /* clear S1 if instruction only had S2 operand */
-             cmp    r11,r10,NINTop /* compare to NINT */
-             bb1    eq,r11,S1clear /* clear S1 if instruction only had S2 operand */
-             cmp    r11,r10,TRNCop /* compare to TRNC */
-             bb0    eq,r11,opercheck /* check for reserved operands */
-
-ASGLOBAL(S1clear)
-	     or     r5,r0,r0       /* clear any NaN''s, denorms, or infinities */
-             or     r6,r0,r0       /* that may be left in S1HI,S1LO from a  */
-                                   /* previous instruction */
-
-/* r12 contains the following flags: */
-/* 		bit 9 -- s1sign */
-/* 		bit 8 -- s2sign */
-/* 		bit 7 -- s1nan */
-/* 		bit 6 -- s2nan */
-/* 		bit 5 -- s1inf */
-/* 		bit 4 -- s2inf */
-/* 		bit 3 -- s1zero */
-/* 		bit 2 -- s2zero */
-/* 		bit 1 -- s1denorm */
-/* 		bit 0 -- s2denorm */
-
-/* Using code for both single and double precision, check if S1 is either */
-/* a NaN or infinity and set the appropriate flags in r12.  Then check if */
-/* S2 is a NaN or infinity.  If it is a NaN, then branch to the NaN routine. */
-
-             
-ASGLOBAL(opercheck)
-	     extu   r10,r5,11<20>        /* internal representation for double */
-             bb1.n  s1size,r9,S1NaNdoub /* S1 is double precision */
-             or     r12,r0,r0      /* clear operand flag register */
-ASGLOBAL(S1NaNsing)
-	     xor    r10,r10,0x0080       /* internal representation for single */
-             ext    r10,r10,8<0>         /* precision is IEEE 8 bits sign extended */
-                                         /* to 11 bits; for real exp. > 0, the */
-                                         /* above instructions gives a result exp. */
-                                         /* that has the MSB flipped and sign */
-                                         /* extended like in the IMPCR */
-             cmp    r11,r10,127    /* Is exponent equal to IEEE 255 (internal 127) */
-             bb1    ne,r11,S2NaN   /* source 1 is not a NaN or infinity */
-             mak    r10,r5,20<0>   /* load r10 with upper bits of S1 mantissa */
-             extu   r11,r6,3<29>   /* get 3 upper bits of lower word */
-             or     r11,r10,r11    /* combine any existing 1''s */
-             bcnd   eq0,r11,noS1NaNs /* since r11 can only hold 0 or a positive */
-                                   /* number, branch to noS1NaN when eq0 */
-             br.n   S2NaN          /* see if S2 has a NaN */
-             set    r12,r12,1<s1nan> /* indicate that S1 has a NaN */
-ASGLOBAL(noS1NaNs)
-	     br.n   S2NaN          /* check contents of S2 */
-             set    r12,r0,1<s1inf> /* indicate that S1 has an infinity */
-
-ASGLOBAL(S1NaNdoub)
-	     xor    r10,r10,0x0400       /* precision is the same IEEE 11 bits  */
-                                         /* The */
-                                         /* above instructions gives a result exp. */
-                                         /* that has the MSB flipped and sign */
-                                         /* extended like in the IMPCR */
-             cmp    r11,r10,1023   /* Is exp. equal to IEEE 2047 (internal 1023) */
-             bb1    ne,r11,S2NaN   /* source 1 is not a NaN or infinity */
-             mak    r10,r5,20<0>   /* load r10 with upper bits of S1 mantissa */
-             or     r11,r6,r10     /* combine existing 1''s of mantissa */
-             bcnd   eq0,r11,noS1NaNd /* since r11 can only hold 0 or a positive */
-                                   /* number, branch to noS1NaN when eq0 */
-             br.n   S2NaN          /* see if S2 has a NaN */
-             set    r12,r12,1<s1nan> /* indicate that S1 has a NaN */
-ASGLOBAL(noS1NaNd)
-	     set    r12,r0,1<s1inf> /* indicate that S1 has an infinity */
-
-ASGLOBAL(S2NaN)
-	     bb1.n  s2size,r9,S2NaNdoub /* S1 is double precision */
-             extu   r10,r7,11<20>        /* internal representation for double */
-ASGLOBAL(S2NaNsing)
-	     xor    r10,r10,0x0080       /* internal representation for single */
-             ext    r10,r10,8<0>         /* precision is IEEE 8 bits sign extended */
-                                         /* to 11 bits; for real exp. > 0, the */
-                                         /* above instruction gives a result exp. */
-                                         /* that has the MSB flipped and sign */
-                                         /* extended like in the IMPCR */
-             cmp    r11,r10,127    /* Is exponent equal to IEEE 255 (internal 127) */
-             bb1    ne,r11,inf     /* source 2 is not a NaN or infinity */
-             mak    r10,r7,20<0>   /* load r10 with upper bits of S1 mantissa */
-             extu   r11,r8,3<29>   /* get 3 upper bits of lower word */
-             or     r11,r10,r11    /* combine any existing 1''s */
-             bcnd   eq0,r11,noS2NaNs /* since r11 can only hold 0 or a positive */
-                                   /* number, branch to noS2NaNs when eq0 */
-             br.n   _NaN           /* branch to NaN routine */
-             set    r12,r12,1<s2nan> /* indicate that s2 has a NaN */
-ASGLOBAL(noS2NaNs)
-	     bb0    s1nan,r12, 1f /* branch to NaN if S1 is a NaN */
-	     br	    _NaN
-1:           br.n   _infinity      /* If S1 had a NaN we would have already */
-                                   /* branched, and S2 does not have a NaN, but */
-                                   /* it does have an infinity, so branch to  */
-                                   /* handle the finity */
-             set    r12,r12,1<s2inf> /* indicate that S2 has an infinity */
-
-ASGLOBAL(S2NaNdoub)
-	     xor    r10,r10,0x0400       /* precision is the same IEEE 11 bits  */
-                                         /* The */
-                                         /* above instruction gives a result exp. */
-                                         /* that has the MSB flipped and sign */
-                                         /* extended like in the IMPCR */
-             cmp    r11,r10,1023   /* Is exp. equal to IEEE 2047 (internal 1023) */
-             bb1    ne,r11,inf     /* source 2 is not a NaN or infinity */
-             mak    r10,r7,20<0>   /* load r10 with upper bits of S2 mantissa */
-             or     r11,r8,r10     /* combine existing 1''s of mantissa */
-             bcnd   eq0,r11,noS2NaNd /* since r11 can only hold 0 or a positive */
-                                   /* number, branch to noS2NaNd when eq0 */
-             br.n   _NaN           /* branch to NaN routine */
-             set    r12,r12,1<s2nan> /* indicate that s2 has a NaN */
-ASGLOBAL(noS2NaNd)
-	     bb0    s1nan,r12,1f /* branch to NaN if S1 is a NaN */
-	     br	    _NaN
-1:           br.n   _infinity      /* If S1 had a NaN we would have already */
-                                   /* branched, and S2 does not have a NaN, but */
-                                   /* it does have an infinity, so branch to  */
-                                   /* handle the finity */
-             set    r12,r12,1<s2inf> /* indicate that S2 has an infinity */
-
-
-/* If S2 was a NaN, the routine would have already branched to NaN.  If S1 */
-/* is a NaN, then branch to NaN.  If S1 is not a NaN and S2 is infinity, then */
-/* we would have already branched to infinity.  If S1 is infinity, then branch. */
-/* If the routine still has not branched, then branch to denorm, the only  */
-/* reserved operand left. */
-
-ASGLOBAL(inf)
-	     bb0    s1nan,r12,1f  /* branch if S1 has a NaN and S2 does not */
-	     br	    _NaN
-1:           bb0    s1inf,r12,2f    /* Neither S1 or S2 has a NaN, and we would */
-                                    /* have branched already if S2 had an  */
-                                    /* infinity, so branch if S1 is infinity */
 /*
- * The above "bb0 s1inf, r12,2f" had been a "bb1", but it just didn't make
- * sense (and didn't work, either), so I changed it.
- * 	jfriedl Dec 1, 1989.
+ * Some instructions only have the S2 operations, so clear S1HI and S1LO
+ * for those instructions so that the previous contents of S1HI and S1LO
+ * do not influence this instruction.
+ */
+
+ASLOCAL(FPresoper)
+	st	r1, r31, 0
+	extu	r10,r9,5<11>	/* extract opcode */
+#if 0
+	cmp	r11,r10,FSQRTop	/* compare to FSQRT */
+	bb1	eq,r11,S1clear	/* clear S1 if instruction only had S2 operand */
+#endif
+	cmp	r11,r10,INTop	/* compare to INT */
+	bb1	eq,r11,S1clear	/* clear S1 if instruction only had S2 operand */
+	cmp	r11,r10,NINTop	/* compare to NINT */
+	bb1	eq,r11,S1clear	/* clear S1 if instruction only had S2 operand */
+	cmp	r11,r10,TRNCop	/* compare to TRNC */
+	bb0	eq,r11,opercheck /* check for reserved operands */
+
+ASLOCAL(S1clear)
+	or	r5,r0,r0	/* clear any NaN''s, denorms, or infinities */
+	or	r6,r0,r0	/* that may be left in S1HI,S1LO from a */
+				/* previous instruction */
+
+/*
+ * r12 contains the following flags:
+ *   bit 9 -- s1sign
+ *   bit 8 -- s2sign
+ *   bit 7 -- s1nan
+ *   bit 6 -- s2nan
+ *   bit 5 -- s1inf
+ *   bit 4 -- s2inf
+ *   bit 3 -- s1zero
+ *   bit 2 -- s2zero
+ *   bit 1 -- s1denorm
+ *   bit 0 -- s2denorm
+ */
+
+/*
+ * Using code for both single and double precision, check if S1 is either
+ * a NaN or infinity and set the appropriate flags in r12. Then check if
+ * S2 is a NaN or infinity. If it is a NaN, then branch to the NaN routine.
  */
-	     br	    _infinity
-2:
 
-	br     _denorm        /* branch to denorm, the only remaining */
-                                   /* alternative */
+ASLOCAL(opercheck)
+	extu	r10,r5,11<20>	/* internal representation for double */
+	bb1.n	s1size,r9,S1NaNdoub /* S1 is double precision */
+	 or	r12,r0,r0	/* clear operand flag register */
+ASLOCAL(S1NaNsing)
+	xor	r10,r10,0x0080	/* internal representation for single */
+	ext	r10,r10,8<0>	/* precision is IEEE 8 bits sign extended */
+				/* to 11 bits; for real exp. > 0, the */
+				/* above instructions gives a result exp. */
+				/* that has the MSB flipped and sign */
+				/* extended like in the IMPCR */
+	cmp	r11,r10,127	/* Is exponent equal to IEEE 255 (here 127) */
+	bb1	ne,r11,S2NaN	/* source 1 is not a NaN or infinity */
+	mak	r10,r5,20<0>	/* load r10 with upper bits of S1 mantissa */
+	extu	r11,r6,3<29>	/* get 3 upper bits of lower word */
+	or	r11,r10,r11	/* combine any existing 1 */
+	bcnd	eq0,r11,noS1NaNs /* since r11 can only hold 0 or a */
+				/* > 0 number, branch to noS1NaN when eq0 */
+	br.n	S2NaN		/* see if S2 has a NaN */
+	 set	r12,r12,1<s1nan> /* indicate that S1 has a NaN */
+ASLOCAL(noS1NaNs)
+	br.n	S2NaN		/* check contents of S2 */
+	 set	r12,r0,1<s1inf>	/* indicate that S1 has an infinity */
+
+ASLOCAL(S1NaNdoub)
+	xor	r10,r10,0x0400	/* precision is the same IEEE 11 bits */
+				/* The above instructions gives a result exp. */
+				/* that has the MSB flipped and sign */
+				/* extended like in the IMPCR */
+	cmp	r11,r10,1023	/* Is exp. equal to IEEE 2047 (internal 1023) */
+	bb1	ne,r11,S2NaN	/* source 1 is not a NaN or infinity */
+	mak	r10,r5,20<0>	/* load r10 with upper bits of S1 mantissa */
+	or	r11,r6,r10	/* combine existing 1''s of mantissa */
+	bcnd	eq0,r11,noS1NaNd /* since r11 can only hold 0 or a > 0 */
+				/* number, branch to noS1NaN when eq0 */
+	br.n	S2NaN		/* see if S2 has a NaN */
+	 set	r12,r12,1<s1nan> /* indicate that S1 has a NaN */
+ASLOCAL(noS1NaNd)
+	set	r12,r0,1<s1inf>	/* indicate that S1 has an infinity */
+
+ASLOCAL(S2NaN)
+	bb1.n	s2size,r9,S2NaNdoub /* S1 is double precision */
+	 extu	r10,r7,11<20>	/* internal representation for double */
+ASLOCAL(S2NaNsing)
+	xor	r10,r10,0x0080	/* internal representation for single */
+	ext	r10,r10,8<0>	/* precision is IEEE 8 bits sign extended */
+				/* to 11 bits; for real exp. > 0, the */
+				/* above instruction gives a result exp. */
+				/* that has the MSB flipped and sign */
+				/* extended like in the IMPCR */
+	cmp	r11,r10,127	/* Is exponent equal to IEEE 255 (here 127) */
+	bb1	ne,r11,inf	/* source 2 is not a NaN or infinity */
+	mak	r10,r7,20<0>	/* load r10 with upper bits of S1 mantissa */
+	extu	r11,r8,3<29>	/* get 3 upper bits of lower word */
+	or	r11,r10,r11	/* combine any existing 1''s */
+	bcnd	eq0,r11,noS2NaNs /* since r11 can only hold 0 or a > 0 */
+				/* number, branch to noS2NaNs when eq0 */
+	br.n	_ASM_LABEL(NaN)	/* branch to NaN routine */
+	 set	r12,r12,1<s2nan> /* indicate that s2 has a NaN */
+ASLOCAL(noS2NaNs)
+	bb0	s1nan,r12, 1f	/* branch to NaN if S1 is a NaN */
+	br	_ASM_LABEL(NaN)
+1:
+	br.n	_ASM_LABEL(infinity) /* If S1 had a NaN we would have */
+				/* already branched, and S2 does not have a */
+				/* NaN, but it does have an infinity, so */
+				/* branch to handle the finity */
+	 set	r12,r12,1<s2inf> /* indicate that S2 has an infinity */
+
+ASLOCAL(S2NaNdoub)
+	xor	r10,r10,0x0400	/* precision is the same IEEE 11 bits */
+				/* The above instruction gives a result exp. */
+				/* that has the MSB flipped and sign */
+				/* extended like in the IMPCR */
+	cmp	r11,r10,1023	/* Is exp. equal to IEEE 2047 (internal 1023) */
+	bb1	ne,r11,inf	/* source 2 is not a NaN or infinity */
+	mak	r10,r7,20<0>	/* load r10 with upper bits of S2 mantissa */
+	or	r11,r8,r10	/* combine existing 1''s of mantissa */
+	bcnd	eq0,r11,noS2NaNd /* since r11 can only hold 0 or a > 0 */
+				/* number, branch to noS2NaNd when eq0 */
+	br.n	_ASM_LABEL(NaN)	/* branch to NaN routine */
+	 set	r12,r12,1<s2nan> /* indicate that s2 has a NaN */
+ASLOCAL(noS2NaNd)
+	bb0	s1nan,r12,1f	/* branch to NaN if S1 is a NaN */
+	br	_ASM_LABEL(NaN)
+1:
+	br.n	_ASM_LABEL(infinity) /* If S1 had a NaN we would have */
+				/* already branched, and S2 does not have a */
+				/* NaN, but it does have an infinity, so */
+				/* branch to handle the finity */
+	 set	r12,r12,1<s2inf> /* indicate that S2 has an infinity */
 
-/* function _FPunderflow --       */
-/* The documentation for this release give an overall description of this code. */
+/*
+ * If S2 was a NaN, the routine would have already branched to NaN. If S1
+ * is a NaN, then branch to NaN. If S1 is not a NaN and S2 is infinity, then
+ * we would have already branched to infinity. If S1 is infinity, then branch.
+ * If the routine still has not branched, then branch to denorm, the only
+ * reserved operand left.
+ */
 
-                text
-                global _FPunderflow
+ASLOCAL(inf)
+	bb0	s1nan,r12,1f	/* branch if S1 has a NaN and S2 does not */
+	br	_ASM_LABEL(NaN)
+1:
+	bb0	s1inf,r12,2f	/* Neither S1 or S2 has a NaN, and we would */
+				/* have branched already if S2 had an */
+				/* infinity, so branch if S1 is infinity */
+	br	_ASM_LABEL(infinity)
+2:
+	br	_ASM_LABEL(denorm)	/* branch to denorm, the only */
+					/* remaining alternative */
 
-/* First check for an underflow user handler.  If there is not one, then */
-/* branch to the routine to make a denormalized number.  Before branching */
-/* to the underflow user handler, add 192 to a single precision exponent */
-/* and 1536 to a double precision exponent. */
+/*
+ * First check for an underflow user handler. If there is not one, then
+ * branch to the routine to make a denormalized number. Before branching
+ * to the underflow user handler, add 192 to a single precision exponent
+ * and 1536 to a double precision exponent.
+ */
 
-_FPunderflow:   st    r1,r31,0 /* save return address */
+ASLOCAL(FPunderflow)
+	st	r1,r31,0	/* save return address */
 #ifdef HANDLER
-		bb0 efunf,r12,denorm /* jump to default procedure */
-                bb1.n destsize,r12,doubleprec /* double precision destination */
-                set   r2,r2,1<underflow>  /* set underflow flag in FPSR */
-singleprec:     or.u  r6,r0,0x0c00 /* load exponent adjust 192 */
-                br.n  callundhand  /* branch to call handler for user handler */
-                add   r12,r6,r12   /* adjust single precision exponent */
-doubleprec:     or.u  r6,r0,0x6000 /* load exponent adjust 1536 */
-                add   r12,r6,r12   /* adjust double precision exponent */
-callundhand:    bsr   _handler     /* call handler for user handler */
-                br    Ureturn       /* return from subroutine */
+	bb0	efunf,r12,FPU_denorm	/* jump to default procedure */
+	bb1.n	destsize,r12,doubleprec	/* double precision destination */
+	 set	r2,r2,1<underflow>	/* set underflow flag in FPSR */
+singleprec:
+	or.u	r6,r0,0x0c00	/* load exponent adjust 192 */
+	br.n	callundhand	/* branch to call handler for user handler */
+	 add	r12,r6,r12	/* adjust single precision exponent */
+doubleprec:
+	or.u	r6,r0,0x6000	/* load exponent adjust 1536 */
+	add	r12,r6,r12	/* adjust double precision exponent */
+callundhand:
+	bsr	_handler	/* call handler for user handler */
+	br	Ureturn
 #endif
 
-/* Now the floating point number, which has an exponent smaller than what */
-/* IEEE allows, must be denormalized.  Denormalization is done by calculating */
-/* the difference between a denormalized exponent and an underflow exponent and */
-/* shifting the mantissa by that amount.  A one may need to be subtracted from  */
-/* the LSB if a one was added during rounding. */
-/* r9 is used to contain the guard, round, sticky, and an inaccuracy bit in */
-/* case some bits were shifted off the mantissa during denormalization. */
-/* r9 will contain: bit 4 -- new addone if one added during rounding  */
-/*                           after denormalization */
-/*                  bit 3 -- inaccuracy flag caused by denormalization */
-/* 			   or pre-denormalization inexactness */
-/*                  bit 2 -- guard bit of result */
-/*                  bit 1 -- round bit of result */
-/*                  bit 0 -- sticky bit of result */
-
-denorm:         bb1.n destsize,r12,Udouble /* denorm for double */
-                extu  r9,r10,3<26>   /* load r9 with grs */
-Usingle:         mak   r5,r10,21<3> /* extract high 21 bits of mantissa */
-                extu  r6,r11,3<29> /* extract low 3 bits of mantissa */
-                or    r11,r5,r6     /* form 24 bits of mantissa */
+/*
+ * Now the floating point number, which has an exponent smaller than what
+ * IEEE allows, must be denormalized. Denormalization is done by calculating
+ * the difference between a denormalized exponent and an underflow exponent
+ * and shifting the mantissa by that amount. A one may need to be subtracted
+ * from the LSB if a one was added during rounding.
+ * r9 is used to contain the guard, round, sticky, and an inaccuracy bit in
+ * case some bits were shifted off the mantissa during denormalization.
+ * r9 will contain:
+ *   bit 4 -- new addone if one added during rounding after denormalization
+ *   bit 3 -- inaccuracy flag caused by denormalization or pre-denormalization
+ *            inexactness
+ *   bit 2 -- guard bit of result
+ *   bit 1 -- round bit of result
+ *   bit 0 -- sticky bit of result
+ */
+
+FPU_denorm:
+	bb1.n	destsize,r12,Udouble 	/* denorm for double */
+	 extu	r9,r10,3<26>	/* load r9 with grs */
+Usingle:
+	mak	r5,r10,21<3>	/* extract high 21 bits of mantissa */
+	extu	r6,r11,3<29>	/* extract low 3 bits of mantissa */
+	or	r11,r5,r6	/* form 24 bits of mantissa */
 
 /* See if the addone bit is set and unround if it is. */
-                bb0.n 25,r10,nounrounds /* do not unround if addone bit clear */
-                extu  r6,r12,12<20>  /* extract signed exponent from IMPCR */
-unrounds:       subu  r11,r11,1      /* subtract 1 from mantissa */
-/* If the hidden bit is cleared after subtracting the one, then the one added */
-/* during the rounding must have propagated through the mantissa.  The exponent */
-/* will need to be decremented. */
-                bb1   23,r11,nounrounds /* if hidden bit is set,then exponent does */
-                                   /* not need to be decremented */
-decexps:        sub   r6,r6,1      /* decrement exponent 1 */
-                set   r11,r11,1<23>  /* set the hidden bit */
-
-/* For both single and double precision, there are cases where it is easier */
-/* and quicker to make a special case.  Examples of this are if the shift  */
-/* amount is only 1 or 2, or all the mantissa is shifted off, or all the */
-/* mantissa is shifted off and it is still shifting, or, in the case of  */
-/* doubles, if the shift amount is around the boundary of MANTLO and MANTHI. */
-
-nounrounds:     or    r8,r0,lo16(0x00000f81)  /* load r8 with -127 in decimal  */
-					    /* for lowest 12 bits */
-                sub   r7,r8,r6     /* find difference between two exponents, */
-                                   /* this amount is the shift amount */
-                cmp   r6,r7,3      /* check to see if r7 contains 3 or more */
-                bb1   ge,r6,threesing /* br to code that handles shifts of >=3 */
-                cmp   r6,r7,2      /* check to see if r7 contains 2 */
-                bb1   eq,r6,twosing /* br to code that handles shifts of 2 */
-one:            rot   r9,r9,0<1>   /* rotate roundoff register once, this places */
-                                   /* guard in round and round in sticky */
-                bb0   31,r9,nosticky1s/* do not or round and sticky if sticky is */
-                                   /* 0, this lost bit will be cleared later */
-                set   r9,r9,1<0>   /* or round and sticky */
-nosticky1s:     bb0   0,r11,guardclr1s /* do not set guard bit if LSB = 0 */
-                set   r9,r9,1<2>   /* set guard bit        */
-guardclr1s:     extu  r11,r11,31<1> /* shift mantissa right 1 */
-                br.n  round        /* round result */
-                mak   r9,r9,3<0>   /* clear bits lost during rotation */
-
-twosing:        rot   r9,r9,0<2>   /* rotate roundff register twice, this places */
-                                   /* guard in sticky */
-                bb0   30,r9,nosticky2s /* do not or guard and sticky if stick is 0 */
-                                   /* this lost bit will be cleared later */
-                br.n  noround2s    /* skip or old guard and old round if old */
-                                   /* sticky set */
-                set   r9,r9,1<0>   /* or guard and sticky */
-nosticky2s:     bb0   31,r9,noround2s /* do not or guard and round if round is 0 */
-                                   /* this lost bit will be cleared later */
-                set   r9,r9,1<0>   /* or guard and round */
-noround2s:      bb0   0,r11,roundclr2s /* do not set round bit if LSB = 0 */
-                set   r9,r9,1<1>   /* set round bit */
-roundclr2s:     bb0   1,r11,guardclr2s /* do not set guard bit if LSB + 1 = 0 */
-                set   r9,r9,1<2>   /* set guard bit */
-guardclr2s:     extu  r11,r11,30<2>  /* shift mantissa right 2 */
-                br.n  round        /* round result */
-                mak   r9,r9,3<0>   /* clear bits lost during rotation */
-
-threesing:      bb1   0,r9,noguard3s /* check sticky initially */
-                                     /* sticky is set, forget most of the oring */
-nosticky3s:     bb0   1,r9,noround3s  /* check round initially, do not set sticky */
-                br.n  noguard3s    /* forget most of the rest of oring */
-                set   r9,r9,1<0>      /* if round is clear,set sticky if round set */
-noround3s:      bb0.n 2,r9,noguard3s  /* check guard initially, do not set sticky */
-                clr   r9,r9,2<1>   /* clear the original guard and round for when */
-                                      /* you get to round section */
-                set   r9,r9,1<0>      /* if guard is clear,set sticky if guard set */
-noguard3s:      cmp   r6,r7,23     /* check if # of shifts is <=23 */
-                bb1   gt,r6,s24    /* branch to see if shifts = 24 */
-                sub   r6,r7,2      /* get number of bits to check for sticky */
-                mak   r6,r6,5<5>   /* shift width into width field */
-                mak   r8,r11,r6     /* mask off shifted bits -2 */
-                ff1   r8,r8        /* see if r8 has any ones */
-                bb1   5,r8,nostky23 /* do not set sticky if no ones found */
-                set   r9,r9,1<0>   /* set sticky bit */
-nostky23:       or    r8,r0,34     /* start code to get new mantissa plus two */
-                                   /* extra bits for new round and new guard bits */
-                subu  r8,r8,r7     
-                mak   r8,r8,5<5>   /* shift field width into second five bits */
-                extu  r6,r6,5<5>   /* shift previous shifted -2 into offset field */
-                or    r6,r6,r8     /* complete field */
-                extu  r11,r11,r6     /* form new mantissa with two extra bits */
-
-                bb0   0,r11,nornd3s /* do not set new round bit */
-                set   r9,r9,1<1>   /* set new round bit */
-nornd3s:        bb0   1,r11,nogrd3s /* do not set new guard bit */
-                set   r9,r9,1<2>   /* set new guard bit */
-nogrd3s:        br.n  round        /* round mantissa */
-                extu  r11,r11,30<2>  /* shift off remaining two bits */
-
-s24:            cmp   r6,r7,24     /* check to see if # of shifts is 24 */
-                bb1   gt,r6,s25    /* branch to see if shifts = 25 */
-                bb1   0,r9,nostky24 /* skip checking if old sticky set */
-                extu  r8,r11,22<0>  /* prepare to check bits that will be shifted */
-                                   /* into the sticky */
-                ff1   r8,r8        /* see if there are any 1''s */
-                bb1   5,r8,nostky24 /* do not set sticky if no ones found */
-                set   r9,r9,1<0>   /* set sticky bit */
-nostky24:       bb0   22,r11,nornd24 /* do not set new round bit */
-                set   r9,r9,1<1>   /* set new round bit */
-nornd24:        set   r9,r9,1<2>   /* set new guard bit,this is hidden bit */
-                br.n  round        /* round mantissa */
-                or    r11,r0,r0     /* clear r11, all of mantissa shifted off */
-                
-s25:            cmp   r6,r7,25     /* check to see if # of shifts is 25 */
-                bb1   gt,r6,s26    /* branch to execute for shifts => 26 */
-                bb1   0,r9,nostky25 /* skip checking if old sticky set */
-                extu  r8,r11,23<0> /* prepare to check bits that will be shifted */
-                                   /* into the sticky */
-                ff1   r8,r8        /* see if there are any 1''s */
-                bb1   5,r8,nostky25 /* do not set sticky if no ones found */
-                set   r9,r9,1<0>   /* set sticky bit */
-nostky25:       set   r9,r9,1<1>   /* set new round bit,this is hidden bit */
-                clr   r9,r9,1<2>   /* clear guard bit since nothing shifted in */
-                br.n  round        /* round and assemble result */
-                or    r11,r0,r0    /* clear r11, all of mantissa shifted off */
-
-s26:            set   r9,r9,1<0>   /* set sticky bit,this contains hidden bit */
-                clr   r9,r9,2<1>   /* clear guard and round bits since nothing  */
-                                   /* shifted in  */
-                br.n  round        /* round and assemble result */
-                or    r11,r0,r0    /* clear mantissa */
-
-Udouble:         mak   r5,r10,21<0> /* extract upper bits of mantissa */
-                bb0.n 25,r10,nounroundd /* do not unround if addone bit clear */
-                extu  r6,r12,12<20>/* extract signed exponenet from IMPCR */
-unroundd:       or    r8,r0,1
-		subu.co  r11,r11,r8     /* subtract 1 from mantissa */
-                subu.ci  r5,r5,r0       /* subtract borrow from upper word */
-                bb1   20,r5,nounroundd /* if hidden bit is set, then exponent does */
-                                       /* not need to be decremented */
-decexpd:        sub   r6,r6,1      /* decrement exponent 1 */
-                set   r5,r5,1<20>  /* set the hidden bit */
-
-nounroundd:     or    r8,r0,lo16(0x00000c01) /* load r8 with -1023 in decimal  */
-					     /* for lowest 12 bits  */
-                sub   r7,r8,r6     /* find difference between two exponents, */
-                                   /* this amount is the shift amount */
-                cmp   r6,r7,3      /* check to see if r7 contains 3 or more */
-                bb1   ge,r6,threedoub /* br to code that handles shifts of >=3 */
-                cmp   r6,r7,2      /* check to see if r7 contains 2 */
-                bb1   eq,r6,twodoub /* br to code that handles shifts of 2 */
-
-onedoub:        rot   r9,r9,0<1>   /* rotate roundoff register once, this places */
-                                   /* guard in round and round in sticky */
-                bb0   31,r9,nosticky1d/* do not or round and sticky if sticky is 0 */
-                                   /* this lost bit will be cleared later */
-                set   r9,r9,1<0>   /* or old round and old sticky into new sticky */
-nosticky1d:     bb0   0,r11,guardclr1d /* do not set new guard bit if old LSB = 0 */
-                set   r9,r9,1<2>   /* set new guard bit */
-guardclr1d:     extu  r11,r11,31<1> /* shift lower mantissa over 1 */
-                mak   r6,r5,1<31>  /* shift off low bit of high mantissa */
-                or    r11,r6,r11   /* load high bit onto lower mantissa */
-                extu  r5,r5,20<1>  /* shift right once upper 20 bits of mantissa */
-                br.n  round        /* round mantissa and assemble result */
-                mak   r9,r9,3<0>   /* clear bits lost during rotation */
-
-twodoub:        rot   r9,r9,0<2>   /* rotate roundoff register twice, this places */
-                                   /* old guard into sticky */
-                bb0   30,r9,nosticky2d /* do not or old guard and old sticky if  */
-                                       /* old sticky is 0 */
-                br.n  noround2d    /* skip or of old guard and old round if old */
-                                   /* sticky set */
-                set   r9,r9,1<0>   /* or old guard and old sticky into new sticky */
-nosticky2d:     bb0   31,r9,noround2d /* do not or old guard and old round if */
-                                      /* old round is 0 */
-                set   r9,r9,1<0>   /* or old guard and old round into new sticky */
-noround2d:      bb0   0,r11,roundclr2d /* do not set round bit if old LSB = 0 */
-                set   r9,r9,1<1>   /* set new round bit */
-roundclr2d:     bb0   1,r11,guardclr2d /* do not set guard bit if old LSB + 1 = 0 */
-                set   r9,r9,1<2>   /* set new guard bit */
-guardclr2d:     extu  r11,r11,30<2> /* shift lower mantissa over 2 */
-                mak   r6,r5,2<30>  /* shift off low bits of high mantissa */
-                or    r11,r6,r11   /* load high bit onto lower mantissa */
-                extu  r5,r5,19<2>  /* shift right twice upper 19 bits of mantissa */
-                br.n  round        /* round mantissa and assemble result */
-                mak   r9,r9,3<0>   /* clear bits lost during rotation */
-
-threedoub:      bb1   0,r9,noguard3d /* checky sticky initially */
-                                    /* sticky is set, forget most of rest of oring */
-nosticky3d:     bb0   1,r9,noround3d /* check old round, do not set sticky if  */
-                                     /* old round is clear, set otherwise */
-                br.n  noguard3d    /* sticky is set, forget most of rest of oring */
-                set   r9,r9,1<0>   /* set sticky if old round is set */
-noround3d:      bb0   2,r9,noguard3d /* check old guard, do not set sticky if 0 */
-                clr   r9,r9,2<1>   /* clear the original guard and round for when */
-                                      /* you get to round section */
-                set   r9,r9,1<0>   /* set sticky if old guard is set */
-noguard3d:      cmp   r6,r7,32     /* do I need to work with a 1 or 2 word mant. */
-                                   /* when forming sticky, round and guard */
-                bb1   gt,r6,d33    /* jump to code that handles 2 word mantissas */
-                sub   r6,r7,2      /* get number of bits to check for sticky */
-                mak   r6,r6,5<5>   /* shift width into width field */
-                mak   r8,r11,r6    /* mask off shifted bits -2 */
-                ff1   r8,r8        /* see if r8 has any ones */
-                bb1   5,r8,nostky32 /* do not set sticky if no ones found */
-                set   r9,r9,1<0>   /* set sticky bit */
-nostky32:       or    r8,r0,34     /* start code to get new mantissa plus two */
-                                   /* extra bits for new round and new guard bits, */
-                                   /* the upper word bits will be shifted after */
-                                   /* the round and guard bits are handled */
-                subu  r8,r8,r7     
-                mak   r8,r8,5<5>   /* shift field width into second five bits */
-                extu  r6,r6,5<5>   /* shift previous shifted -2 into offset field */
-                or    r6,r6,r8     /* complete bit field */
-                extu  r11,r11,r6   /* partially form new low mantissa with 2 more  */
-                                   /* bits */
-                bb0   0,r11,nornd32d /* do not set new round bit */
-                set   r9,r9,1<1>   /* set new round bit */
-nornd32d:       bb0   1,r11,nogrd32d /* do not set new guard bit */
-                set   r9,r9,1<2>   /* set new guard bit */
-nogrd32d:       extu  r11,r11,30<2> /* shift off remaining two bits */
-                mak   r6,r7,5<5>   /* shift field width into second 5 bits, if the */
-                                   /* width is 32, then these bits will be 0 */
-                or    r8,r0,32     /* load word length into r8 */
-                sub   r8,r8,r7     /* form offset for high bits moved to low word */
-                or    r6,r6,r8     /* form complete bit field */
-                mak   r6,r5,r6     /* get shifted bits of high word */
-                or    r11,r6,r11   /* form new low word of mantissa */
-		bcnd  ne0,r8,regular33 /* do not adjust for special case of r8 */
-		br.n  round	       /* containing zeros, which would cause */
-		or    r5,r0,r0         /* all of the bits to be extracted under */
-				       /* the regular method */
-regular33:      mak   r6,r7,5<0>   /* place lower 5 bits of shift into r6 */
-                mak   r8,r8,5<5>   /* shift r8 into width field */
-                or    r6,r6,r8     /* form field for shifting of upper bits */
-                br.n  round        /* round and assemble result */
-                extu  r5,r5,r6     /* form new high word mantissa */
-
-d33:            cmp   r6,r7,33     /* is the number of bits to be shifted is 33? */
-                bb1   gt,r6,d34    /* check to see if # of bits is 34 */
-                bb1   0,r9,nostky33 /* skip checking if old sticky set */
-                mak   r6,r11,31<0> /* check bits that will be shifted into sticky */
-                ff1   r8,r8        /* check for ones */
-                bb1   5,r8,nostky33 /* do not set sticky if there are no ones */
-                set   r9,r9,1<0>   /* set new sticky bit */
-nostky33:       bb0   31,r11,nornd33 /* do not set round if bit is not a 1 */
-                set   r9,r9,1<1>   /* set new round bit */
-nornd33:        bb0   0,r5,nogrd33 /* do not set guard bit if bit is not a 1 */
-                set   r9,r9,1<2>   /* set new guard bit */
-nogrd33:        extu  r11,r5,31<1> /* shift high bits into low word */
-                br.n  round        /* round and assemble result */
-                or    r5,r0,r0     /* clear high word */
-
-d34:            cmp   r6,r7,34     /* is the number of bits to be shifted 34? */
-                bb1   gt,r6,d35    /* check to see if # of bits is >= 35 */
-                bb1   0,r9,nostky34 /* skip checking if old sticky set */
-                ff1   r8,r11       /* check bits that will be shifted into sticky */
-                bb1   5,r8,nostky34 /* do not set sticky if there are no ones */
-                set   r9,r9,1<0>   /* set new sticky bit */
-nostky34:       bb0   0,r5,nornd34 /* do not set round if bit is not a 1 */
-                set   r9,r9,1<1>   /* set new round bit */
-nornd34:        bb0   1,r5,nogrd34 /* do not set guard bit if bit is not a 1 */
-                set   r9,r9,1<2>   /* set new guard bit */
-nogrd34:        extu  r11,r5,30<2> /* shift high bits into low word */
-                br.n  round        /* round and assemble result */
-                or    r5,r0,r0     /* clear high word */
-
-d35:            cmp   r6,r7,52     /* see if # of shifts is 35 <= X <= 52 */
-                bb1   gt,r6,d53    /* check to see if # of shifts is 52 */
-                bb1.n 0,r9,nostky35 /* skip checking if old sticky set */
-                sub   r7,r7,34     /* subtract 32 from # of shifts so that opera- */
-                                   /* tions can be done on the upper word, and  */
-                                   /* then subtract two more checking guard and */
-                                   /* sticky bits */
-                ff1   r8,r11       /* see if lower word has a bit for sticky */
-                bb1   5,r8,stkycheck35 /* see if upper word has any sticky bits    */
-                br.n  nostky35     /* quit checking for sticky */
-                set   r9,r9,1<0>   /* set sticky bit */
-stkycheck35:    mak   r6,r7,5<5>   /* place width into width field */
-                mak   r8,r5,r6     /* mask off shifted bits - 2 */
-                ff1   r8,r8        /* see if r8 has any ones */
-                bb1   5,r8,nostky35 /* do not set sticky if no ones found */
-                set   r9,r9,1<0>   /* set sticky bit */
-nostky35:       or    r8,r0,32     /* look at what does not get shifted off plus */
-                                   /* round and sticky, remember that the r7 value */
-                                   /* was adjusted so that it did not include */
-                                   /* new round or new sticky in shifted off bits */
-                subu  r8,r8,r7     /* complement width  */
-                mak   r8,r8,5<5>   /* shift width into width field */
-                or    r8,r7,r8     /* add offset field */
-                extu  r11,r5,r8    /* extract upper bits into low word */
-                bb0   0,r11,nornd35 /* do not set new round bit */
-                set   r9,r9,1<1>   /* set new round bit */
-nornd35:        bb0   1,r11,nogrd35 /* do not set new guard bit */
-                set   r9,r9,1<2>   /* set new guard bit */
-nogrd35:        extu  r11,r11,30<2> /* shift off remaining guard and round bits */
-                br.n  round         /* round and assemble result */
-                or    r5,r0,r0      /* clear high word */
-
-d53:            cmp   r6,r7,53     /* check to see if # of shifts is 53 */
-                bb1   gt,r6,d54    /* branch to see if shifts = 54 */
-                bb1   0,r9,nostky53 /* skip checking if old sticky set */
-                ff1   r8,r11       /* see if lower word has a bit for sticky */
-                bb1   5,r8,stkycheck53 /* see if upper word has any sticky bits    */
-                br.n  nostky53     /* quit checking for sticky */
-                set   r9,r9,1<0>   /* set sticky bit */
-stkycheck53:    mak   r6,r5,19<0>  /* check bits that are shifted into sticky */
-                ff1   r8,r6        /* see if r6 has any ones */
-                bb1   5,r8,nostky53 /* do not set sticky if no ones found */
-                set   r9,r9,1<0>   /* set sticky bit */
-nostky53:       bb0   19,r5,nornd53 /* do not set new round bit */
-                set   r9,r9,1<1>   /* set new round bit */
-nornd53:        set   r9,r9,1<2>   /* set new guard bit,this is hidden bit */
-                or    r5,r0,r0     /* clear high word */
-                br.n  round        /* round and assemble result */
-                or    r11,r0,r0    /* clear low word */
-
-d54:            cmp   r6,r7,54     /* check to see if # of shifts is 54 */
-                bb1   gt,r6,d55    /* branch to execute for shifts =>55 */
-                bb1   0,r9,nostky54 /* skip checking if old sticky set */
-                ff1   r8,r11       /* see if lower word has a bit for sticky */
-                bb1   5,r8,stkycheck54 /* see if upper word has any sticky bits    */
-                br.n  nostky54     /* quit checking for sticky */
-                set   r9,r9,1<0>   /* set sticky bit */
-stkycheck54:    mak   r6,r5,20<0>  /* check bits that are shifted into sticky */
-                ff1   r8,r6        /* see if r6 has any ones */
-                bb1   5,r8,nostky54 /* do not set sticky if no ones found */
-                set   r9,r9,1<0>   /* set sticky bit */
-nostky54:       set   r9,r9,1<1>   /* set new round bit,this is hidden bit */
-                clr   r9,r9,1<2>   /* clear guard bit since nothing shifted in */
-                or    r5,r0,r0     /* clear high word */
-                br.n  round        /* round and assemble result */
-                or    r11,r0,r0    /* clear low word */
-
-d55:            set   r9,r9,1<0>   /* set new sticky bit,this contains hidden bit */
-                clr   r9,r9,2<1>   /* clear guard and round bits since nothing */
-                                   /* shifted in */
-                or    r5,r0,r0     /* clear high word */
-                or    r11,r0,r0    /* clear low word */
+	bb0.n	25,r10,nounrounds /* do not unround if addone bit clear */
+	 extu	r6,r12,12<20>	/* extract signed exponent from IMPCR */
+unrounds:
+	subu	r11,r11,1	/* subtract 1 from mantissa */
+
+/*
+ * If the hidden bit is cleared after subtracting the one, then the one added
+ * during the rounding must have propagated through the mantissa. The exponent
+ * will need to be decremented.
+ */
+	bb1	23,r11,nounrounds /* if hidden bit is set,then exponent */
+				/* does not need to be decremented */
+decexps:
+	sub	r6,r6,1		/* decrement exponent 1 */
+	set	r11,r11,1<23>	/* set the hidden bit */
+
+/*
+ * For both single and double precision, there are cases where it is easier
+ * and quicker to make a special case. Examples of this are if the shift
+ * amount is only 1 or 2, or all the mantissa is shifted off, or all the
+ * mantissa is shifted off and it is still shifting, or, in the case of
+ * doubles, if the shift amount is around the boundary of MANTLO and MANTHI.
+ */
+
+nounrounds:
+	or	r8,r0,lo16(0x00000f81)	/* load r8 with -127 in decimal */
+					/* for lowest 12 bits */
+	sub	r7,r8,r6	/* find difference between two exponents, */
+				/* this amount is the shift amount */
+	cmp	r6,r7,3		/* check to see if r7 contains 3 or more */
+	bb1	ge,r6,threesing	/* br to code that handles shifts of >=3 */
+	cmp	r6,r7,2		/* check to see if r7 contains 2 */
+	bb1	eq,r6,twosing	/* br to code that handles shifts of 2 */
+one:
+	rot	r9,r9,0<1>	/* rotate roundoff register once, this places */
+				/* guard in round and round in sticky */
+	bb0	31,r9,nosticky1s /* do not or round and sticky if sticky is */
+				/* 0, this lost bit will be cleared later */
+	set	r9,r9,1<0>	/* or round and sticky */
+nosticky1s:
+	bb0	0,r11,guardclr1s /* do not set guard bit if LSB = 0 */
+	set	r9,r9,1<2>	/* set guard bit */
+guardclr1s:
+	extu	r11,r11,31<1>	/* shift mantissa right 1 */
+	br.n	round		/* round result */
+	 mak	r9,r9,3<0>	/* clear bits lost during rotation */
+
+twosing:
+	rot	r9,r9,0<2>	/* rotate roundff register twice, this places */
+				/* guard in sticky */
+	bb0	30,r9,nosticky2s /* do not or guard and sticky if stick is 0 */
+				/* this lost bit will be cleared later */
+	br.n	noround2s	/* skip or old guard and old round if old */
+				/* sticky set */
+	 set	r9,r9,1<0>	/* or guard and sticky */
+nosticky2s:
+	bb0	31,r9,noround2s /* do not or guard and round if round is 0 */
+				/* this lost bit will be cleared later */
+	set	r9,r9,1<0>	/* or guard and round */
+noround2s:
+	bb0	0,r11,roundclr2s /* do not set round bit if LSB = 0 */
+	set	r9,r9,1<1>	/* set round bit */
+roundclr2s:
+	bb0	1,r11,guardclr2s /* do not set guard bit if LSB + 1 = 0 */
+	set	r9,r9,1<2>	/* set guard bit */
+guardclr2s:
+	extu	r11,r11,30<2>	/* shift mantissa right 2 */
+	br.n	round		/* round result */
+	 mak	r9,r9,3<0>	/* clear bits lost during rotation */
+
+threesing:
+	bb1	0,r9,noguard3s	/* check sticky initially */
+				/* sticky is set, forget most of the oring */
+nosticky3s:
+	bb0	1,r9,noround3s	/* check round initially, do not set sticky */
+	br.n	noguard3s	/* forget most of the rest of oring */
+	 set	r9,r9,1<0>	/* if round is clear,set sticky if round set */
+noround3s:
+	bb0.n	2,r9,noguard3s	/* check guard initially, do not set sticky */
+	 clr	r9,r9,2<1>	/* clear the original guard and round for when */
+				/* you get to round section */
+	set	r9,r9,1<0>	/* if guard is clear,set sticky if guard set */
+noguard3s:
+	cmp	r6,r7,23	/* check if # of shifts is <=23 */
+	bb1	gt,r6,s24	/* branch to see if shifts = 24 */
+	sub	r6,r7,2		/* get number of bits to check for sticky */
+	mak	r6,r6,5<5>	/* shift width into width field */
+	mak	r8,r11,r6	/* mask off shifted bits -2 */
+	ff1	r8,r8		/* see if r8 has any ones */
+	bb1	5,r8,nostky23	/* do not set sticky if no ones found */
+	set	r9,r9,1<0>	/* set sticky bit */
+nostky23:
+	or	r8,r0,34	/* start code to get new mantissa plus two */
+				/* extra bits for new round and new guard */
+				/* bits */
+	subu	r8,r8,r7
+	mak	r8,r8,5<5>	/* shift field width into second five bits */
+	extu	r6,r6,5<5>	/* shift previous shifted -2 into offset field */
+	or	r6,r6,r8	/* complete field */
+	extu	r11,r11,r6	/* form new mantissa with two extra bits */
+
+	bb0	0,r11,nornd3s	/* do not set new round bit */
+	set	r9,r9,1<1>	/* set new round bit */
+nornd3s:
+	bb0	1,r11,nogrd3s	/* do not set new guard bit */
+	set	r9,r9,1<2>	/* set new guard bit */
+nogrd3s:
+	br.n	round		/* round mantissa */
+	 extu	r11,r11,30<2>	/* shift off remaining two bits */
+
+s24:
+	cmp	r6,r7,24	/* check to see if # of shifts is 24 */
+	bb1	gt,r6,s25	/* branch to see if shifts = 25 */
+	bb1	0,r9,nostky24	/* skip checking if old sticky set */
+	extu	r8,r11,22<0>	/* prepare to check bits that will be shifted */
+				/* into the sticky */
+	ff1	r8,r8		/* see if there are any 1''s */
+	bb1	5,r8,nostky24	/* do not set sticky if no ones found */
+	set	r9,r9,1<0>	/* set sticky bit */
+nostky24:
+	bb0	22,r11,nornd24	/* do not set new round bit */
+	set	r9,r9,1<1>	/* set new round bit */
+nornd24:
+	set	r9,r9,1<2>	/* set new guard bit,this is hidden bit */
+	br.n	round		/* round mantissa */
+	 or	r11,r0,r0	/* clear r11, all of mantissa shifted off */
+
+s25:
+	cmp	r6,r7,25	/* check to see if # of shifts is 25 */
+	bb1	gt,r6,s26	/* branch to execute for shifts => 26 */
+	bb1	0,r9,nostky25	/* skip checking if old sticky set */
+	extu	r8,r11,23<0>	/* prepare to check bits that will be shifted */
+				/* into the sticky */
+	ff1	r8,r8		/* see if there are any 1''s */
+	bb1	5,r8,nostky25	/* do not set sticky if no ones found */
+	set	r9,r9,1<0>	/* set sticky bit */
+nostky25:
+	set	r9,r9,1<1>	/* set new round bit,this is hidden bit */
+	clr	r9,r9,1<2>	/* clear guard bit since nothing shifted in */
+	br.n	round		/* round and assemble result */
+	 or	r11,r0,r0	/* clear r11, all of mantissa shifted off */
+
+s26:
+	set	r9,r9,1<0>	/* set sticky bit,this contains hidden bit */
+	clr	r9,r9,2<1>	/* clear guard and round bits since nothing */
+				/* shifted in */
+	br.n	round		/* round and assemble result */
+	 or	r11,r0,r0	/* clear mantissa */
+
+Udouble:
+	mak	r5,r10,21<0>	/* extract upper bits of mantissa */
+	bb0.n	25,r10,nounroundd /* do not unround if addone bit clear */
+	 extu	r6,r12,12<20>	/* extract signed exponenet from IMPCR */
+unroundd:
+	or	r8,r0,1
+	subu.co	r11,r11,r8	/* subtract 1 from mantissa */
+	subu.ci	r5,r5,r0	/* subtract borrow from upper word */
+	bb1	20,r5,nounroundd /* if hidden bit is set, then exponent does */
+				/* not need to be decremented */
+decexpd:
+	sub	r6,r6,1		/* decrement exponent 1 */
+	set	r5,r5,1<20>	/* set the hidden bit */
+
+nounroundd:
+	or	r8,r0,lo16(0x00000c01) /* load r8 with -1023 in decimal */
+				/* for lowest 12 bits */
+	sub	r7,r8,r6	/* find difference between two exponents, */
+				/* this amount is the shift amount */
+	cmp	r6,r7,3		/* check to see if r7 contains 3 or more */
+	bb1	ge,r6,threedoub	/* br to code that handles shifts of >=3 */
+	cmp	r6,r7,2		/* check to see if r7 contains 2 */
+	bb1	eq,r6,twodoub	/* br to code that handles shifts of 2 */
+
+onedoub:
+	rot	r9,r9,0<1>	/* rotate roundoff register once, this places */
+				/* guard in round and round in sticky */
+	bb0	31,r9,nosticky1d/* do not or round and sticky if sticky is 0 */
+				/* this lost bit will be cleared later */
+	set	r9,r9,1<0>	/* or old round and old sticky into new sticky */
+nosticky1d:
+	bb0	0,r11,guardclr1d /* do not set new guard bit if old LSB = 0 */
+	set	r9,r9,1<2>	/* set new guard bit */
+guardclr1d:
+	extu	r11,r11,31<1>	/* shift lower mantissa over 1 */
+	mak	r6,r5,1<31>	/* shift off low bit of high mantissa */
+	or	r11,r6,r11	/* load high bit onto lower mantissa */
+	extu	r5,r5,20<1>	/* shift right once upper 20 bits of mantissa */
+	br.n	round		/* round mantissa and assemble result */
+	 mak	r9,r9,3<0>	/* clear bits lost during rotation */
+
+twodoub:
+	rot	r9,r9,0<2>	/* rotate roundoff register twice, this places */
+				/* old guard into sticky */
+	bb0	30,r9,nosticky2d /* do not or old guard and old sticky if */
+				/* old sticky is 0 */
+	br.n	noround2d	/* skip or of old guard and old round if old */
+				/* sticky set */
+	 set	r9,r9,1<0>	/* or old guard and old sticky into new sticky */
+nosticky2d:
+	bb0	31,r9,noround2d	/* do not or old guard and old round if */
+				/* old round is 0 */
+	set	r9,r9,1<0>	/* or old guard and old round into new sticky */
+noround2d:
+	bb0	0,r11,roundclr2d /* do not set round bit if old LSB = 0 */
+	set	r9,r9,1<1>	/* set new round bit */
+roundclr2d:
+	bb0	1,r11,guardclr2d /* do not set guard bit if old LSB + 1 = 0 */
+	set	r9,r9,1<2>	/* set new guard bit */
+guardclr2d:
+	extu	r11,r11,30<2>	/* shift lower mantissa over 2 */
+	mak	r6,r5,2<30>	/* shift off low bits of high mantissa */
+	or	r11,r6,r11	/* load high bit onto lower mantissa */
+	extu	r5,r5,19<2>	/* shift right twice upper 19 bits of mantissa */
+	br.n	round		/* round mantissa and assemble result */
+	 mak	r9,r9,3<0>	/* clear bits lost during rotation */
+
+threedoub:
+	bb1	0,r9,noguard3d	/* checky sticky initially */
+				/* sticky is set, forget most of rest of oring */
+nosticky3d:
+	bb0	1,r9,noround3d	/* check old round, do not set sticky if */
+				/* old round is clear, set otherwise */
+	br.n	noguard3d	/* sticky is set, forget most of rest of oring */
+	 set	r9,r9,1<0>	/* set sticky if old round is set */
+noround3d:
+	bb0	2,r9,noguard3d	/* check old guard, do not set sticky if 0 */
+	clr	r9,r9,2<1>	/* clear the original guard and round for when */
+				/* you get to round section */
+	set	r9,r9,1<0>	/* set sticky if old guard is set */
+noguard3d:
+	cmp	r6,r7,32	/* do I need to work with a 1 or 2 word mant. */
+				/* when forming sticky, round and guard */
+	bb1	gt,r6,d33	/* jump to code that handles 2 word mantissas */
+	sub	r6,r7,2		/* get number of bits to check for sticky */
+	mak	r6,r6,5<5>	/* shift width into width field */
+	mak	r8,r11,r6	/* mask off shifted bits -2 */
+	ff1	r8,r8		/* see if r8 has any ones */
+	bb1	5,r8,nostky32	/* do not set sticky if no ones found */
+	set	r9,r9,1<0>	/* set sticky bit */
+nostky32:
+	or	r8,r0,34	/* start code to get new mantissa plus two */
+				/* extra bits for new round and new guard bits, */
+				/* the upper word bits will be shifted after */
+				/* the round and guard bits are handled */
+	subu	r8,r8,r7
+	mak	r8,r8,5<5>	/* shift field width into second five bits */
+	extu	r6,r6,5<5>	/* shift previous shifted -2 into offset field */
+	or	r6,r6,r8	/* complete bit field */
+	extu	r11,r11,r6	/* partially form new low mantissa with 2 more */
+				/* bits */
+	bb0	0,r11,nornd32d	/* do not set new round bit */
+	set	r9,r9,1<1>	/* set new round bit */
+nornd32d:
+	bb0	1,r11,nogrd32d	/* do not set new guard bit */
+	set	r9,r9,1<2>	/* set new guard bit */
+nogrd32d:
+	extu	r11,r11,30<2>	/* shift off remaining two bits */
+	mak	r6,r7,5<5>	/* shift field width into second 5 bits, if the */
+				/* width is 32, then these bits will be 0 */
+	or	r8,r0,32	/* load word length into r8 */
+	sub	r8,r8,r7	/* form offset for high bits moved to low word */
+	or	r6,r6,r8	/* form complete bit field */
+	mak	r6,r5,r6	/* get shifted bits of high word */
+	or	r11,r6,r11	/* form new low word of mantissa */
+	bcnd	ne0,r8,regular33 /* do not adjust for special case of r8 */
+	br.n	round		/* containing zeros, which would cause */
+	 or	r5,r0,r0	/* all of the bits to be extracted under */
+				/* the regular method */
+regular33:
+	mak	r6,r7,5<0>	/* place lower 5 bits of shift into r6 */
+	mak	r8,r8,5<5>	/* shift r8 into width field */
+	or	r6,r6,r8	/* form field for shifting of upper bits */
+	br.n	round		/* round and assemble result */
+	 extu	r5,r5,r6	/* form new high word mantissa */
+
+d33:
+	cmp	r6,r7,33	/* is the number of bits to be shifted is 33? */
+	bb1	gt,r6,d34	/* check to see if # of bits is 34 */
+	bb1	0,r9,nostky33	/* skip checking if old sticky set */
+	mak	r6,r11,31<0>	/* check bits that will be shifted into sticky */
+	ff1	r8,r8		/* check for ones */
+	bb1	5,r8,nostky33	/* do not set sticky if there are no ones */
+	set	r9,r9,1<0>	/* set new sticky bit */
+nostky33:
+	bb0	31,r11,nornd33	/* do not set round if bit is not a 1 */
+	set	r9,r9,1<1>	/* set new round bit */
+nornd33:
+	bb0	0,r5,nogrd33	/* do not set guard bit if bit is not a 1 */
+	set	r9,r9,1<2>	/* set new guard bit */
+nogrd33:
+	extu	r11,r5,31<1>	/* shift high bits into low word */
+	br.n	round		/* round and assemble result */
+	 or	r5,r0,r0	/* clear high word */
+
+d34:
+	cmp	r6,r7,34	/* is the number of bits to be shifted 34? */
+	bb1	gt,r6,d35	/* check to see if # of bits is >= 35 */
+	bb1	0,r9,nostky34	/* skip checking if old sticky set */
+	ff1	r8,r11		/* check bits that will be shifted into sticky */
+	bb1	5,r8,nostky34	/* do not set sticky if there are no ones */
+	set	r9,r9,1<0>	/* set new sticky bit */
+nostky34:
+	bb0	0,r5,nornd34	/* do not set round if bit is not a 1 */
+	set	r9,r9,1<1>	/* set new round bit */
+nornd34:
+	bb0	1,r5,nogrd34	/* do not set guard bit if bit is not a 1 */
+	set	r9,r9,1<2>	/* set new guard bit */
+nogrd34:
+	extu	r11,r5,30<2>	/* shift high bits into low word */
+	br.n	round		/* round and assemble result */
+	 or	r5,r0,r0	/* clear high word */
+
+d35:
+	cmp	r6,r7,52	/* see if # of shifts is 35 <= X <= 52 */
+	bb1	gt,r6,d53	/* check to see if # of shifts is 52 */
+	bb1.n	0,r9,nostky35	/* skip checking if old sticky set */
+	 sub	r7,r7,34	/* subtract 32 from # of shifts so that opera- */
+				/* tions can be done on the upper word, and */
+				/* then subtract two more checking guard and */
+				/* sticky bits */
+	ff1	r8,r11		/* see if lower word has a bit for sticky */
+	bb1	5,r8,stkycheck35 /* see if upper word has any sticky bits	*/
+	br.n	nostky35	/* quit checking for sticky */
+	 set	r9,r9,1<0>	/* set sticky bit */
+stkycheck35:
+	mak	r6,r7,5<5>	/* place width into width field */
+	mak	r8,r5,r6	/* mask off shifted bits - 2 */
+	ff1	r8,r8		/* see if r8 has any ones */
+	bb1	5,r8,nostky35	/* do not set sticky if no ones found */
+	set	r9,r9,1<0>	/* set sticky bit */
+nostky35:
+	or	r8,r0,32	/* look at what does not get shifted off plus */
+				/* round and sticky, remember that the r7 value */
+				/* was adjusted so that it did not include */
+				/* new round or new sticky in shifted off bits */
+	subu	r8,r8,r7	/* complement width */
+	mak	r8,r8,5<5>	/* shift width into width field */
+	or	r8,r7,r8	/* add offset field */
+	extu	r11,r5,r8	/* extract upper bits into low word */
+	bb0	0,r11,nornd35	/* do not set new round bit */
+	set	r9,r9,1<1>	/* set new round bit */
+nornd35:
+	bb0	1,r11,nogrd35	/* do not set new guard bit */
+	set	r9,r9,1<2>	/* set new guard bit */
+nogrd35:
+	extu	r11,r11,30<2>	/* shift off remaining guard and round bits */
+	br.n	round		/* round and assemble result */
+	 or	r5,r0,r0	/* clear high word */
+
+d53:
+	cmp	r6,r7,53	/* check to see if # of shifts is 53 */
+	bb1	gt,r6,d54	/* branch to see if shifts = 54 */
+	bb1	0,r9,nostky53	/* skip checking if old sticky set */
+	ff1	r8,r11		/* see if lower word has a bit for sticky */
+	bb1	5,r8,stkycheck53 /* see if upper word has any sticky bits	*/
+	br.n	nostky53	/* quit checking for sticky */
+	 set	r9,r9,1<0>	/* set sticky bit */
+stkycheck53:
+	mak	r6,r5,19<0>	/* check bits that are shifted into sticky */
+	ff1	r8,r6		/* see if r6 has any ones */
+	bb1	5,r8,nostky53	/* do not set sticky if no ones found */
+	set	r9,r9,1<0>	/* set sticky bit */
+nostky53:
+	bb0	19,r5,nornd53	/* do not set new round bit */
+	set	r9,r9,1<1>	/* set new round bit */
+nornd53:
+	set	r9,r9,1<2>	/* set new guard bit,this is hidden bit */
+	or	r5,r0,r0	/* clear high word */
+	br.n	round		/* round and assemble result */
+	 or	r11,r0,r0	/* clear low word */
+
+d54:
+	cmp	r6,r7,54	/* check to see if # of shifts is 54 */
+	bb1	gt,r6,d55	/* branch to execute for shifts =>55 */
+	bb1	0,r9,nostky54	/* skip checking if old sticky set */
+	ff1	r8,r11		/* see if lower word has a bit for sticky */
+	bb1	5,r8,stkycheck54 /* see if upper word has any sticky bits	*/
+	br.n	nostky54	/* quit checking for sticky */
+	 set	r9,r9,1<0>	/* set sticky bit */
+stkycheck54:
+	mak	r6,r5,20<0>	/* check bits that are shifted into sticky */
+	ff1	r8,r6		/* see if r6 has any ones */
+	bb1	5,r8,nostky54	/* do not set sticky if no ones found */
+	set	r9,r9,1<0>	/* set sticky bit */
+nostky54:
+	set	r9,r9,1<1>	/* set new round bit,this is hidden bit */
+	clr	r9,r9,1<2>	/* clear guard bit since nothing shifted in */
+	or	r5,r0,r0	/* clear high word */
+	br.n	round		/* round and assemble result */
+	 or	r11,r0,r0	/* clear low word */
+
+d55:
+	set	r9,r9,1<0>	/* set new sticky bit,this contains hidden bit */
+	clr	r9,r9,2<1>	/* clear guard and round bits since nothing */
+				/* shifted in */
+	or	r5,r0,r0	/* clear high word */
+	or	r11,r0,r0	/* clear low word */
 
 
 /* The first item that the rounding code does is see if either guard, round, */
-/* or sticky is set.  If all are clear, then there is no denormalization loss */
+/* or sticky is set. If all are clear, then there is no denormalization loss */
 /* and no need to round, then branch to assemble answer. */
-/* For rounding, a branch table is set up.  The left two most bits are the  */
-/* rounding mode.  The third bit is either the LSB of the mantissa or the */
-/* sign bit, depending on the rounding mode.  The three LSB''s are the guard, */
+/* For rounding, a branch table is set up. The left two most bits are the */
+/* rounding mode. The third bit is either the LSB of the mantissa or the */
+/* sign bit, depending on the rounding mode. The three LSB''s are the guard, */
 /* round and sticky bits. */
 
-round:          ff1   r8,r9         /* see if there is denormalization loss */
-                bb1   5,r8,assemble /* no denormalization loss or inexactness */
-                extu  r6,r10,2<modelo>   /* extract rounding mode */
-                bb1.n modehi,r10,signext /* use sign bit instead of LSB */
-                mak   r6,r6,2<4>    /* shift over rounding mode */
-                extu  r7,r11,1<0>   /* extract LSB */
-                br.n  grs           /* skip sign extraction */
-                mak   r7,r7,1<3>    /* shift over LSB */
-signext:        extu  r7,r10,1<31>  /* extract sign bit */
-                mak   r7,r7,1<3>    /* shift sign bit over */
-grs:            or    r6,r6,r7
-                or    r6,r6,r9      /* or in guard, round, and sticky */
-                or.u  r1,r0,hi16(roundtable) /* form address of branch table */
-                or    r1,r1,lo16(roundtable)
-                lda   r6,r1[r6]     /* scale offset into branch table */
-                jmp.n r6            /* jump to branch table */
-                set   r9,r9,1<3>    /* set inexact flag in r9 */
-
-roundtable:     br    noaddone
-r000001:        br    noaddone
-r000010:        br    noaddone
-r000011:        br    noaddone
-r000100:        br    noaddone
-r000101:        br    addone
-r000110:        br    addone
-r000111:        br    addone
-r001000:        br    noaddone
-r001001:        br    noaddone
-r001010:        br    noaddone
-r001011:        br    noaddone
-r001100:        br    addone
-r001101:        br    addone
-r001110:        br    addone
-r001111:        br    addone
-r010000:        br    noaddone
-r010001:        br    noaddone
-r010010:        br    noaddone
-r010011:        br    noaddone
-r010100:        br    noaddone
-r010101:        br    noaddone
-r010110:        br    noaddone
-r010111:        br    noaddone
-r011000:        br    noaddone
-r011001:        br    noaddone
-r011010:        br    noaddone
-r011011:        br    noaddone
-r011100:        br    noaddone
-r011101:        br    noaddone
-r011110:        br    noaddone
-r011111:        br    noaddone
-r100000:        br    noaddone
-r100001:        br    noaddone
-r100010:        br    noaddone
-r100011:        br    noaddone
-r100100:        br    noaddone
-r100101:        br    noaddone
-r100110:        br    noaddone
-r100111:        br    noaddone
-r101000:        br    noaddone
-r101001:        br    addone
-r101010:        br    addone
-r101011:        br    addone
-r101100:        br    addone
-r101101:        br    addone
-r101110:        br    addone
-r101111:        br    addone
-r110000:        br    noaddone
-r110001:        br    addone
-r110010:        br    addone
-r110011:        br    addone
-r110100:        br    addone
-r110101:        br    addone
-r110110:        br    addone
-r110111:        br    addone
-r111000:        br    noaddone
-r111001:        br    noaddone
-r111010:        br    noaddone
-r111011:        br    noaddone
-r111100:        br    noaddone
-r111101:        br    noaddone
-r111110:        br    noaddone
-r111111:        br    noaddone
+round:
+	ff1	r8,r9		/* see if there is denormalization loss */
+	bb1	5,r8,assemble	/* no denormalization loss or inexactness */
+	extu	r6,r10,2<modelo> /* extract rounding mode */
+	bb1.n	modehi,r10,signext /* use sign bit instead of LSB */
+	 mak	r6,r6,2<4>	/* shift over rounding mode */
+	extu	r7,r11,1<0>	/* extract LSB */
+	br.n	grs		/* skip sign extraction */
+	 mak	r7,r7,1<3>	/* shift over LSB */
+signext:
+	extu	r7,r10,1<31>	/* extract sign bit */
+	mak	r7,r7,1<3>	/* shift sign bit over */
+grs:
+	or	r6,r6,r7
+	or	r6,r6,r9	/* or in guard, round, and sticky */
+	or.u	r1,r0,hi16(roundtable) /* form address of branch table */
+	or	r1,r1,lo16(roundtable)
+	lda	r6,r1[r6]	/* scale offset into branch table */
+	jmp.n	r6		/* jump to branch table */
+	 set	r9,r9,1<3>	/* set inexact flag in r9 */
+
+roundtable:
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	addone
+	br	addone
+	br	addone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	addone
+	br	addone
+	br	addone
+	br	addone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	addone
+	br	addone
+	br	addone
+	br	addone
+	br	addone
+	br	addone
+	br	addone
+	br	noaddone
+	br	addone
+	br	addone
+	br	addone
+	br	addone
+	br	addone
+	br	addone
+	br	addone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
+	br	noaddone
 
 /* Round by adding a one to the LSB of the mantissa. */
-addone:         or    r6,r0,1      /* load a 1 into r6 so that add.co can be used */
-                add.co r11,r11,r6  /* add a one to the lower word of result */
-                bb0.n destsize,r12,noaddone /* single result,forget carry */
-                set   r9,r9,1<4>   /* indicate that a 1 has been added */
-                add.ci r5,r5,r0    /* propagate carry into high word */
+addone:
+	or	r6,r0,1		/* load a 1 into r6 so that add.co can be used */
+	add.co	r11,r11,r6	/* add a one to the lower word of result */
+	bb0.n	destsize,r12,noaddone /* single result,forget carry */
+	 set	r9,r9,1<4>	/* indicate that a 1 has been added */
+	add.ci	r5,r5,r0	/* propagate carry into high word */
 
 
 /* Branch to inexact user handler if there is one. */
 
-noaddone:       
+noaddone:
 #ifdef HANDLER
-		bb1.n efinx,r12,modformdef /* branch to modify form for user  */
-                                             /* handler */
-                or    r2,r2,5      /* set inexact and underflow flags */
+	bb1.n	efinx,r12,modformdef	/* branch to modify form for user */
+					/* handler */
+	 or	r2,r2,5			/* set inexact and underflow flags */
 #endif
 
 
-/* Assemble the result of the denormalization routine for writeback to the  */
-/* destination register.  The exponent of a denormalized number is zero, */
+/* Assemble the result of the denormalization routine for writeback to the */
+/* destination register. The exponent of a denormalized number is zero, */
 /* so simply assemble the sign and the new mantissa. */
 
-assemble:       bb1   destsize,r12,doubassem /* assemble double result */
-                bb0   sign,r10,exassems /* exit assemble if sign is zero */
-                set   r11,r11,1<sign>  /* make result negative */
-exassems:       br    Ureturn       /* return from subroutine */
+assemble:
+	bb1	destsize,r12,doubassem	/* assemble double result */
+	bb0	sign,r10,exassems	/* exit assemble if sign is zero */
+	set	r11,r11,1<sign>		/* make result negative */
+exassems:
+	br	Ureturn	
+
+doubassem:
+	bb0.n	sign,r10,signclr	/* do not set sign in r10 */
+	 or	r10,r5,r0		/* load high word from r5 into r10 */
+	set	r10,r10,1<sign>		/* high word with sign loaded */
+signclr:
+	br	Ureturn
 
-doubassem:      bb0.n sign,r10,signclr /* do not set sign in r10 */
-		or    r10,r5,r0    /* load high word from r5 into r10 */
-                set   r10,r10,1<sign> /* high word with sign loaded */
-signclr:        br    Ureturn       /* return from subroutine */
-                
 
 /* modfordef modifies the result of denormalization to the input format of */
-/* the inexact user handler.  This input format is the same format that  */
+/* the inexact user handler. This input format is the same format that */
 /* MANTHI, MANTLO, and IMPCR were initially loaded with. */
 
 #ifdef HANDLER
-modformdef:     clr   r12,r12,12<20> /* clear result exponent,IMPCR complete */
-                clr   r10,r10,4<25>  /* clear old guard,round,sticky,and addone */
-                mak   r5,r9,3<26>    /* make grs field */
-                bb0.n 4,r9,newaddone /* do not set new addone in MANTHI */
-                or    r10,r5,r10     /* or in new grs field */
-                set   r10,r10,1<25>  /* set new addone */
-newaddone:      bb1.n destsize,r12,moddefd /* branch to handle double precision */
-                clr   r10,r10,21<0>  /* clear upper bits of old mantissa */
-moddefs:        extu  r5,r11,20<3>   /* extract upper bits */
-                or    r10,r5,r10     /* MANTHI complete */
-                bsr.n _handler       /* execute user handler for inexact */
-                rot   r11,r11,0<3>   /* MANTLO complete */
-                br    Ureturn         /* return from subroutine */
-moddefd:        bsr.n _handler       /* execute user handler for inexact */
-                or    r10,r5,r10     /* MANTHI complete,r5 should be set to OR */
+modformdef:
+	clr	r12,r12,12<20>	/* clear result exponent,IMPCR complete */
+	clr	r10,r10,4<25>	/* clear old guard,round,sticky,and addone */
+	mak	r5,r9,3<26>	/* make grs field */
+	bb0.n	4,r9,newaddone	/* do not set new addone in MANTHI */
+	 or	r10,r5,r10	/* or in new grs field */
+	set	r10,r10,1<25>	/* set new addone */
+newaddone:
+	bb1.n	destsize,r12,moddefd /* branch to handle double precision */
+	 clr	r10,r10,21<0>	/* clear upper bits of old mantissa */
+moddefs:
+	extu	r5,r11,20<3>	/* extract upper bits */
+	or	r10,r5,r10	/* MANTHI complete */
+	bsr.n	_handler	/* execute user handler for inexact */
+	 rot	r11,r11,0<3>	/* MANTLO complete */
+	br	Ureturn
+moddefd:
+	bsr.n	_handler	/* execute user handler for inexact */
+	 or	r10,r5,r10	/* MANTHI complete,r5 should be set to OR */
 #endif
 
-
 /* Return to fpui. */
 
-Ureturn:         ld    r1,r31,0 /* load return address */
-                jmp   r1           /* return from subroutine */
-  
-                data
-
-/* function _FPoverflow --       */
-/* The documentation for this release gives an overall description of this code. */
-data
-align 4
-msg2: string "here at line %d, r1 is %x\n\0"
-text
-
-#line 23
+Ureturn:
+	ld	r1,r31,0	/* load return address */
+	jmp	r1
 
+/*
+ * FPoverflow
+ */
 /* If the overflow user handler bit is not set, then the inexact bit in the */
-/* FPSR is set, and the inexact user handler bit is checked.  If it is set, */
+/* FPSR is set, and the inexact user handler bit is checked. If it is set, */
 /* then the inexact user handler is executed, else the default routine for */
 /* overflow is executed. */
-              text				
-	      align 8
-              global _FPoverflow
-_FPoverflow:  
-	      st     r1,r31,0 /* save return address */
+
+ASLOCAL(FPoverflow)
+	st	r1,r31,0	/* save return address */
 #ifdef HANDLER
-              set    r2,r2,1<overflow> /* set overflow bit in r2 which holds FPSR */
-              bb1    efovf,r12,hand  /* go to user handler if bit set for overflow */
-              set    r2,r2,1<inexact> /* set inexact bit in r2 since overflow bit */
-                                /* in FPCR is not set */
-              bb0  efinx,r12,nohandler/* if userhandler for inexact not set,then */
-                                        /* round result */
-              br     callhandler /* branch to user handler for inexact */
+	set	r2,r2,1<overflow> /* set overflow bit in r2 which holds FPSR */
+	bb1	efovf,r12,hand	/* go to user handler if bit set for overflow */
+	set	r2,r2,1<inexact> /* set inexact bit in r2 since overflow bit */
+				/* in FPCR is not set */
+	bb0	efinx,r12,nohandler/* if userhandler for inexact not set,then */
+				/* round result */
+	br	callhandler	/* branch to user handler for inexact */
 
 /* Before the overflow user handler is executed, the exponent is modified */
 /* by subtracting 192 for single precision and 1536 for double precision. */
- 
-hand:         bb1    10,r12,doubleprec /* double precision result */
-singleprec:   or.u   r5,r0,0x0c00 /* load exponent adjust */
-              br.n   callhandler  /* prepare to call user handler */
-              subu   r12,r12,r5 /* adjust single precision exponent */
-doubleprec:   or.u   r5,r0,0x6000 /* load exponent adjust */
-              subu   r12,r12,r5 /* adjust double precision exponent */
-callhandler:  bsr    _handler   /* branch to common handler routine */
-              br     return     /* return from overflow subroutine */
+
+hand:
+	bb1	10,r12,doubleprec /* double precision result */
+singleprec:
+	or.u	r5,r0,0x0c00	/* load exponent adjust */
+	br.n	callhandler	/* prepare to call user handler */
+	 subu	r12,r12,r5	/* adjust single precision exponent */
+doubleprec:
+	or.u	r5,r0,0x6000	/* load exponent adjust */
+	subu	r12,r12,r5	/* adjust double precision exponent */
+callhandler:
+	bsr	_handler	/* branch to common handler routine */
+	br	return
 #endif
 
 /* Determine which rounding mode to use for the default procedure. */
 
-nohandler:    bb1    modehi,r10,signed /* mode is either round toward pos. or neg. */
-              bb0    modelo,r10,OFnearest /* rounding mode is round nearest */
-              br     OFzero            /* rounding mode is round zero */
-signed:       bb0    modelo,r10,OFnegative /* rounding mode is round negative */
-              br     positive        /* rounding mode is round positive */
+nohandler:
+	bb1	modehi,r10,signed /* mode is either round toward pos. or neg. */
+	bb0	modelo,r10,OFnearest /* rounding mode is round nearest */
+	br	OFzero		/* rounding mode is round zero */
+signed:
+	bb0	modelo,r10,OFnegative /* rounding mode is round negative */
+	br	positive	/* rounding mode is round positive */
 
 
 /* In the round toward nearest mode, positive values are rounded to */
 /* positive infinity and negative values are loaded toward negative infinity. */
 /* The value for single or double precision is loaded from a data table. */
 
-OFnearest:      
-		bb1.n  destsize,r12,neardouble /* branch to neardouble of  */
-                                             /* double result */
-              mask.u r5,r10,0x8000  /* mask off sign bit from MANTHI */
-              or.u   r11,r0,hi16(0x7f800000)  /* load single infinity constant */
-              or     r11,r11,lo16(0x7f800000)  
-              br.n   return     /* return with result */
-              or     r11,r5,r11 /* adjust sign */
+OFnearest:
+	bb1.n	destsize,r12,neardouble	/* branch to neardouble of */
+					/* double result */
+	 mask.u	r5,r10,0x8000		/* mask off sign bit from MANTHI */
+	or.u	r11,r0,hi16(0x7f800000)	/* load single infinity constant */
+	or	r11,r11,lo16(0x7f800000)
+	br.n	return			/* return with result */
+	 or	r11,r5,r11		/* adjust sign */
 neardouble:
-	     or     r11,r0,r0           /* load lower word of infinity */
-              or.u   r10,r0,hi16(0x7ff00000)  /* load upper word of infinity */
-              or     r10,r10,lo16(0x7ff00000)  
-              br.n   return     /* return with result */
-              or     r10,r5,r10 /* adjust sign */
+	or	r11,r0,r0		/* load lower word of infinity */
+	or.u	r10,r0,hi16(0x7ff00000)	/* load upper word of infinity */
+	or	r10,r10,lo16(0x7ff00000)
+	br.n	return			/* return with result */
+	 or	r10,r5,r10		/* adjust sign */
 
 
 /* In the round toward zero mode, positive values are rounded to the largest */
@@ -1393,494 +1477,520 @@ neardouble:
 /* negative finite number. */
 /* The value for single or double precision is loaded from a data table. */
 
-OFzero:         
-		bb1.n  destsize,r12,zerodouble /* branch to zerodouble of  */
-                                             /* double result */
-              mask.u r5,r10,0x8000  /* mask off sign bit from MANTHI */
-              or.u   r11,r0,hi16(0x7f7fffff)  /* load single finite number constant */
-              or     r11,r11,lo16(0x7f7fffff)  
-              br.n   return     /* return with result */
-              or     r11,r5,r11 /* adjust sign */
-zerodouble:   
-	     set    r11,r0,0<0>          /* load lower word of finite number */
-              or.u   r10,r0,hi16(0x7fefffff)  /* load upper word of finite number */
-              or     r10,r10,lo16(0x7fefffff)  
-              br.n   return     /* return with result */
-              or     r10,r5,r10 /* adjust sign */
-
-
-/* In the round toward positve mode, positive values are rounded to  */
+OFzero:
+	bb1.n	destsize,r12,zerodouble	/* branch to zerodouble of */
+					/* double result */
+	 mask.u	r5,r10,0x8000		/* mask off sign bit from MANTHI */
+	or.u	r11,r0,hi16(0x7f7fffff)	/* load single finite number constant */
+	or	r11,r11,lo16(0x7f7fffff)
+	br.n	return			/* return with result */
+	 or	r11,r5,r11		/* adjust sign */
+zerodouble:
+	set	r11,r0,0<0>		/* load lower word of finite number */
+	or.u	r10,r0,hi16(0x7fefffff)	/* load upper word of finite number */
+	or	r10,r10,lo16(0x7fefffff)
+	br.n	return			/* return with result */
+	 or	r10,r5,r10		/* adjust sign */
+
+
+/* In the round toward positve mode, positive values are rounded to */
 /* postive infinity and negative values are loaded toward the largest */
 /* negative finite number. */
 /* The value for single or double precision is loaded from a data table. */
 
-positive:     
-              bb1    destsize,r12,posdouble /* branch to section for double result */
-possingle:    
-  	     bb1    sign,r10,possingleneg /* branch to section for negatives */
-possinglepos: 
-		or.u   r11,r0,hi16(0x7f800000)  /* load single infinity constant */
-              br.n   return     /* return with result */
-              or     r11,r11,lo16(0x7f800000)  
+positive:
+	bb1	destsize,r12,posdouble	/* branch to section for double result */
+possingle:
+	bb1	sign,r10,possingleneg	/* branch to section for negatives */
+possinglepos:
+	or.u	r11,r0,hi16(0x7f800000)	/* load single infinity constant */
+	br.n	return			/* return with result */
+	 or	r11,r11,lo16(0x7f800000)
 possingleneg:
-	     or.u   r11,r0,hi16(0x7f7fffff)  /* load single finite number constant */
-              or     r11,r11,lo16(0x7f7fffff)  
-              br.n   return     /* return with result */
-              set    r11,r11,1<sign> /* set sign for negative */
-posdouble:    
-	     bb1    sign,r10,posdoubleneg /* branch to negative double results */
-posdoublepos: 
-		or     r11,r0,r0  /* load lower word of double infinity */
-              or.u   r10,r0,hi16(0x7ff00000)  /* load upper word of infinity */
-              br.n   return     /* return with result */
-              or     r10,r10,lo16(0x7ff00000)  
-posdoubleneg: 
-	      set    r11,r0,0<0>          /* load lower word of finite number */
-              or.u   r10,r0,hi16(0x7fefffff)  /* load upper word of finite number */
-              or     r10,r10,lo16(0x7fefffff)  
-              br.n   return     /* return with result */
-              set    r10,r10,1<sign> /* set sign for negative */
-
-
-/* In the round toward negative mode, positive values are rounded to the largest  */
+	or.u	r11,r0,hi16(0x7f7fffff)	/* load single finite number constant */
+	or	r11,r11,lo16(0x7f7fffff)
+	br.n	return			/* return with result */
+	 set	r11,r11,1<sign>		/* set sign for negative */
+posdouble:
+	bb1	sign,r10,posdoubleneg	/* branch to negative double results */
+posdoublepos:
+	or	r11,r0,r0		/* load lower word of double infinity */
+	or.u	r10,r0,hi16(0x7ff00000)	/* load upper word of infinity */
+	br.n	return			/* return with result */
+	 or	r10,r10,lo16(0x7ff00000)
+posdoubleneg:
+	set	r11,r0,0<0>		/* load lower word of finite number */
+	or.u	r10,r0,hi16(0x7fefffff)	/* load upper word of finite number */
+	or	r10,r10,lo16(0x7fefffff)
+	br.n	return			/* return with result */
+	 set	r10,r10,1<sign>		/* set sign for negative */
+
+
+/* In the round toward negative mode, positive values are rounded to the largest */
 /* postive finite number and negative values are rounded to negative infinity. */
 /* The value for single or double precision is loaded from a data table. */
 
-OFnegative: 
-	      bb1    destsize,r12,negdouble /* branch to section for double result */
-negsingle:    
-	      bb1    sign,r10,negsingleneg /* branch to section for negatives */
-negsinglepos: 
-	      or.u   r11,r0,hi16(0x7f7fffff)  /* load single finite number constant */
-              br.n   return     /* return with result */
-              or     r11,r11,lo16(0x7f7fffff)  
-negsingleneg: 
-	      or.u   r11,r0,hi16(0x7f800000)  /* load single infinity constant */
-              or     r11,r11,lo16(0x7f800000)  
-              br.n   return     /* return with result */
-              set    r11,r11,1<sign> /* set sign for negative */
-negdouble:    
-	   bb1    sign,r10,negdoubleneg /* branch to negative double results */
-negdoublepos: 
-	      set    r11,r0,0<0>          /* load lower word of finite number */
-              or.u   r10,r0,hi16(0x7fefffff)  /* load upper word of finite number */
-              br.n   return     /* return with result */
-              or     r10,r10,lo16(0x7fefffff)  
-negdoubleneg: 
-	      or     r11,r0,r0  /* load lower word of double infinity */
-              or.u   r10,r0,hi16(0x7ff00000)  /* load upper word of infinity */
-              or     r10,r10,lo16(0x7ff00000)  
-              set    r10,r10,1<sign> /* set sign for negative */
-
-return:       
-	      ld     r1,r31,0 /* ld return address */
-              jmp    r1         /* return from subroutine */
-
-              data
+OFnegative:
+	bb1	destsize,r12,negdouble	/* branch to section for double result */
+negsingle:
+	bb1	sign,r10,negsingleneg	/* branch to section for negatives */
+negsinglepos:
+	or.u	r11,r0,hi16(0x7f7fffff)	/* load single finite number constant */
+	br.n	return			/* return with result */
+	 or	r11,r11,lo16(0x7f7fffff)
+negsingleneg:
+	or.u	r11,r0,hi16(0x7f800000)	/* load single infinity constant */
+	or	r11,r11,lo16(0x7f800000)
+	br.n	return			/* return with result */
+	 set	r11,r11,1<sign>		/* set sign for negative */
+negdouble:
+	bb1	sign,r10,negdoubleneg	/* branch to negative double results */
+negdoublepos:
+	set	r11,r0,0<0>		/* load lower word of finite number */
+	or.u	r10,r0,hi16(0x7fefffff)	/* load upper word of finite number */
+	br.n	return			/* return with result */
+	 or	r10,r10,lo16(0x7fefffff)
+negdoubleneg:
+	or	r11,r0,r0		/* load lower word of double infinity */
+	or.u	r10,r0,hi16(0x7ff00000)	/* load upper word of infinity */
+	or	r10,r10,lo16(0x7ff00000)
+	set	r10,r10,1<sign>		/* set sign for negative */
+
+return:
+	ld	r1,r31,0		/* ld return address */
+	jmp	r1
+
+	data
 
 
 /* If either S1 or S2 is a signalling NaN, then set the invalid operation */
-/* bit of the FPSR.  If the invalid operation user handler flag is set and */
+/* bit of the FPSR. If the invalid operation user handler flag is set and */
 /* then NaN is signalling, then branch to the handler routine to go to the */
 /* user handler. */
 /* If S1 is the only NaN or one of two NaN''s, then write */
-/* a quiet S1 to the result.  A signalling NaN must be made quiet before */
+/* a quiet S1 to the result. A signalling NaN must be made quiet before */
 /* it can be written, but a signalling S2 is not modified in this routine */
 /* if S1 is a NaN. */
-             text
-GLOBAL(NaN)
-	        bb0.n	s1nan,r12,S2sigcheck /* S1 is not a NaN */
-		st	r1,r31,0 /* save return address */
-		bb1	sigbit,r5,S2sigcheck /* S1 is not a signaling NaN */
-		set	r2,r2,1<oper>  /* set invalid operation bit in FPSR */
-#ifdef JEFF_DEBUGxxxxxxx
-		/*
-		 * Generate a signal to the offending process.
-		 * This uses hardcoded constants from mach/exception.h
-		 * and mach/machine/exception.h.
-		 */
-		ldcr	 r2, cr17	/* first arg: current_thread() */
-		or	 r3,  r0, 3	/* second arg: EXC_ARITHMETIC  */
-		or	 r4,  r0, 3	/* third arg: EXC_M88K_FLOAT_P */
-		or	 r5,  r0, r0
-		subu	r31, r31, 48
-		bsr.n    _thread_doexception
-		st	 r1, r31, 44
-		ld	 r1, r31, 44
-		br.n	FPnan_return
-		addu	r31, r31, 48
-#endif
+	text
+ASLOCAL(NaN)
+	bb0.n	s1nan,r12,S2sigcheck	/* S1 is not a NaN */
+	 st	r1,r31,0		/* save return address */
+	bb1	sigbit,r5,S2sigcheck	/* S1 is not a signaling NaN */
+	set	r2,r2,1<oper>		/* set invalid operation bit in FPSR */
 #ifdef HANDLER
-		bb0	oper,r3,S1nohandler /* branch if no user handler */
-		bsr	_handler       /* branch to handler */
-		br	FPnan_return
-ASGLOBAL(S1nohandler)
+	bb0	oper,r3,S1nohandler	/* branch if no user handler */
+	bsr	_handler		/* branch to handler */
+	br	FPnan_return
+ASLOCAL(S1nohandler)
 #endif
-	        br.n	S1write        /* FPSR bit already set, S1 is made quiet, */
-                                   /* and since we always write S1 if it is a */
-                                   /* NaN, write S1 and skip rest of routine */
-		set	r5,r5,1<sigbit> /* make S1 a quiet NaN */
-
-ASGLOBAL(S2sigcheck)
-		bb0	s2nan,r12,S1write /* S2 is not a NaN */
-		bb1	sigbit,r7,S1write /* S2 is not a signaling NaN */
-		set	r2,r2,1<oper>  /* set invalid operation bit in FPSR */
+	br.n	S1write		/* FPSR bit already set, S1 is made quiet, */
+				/* and since we always write S1 if it is a */
+				/* NaN, write S1 and skip rest of routine */
+	 set	r5,r5,1<sigbit>	/* make S1 a quiet NaN */
+
+ASLOCAL(S2sigcheck)
+	bb0	s2nan,r12,S1write	/* S2 is not a NaN */
+	bb1	sigbit,r7,S1write	/* S2 is not a signaling NaN */
+	set	r2,r2,1<oper>		/* set invalid operation bit in FPSR */
 #ifdef HANDLER
-		bb0	oper,r3,S2nohandler	/* branch if no user handler */
-		bsr	_handler       /* branch to handler */
-		br	FPnan_return
+	bb0	oper,r3,S2nohandler	/* branch if no user handler */
+	bsr	_handler		/* branch to handler */
+	br	FPnan_return
 #endif
 
-ASGLOBAL(S2nohandler)
-		set    r7,r7,1<sigbit> /* make S2 a quiet NaN */
+ASLOCAL(S2nohandler)
+	set	r7,r7,1<sigbit>	/* make S2 a quiet NaN */
 
 
 /* Write a single or double precision quiet NaN unless the opeation is FCMP. */
 /* If the operation is FCMP, then set the not comparable bit in the result. */
 
-ASGLOBAL(S1write)
-	     bb0    s1nan,r12,S2write /* do not write S1 if it is not a NaN */
-             extu   r10,r9,5<11>      /* extract opcode */
-             cmp    r11,r10,FCMPop    /* compare to FCMP */
-             bb1    ne,r11,S1noFCMP   /* operation is not FCMP */
-             set    r6,r0,1<nc>       /* set the not comparable bit */
-             br.n   FPnan_return            /* return from subroutine */
-             set    r6,r6,1<ne>       /* set the not equal bit */
-ASGLOBAL(S1noFCMP)
-	     bb1.n  dsize,r9,wrdoubS1 /* double destination */
-             set    r5,r5,11<20>      /* set all exponent bits to 1 */
+ASLOCAL(S1write)
+	bb0	s1nan,r12,S2write /* do not write S1 if it is not a NaN */
+	extu	r10,r9,5<11>	/* extract opcode */
+	cmp	r11,r10,FCMPop	/* compare to FCMP */
+	bb1	ne,r11,S1noFCMP	/* operation is not FCMP */
+	set	r6,r0,1<nc>	/* set the not comparable bit */
+	br.n	FPnan_return
+	 set	r6,r6,1<ne>	/* set the not equal bit */
+ASLOCAL(S1noFCMP)
+	bb1.n	dsize,r9,wrdoubS1 /* double destination */
+	 set	r5,r5,11<20>	/* set all exponent bits to 1 */
 /* The single result will be formed the same way whether S1 is a single or double */
-ASGLOBAL(wrsingS1)
-	     mak    r10,r5,28<3>      /* wipe out extra exponent bits */
-             extu   r11,r6,3<29>      /* get lower three bits of mantissa */
-             or     r10,r10,r11       /* combine all of result except sign */
-             clr    r6,r5,31<0>       /* clear all but sign */
-             br.n   FPnan_return            /* return from function */
-             or     r6,r6,r10         /* form result */
-
-ASGLOBAL(wrdoubS1)
-/* ;;;;;	     bb1    s1size,r9,wrdoubS1d ;write double source to double dest. */
-/* took out the above instruction -- don't see why it's there.... jfriedl */
-ASGLOBAL(wrdoubS1s)
-	     set    r6,r6,29<0>       /* set extra bits of lower word */
-ASGLOBAL(wrdoubS1d)
-	     br     FPnan_return      /* no modification necessary for writing */
-                                      /* double to double, so return from function */
-
-ASGLOBAL(S2write)
-	     extu   r10,r9,5<11>      /* extract opcode */
-             cmp    r11,r10,FCMPop    /* compare to FCMP */
-             bb1.n  ne,r11,S2noFCMP   /* operation is not FCMP */
-             set    r7,r7,11<20>      /* set all exponent bits to 1 */
-             set    r6,r0,1<nc>       /* set the not comparable bit */
-             br.n   FPnan_return            /* return from subroutine */
-             set    r6,r6,1<ne>       /* set the not equal bit */
-ASGLOBAL(S2noFCMP)
-	     bb1.n  dsize,r9,wrdoubS2 /* double destination */
-	/*
-	 * In the original, the ".n" above and the "set r5..." below
-	 * were omitted here.  Since they're in the S1 stuff above,
-	 * and since this isn't working right now (r5 isn't being set
-	 * to it's part of the nan), I'll try this...
-	 *	jfriedl Dec 1, 1989
-	 */
-             set    r5,r5,11<20>      /* set all exponent bits to 1 */
+ASLOCAL(wrsingS1)
+	mak	r10,r5,28<3>	/* wipe out extra exponent bits */
+	extu	r11,r6,3<29>	/* get lower three bits of mantissa */
+	or	r10,r10,r11	/* combine all of result except sign */
+	clr	r6,r5,31<0>	/* clear all but sign */
+	br.n	FPnan_return
+	 or	r6,r6,r10	/* form result */
+
+ASLOCAL(wrdoubS1)
+	set	r6,r6,29<0>	/* set extra bits of lower word */
+	br	FPnan_return	/* no modification necessary for writing */
+				/* double to double, so return */
+
+ASLOCAL(S2write)
+	extu	r10,r9,5<11>	/* extract opcode */
+	cmp	r11,r10,FCMPop	/* compare to FCMP */
+	bb1.n	ne,r11,S2noFCMP	/* operation is not FCMP */
+	 set	r7,r7,11<20>	/* set all exponent bits to 1 */
+	set	r6,r0,1<nc>	/* set the not comparable bit */
+	br.n	FPnan_return
+	 set	r6,r6,1<ne>	/* set the not equal bit */
+ASLOCAL(S2noFCMP)
+	bb1.n	dsize,r9,wrdoubS2 /* double destination */
+	 set	r5,r5,11<20>	/* set all exponent bits to 1 */
 /* The single result will be formed the same way whether S1 is a single or double */
-ASGLOBAL(wrsingS2)
-	     mak    r10,r7,28<3>      /* wipe out extra exponent bits */
-             extu   r11,r8,3<29>      /* get lower three bits of mantissa */
-             or     r10,r10,r11       /* combine all of result except sign */
-             clr    r6,r7,31<0>       /* clear all but sign */
-             br.n   FPnan_return            /* return from function */
-             or     r6,r6,r10         /* form result */
-
-ASGLOBAL(wrdoubS2)
-	
-/* ;;;	     bb1  s2size,r9,FPnan_return  ;write double source to double dest. */
-	/*
-	 * I took out the above branch because I just don't see how it
-	 * makes sense. jfriedl Dec 1, '89
-	 */
-ASGLOBAL(wrdoubS2s)
-	     set    r6,r8,29<0>       /* set extra bits of lower word */
+ASLOCAL(wrsingS2)
+	mak	r10,r7,28<3>	/* wipe out extra exponent bits */
+	extu	r11,r8,3<29>	/* get lower three bits of mantissa */
+	or	r10,r10,r11	/* combine all of result except sign */
+	clr	r6,r7,31<0>	/* clear all but sign */
+	br.n	FPnan_return
+	 or	r6,r6,r10	/* form result */
 
+ASLOCAL(wrdoubS2)
+	set	r6,r8,29<0>	/* set extra bits of lower word */
 
 /* Return from this subroutine with the result. */
 
-ASGLOBAL(FPnan_return)
-	     /* no modification necessary for writing */
-                                      /* double to double, so return from function */
-             ld     r1,r31,	 0   /* retrieve return address */
-             jmp    r1                /* return from function */
-
-             data
+ASLOCAL(FPnan_return)
+				/* no modification necessary for writing */
+				/* double to double, so return */
+	ld	r1,r31, 0	/* retrieve return address */
+	jmp	r1
 
-/* function _infinity --       */
-/* See the documentation of this release for an overall description of this */
-/* code. */
+	data
 
+/*
+ * infinity
+ */
 
 /* Extract the opcode, compare to a constant, and branch to the code */
 /* for the instruction. */
 
-             text
-	     align 8
-             global _infinity
-_infinity:   extu   r10,r9,5<11>   /* extract opcode */
-             cmp    r11,r10,FADDop /* compare to FADD */
-             bb1.n  eq,r11,FADD    /* operation is FADD */
-             st     r1,r31,0       /* save return address */
-             cmp    r11,r10,FSUBop /* compare to FSUB */
-             bb1    eq,r11,FSUB    /* operation is FSUB */
-             cmp    r11,r10,FCMPop /* compare to FCMP */
-             bb1    eq,r11,FCMP    /* operation is FCMP */
-             cmp    r11,r10,FMULop /* compare to FMUL */
-             bb1    eq,r11,FMUL    /* operation is FMUL */
-             cmp    r11,r10,FDIVop /* compare to FDIV */
-             bb1    eq,r11,FDIV    /* operation is FDIV */
-/*              cmp    r11,r10,FSQRTop;compare to FSQRT */
-/*              bb1    eq,r11,FSQRT   ;operation is FSQRT */
-             cmp    r11,r10,INTop  /* compare to INT */
-             bb1    eq,r11,FP_inf_overflw /* operation is INT */
-             cmp    r11,r10,NINTop /* compare to NINT */
-             bb1    eq,r11,FP_inf_overflw /* operation is NINT */
-             cmp    r11,r10,TRNCop /* compare to TRNC */
-             bb1    eq,r11,FP_inf_overflw /* operation is TRNC */
+ASLOCAL(infinity)
+	extu	r10,r9,5<11>	/* extract opcode */
+	cmp	r11,r10,FADDop	/* compare to FADD */
+	bb1.n	eq,r11,FADD	/* operation is FADD */
+	 st	r1,r31,0	/* save return address */
+	cmp	r11,r10,FSUBop	/* compare to FSUB */
+	bb1	eq,r11,FSUB	/* operation is FSUB */
+	cmp	r11,r10,FCMPop	/* compare to FCMP */
+	bb1	eq,r11,FCMP	/* operation is FCMP */
+	cmp	r11,r10,FMULop	/* compare to FMUL */
+	bb1	eq,r11,FMUL	/* operation is FMUL */
+	cmp	r11,r10,FDIVop	/* compare to FDIV */
+	bb1	eq,r11,FDIV	/* operation is FDIV */
+#if 0
+	cmp	r11,r10,FSQRTop	/* compare to FSQRT */
+	bb1	eq,r11,FSQRT	/* operation is FSQRT */
+#endif
+	cmp	r11,r10,INTop	/* compare to INT */
+	bb1	eq,r11,FP_inf_overflw /* operation is INT */
+	cmp	r11,r10,NINTop	/* compare to NINT */
+	bb1	eq,r11,FP_inf_overflw /* operation is NINT */
+	cmp	r11,r10,TRNCop	/* compare to TRNC */
+	bb1	eq,r11,FP_inf_overflw /* operation is TRNC */
 
 
 /* Adding infinities of opposite signs will cause an exception, */
 /* but all other operands will result in a correctly signed infinity. */
 
-FADD:        bb0    s1inf,r12,addS2write /* branch if S1 not infinity */
-             bb0    s2inf,r12,addS1write /* S2 is not inf., so branch to write S1 */
-             bb1    sign,r5,addS1neg   /* handle case of S1 negative */
-addS1pos:    bb1    sign,r7,excpt      /* adding infinities of different signs */
-                                       /* causes an exception */
-             br     poswrinf           /* branch to write positive infinity */
-addS1neg:    bb0    sign,r7,excpt      /* adding infinities of different signs */
-                                       /* causes an exception */
-             br     negwrinf           /* branch to write negative infinity */
-addS1write:  bb0    sign,r5,poswrinf   /* branch to write positive infinity */
-             br     negwrinf           /* branch to write negative infinity */
-addS2write:  bb0    sign,r7,poswrinf   /* branch to write positive infinity */
-             br     negwrinf           /* branch to write negative infinity */
+FADD:
+	bb0	s1inf,r12,addS2write	/* branch if S1 not infinity */
+	bb0	s2inf,r12,addS1write	/* S2 is not inf., so branch to write S1 */
+	bb1	sign,r5,addS1neg	/* handle case of S1 negative */
+addS1pos:
+	bb1	sign,r7,excpt		/* adding infinities of different */
+					/* signs causes an exception */
+	br	poswrinf		/* branch to write positive infinity */
+addS1neg:
+	bb0	sign,r7,excpt		/* adding infinities of different */
+					/* signs causes an exception */
+	br	negwrinf		/* branch to write negative infinity */
+addS1write:
+	bb0	sign,r5,poswrinf	/* branch to write positive infinity */
+	br	negwrinf		/* branch to write negative infinity */
+addS2write:
+	bb0	sign,r7,poswrinf	/* branch to write positive infinity */
+	br	negwrinf		/* branch to write negative infinity */
 
 
 /* Subtracting infinities of the same sign will cause an exception, */
 /* but all other operands will result in a correctly signed infinity. */
 
-FSUB:        bb0    s1inf,r12,subS2write /* branch if S1 not infinity */
-             bb0    s2inf,r12,subS1write /* S2 is not inf., so branch to write S1 */
-             bb1    sign,r5,subS1neg   /* handle case of S1 negative */
-subS1pos:    bb0    sign,r7,excpt      /* subtracting infinities of the same sign */
-                                       /* causes an exception */
-             br     poswrinf           /* branch to write positive infinity */
-subS1neg:    bb1    sign,r7,excpt      /* subtracting infinities of the same sign */
-                                       /* causes an exception */
-             br     negwrinf           /* branch to write negative infinity */
-subS1write:  bb0    sign,r5,poswrinf   /* branch to write positive infinity */
-             br     negwrinf           /* branch to write negative infinity */
-subS2write:  bb1    sign,r7,poswrinf   /* branch to write positive infinity */
-             br     negwrinf           /* branch to write negative infinity */
+FSUB:
+	bb0	s1inf,r12,subS2write	/* branch if S1 not infinity */
+	bb0	s2inf,r12,subS1write	/* S2 is not inf., so branch to write S1 */
+	bb1	sign,r5,subS1neg	/* handle case of S1 negative */
+subS1pos:
+	bb0	sign,r7,excpt		/* subtracting infinities of the same */
+					/* sign causes an exception */
+	br	poswrinf		/* branch to write positive infinity */
+subS1neg:
+	bb1	sign,r7,excpt		/* subtracting infinities of the same */
+					/* sign causes an exception */
+	br	negwrinf		/* branch to write negative infinity */
+subS1write:
+	bb0	sign,r5,poswrinf	/* branch to write positive infinity */
+	br	negwrinf		/* branch to write negative infinity */
+subS2write:
+	bb1	sign,r7,poswrinf	/* branch to write positive infinity */
+	br	negwrinf		/* branch to write negative infinity */
 
 
 /* Compare the operands, at least one of which is infinity, and set the */
 /* correct bits in the destination register. */
 
-FCMP:        bb0.n  s1inf,r12,FCMPS1f  /* branch for finite S1 */
-             set    r4,r0,1<cp>        /* since neither S1 or S2 is a NaN, set cp */
-FCMPS1i:     bb1    sign,r5,FCMPS1ni   /* branch to negative S1i */
-FCMPS1pi:    bb0    s2inf,r12,FCMPS1piS2f /* branch to finite S2 with S1pi */
-FCMPS1piS2i: bb1    sign,r7,FCMPS1piS2ni /* branch to negative S2i with S1pi */
-FCMPS1piS2pi: set   r4,r4,1<eq>        /* set eq bit  */
-             set    r4,r4,1<le>        /* set le bit */
-             set    r4,r4,1<ge>        /* set ge bit */
-             set    r4,r4,1<ib>        /* set ib bit */
-             br.n   move               /* return from subroutine */
-             set    r4,r4,1<ob>        /* set ob bit */
-FCMPS1piS2ni: set   r4,r4,1<ne>        /* set ne bit */
-             set    r4,r4,1<gt>        /* set gt bit */
-             br.n   move               /* return from subroutine */
-             set    r4,r4,1<ge>        /* set ge bit */
-FCMPS1piS2f: set    r4,r4,1<ne>        /* set ne bit */
-             set    r4,r4,1<gt>        /* set gt bit */
-             bsr.n  _zero              /* see if any of the operands are zero */
-             set    r4,r4,1<ge>        /* set ge bit */
-             bb0    s2zero,r12,FCMPS1piS2nz /* check for negative if s2 not zero */
-	     set    r4,r4,1<ou>        /* set ou bit */
-	     br.n   move
-             set    r4,r4,1<ob>        /* set ob bit */
-FCMPS1piS2nz: bb1    sign,r7,move     /* return from subroutine if s2 is neg. */
-FCMPS1piS2pf: set   r4,r4,1<ou>        /* set ou bit */
-             br.n   move             /* return from subroutine */
-             set    r4,r4,1<ob>        /* set ob bit */
-FCMPS1ni:    bb0    s2inf,r12,FCMPS1niS2f /* branch to finite S2 with S1ni */
-FCMPS1niS2i: bb1    sign,r7,FCMPS1niS2ni /* branch to negative S2i with S1ni */
-FCMPS1niS2pi: set   r4,r4,1<ne>        /* set eq bit  */
-             set    r4,r4,1<le>        /* set le bit */
-             set    r4,r4,1<lt>        /* set lt bit */
-             set    r4,r4,1<ou>        /* set ou bit */
-             br.n   move             /* return from subroutine */
-             set    r4,r4,1<ob>        /* set ob bit */
-FCMPS1niS2ni: set   r4,r4,1<eq>        /* set eq bit  */
-             set    r4,r4,1<le>        /* set le bit */
-             br.n   move             /* return from subroutine */
-             set    r4,r4,1<ge>        /* set ge bit */
-FCMPS1niS2f: set    r4,r4,1<ne>        /* set eq bit  */
-             set    r4,r4,1<le>        /* set le bit */
-             bsr.n  _zero              /* see if any of the operands are zero */
-             set    r4,r4,1<lt>        /* set lt bit */
-             bb0    s2zero,r12,FCMPS1niS2nz /* branch if s2 is not zero */
-	     set    r4,r4,1<ou>        /* set ou bit */
-	     br.n   move
-             set    r4,r4,1<ob>        /* set ob bit */
-FCMPS1niS2nz: bb1    sign,r7,move     /* return from subroutine if s2 is neg. */
-             set    r4,r4,1<ou>        /* set ou bit */
-             br.n   move             /* return from subroutine */
-             set    r4,r4,1<ob>        /* set ob bit */
-FCMPS1f:     bb1    sign,r5,FCMPS1nf   /* branch to negative S1f */
-FCMPS1pf:    bb1.n  sign,r7,FCMPS1pfS2ni /* branch to negative S2i with S1pf */
-             set   r4,r4,1<ne>        /* set ne bit    */
-FCMPS1pfS2pi: set   r4,r4,1<le>        /* set le bit */
-             set    r4,r4,1<lt>        /* set lt bit */
-	     bsr.n  _zero
-             set    r4,r4,1<ib>        /* set ib bit */
-             bb0    s1zero,r12,FCMPS1pfS2pinozero
-FCMPS1pfS2pizero: br.n   move
-             set    r4,r4,1<ob>        /* set ob bit */
-FCMPS1pfS2pinozero: br.n move
-             set    r4,r4,1<in>        /* set in bit */
-FCMPS1pfS2ni: set    r4,r4,1<gt>        /* set gt bit */
-             br.n   move             /* return from subroutine */
-             set    r4,r4,1<ge>        /* set ge bit */
-FCMPS1nf:    bb1.n    sign,r7,FCMPS1nfS2ni /* branch to negative S2i with S1nf */
-             set    r4,r4,1<ne>        /* set ne bit */
-             set    r4,r4,1<le>        /* set gt bit */
-             set    r4,r4,1<lt>        /* set ge bit */
-             bsr.n  _zero              /* see which of the operands are zero */
-             set    r4,r4,1<ob>        /* set ob bit */
-             bb0    s1zero,r12,FCMPS1nfS2pinozero /* no ls and lo */
-FCMPS1nfS2pizero: br.n move
-             set    r4,r4,1<ib>        /* set ib bit */
-FCMPS1nfS2pinozero: br.n move
-             set    r4,r4,1<ou>        /* set ou bit */
-FCMPS1nfS2ni: set    r4,r4,1<gt>        /* set gt bit */
-             set    r4,r4,1<ge>        /* set ge bit */
-
-move:	     br.n   inf_return		/* return from subroutine */
-             or	    r6,r0,r4            /* transfer answer to r6 */
+FCMP:
+	bb0.n	s1inf,r12,FCMPS1f	/* branch for finite S1 */
+	 set	r4,r0,1<cp>		/* since neither S1 or S2 is a NaN, */
+					/* set cp */
+FCMPS1i:
+	bb1	sign,r5,FCMPS1ni	/* branch to negative S1i */
+FCMPS1pi:
+	bb0	s2inf,r12,FCMPS1piS2f	/* branch to finite S2 with S1pi */
+FCMPS1piS2i:
+	bb1	sign,r7,FCMPS1piS2ni	/* branch to negative S2i with S1pi */
+FCMPS1piS2pi:
+	set	r4,r4,1<eq>		/* set eq bit */
+	set	r4,r4,1<le>		/* set le bit */
+	set	r4,r4,1<ge>		/* set ge bit */
+	set	r4,r4,1<ib>		/* set ib bit */
+	br.n	move
+	 set	r4,r4,1<ob>		/* set ob bit */
+FCMPS1piS2ni:
+	set	r4,r4,1<ne>		/* set ne bit */
+	set	r4,r4,1<gt>		/* set gt bit */
+	br.n	move
+	 set	r4,r4,1<ge>		/* set ge bit */
+FCMPS1piS2f:
+	set	r4,r4,1<ne>		/* set ne bit */
+	set	r4,r4,1<gt>		/* set gt bit */
+	bsr.n	_ASM_LABEL(zero)	/* see if any of the operands are zero */
+	 set	r4,r4,1<ge>		/* set ge bit */
+	bb0	s2zero,r12,FCMPS1piS2nz	/* check for negative if s2 not zero */
+	set	r4,r4,1<ou>		/* set ou bit */
+	br.n	move
+	 set	r4,r4,1<ob>		/* set ob bit */
+FCMPS1piS2nz:
+	bb1	sign,r7,move		/* return if s2 is negative */
+FCMPS1piS2pf:
+	set	r4,r4,1<ou>		/* set ou bit */
+	br.n	move
+	 set	r4,r4,1<ob>		/* set ob bit */
+FCMPS1ni:
+	bb0	s2inf,r12,FCMPS1niS2f	/* branch to finite S2 with S1ni */
+FCMPS1niS2i:
+	bb1	sign,r7,FCMPS1niS2ni	/* branch to negative S2i with S1ni */
+FCMPS1niS2pi:
+	set	r4,r4,1<ne>		/* set eq bit */
+	set	r4,r4,1<le>		/* set le bit */
+	set	r4,r4,1<lt>		/* set lt bit */
+	set	r4,r4,1<ou>		/* set ou bit */
+	br.n	move
+	 set	r4,r4,1<ob>		/* set ob bit */
+FCMPS1niS2ni:
+	set	r4,r4,1<eq>		/* set eq bit */
+	set	r4,r4,1<le>		/* set le bit */
+	br.n	move
+	 set	r4,r4,1<ge>		/* set ge bit */
+FCMPS1niS2f:
+	set	r4,r4,1<ne>		/* set eq bit */
+	set	r4,r4,1<le>		/* set le bit */
+	bsr.n	_ASM_LABEL(zero)	/* see if any of the operands are zero */
+	 set	r4,r4,1<lt>		/* set lt bit */
+	bb0	s2zero,r12,FCMPS1niS2nz	/* branch if s2 is not zero */
+	set	r4,r4,1<ou>		/* set ou bit */
+	br.n	move
+	 set	r4,r4,1<ob>		/* set ob bit */
+FCMPS1niS2nz:
+	bb1	sign,r7,move		/* return if s2 is negative */
+	set	r4,r4,1<ou>		/* set ou bit */
+	br.n	move
+	 set	r4,r4,1<ob>		/* set ob bit */
+FCMPS1f:
+	bb1	sign,r5,FCMPS1nf	/* branch to negative S1f */
+FCMPS1pf:
+	bb1.n	sign,r7,FCMPS1pfS2ni	/* branch to negative S2i with S1pf */
+	 set	r4,r4,1<ne>		/* set ne bit */
+FCMPS1pfS2pi:
+	set	r4,r4,1<le>		/* set le bit */
+	set	r4,r4,1<lt>		/* set lt bit */
+	bsr.n	_ASM_LABEL(zero)
+	 set	r4,r4,1<ib>		/* set ib bit */
+	bb0	s1zero,r12,FCMPS1pfS2pinozero
+FCMPS1pfS2pizero:
+	br.n	move
+	 set	r4,r4,1<ob>		/* set ob bit */
+FCMPS1pfS2pinozero:
+	br.n	move
+	 set	r4,r4,1<in>		/* set in bit */
+FCMPS1pfS2ni:
+	set	r4,r4,1<gt>		/* set gt bit */
+	br.n	move
+	 set	r4,r4,1<ge>		/* set ge bit */
+FCMPS1nf:
+	bb1.n	sign,r7,FCMPS1nfS2ni	/* branch to negative S2i with S1nf */
+	 set	r4,r4,1<ne>		/* set ne bit */
+	set	r4,r4,1<le>		/* set gt bit */
+	set	r4,r4,1<lt>		/* set ge bit */
+	bsr.n	_ASM_LABEL(zero)	/* see which of the operands are zero */
+	 set	r4,r4,1<ob>		/* set ob bit */
+	bb0	s1zero,r12,FCMPS1nfS2pinozero /* no ls and lo */
+FCMPS1nfS2pizero:
+	br.n	move
+	 set	r4,r4,1<ib>		/* set ib bit */
+FCMPS1nfS2pinozero:
+	br.n	move
+	 set	r4,r4,1<ou>		/* set ou bit */
+FCMPS1nfS2ni:
+	set	r4,r4,1<gt>		/* set gt bit */
+	set	r4,r4,1<ge>		/* set ge bit */
+
+move:
+	br.n	inf_return
+	 or	r6,r0,r4		/* transfer answer to r6 */
 
 
 /* Multiplying infinity and zero causes an exception, but all other */
 /* operations produce a correctly signed infinity. */
 
-FMUL:        bsr    _zero              /* see if any of the operands are zero */
-             bb1    s1zero,r12,excpt   /* infinity X 0 causes an exception */
-             bb1    s2zero,r12,excpt   /* infinity X 0 causes an exception */
-             bb1    sign,r5,FMULS1neg  /* handle negative cases of S1 */
-             bb0    sign,r7,poswrinf   /* + X + = + */
-             br     negwrinf           /* + X - = - */
-FMULS1neg:   bb1    sign,r7,poswrinf   /* - X - = + */
-             br     negwrinf           /* - X + = - */
+FMUL:
+	bsr	_ASM_LABEL(zero)	/* see if any of the operands are zero */
+	bb1	s1zero,r12,excpt	/* infinity X 0 causes an exception */
+	bb1	s2zero,r12,excpt	/* infinity X 0 causes an exception */
+	bb1	sign,r5,FMULS1neg	/* handle negative cases of S1 */
+	bb0	sign,r7,poswrinf	/* + X + = + */
+	br	negwrinf		/* + X - = - */
+FMULS1neg:
+	bb1	sign,r7,poswrinf	/* - X - = + */
+	br	negwrinf		/* - X + = - */
 
 
-/* Dividing infinity by infinity causes an exception, but dividing  */
-/* infinity by a finite yields a correctly signed infinity, and  */
+/* Dividing infinity by infinity causes an exception, but dividing */
+/* infinity by a finite yields a correctly signed infinity, and */
 /* dividing a finite by an infinity produces a correctly signed zero. */
 
-FDIV:        bb1    s1inf,r12,FDIVS1inf /* handle case of S1 being infinity */
-             bb1    sign,r5,FDIVS1nf    /* handle cases of S1 being neg. non-inf. */
-             bb1    sign,r7,FDIVS1pfS2mi /* handle case of negative S2 */
-FDIVS1pfS2pi: br    poswrzero           /* +f / +inf = +0 */
-FDIVS1pfS2mi: br    negwrzero           /* +f / -inf = -0 */
-FDIVS1nf:    bb1    sign,r7,FDIVS1nfS2mi /* handle case of negative S2 */
-FDIVS1nfS2pi: br    negwrzero           /* -f / +inf = -0 */
-FDIVS1nfS2mi: br    poswrzero           /* -f / -inf = +0 */
-FDIVS1inf:   bb1    s2inf,r12,excpt     /* inf / inf = exception */
-             bb1    sign,r5,FDIVS1mi    /* handle cases of S1 being neg. inf. */
-             bb1    sign,r7,FDIVS1piS2nf /* handle case of negative S2 */
-FDIVS1piS2pf: br    poswrinf            /* +inf / +f = +inf */
-FDIVS1piS2nf: br    negwrinf            /* +inf / -f = -inf */
-FDIVS1mi:    bb1    sign,r7,FDIVS1miS2nf /* handle case of negative S2 */
-FDIVS1miS2pf: br    negwrinf            /* -inf / +f = -inf */
-FDIVS1miS2nf: br    poswrinf            /* -inf / -f = +inf */
-              
+FDIV:
+	bb1	s1inf,r12,FDIVS1inf	/* handle case of S1 being infinity */
+	bb1	sign,r5,FDIVS1nf	/* handle cases of S1 being neg. non-inf. */
+	bb1	sign,r7,FDIVS1pfS2mi	/* handle case of negative S2 */
+FDIVS1pfS2pi:
+	br	poswrzero		/* +f / +inf = +0 */
+FDIVS1pfS2mi:
+	br	negwrzero		/* +f / -inf = -0 */
+FDIVS1nf:
+	bb1	sign,r7,FDIVS1nfS2mi	/* handle case of negative S2 */
+FDIVS1nfS2pi:
+	br	negwrzero		/* -f / +inf = -0 */
+FDIVS1nfS2mi:
+	br	poswrzero		/* -f / -inf = +0 */
+FDIVS1inf:
+	bb1	s2inf,r12,excpt		/* inf / inf = exception */
+	bb1	sign,r5,FDIVS1mi	/* handle cases of S1 being neg. inf. */
+	bb1	sign,r7,FDIVS1piS2nf	/* handle case of negative S2 */
+FDIVS1piS2pf:
+	br	poswrinf		/* +inf / +f = +inf */
+FDIVS1piS2nf:
+	br	negwrinf		/* +inf / -f = -inf */
+FDIVS1mi:
+	bb1	sign,r7,FDIVS1miS2nf	/* handle case of negative S2 */
+FDIVS1miS2pf:
+	br	negwrinf		/* -inf / +f = -inf */
+FDIVS1miS2nf:
+	br	poswrinf		/* -inf / -f = +inf */
+
 
 /* The square root of positive infinity is positive infinity, */
 /* but the square root of negative infinity is a NaN */
 
-/* FSQRT:       bb0    sign,r7,poswrinf    ;write sqrt(inf) = inf */
-/*              br     excpt               ;write sqrt(-inf) = NaN */
+#if 0
+FSQRT:
+	bb0	sign,r7,poswrinf	/* write sqrt(inf) = inf */
+	br	excpt			/* write sqrt(-inf) = NaN */
+#endif
 
-excpt:       
-             set    r2,r2,1<oper>      /* set invalid operation bit of FPSR */
+excpt:
+	set	r2,r2,1<oper>		/* set invalid operation bit of FPSR */
 #ifdef HANDLER
-	     bb0    oper,r3,nohandler  /* branch if no user handler */
-             bsr    _handler           /* branch to interface with user handler */
-             br     inf_return             /* return from function */
-nohandler:   
+	bb0	oper,r3,nohandler	/* branch if no user handler */
+	bsr	_handler		/* branch to interface with user handler */
+	br	inf_return
+nohandler:
 #endif
-	     set    r5,r0,0<0>        /* write NaN into r5 */
-             br.n   inf_return             /* return from subroutine */
-             set    r6,r0,0<0>        /* write NaN into r6, writing NaN''s into */
-                                       /* both of these registers is quicker than */
-                                       /* checking for single or double precision */
+	set	r5,r0,0<0>	/* write NaN into r5 */
+	br.n	inf_return
+	 set	r6,r0,0<0>	/* write NaN into r6, writing NaN''s into */
+				/* both of these registers is quicker than */
+				/* checking for single or double precision */
 
 
 /* Write positive infinity of the correct precision */
 
-poswrinf:    bb1    dsize,r9,poswrinfd /* branch to write double precision inf. */
-             br.n   inf_return             /* return from subroutine */
-             or.u   r6,r0,0x7f80       /* load r6 with single precision pos inf.   */
-poswrinfd:   or.u   r5,r0,0x7ff0       /* load double precision pos inf. */
-             br.n   inf_return             /* return from subroutine */
-             or     r6,r0,r0
+poswrinf:
+	bb1	dsize,r9,poswrinfd	/* branch to write double precision inf. */
+	br.n	inf_return
+	 or.u	r6,r0,0x7f80		/* load r6 with single precision pos inf.	*/
+poswrinfd:
+	or.u	r5,r0,0x7ff0		/* load double precision pos inf. */
+	br.n	inf_return
+	 or	r6,r0,r0
 
 
 /* Write negative infinity of the correct precision */
 
-negwrinf:    bb1    dsize,r9,negwrinfd /* branch to write double precision inf. */
-             br.n   inf_return             /* return from subroutine */
-             or.u   r6,r0,0xff80       /* load r6 with single precision pos inf.   */
-negwrinfd:   or.u   r5,r0,0xfff0       /* load double precision pos inf. */
-             br.n   inf_return             /* return from subroutine */
-             or     r6,r0,r0
+negwrinf:
+	bb1	dsize,r9,negwrinfd	/* branch to write double precision inf. */
+	br.n	inf_return
+	 or.u	r6,r0,0xff80		/* load r6 with single precision pos inf.	*/
+negwrinfd:
+	or.u	r5,r0,0xfff0		/* load double precision pos inf. */
+	br.n	inf_return
+	 or	r6,r0,r0
 
 
 /* Write a positive zero disregarding precision. */
 
-poswrzero:   or     r5,r0,r0           /* write to both high word and low word now */
-             br.n   inf_return             /* it does not matter that both are written */
-             or     r6,r0,r0     
+poswrzero:
+	or	r5,r0,r0	/* write to both high word and low word now */
+	br.n	inf_return	/* it does not matter that both are written */
+	 or	r6,r0,r0
 
 
 /* Write a negative zero of the correct precision. */
 
-negwrzero:   or     r6,r0,r0           /* clear low word */
-             bb1    dsize,r9,negwrzerod /* branch to write double precision zero */
-             br.n   inf_return             /* return from subroutine */
-             set    r6,r6,1<31>        /* set sign bit */
-negwrzerod:  or     r5,r0,r0           /* clear high word */
-             br.n   inf_return             /* return from subroutine */
-             set    r5,r5,1<31>        /* set sign bit */
-             
-FP_inf_overflw:     
-             set    r2,r2,1<oper>       /* set invalid operand bit */
+negwrzero:
+	or	r6,r0,r0	/* clear low word */
+	bb1	dsize,r9,negwrzerod /* branch to write double precision zero */
+	br.n	inf_return
+	 set	r6,r6,1<31>	/* set sign bit */
+negwrzerod:
+	or	r5,r0,r0	/* clear high word */
+	br.n	inf_return
+	 set	r5,r5,1<31>	/* set sign bit */
+
+FP_inf_overflw:
+	set	r2,r2,1<oper>	/* set invalid operand bit */
 #ifdef HANDLER
-	     bb0    oper,r3,nohandlero  /* do not go to user handler routine */
-             bsr    _handler            /* go to user handler routine */
-             br     inf_return              /* return from subroutine */
+	bb0	oper,r3,nohandlero /* do not go to user handler routine */
+	bsr	_handler	/* go to user handler routine */
+	br	inf_return
 #endif
 
-nohandlero:  bb0.n  sign,r7,inf_return      /* if positive then return from subroutine */
-             
-             set    r6,r6,31<0>         /* set result to largest positive integer */
-             or.c   r6,r0,r6            /* negate r6,giving largest negative int. */
+nohandlero:
+	bb0.n	sign,r7,inf_return /* if positive then return */
 
-inf_return:      ld     r1,r31,0          /* load return address */
-             jmp    r1                 /* return from subroutine */
+	 set	r6,r6,31<0>	/* set result to largest positive integer */
+	or.c	r6,r0,r6	/* negate r6,giving largest negative int. */
 
-             data
+inf_return:
+	ld	r1,r31,0	/* load return address */
+	jmp	r1
+
+	data
 
 #define FADD	denorm_FADD
 #define FSUB	denorm_FSUB
@@ -1890,397 +2000,521 @@ inf_return:      ld     r1,r31,0          /* load return address */
 #define NINT	denorm_NINT
 #define TRNC	denorm_TRNC
 #define return	denorm_return
-/* function _denorm --       */
-/* See the documentation for this release for an overall description of this */
-/* code. */
-       
 
-/* Check to see if either S1 or S2 is a denormalized number.  First  */
+/*
+ * denorm
+ */
+
+/* Check to see if either S1 or S2 is a denormalized number. First */
 /* extract the exponent to see if it is zero, and then check to see if */
-/* the mantissa is not zero.  If the number is denormalized, then set the */
+/* the mantissa is not zero. If the number is denormalized, then set the */
 /* 1 or 0 bit 10 r12. */
 
-             text
-	     align 8
-             global _denorm
-_denorm:     st     r1,r31,0  /* save return address */
-dnmcheckS1:  extu   r10,r5,11<20>  /* extract exponent */
-             bcnd   ne0,r10,dnmsetS2 /* S1 is not a denorm, so S2 must be */
-             bb1.n  9,r9,dnmcheckS1d /* S1 is double precision */
-             mak    r10,r5,20<3>   /* mak field with only mantissa bits */
-                                   /* into final result */
-dnmcheckS1s: extu   r11,r6,3<29>   /* get three low bits of mantissa */
-             or     r10,r10,r11    /* assemble all of the mantissa bits */
-             bcnd   eq0,r10,dnmsetS2 /* S1 is not a denorm, so S2 must be */
-             br     dnmsetS1       /* S1 is a denorm */
-
-dnmcheckS1d: or     r10,r6,r10     /* or all of mantissa bits */
-             bcnd   eq0,r10,dnmsetS2 /* S1 is not a denorm, so S2 must be */
-dnmsetS1:    set    r12,r12,1<1> /* S1 is a denorm */
-
-dnmcheckS2:  extu   r10,r7,11<20>  /* extract exponent */
-             bcnd   ne0,r10,S1form /* S2 is not a denorm */
-             bb1.n  7,r9,dnmcheckS2d /* S2 is double precision */
-             mak    r10,r7,20<3>   /* mak field with only mantissa bits */
-dnmcheckS2s: extu   r11,r8,3<29>   /* get three low bits of mantissa */
-             or     r10,r10,r11    /* assemble all of the mantissa bits */
-             bcnd   eq0,r10,S1form /* S2 is not a denorm */
-             br     dnmsetS2       /* S1 is a denorm */
-dnmcheckS2d: or     r10,r8,r10     /* or all or mantissa bits */
-             bcnd   eq0,r10,S1form /* S2 is not a denorm */
-dnmsetS2:    set    r12,r12,1<0> /* S2 is a denorm */
+ASLOCAL(denorm)
+	st	r1,r31,0	/* save return address */
+dnmcheckS1:
+	extu	r10,r5,11<20>	/* extract exponent */
+	bcnd	ne0,r10,dnmsetS2 /* S1 is not a denorm, so S2 must be */
+	bb1.n	9,r9,dnmcheckS1d /* S1 is double precision */
+	 mak	r10,r5,20<3>	/* mak field with only mantissa bits */
+				/* into final result */
+dnmcheckS1s:
+	extu	r11,r6,3<29>	/* get three low bits of mantissa */
+	or	r10,r10,r11	/* assemble all of the mantissa bits */
+	bcnd	eq0,r10,dnmsetS2 /* S1 is not a denorm, so S2 must be */
+	br	dnmsetS1	/* S1 is a denorm */
+
+dnmcheckS1d:
+	or	r10,r6,r10	/* or all of mantissa bits */
+	bcnd	eq0,r10,dnmsetS2 /* S1 is not a denorm, so S2 must be */
+dnmsetS1:
+	set	r12,r12,1<1>	/* S1 is a denorm */
+
+dnmcheckS2:
+	extu	r10,r7,11<20>	/* extract exponent */
+	bcnd	ne0,r10,S1form	/* S2 is not a denorm */
+	bb1.n	7,r9,dnmcheckS2d /* S2 is double precision */
+	 mak	r10,r7,20<3>	/* mak field with only mantissa bits */
+dnmcheckS2s:
+	extu	r11,r8,3<29>	/* get three low bits of mantissa */
+	or	r10,r10,r11	/* assemble all of the mantissa bits */
+	bcnd	eq0,r10,S1form	/* S2 is not a denorm */
+	br	dnmsetS2	/* S1 is a denorm */
+dnmcheckS2d:
+	or	r10,r8,r10	/* or all or mantissa bits */
+	bcnd	eq0,r10,S1form	/* S2 is not a denorm */
+dnmsetS2:
+	set	r12,r12,1<0>	/* S2 is a denorm */
 
 
 /* Since the operations are going to be reperformed with modified denorms, */
 /* the operands which were initially single precision need to be modified */
-/* back to single precision.   */
-
-S1form:      bb1    9,r9,S2form /* S1 is double precision, so do not */
-                                     /* modify S1 into single format */
-             mak    r11,r5,28<3>   /*  over final exponent and mantissa */
-                                   /* eliminating extra 3 bits of exponent */
-             extu   r6,r6,3<29>    /* get low 3 bits of mantissa */
-             or     r11,r6,r11     /* form complete mantissa and exponent */
-             extu   r10,r5,1<31> /* get the 31 bit */
-             mak    r10,r10,1<31>  /* place 31 bit 10 correct position */
-             or     r6,r10,r11     /* or 31, exponent, and all of mantissa */
-
-S2form:      bb1    7,r9,checkop /* S2 is double precision, so do not */
-                                      /* modify S2 into single format */
-             mak    r11,r7,28<3>   /*  over final exponent and mantissa */
-                                   /* eliminating extra 3 bits of exponent */
-             extu   r8,r8,3<29>    /* get low 3 bits of mantissa */
-             or     r11,r8,r11     /* form complete mantissa and exponent */
-             extu   r10,r7,1<31> /* get the 31 bit */
-             mak    r10,r10,1<31>  /* place 31 bit 10 correct position */
-             or     r8,r10,r11     /* or 31, exponent, and all of mantissa */
+/* back to single precision.	*/
+
+S1form:
+	bb1	9,r9,S2form	/* S1 is double precision, so do not */
+				/* modify S1 into single format */
+	mak	r11,r5,28<3>	/* over final exponent and mantissa */
+				/* eliminating extra 3 bits of exponent */
+	extu	r6,r6,3<29>	/* get low 3 bits of mantissa */
+	or	r11,r6,r11	/* form complete mantissa and exponent */
+	extu	r10,r5,1<31>	/* get the 31 bit */
+	mak	r10,r10,1<31>	/* place 31 bit 10 correct position */
+	or	r6,r10,r11	/* or 31, exponent, and all of mantissa */
+
+S2form:
+	bb1	7,r9,checkop	/* S2 is double precision, so do not */
+				/* modify S2 into single format */
+	mak	r11,r7,28<3>	/* over final exponent and mantissa */
+				/* eliminating extra 3 bits of exponent */
+	extu	r8,r8,3<29>	/* get low 3 bits of mantissa */
+	or	r11,r8,r11	/* form complete mantissa and exponent */
+	extu	r10,r7,1<31>	/* get the 31 bit */
+	mak	r10,r10,1<31>	/* place 31 bit 10 correct position */
+	or	r8,r10,r11	/* or 31, exponent, and all of mantissa */
 
 
 /* Extract the opcode, compare to a constant, and branch to the code that */
 /* deals with that opcode. */
 
-checkop:     extu   r10,r9,5<11>   /* extract opcode */
-             cmp    r11,r10,0x05 /* compare to FADD */
-             bb1    2,r11,FADD    /* operation is FADD */
-             cmp    r11,r10,0x06 /* compare to FSUB */
-             bb1    2,r11,FSUB    /* operation is FSUB */
-             cmp    r11,r10,0x07 /* compare to FCMP */
-             bb1    2,r11,FCMP    /* operation is FCMP */
-             cmp    r11,r10,0x00 /* compare to FMUL */
-             bb1    2,r11,FMUL    /* operation is FMUL */
-             cmp    r11,r10,0x0e /* compare to FDIV */
-             bb1    2,r11,FDIV    /* operation is FDIV */
-/*              cmp    r11,r10,0x0f;compare to FSQRT */
-/*              bb1    2,r11,FSQRT   ;operation is FSQRT */
-             cmp    r11,r10,0x09  /* compare to INT */
-             bb1    2,r11,INT     /* operation is INT */
-             cmp    r11,r10,0x0a /* compare to NINT */
-             bb1    2,r11,NINT    /* operation is NINT */
-             cmp    r11,r10,0x0b /* compare to TRNC */
-             bb1    2,r11,TRNC    /* operation is TRNC */
+checkop:
+	extu	r10,r9,5<11>	/* extract opcode */
+	cmp	r11,r10,0x05	/* compare to FADD */
+	bb1	2,r11,FADD	/* operation is FADD */
+	cmp	r11,r10,0x06	/* compare to FSUB */
+	bb1	2,r11,FSUB	/* operation is FSUB */
+	cmp	r11,r10,0x07	/* compare to FCMP */
+	bb1	2,r11,FCMP	/* operation is FCMP */
+	cmp	r11,r10,0x00	/* compare to FMUL */
+	bb1	2,r11,FMUL	/* operation is FMUL */
+	cmp	r11,r10,0x0e	/* compare to FDIV */
+	bb1	2,r11,FDIV	/* operation is FDIV */
+#if 0
+	cmp	r11,r10,0x0f	/* compare to FSQRT */
+	bb1	2,r11,FSQRT	/* operation is FSQRT */
+#endif
+	cmp	r11,r10,0x09	/* compare to INT */
+	bb1	2,r11,INT	/* operation is INT */
+	cmp	r11,r10,0x0a	/* compare to NINT */
+	bb1	2,r11,NINT	/* operation is NINT */
+	cmp	r11,r10,0x0b	/* compare to TRNC */
+	bb1	2,r11,TRNC	/* operation is TRNC */
 
 
 /* For all the following operations, the denormalized number is set to */
 /* zero and the operation is reperformed the correct destination and source */
 /* sizes. */
 
-FADD:        bb0    1,r12,FADDS2dnm /* S1 is not denorm, so S2 must be */
-             or     r5,r0,r0     /* set S1 to zero */
-             or     r6,r0,r0
-FADDS2chk:   bb0    0,r12,FADDcalc /* S2 is not a denorm */
-FADDS2dnm:   or     r7,r0,r0     /* set S2 to zero */
-             or     r8,r0,r0
-FADDcalc:    bb1    5,r9,FADDdD   /* branch for double precision destination */
-FADDsD:      bb1    9,r9,FADDsDdS1 /* branch for double precision S1 */
-FADDsDsS1:   bb1    7,r9,FADDsDsS1dS2 /* branch for double precision S2 */
-FADDsDsS1sS2: br.n  return      /* return from subroutine */
-              fadd.sss r6,r6,r8   /* add the two sources and place result 10 S1 */
-FADDsDsS1dS2: br.n  return      /* return from subroutine */
-              fadd.ssd r6,r6,r7   /* add the two sources and place result 10 S1 */
-FADDsDdS1:   bb1    7,r9,FADDsDdS1dS2 /* branch for double precision S2 */
-FADDsDdS1sS2: br.n  return      /* return from subroutine */
-              fadd.sds r6,r5,r8   /* add the two sources and place result 10 S1 */
-FADDsDdS1dS2: br.n  return      /* return from subroutine */
-              fadd.sdd r6,r5,r7   /* add the two sources and place result 10 S1 */
-FADDdD:      bb1    9,r9,FADDdDdS1 /* branch for double precision S1 */
-FADDdDsS1:   bb1    7,r9,FADDdDsS1dS2 /* branch for double precision S2 */
-FADDdDsS1sS2: br.n  return      /* return from subroutine */
-              fadd.dss r5,r6,r8   /* add the two sources and place result 10 S1 */
-FADDdDsS1dS2: br.n  return      /* return from subroutine */
-              fadd.dsd r5,r6,r7   /* add the two sources and place result 10 S1 */
-FADDdDdS1:   bb1    7,r9,FADDdDdS1dS2 /* branch for double precision S2 */
-FADDdDdS1sS2: br.n  return      /* return from subroutine */
-              fadd.dds r5,r5,r8   /* add the two sources and place result 10 S1 */
-FADDdDdS1dS2: br.n  return      /* return from subroutine */
-              fadd.ddd r5,r5,r7   /* add the two sources and place result 10 S1 */
-
-FSUB:        bb0    1,r12,FSUBS2dnm /* S1 is not denorm, so S2 must be */
-             or     r5,r0,r0     /* set S1 to zero */
-             or     r6,r0,r0
-FSUBS2chk:   bb0    0,r12,FSUBcalc /* S2 is not a denorm */
-FSUBS2dnm:   or     r7,r0,r0     /* set S2 to zero */
-             or     r8,r0,r0
-FSUBcalc:    bb1    5,r9,FSUBdD   /* branch for double precision destination */
-FSUBsD:      bb1    9,r9,FSUBsDdS1 /* branch for double precision S1 */
-FSUBsDsS1:   bb1    7,r9,FSUBsDsS1dS2 /* branch for double precision S2 */
-FSUBsDsS1sS2: br.n  return      /* return from subroutine */
-              fsub.sss r6,r6,r8   /* add the two sources and place result 10 S1 */
-FSUBsDsS1dS2: br.n  return      /* return from subroutine */
-              fsub.ssd r6,r6,r7   /* add the two sources and place result 10 S1 */
-FSUBsDdS1:   bb1    7,r9,FSUBsDdS1dS2 /* branch for double precision S2 */
-FSUBsDdS1sS2: br.n  return      /* return from subroutine */
-              fsub.sds r6,r5,r8   /* add the two sources and place result 10 S1 */
-FSUBsDdS1dS2: br.n  return      /* return from subroutine */
-              fsub.sdd r6,r5,r7   /* add the two sources and place result 10 S1 */
-FSUBdD:      bb1    9,r9,FSUBdDdS1 /* branch for double precision S1 */
-FSUBdDsS1:   bb1    7,r9,FSUBdDsS1dS2 /* branch for double precision S2 */
-FSUBdDsS1sS2: br.n  return      /* return from subroutine */
-              fsub.dss r5,r6,r8   /* add the two sources and place result 10 S1 */
-FSUBdDsS1dS2: br.n  return      /* return from subroutine */
-              fsub.dsd r5,r6,r7   /* add the two sources and place result 10 S1 */
-FSUBdDdS1:   bb1    7,r9,FSUBdDdS1dS2 /* branch for double precision S2 */
-FSUBdDdS1sS2: br.n  return      /* return from subroutine */
-              fsub.dds r5,r5,r8   /* add the two sources and place result 10 S1 */
-FSUBdDdS1dS2: br.n  return      /* return from subroutine */
-              fsub.ddd r5,r5,r7   /* add the two sources and place result 10 S1 */
-        
-FCMP:        bb0    1,r12,FCMPS2dnm /* S1 is not denorm, so S2 must be */
-             or     r5,r0,r0     /* set S1 to zero */
-             or     r6,r0,r0
-FCMPS2chk:   bb0    0,r12,FCMPcalc /* S2 is not a denorm */
-FCMPS2dnm:   or     r7,r0,r0     /* set S2 to zero */
-             or     r8,r0,r0
-FCMPcalc:    bb1    9,r9,FCMPdS1 /* branch for double precision S1 */
-FCMPsS1:     bb1    7,r9,FCMPsS1dS2 /* branch for double precision S2 */
-FCMPsS1sS2:  br.n  return      /* return from subroutine */
-             fcmp.sss r6,r6,r8   /* add the two sources and place result 10 S1 */
-FCMPsS1dS2:  br.n  return      /* return from subroutine */
-             fcmp.ssd r6,r6,r7   /* add the two sources and place result 10 S1 */
-FCMPdS1:     bb1    7,r9,FCMPdS1dS2 /* branch for double precision S2 */
-FCMPdS1sS2:  br.n  return      /* return from subroutine */
-             fcmp.sds r6,r5,r8   /* add the two sources and place result 10 S1 */
-FCMPdS1dS2:  br.n  return      /* return from subroutine */
-             fcmp.sdd r6,r5,r7   /* add the two sources and place result 10 S1 */
-
-FMUL:        bb0    1,r12,FMULS2dnm /* S1 is not denorm, so S2 must be */
-             or     r5,r0,r0     /* set S1 to zero */
-             or     r6,r0,r0
-FMULS2chk:   bb0    0,r12,FMULcalc /* S2 is not a denorm */
-FMULS2dnm:   or     r7,r0,r0     /* set S2 to zero */
-             or     r8,r0,r0
-FMULcalc:    bb1    5,r9,FMULdD   /* branch for double precision destination */
-FMULsD:      bb1    9,r9,FMULsDdS1 /* branch for double precision S1 */
-FMULsDsS1:   bb1    7,r9,FMULsDsS1dS2 /* branch for double precision S2 */
-FMULsDsS1sS2: br.n  return      /* return from subroutine */
-              fmul.sss r6,r6,r8   /* add the two sources and place result 10 S1 */
-FMULsDsS1dS2: br.n  return      /* return from subroutine */
-              fmul.ssd r6,r6,r7   /* add the two sources and place result 10 S1 */
-FMULsDdS1:   bb1    7,r9,FMULsDdS1dS2 /* branch for double precision S2 */
-FMULsDdS1sS2: br.n  return      /* return from subroutine */
-              fmul.sds r6,r5,r8   /* add the two sources and place result 10 S1 */
-FMULsDdS1dS2: br.n  return      /* return from subroutine */
-              fmul.sdd r6,r5,r7   /* add the two sources and place result 10 S1 */
-FMULdD:      bb1    9,r9,FMULdDdS1 /* branch for double precision S1 */
-FMULdDsS1:   bb1    7,r9,FMULdDsS1dS2 /* branch for double precision S2 */
-FMULdDsS1sS2: br.n  return      /* return from subroutine */
-              fmul.dss r5,r6,r8   /* add the two sources and place result 10 S1 */
-FMULdDsS1dS2: br.n  return      /* return from subroutine */
-              fmul.dsd r5,r6,r7   /* add the two sources and place result 10 S1 */
-FMULdDdS1:   bb1    7,r9,FMULdDdS1dS2 /* branch for double precision S2 */
-FMULdDdS1sS2: br.n  return      /* return from subroutine */
-              fmul.dds r5,r5,r8   /* add the two sources and place result 10 S1 */
-FMULdDdS1dS2: br.n  return      /* return from subroutine */
-              fmul.ddd r5,r5,r7   /* add the two sources and place result 10 S1 */
-
-FDIV:        bb0    1,r12,FDIVS2dnm /* S1 is not denorm, so S2 must be */
-             or     r5,r0,r0     /* set S1 to zero */
-             or     r6,r0,r0
-FDIVS2chk:   bb0    0,r12,FDIVcalc /* S2 is not a denorm */
-FDIVS2dnm:   or     r7,r0,r0     /* set S2 to zero */
-             or     r8,r0,r0
-FDIVcalc:    bb1    5,r9,FDIVdD   /* branch for double precision destination */
-FDIVsD:      bb1    9,r9,FDIVsDdS1 /* branch for double precision S1 */
-FDIVsDsS1:   bb1    7,r9,FDIVsDsS1dS2 /* branch for double precision S2 */
-FDIVsDsS1sS2: fdiv.sss r6,r6,r8   /* add the two sources and place result 10 S1 */
-	      br  return      /* return from subroutine */
-FDIVsDsS1dS2: fdiv.ssd r6,r6,r7   /* add the two sources and place result 10 S1 */
-              br    return      /* return from subroutine */
-FDIVsDdS1:   bb1    7,r9,FDIVsDdS1dS2 /* branch for double precision S2 */
-FDIVsDdS1sS2: fdiv.sds r6,r5,r8   /* add the two sources and place result 10 S1 */
-	      br    return      /* return from subroutine */
-FDIVsDdS1dS2: fdiv.sdd r6,r5,r7   /* add the two sources and place result 10 S1 */
-	      br    return      /* return from subroutine */
-FDIVdD:      bb1    9,r9,FDIVdDdS1 /* branch for double precision S1 */
-FDIVdDsS1:   bb1    7,r9,FDIVdDsS1dS2 /* branch for double precision S2 */
-FDIVdDsS1sS2: fdiv.dss r5,r6,r8   /* add the two sources and place result 10 S1 */
-	      br    return      /* return from subroutine */
-FDIVdDsS1dS2: fdiv.dsd r5,r6,r7   /* add the two sources and place result 10 S1 */
-	      br    return      /* return from subroutine */
-FDIVdDdS1:   bb1    7,r9,FDIVdDdS1dS2 /* branch for double precision S2 */
-FDIVdDdS1sS2: fdiv.dds r5,r5,r8   /* add the two sources and place result 10 S1 */
-	      br    return      /* return from subroutine */
-FDIVdDdS1dS2: fdiv.ddd r5,r5,r7   /* add the two sources and place result 10 S1 */
-	      br    return      /* return from subroutine */
-
-/* FSQRT:       or     r7,r0,r0     ;set S2 to zero */
-/*              or     r8,r0,r0 */
-/* FSQRTcalc:   bb1    5,r9,FSQRTdD   ;branch for double precision destination */
-/* FSQRTsD:     bb1    7,r9,FSQRTsDdS2 ;branch for double precision S2 */
-/* FSQRTsDsS2:  br.n   return   ;return from subroutine */
-            /* fsqrt.ss r6,r8   ;add the two sources and place result 10 S1 */
-/* FSQRTsDdS2:  br.n   return   ;return from subroutine */
-            /* fsqrt.sd r6,r7   ;add the two sources and place result 10 S1 */
-/* FSQRTdD:     bb1    7,r9,FSQRTdDdS2 ;branch for double precision S2 */
-/* FSQRTdDsS2:  br.n   return   ;return from subroutine */
-            /* fsqrt.ds r5,r8   ;add the two sources and place result 10 S1 */
-/* FSQRTdDdS2:  br.n   return   ;return from subroutine */
-            /* fsqrt.dd r5,r7   ;add the two sources and place result 10 S1 */
-
-INT:         or     r7,r0,r0     /* set S2 to zero */
-             or     r8,r0,r0
-INTcalc:     bb1    7,r9,INTdS2 /* branch for double precision S2 */
-INTsS2:      br.n   return   /* return from subroutine */
-             int.ss r6,r8    /* add the two sources and place result 10 S1 */
-INTdS2:      br.n   return   /* return from subroutine */
-             int.sd r6,r7   /* add the two sources and place result 10 S1 */
-
-NINT:        or     r7,r0,r0     /* set S2 to zero */
-             or     r8,r0,r0
-NINTcalc:    bb1    7,r9,NINTdS2 /* branch for double precision S2 */
-NINTsS2:     br.n   return   /* return from subroutine */
-             nint.ss r6,r8    /* add the two sources and place result 10 S1 */
-NINTdS2:     br.n   return   /* return from subroutine */
-             nint.sd r6,r7   /* add the two sources and place result 10 S1 */
-
-TRNC:        or     r7,r0,r0     /* set S2 to zero */
-             or     r8,r0,r0
-TRNCcalc:    bb1    7,r9,TRNCdS2 /* branch for double precision S2 */
-TRNCsS2:     br.n   return   /* return from subroutine */
-             trnc.ss r6,r8    /* add the two sources and place result 10 S1 */
-TRNCdS2:     trnc.sd r6,r7   /* add the two sources and place result 10 S1 */
+FADD:
+	bb0	1,r12,FADDS2dnm	/* S1 is not denorm, so S2 must be */
+	or	r5,r0,r0	/* set S1 to zero */
+	or	r6,r0,r0
+FADDS2chk:
+	bb0	0,r12,FADDcalc	/* S2 is not a denorm */
+FADDS2dnm:
+	or	r7,r0,r0	/* set S2 to zero */
+	or	r8,r0,r0
+FADDcalc:
+	bb1	5,r9,FADDdD	/* branch for double precision destination */
+FADDsD:
+	bb1	9,r9,FADDsDdS1	/* branch for double precision S1 */
+FADDsDsS1:
+	bb1	7,r9,FADDsDsS1dS2 /* branch for double precision S2 */
+FADDsDsS1sS2:
+	br.n	return
+	 fadd.sss r6,r6,r8	/* add the two sources and place result 10 S1 */
+FADDsDsS1dS2:
+	br.n	return
+	 fadd.ssd r6,r6,r7	/* add the two sources and place result 10 S1 */
+FADDsDdS1:
+	bb1	7,r9,FADDsDdS1dS2 /* branch for double precision S2 */
+FADDsDdS1sS2:
+	br.n	return
+	 fadd.sds r6,r5,r8	/* add the two sources and place result 10 S1 */
+FADDsDdS1dS2:
+	br.n	return
+	 fadd.sdd r6,r5,r7	/* add the two sources and place result 10 S1 */
+FADDdD:
+	bb1	9,r9,FADDdDdS1	/* branch for double precision S1 */
+FADDdDsS1:
+	bb1	7,r9,FADDdDsS1dS2 /* branch for double precision S2 */
+FADDdDsS1sS2:
+	br.n	return
+	 fadd.dss r5,r6,r8	/* add the two sources and place result 10 S1 */
+FADDdDsS1dS2:
+	br.n	return
+	 fadd.dsd r5,r6,r7	/* add the two sources and place result 10 S1 */
+FADDdDdS1:
+	bb1	7,r9,FADDdDdS1dS2 /* branch for double precision S2 */
+FADDdDdS1sS2:
+	br.n	return
+	 fadd.dds r5,r5,r8	/* add the two sources and place result 10 S1 */
+FADDdDdS1dS2:
+	br.n	return
+	 fadd.ddd r5,r5,r7	/* add the two sources and place result 10 S1 */
+
+FSUB:
+	bb0	1,r12,FSUBS2dnm	/* S1 is not denorm, so S2 must be */
+	or	r5,r0,r0	/* set S1 to zero */
+	or	r6,r0,r0
+FSUBS2chk:
+	bb0	0,r12,FSUBcalc	/* S2 is not a denorm */
+FSUBS2dnm:
+	or	r7,r0,r0	/* set S2 to zero */
+	or	r8,r0,r0
+FSUBcalc:
+	bb1	5,r9,FSUBdD	/* branch for double precision destination */
+FSUBsD:
+	bb1	9,r9,FSUBsDdS1	/* branch for double precision S1 */
+FSUBsDsS1:
+	bb1	7,r9,FSUBsDsS1dS2 /* branch for double precision S2 */
+FSUBsDsS1sS2:
+	br.n	return
+	 fsub.sss r6,r6,r8	/* add the two sources and place result 10 S1 */
+FSUBsDsS1dS2:
+	br.n	return
+	 fsub.ssd r6,r6,r7	/* add the two sources and place result 10 S1 */
+FSUBsDdS1:
+	bb1	7,r9,FSUBsDdS1dS2 /* branch for double precision S2 */
+FSUBsDdS1sS2:
+	br.n	return
+	 fsub.sds r6,r5,r8	/* add the two sources and place result 10 S1 */
+FSUBsDdS1dS2:
+	br.n	return
+	 fsub.sdd r6,r5,r7	/* add the two sources and place result 10 S1 */
+FSUBdD:
+	bb1	9,r9,FSUBdDdS1	/* branch for double precision S1 */
+FSUBdDsS1:
+	bb1	7,r9,FSUBdDsS1dS2 /* branch for double precision S2 */
+FSUBdDsS1sS2:
+	br.n	return
+	 fsub.dss r5,r6,r8	/* add the two sources and place result 10 S1 */
+FSUBdDsS1dS2:
+	br.n	return
+	 fsub.dsd r5,r6,r7	/* add the two sources and place result 10 S1 */
+FSUBdDdS1:
+	bb1	7,r9,FSUBdDdS1dS2 /* branch for double precision S2 */
+FSUBdDdS1sS2:
+	br.n	return
+	 fsub.dds r5,r5,r8	/* add the two sources and place result 10 S1 */
+FSUBdDdS1dS2:
+	br.n	return
+	 fsub.ddd r5,r5,r7	/* add the two sources and place result 10 S1 */
+
+FCMP:
+	bb0	1,r12,FCMPS2dnm	/* S1 is not denorm, so S2 must be */
+	or	r5,r0,r0	/* set S1 to zero */
+	or	r6,r0,r0
+FCMPS2chk:
+	bb0	0,r12,FCMPcalc	/* S2 is not a denorm */
+FCMPS2dnm:
+	or	r7,r0,r0	/* set S2 to zero */
+	or	r8,r0,r0
+FCMPcalc:
+	bb1	9,r9,FCMPdS1	/* branch for double precision S1 */
+FCMPsS1:
+	bb1	7,r9,FCMPsS1dS2	/* branch for double precision S2 */
+FCMPsS1sS2:
+	br.n	return
+	 fcmp.sss r6,r6,r8	/* add the two sources and place result 10 S1 */
+FCMPsS1dS2:
+	br.n	return
+	 fcmp.ssd r6,r6,r7	/* add the two sources and place result 10 S1 */
+FCMPdS1:
+	bb1	7,r9,FCMPdS1dS2	/* branch for double precision S2 */
+FCMPdS1sS2:
+	br.n	return
+	 fcmp.sds r6,r5,r8	/* add the two sources and place result 10 S1 */
+FCMPdS1dS2:
+	br.n	return
+	 fcmp.sdd r6,r5,r7	/* add the two sources and place result 10 S1 */
+
+FMUL:
+	bb0	1,r12,FMULS2dnm	/* S1 is not denorm, so S2 must be */
+	or	r5,r0,r0	/* set S1 to zero */
+	or	r6,r0,r0
+FMULS2chk:
+	bb0	0,r12,FMULcalc	/* S2 is not a denorm */
+FMULS2dnm:
+	or	r7,r0,r0	/* set S2 to zero */
+	or	r8,r0,r0
+FMULcalc:
+	bb1	5,r9,FMULdD	/* branch for double precision destination */
+FMULsD:
+	bb1	9,r9,FMULsDdS1	/* branch for double precision S1 */
+FMULsDsS1:
+	bb1	7,r9,FMULsDsS1dS2 /* branch for double precision S2 */
+FMULsDsS1sS2:
+	br.n	return
+	 fmul.sss r6,r6,r8	/* add the two sources and place result 10 S1 */
+FMULsDsS1dS2:
+	br.n	return
+	 fmul.ssd r6,r6,r7	/* add the two sources and place result 10 S1 */
+FMULsDdS1:
+	bb1	7,r9,FMULsDdS1dS2 /* branch for double precision S2 */
+FMULsDdS1sS2:
+	br.n	return
+	 fmul.sds r6,r5,r8	/* add the two sources and place result 10 S1 */
+FMULsDdS1dS2:
+	br.n	return
+	 fmul.sdd r6,r5,r7	/* add the two sources and place result 10 S1 */
+FMULdD:
+	bb1	9,r9,FMULdDdS1	/* branch for double precision S1 */
+FMULdDsS1:
+	bb1	7,r9,FMULdDsS1dS2 /* branch for double precision S2 */
+FMULdDsS1sS2:
+	br.n	return
+	 fmul.dss r5,r6,r8	/* add the two sources and place result 10 S1 */
+FMULdDsS1dS2:
+	br.n	return
+	 fmul.dsd r5,r6,r7	/* add the two sources and place result 10 S1 */
+FMULdDdS1:
+	bb1	7,r9,FMULdDdS1dS2 /* branch for double precision S2 */
+FMULdDdS1sS2:
+	br.n	return
+	 fmul.dds r5,r5,r8	/* add the two sources and place result 10 S1 */
+FMULdDdS1dS2:
+	br.n	return
+	 fmul.ddd r5,r5,r7	/* add the two sources and place result 10 S1 */
+
+FDIV:
+	bb0	1,r12,FDIVS2dnm	/* S1 is not denorm, so S2 must be */
+	or	r5,r0,r0	/* set S1 to zero */
+	or	r6,r0,r0
+FDIVS2chk:
+	bb0	0,r12,FDIVcalc	/* S2 is not a denorm */
+FDIVS2dnm:
+	or	r7,r0,r0	/* set S2 to zero */
+	or	r8,r0,r0
+FDIVcalc:
+	bb1	5,r9,FDIVdD	/* branch for double precision destination */
+FDIVsD:
+	bb1	9,r9,FDIVsDdS1	/* branch for double precision S1 */
+FDIVsDsS1:
+	bb1	7,r9,FDIVsDsS1dS2 /* branch for double precision S2 */
+FDIVsDsS1sS2:
+	fdiv.sss r6,r6,r8	/* add the two sources and place result 10 S1 */
+	br	return
+FDIVsDsS1dS2:
+	fdiv.ssd r6,r6,r7	/* add the two sources and place result 10 S1 */
+	br	return
+FDIVsDdS1:
+	bb1	7,r9,FDIVsDdS1dS2 /* branch for double precision S2 */
+FDIVsDdS1sS2:
+	fdiv.sds r6,r5,r8	/* add the two sources and place result 10 S1 */
+	br	return
+FDIVsDdS1dS2:
+	fdiv.sdd r6,r5,r7	/* add the two sources and place result 10 S1 */
+	br	return
+FDIVdD:
+	bb1	9,r9,FDIVdDdS1	/* branch for double precision S1 */
+FDIVdDsS1:
+	bb1	7,r9,FDIVdDsS1dS2 /* branch for double precision S2 */
+FDIVdDsS1sS2:
+	fdiv.dss r5,r6,r8	/* add the two sources and place result 10 S1 */
+	br	return
+FDIVdDsS1dS2:
+	fdiv.dsd r5,r6,r7	/* add the two sources and place result 10 S1 */
+	br	return
+FDIVdDdS1:
+	bb1	7,r9,FDIVdDdS1dS2 /* branch for double precision S2 */
+FDIVdDdS1sS2:
+	fdiv.dds r5,r5,r8	/* add the two sources and place result 10 S1 */
+	br	return
+FDIVdDdS1dS2:
+	fdiv.ddd r5,r5,r7	/* add the two sources and place result 10 S1 */
+	br	return
+
+#if 0
+FSQRT:
+	or	r7,r0,r0	/* set S2 to zero */
+	or	r8,r0,r0
+FSQRTcalc:
+	bb1	5,r9,FSQRTdD	/* branch for double precision destination */
+FSQRTsD:
+	bb1	7,r9,FSQRTsDdS2 /* branch for double precision S2 */
+FSQRTsDsS2:
+	br.n	return
+	 fsqrt.ss r6,r8		/* add the two sources and place result 10 S1 */
+FSQRTsDdS2:
+	br.n	return
+	 fsqrt.sd r6,r7		/* add the two sources and place result 10 S1 */
+FSQRTdD:
+	bb1	7,r9,FSQRTdDdS2	/* branch for double precision S2 */
+FSQRTdDsS2:
+	br.n	return
+	 fsqrt.ds r5,r8		/* add the two sources and place result 10 S1 */
+FSQRTdDdS2:
+	br.n	return
+	 fsqrt.dd r5,r7		/* add the two sources and place result 10 S1 */
+#endif
+
+INT:
+	or	r7,r0,r0	/* set S2 to zero */
+	or	r8,r0,r0
+INTcalc:
+	bb1	7,r9,INTdS2	/* branch for double precision S2 */
+INTsS2:
+	br.n	return
+	 int.ss r6,r8		/* add the two sources and place result 10 S1 */
+INTdS2:
+	br.n	return
+	 int.sd r6,r7		/* add the two sources and place result 10 S1 */
+
+NINT:
+	or	r7,r0,r0	/* set S2 to zero */
+	or	r8,r0,r0
+NINTcalc:
+	bb1	7,r9,NINTdS2	/* branch for double precision S2 */
+NINTsS2:
+	br.n	return
+	 nint.ss r6,r8		/* add the two sources and place result 10 S1 */
+NINTdS2:
+	br.n	return
+	 nint.sd r6,r7		/* add the two sources and place result 10 S1 */
+
+TRNC:
+	or	r7,r0,r0	/* set S2 to zero */
+	or	r8,r0,r0
+TRNCcalc:
+	bb1	7,r9,TRNCdS2	/* branch for double precision S2 */
+TRNCsS2:
+	br.n	return
+	 trnc.ss r6,r8		/* add the two sources and place result 10 S1 */
+TRNCdS2:
+	trnc.sd r6,r7		/* add the two sources and place result 10 S1 */
 
 
 /* Return to the routine that detected the reserved operand. */
 
-return:      ld     r1,r31,0    /* load return address */
-             jmp    r1                 /* return from subroutine */
+return:
+	ld	r1,r31,0	/* load return address */
+	jmp	r1
 
-             data
+	data
 
 
 /* S1 and/or S2 is an infinity, and the other operand may be a zero. */
 /* Knowing which operands are infinity, check the remaining operands for zeros. */
 
-             text
-	     align 8
-             global _zero
-_zero:       bb0    s1inf,r12,S1noinf /* see if S1 is zero */
-             bb0    s2inf,r12,S2noinf /* see if S2 is zero */
-             jmp    r1                /* return from function */
-
-/* See if S1 is zero.  Whether or not S1 is a zero, being in this routine */
-/* implies that S2 is infinity, so return to subroutine infinity after  */
-/* completing this code.  Set the s1zero flag in r12 if S1 is zero. */
-
-S1noinf:     bb1    s1size,r9,S1noinfd /* work with double precision operand */
-S1noinfs:    or     r10,r0,r5          /* load high word into r10 */
-             clr    r10,r10,1<sign>    /* clear the sign bit */
-             extu   r11,r6,3<29>       /* extract lower 3 bits of mantissa */
-             or     r10,r10,r11        /* or these 3 bits with high word */
-             bcnd   ne0,r10,operation  /* do not set zero flag */
-             jmp.n  r1                 /* since this operand was not infinity, */
-                                       /* S2 must have been, so return from */
-                                       /* function */
-             set    r12,r12,1<s1zero>  /* set zeroflag */
-S1noinfd:    clr    r10,r5,1<sign>     /* clear the sign bit */
-             or     r10,r6,r10         /* or high and low word */
-             bcnd   ne0,r10,operation  /* do not set zero flag */
-             jmp.n  r1                 /* since this operand was not infinity, */
-                                       /* S2 must have been, so return from */
-                                       /* function */
-             set    r12,r12,1<s1zero>  /* set zeroflag */
-
-
-/* Check S2 for zero.  If it is zero, then set the s2zero flag in r12. */
-
-S2noinf:     bb1    s2size,r9,S2noinfd /* work with double precision operand */
-S2noinfs:    or     r10,r0,r7          /* load high word into r10 */
-             clr    r10,r10,1<sign>    /* clear the sign bit */
-             extu   r11,r8,3<29>       /* extract lower 3 bits of mantissa */
-             or     r10,r10,r11        /* or these 3 bits with high word */
-             bcnd   ne0,r10,operation  /* do not set zero flag */
-             jmp.n  r1                 /* since this operand was not infinity, */
-                                       /* S1 must have been, so return from */
-                                       /* function */
-             set    r12,r12,1<s2zero>  /* set zeroflag */
-S2noinfd:    clr    r10,r7,1<sign>     /* clear the sign bit */
-             or     r10,r8,r10         /* or high and low word */
-             bcnd   ne0,r10,operation  /* do not set zero flag */
-             set    r12,r12,1<s2zero>  /* set zeroflag */
-                                       /* since this operand was not infinity, */
-                                       /* S1 must have been, so return from */
-                                       /* function */
-operation:   jmp    r1                 /* return from function */
+ASLOCAL(zero)
+	bb0	s1inf,r12,S1noinf	/* see if S1 is zero */
+	bb0	s2inf,r12,S2noinf	/* see if S2 is zero */
+	jmp	r1
+
+/* See if S1 is zero. Whether or not S1 is a zero, being in this routine */
+/* implies that S2 is infinity, so return to subroutine infinity after */
+/* completing this code. Set the s1zero flag in r12 if S1 is zero. */
+
+S1noinf:
+	bb1	s1size,r9,S1noinfd	/* work with double precision operand */
+S1noinfs:
+	or	r10,r0,r5		/* load high word into r10 */
+	clr	r10,r10,1<sign>		/* clear the sign bit */
+	extu	r11,r6,3<29>		/* extract lower 3 bits of mantissa */
+	or	r10,r10,r11		/* or these 3 bits with high word */
+	bcnd	ne0,r10,operation	/* do not set zero flag */
+	jmp.n	r1			/* since this operand was not */
+					/* infinity, S2 must have been, */
+					/* so return */
+	 set	r12,r12,1<s1zero>	/* set zeroflag */
+S1noinfd:
+	clr	r10,r5,1<sign>		/* clear the sign bit */
+	or	r10,r6,r10		/* or high and low word */
+	bcnd	ne0,r10,operation	/* do not set zero flag */
+	jmp.n	r1			/* since this operand was not */
+					/* infinity, S2 must have been, */
+					/* so return */
+	 set	r12,r12,1<s1zero>	/* set zeroflag */
+
+
+/* Check S2 for zero. If it is zero, then set the s2zero flag in r12. */
+
+S2noinf:
+	bb1	s2size,r9,S2noinfd	/* work with double precision operand */
+S2noinfs:
+	or	r10,r0,r7		/* load high word into r10 */
+	clr	r10,r10,1<sign>		/* clear the sign bit */
+	extu	r11,r8,3<29>		/* extract lower 3 bits of mantissa */
+	or	r10,r10,r11		/* or these 3 bits with high word */
+	bcnd	ne0,r10,operation	/* do not set zero flag */
+	jmp.n	r1			/* since this operand was not */
+					/* infinity, S1 must have been, */
+					/* so return */
+	 set	r12,r12,1<s2zero>	/* set zeroflag */
+S2noinfd:
+	clr	r10,r7,1<sign>		/* clear the sign bit */
+	or	r10,r8,r10		/* or high and low word */
+	bcnd	ne0,r10,operation	/* do not set zero flag */
+	set	r12,r12,1<s2zero>	/* set zeroflag */
+					/* since this operand was not */
+					/* infinity, S1 must have been, */
+					/* so return */
+operation:
+	jmp	r1
 
 ASENTRY(Xfp_imprecise)
-/*  input: r3 is the excepton frame */
-		or	r29, r3, r0	/*  r29 is now the E.F. */
-		subu	r31, r31, 40
-		st	r1,  r31, 32
-		st	r29, r31, 36
-
-		ld      r2 , r29, EF_FPSR  * 4
-		ld      r3 , r29, EF_FPCR  * 4
-		ld      r4 , r29, EF_FPECR * 4
-		ld	r10, r29, EF_FPRH  * 4
-		ld	r11, r29, EF_FPRL  * 4
-		ld	r12, r29, EF_FPIT  * 4
-
-/* Load into r1 the return address for the exception handlers.  Looking */
+/* input: r3 is the excepton frame */
+	or	r29, r3, r0		/* r29 is now the E.F. */
+	subu	r31, r31, 40
+	st	r1,  r31, 32
+	st	r29, r31, 36
+
+	ld	r2 , r29, EF_FPSR  * 4
+	ld	r3 , r29, EF_FPCR  * 4
+	ld	r4 , r29, EF_FPECR * 4
+	ld	r10, r29, EF_FPRH  * 4
+	ld	r11, r29, EF_FPRL  * 4
+	ld	r12, r29, EF_FPIT  * 4
+
+/* Load into r1 the return address for the exception handlers. Looking */
 /* at FPECR, branch to the appropriate exception handler. */
 
-		or.u	r1,r0,hi16(fpui_wrapup)/* load return address of functions */
-		or	r1,r1,lo16(fpui_wrapup)
+	or.u	r1,r0,hi16(fpui_wrapup)/* load return address of functions */
+	or	r1,r1,lo16(fpui_wrapup)
 
-		bb0  	2,r4,2f /* branch to FPunderflow if bit set */
-		br	_FPunderflow
-	2:	bb0	1,r4,3f /* branch to FPoverflow if bit set */
-		br	_FPoverflow
-	3:
+	bb0	2,r4,2f			/* branch to FPunderflow if bit set */
+	br	_ASM_LABEL(FPunderflow)
+2:
+	bb0	1,r4,3f			/* branch to FPoverflow if bit set */
+	br	_ASM_LABEL(FPoverflow)
+3:
 #ifdef HANDLER
-		br	_handler	/* branch to handler since bit will be set */
-					/* for inexact */
+	br	_handler		/* branch to handler since bit will */
+					/* be set for inexact */
 #endif
-		/* should never get here!!!! */
+
+/*
+ * XXX should never get here!
+ */
 	data
 	align 8
-	1: string "error in inprecise fp exception handler, r4 is 0x%08x"
-	align 8
+1:
+	string "error in inprecise fp exception handler, r4 is 0x%08x\0"
 	text
-		or.u r2, r0, hi16(1b)
-		or   r2, r2, lo16(1b)
-		or   r3, r4, r0
-		bsr _printf
-		or.u r2, r0, hi16(1b)
-		or   r2, r2, lo16(1b)
-		bsr _panic
+	align 8
+	or.u	r2, r0, hi16(1b)
+	or	r2, r2, lo16(1b)
+	or	r3, r4, r0
+	bsr	_printf
+	or.u	r2, r0, hi16(1b)
+	or	r2, r2, lo16(1b)
+	bsr	_panic
 
 fpui_wrapup:
-        tb1     0,r0,0          /* make sure all floating point operations */
-        ldcr    r5, psr        /* load the PSR */
-        /* have finished */
-        or      r5, r5, 0x2   /* disable interrupts */
-        stcr    r5, psr
+	tb1	0,r0,0		/* make sure all floating point operations */
+				/* have finished */
+	ldcr	r10, cr1	/* load the PSR */
 #if 0
-Why is this done? -- it screws up things later.
-        or      r5, r5, 0x8   /* set SFU 1 disable bit, disable SFU 1 */
-        stcr    r5, psr
+	set	r10, r10, 1<PSR_FPU_DISABLE_BIT>
 #endif
+	set	r10, r10, 1<PSR_INTERRUPT_DISABLE_BIT>
+	stcr	r10, cr1
 	ld	r1, r31, 32
 	ld	r29,r31, 36
 	addu	r31, r31, 40
 
-	/*  write back the results */
+	/* write back the results */
 	extu	r2, r12, 5<0>
 	addu	r3, r29, EF_R0*4
 	bb0	destsize, r12, Iwritesingle
@@ -2289,5 +2523,4 @@ Why is this done? -- it screws up things later.
 	clr	r2, r2, 27<5>
 Iwritesingle:
 	st	r11, r3 [r2]
-/* Return.. */
 	jmp	r1