diff options
Diffstat (limited to 'sys')
-rw-r--r-- | sys/arch/mvme88k/mvme88k/m88100_fp.S | 4151 |
1 files changed, 2192 insertions, 1959 deletions
diff --git a/sys/arch/mvme88k/mvme88k/m88100_fp.S b/sys/arch/mvme88k/mvme88k/m88100_fp.S index 01c2c98b04a..3223ac7ee3e 100644 --- a/sys/arch/mvme88k/mvme88k/m88100_fp.S +++ b/sys/arch/mvme88k/mvme88k/m88100_fp.S @@ -1,4 +1,4 @@ -/* $OpenBSD: m88100_fp.S,v 1.18 2003/11/03 06:54:26 david Exp $ */ +/* $OpenBSD: m88100_fp.S,v 1.19 2003/12/24 22:41:45 miod Exp $ */ /* * Mach Operating System * Copyright (c) 1991 Carnegie Mellon University @@ -31,81 +31,35 @@ #include <machine/trap.h> #include <machine/asm.h> -#define psr cr1 -#define spsr cr2 -#define ssb cr3 -#define scip cr4 -#define snip cr5 -#define sfip cr6 -#define vbr cr7 -#define dmt0 cr8 -#define scratch1 cr18 -#define scratch2 cr20 -#define fpecr fcr0 -#define s1hi fcr1 -#define s1lo fcr2 -#define s2hi fcr3 -#define s2lo fcr4 -#define pcr fcr5 -#define manthi fcr6 -#define mantlo fcr7 -#define impcr fcr8 -#define fpsr fcr62 -#define fpcr fcr63 -#define valid 1 -#define exception 0 -#define exc_disable 0 -#define FP_disable 3 -#define dexc 27 -#define serial 29 #define destsize 10 #define inexact 0 #define overflow 1 #define underflow 2 #define divzero 3 #define oper 4 + #define sign 31 #define s1size 9 #define s2size 7 #define dsize 5 -#define full 1 -#define fault 0 + #define FADDop 0x05 #define FSUBop 0x06 #define FCMPop 0x07 #define FMULop 0x00 #define FDIVop 0x0e #define FSQRTop 0x0f -#define FLTop 0x04 #define INTop 0x09 #define NINTop 0x0a #define TRNCop 0x0b -#define mode 31 -#define s1sign 9 -#define s2sign 8 + #define s1nan 7 #define s2nan 6 #define s1inf 5 #define s2inf 4 #define s1zero 3 #define s2zero 2 -#define s1denorm 1 -#define s2denorm 0 #define sigbit 19 -#define sigbits 22 -#define sigbitd 19 -#define nc 0 -#define cp 1 -#define eq 2 -#define ne 3 -#define gt 4 -#define le 5 -#define lt 6 -#define ge 7 -#define ou 8 -#define ib 9 -#define in 10 -#define ob 11 #define modehi 30 #define modelo 29 @@ -115,1277 +69,1407 @@ #define efovf 6 #define efinx 5 -#define MARK or r21, r0, __LINE__ - ASENTRY(m88100_Xfp_precise) - or r29, r3, r0 /* r29 is now the E.F. */ + or r29, r3, r0 /* r29 is now the E.F. */ subu r31, r31, 40 st r1, r31, 32 st r29, r31, 36 - - ld r2, r29, EF_FPSR * 4 - ld r3, r29, EF_FPCR * 4 + + ld r2, r29, EF_FPSR * 4 + ld r3, r29, EF_FPCR * 4 ld r4, r29, EF_FPECR * 4 ld r5, r29, EF_FPHS1 * 4 ld r6, r29, EF_FPLS1 * 4 ld r7, r29, EF_FPHS2 * 4 ld r8, r29, EF_FPLS2 * 4 - ld r9, r29, EF_FPPT * 4 - - - /* Load into r1 the return address for the 0 handlers. Looking */ - /* at FPECR, branch to the appropriate 0 handler. However, */ - /* if none of the 0 bits are enabled, then a floating point */ - /* instruction was issued with the floating point unit disabled. This */ - /* will cause an unimplemented opcode 0. */ - - or.u r1,r0,hi16(wrapup) /* load return address of function */ + ld r9, r29, EF_FPPT * 4 + + + /* + * Load into r1 the return address for the 0 handlers. Looking at + * FPECR, branch to the appropriate 0 handler. However, if none of the + * 0 bits are enabled, then a floating point instruction was issued + * with the floating point unit disabled. This will cause an + * unimplemented opcode 0. + */ + + or.u r1,r0,hi16(wrapup) /* load return address of function */ or r1,r1,lo16(wrapup) -2: bb0 6,r4, 3f /* branch to FPunimp if bit set */ - br FPuimp -3: bb0 7,r4, 4f /* branch to FPintover if bit set */ - br _FPintover -4: /* bb0 5,r4, 5f ;branch to FPpriviol if bit set */ - /* br _FPpriviol */ -5: bb0 4,r4, 6f /* branch to FPresoper if bit set */ - br _FPresoper -6: bb0 3,r4, 7f /* branch to FPdivzero if bit set */ - br _FPdivzero -7: +2: + bb0 6,r4, 3f /* branch to FPunimp if bit set */ + br FPuimp +3: + bb0 7,r4, 4f /* branch to FPintover if bit set */ + br FPintover +4: +#if 0 + bb0 5,r4, 5f /* branch to FPpriviol if bit set */ + br FPpriviol +#endif +5: + bb0 4,r4, 6f /* branch to FPresoper if bit set */ + br FPresoper +6: + bb0 3,r4, 7f /* branch to FPdivzero if bit set */ + br FPdivzero +7: or.u r4, r4, 0xffff -FPuimp: global FPuimp -fp_p_trap: - subu r31,r31,40 /* allocate stack */ - st r1,r31,36 /* save return address */ - st r3,r31,32 /* save exception frame */ - or r2,r0,T_FPEPFLT /* load trap type */ +ASLOCAL(FPuimp) + subu r31,r31,40 /* allocate stack */ + st r1,r31,36 /* save return address */ + st r3,r31,32 /* save exception frame */ + or r2,r0,T_FPEPFLT /* load trap type */ or r3, r29, r0 - bsr _C_LABEL(m88100_trap) /* trap */ - ld r1,r31,36 /* recover return address */ - addu r31,r31,40 /* deallocate stack */ - br fp_p_return - - /* To write back the results to the user registers, disable exceptions */ - /* and the floating point unit. Write FPSR and FPCR and load the SNIP */ - /* and SFIP. */ - /* r5 will contain the upper word of the result */ - /* r6 will contain the lower word of the result */ - -wrapup: global wrapup - tb1 0,r0,0 /* make sure all floating point operations */ - /* have finished */ - ldcr r10, cr1 /* load the PSR */ - or r10, r10, 0x2 /* disable interrupts */ - stcr r10, cr1 + bsr _C_LABEL(m88100_trap) + ld r1,r31,36 /* recover return address */ + addu r31,r31,40 /* deallocate stack */ + br fp_p_return + + /* + * To write back the results to the user registers, disable exceptions + * and the floating point unit. Write FPSR and FPCR and load the SNIP + * and SFIP. + * r5 will contain the upper word of the result + * r6 will contain the lower word of the result + */ + +ASLOCAL(wrapup) + tb1 0,r0,0 /* make sure all floating point operations */ + /* have finished */ + ldcr r10, cr1 /* load the PSR */ #if 0 -Why is this done? -jfriedl - or r10, r10, 0x8 /* set SFU 1 disable bit, disable SFU 1 */ - stcr r10, cr1 + set r10, r10, 1<PSR_FPU_DISABLE_BIT> #endif + set r10, r10, 1<PSR_INTERRUPT_DISABLE_BIT> + stcr r10, cr1 + ld r1, r31, 32 ld r29, r31, 36 addu r31, r31, 40 - - fstcr r2, fpsr /* write revised value of FPSR */ - fstcr r3, fpcr /* write revised value of FPCR */ - + + fstcr r2, FPSR /* write revised value of FPSR */ + fstcr r3, FPCR /* write revised value of FPCR */ + /* result writeback routine */ - addu r3, r29, EF_R0 * 4 - extu r2, r9, 5<0> /* get 5 bits of destination register */ - bb0 5, r9, writesingle /* branch if destination is single */ - + addu r3, r29, EF_R0 * 4 + extu r2, r9, 5<0> /* get 5 bits of destination register */ + bb0 5, r9, writesingle /* branch if destination is single */ + /* writedouble here */ - st r5, r3 [r2] /* write high word */ - add r2, r2, 1 /* for double, the low word is the */ + st r5, r3 [r2] /* write high word */ + add r2, r2, 1 /* for double, the low word is the */ /* unspecified register */ - clr r2, r2, 27<5> /* perform equivalent of mod 32 */ -writesingle: - st r6, r3 [r2] /* write low word into memory */ + clr r2, r2, 27<5> /* perform equivalent of mod 32 */ +ASLOCAL(writesingle) + st r6, r3 [r2] /* write low word into memory */ -fp_p_return: +ASLOCAL(fp_p_return) jmp r1 - text - align 8 - global _FPdivzero - - -/* Check if the numerator is zero. If the numerator is zero, then handle */ -/* this instruction as you would a 0/0 invalid operation. */ +/* + * Check if the numerator is zero. If the numerator is zero, then handle + * this instruction as you would a 0/0 invalid operation. + */ -_FPdivzero: - st r1,r31,0 /* save return address */ - bb1 s1size,r9,1f /* branch if numerator double */ +ASLOCAL(FPdivzero) + st r1,r31,0 /* save return address */ + bb1 s1size,r9,1f /* branch if numerator double */ /* single number */ - clr r10,r5,1<sign> /* clear sign bit */ - extu r11,r6,3<29> /* grab upper bits of lower word */ - or r10,r10,r11 /* combine ones of mantissa */ - bcnd eq0,r10,resoper /* numerator is zero, handle reserved */ - /* operand */ - br setbit /* set divzero bit */ + clr r10,r5,1<sign> /* clear sign bit */ + extu r11,r6,3<29> /* grab upper bits of lower word */ + or r10,r10,r11 /* combine ones of mantissa */ + bcnd eq0,r10,resoper /* numerator is zero, handle reserved operand */ + br setbit /* set divzero bit */ 1: /* double number */ - clr r10,r5,1<sign> /* clear sign bit */ - or r10,r10,r6 /* or high and low words */ - bcnd ne0,r10,setbit /* set divzero bit */ + clr r10,r5,1<sign> /* clear sign bit */ + or r10,r10,r6 /* or high and low words */ + bcnd ne0,r10,setbit /* set divzero bit */ -/* The numerator is zero, so handle the invalid operation by setting the */ -/* invalid operation bit and branching to the user handler if there is one */ -/* or writing a quiet NaN to the destination. */ +/* + * The numerator is zero, so handle the invalid operation by setting the + * invalid operation bit and branching to the user handler if there is one + * or writing a quiet NaN to the destination. + */ -resoper: - set r2,r2,1<oper> /* set bit in FPSR */ +ASLOCAL(resoper) + set r2,r2,1<oper> /* set bit in FPSR */ #ifdef HANDLER - bb0 oper,r3,noreshand /* branch to execute default handling for */ - /* reserved operands */ - bsr _handler /* branch to user handler */ - br FP_div_return /* return from function */ + bb0 oper,r3,noreshand /* branch to execute default handling */ + /* for reserved operands */ + bsr _handler /* branch to user handler */ + br FP_div_return #endif - -noreshand: - set r5,r0,0<0> /* put a NaN in high word */ - set r6,r0,0<0> /* put a NaN in low word */ - br FP_div_return /* return from subroutine */ - /* writing to a word which may be ignored */ - /* is just as quick as checking the precision */ - /* of the destination */ - -/* The operation is divide by zero, so set the divide by zero bit in the */ -/* FPSR. If the user handler is set, then go to the user handler, else */ -/* go to the default mode. */ + +noreshand: + set r5,r0,0<0> /* put a NaN in high word */ + set r6,r0,0<0> /* put a NaN in low word */ + br FP_div_return + /* writing to a word which may be ignored */ + /* is just as quick as checking the precision */ + /* of the destination */ + +/* + * The operation is divide by zero, so set the divide by zero bit in the + * FPSR. If the user handler is set, then go to the user handler, else + * go to the default mode. + */ setbit: #ifdef HANDLER - set r2,r2,1<divzero> /* set bit in FPSR */ - bb0 divzero,r3,default /* go to default routine if no handler */ - bsr _handler /* execute handler routine */ - br FP_div_return /* return from subroutine */ + set r2,r2,1<divzero> /* set bit in FPSR */ + bb0 divzero,r3,default /* go to default routine if no hdlr */ + bsr _handler /* execute handler routine */ + br FP_div_return #endif +/* + * Considering the sign of the numerator and zero, write a correctly + * signed infinity of the proper precision into the destination. + */ + +default: + bb1 dsize,r9,FPzero_double /* branch to handle double result */ +FPzero_single: + clr r10,r5,31<0> /* clear all of S1HI except sign bit */ + xor r10,r7,r10 /* xor the sign bits of the operands */ + or.u r6,r0,0x7f80 /* load single precision infinity */ + br.n FP_div_return + or r6,r6,r10 /* load correctly signed infinity */ + +FPzero_double: + clr r10,r5,31<0> /* clear all of S1HI except sign bit */ + xor r10,r7,r10 /* xor the sign bits of the operands */ + or.u r5,r0,0x7ff0 /* load double precision infinity */ + or r5,r5,r10 /* load correctly signed infinity */ + or r6,r0,r0 /* clear lower word of double */ + +FP_div_return: + ld r1,r31,0 /* load return address */ + jmp r1 -/* Considering the sign of the numerator and zero, write a correctly */ -/* signed infinity of the proper precision into the destination. */ - -default: - bb1 dsize,r9,FPzero_double /* branch to handle double result */ -FPzero_single: - clr r10,r5,31<0> /* clear all of S1HI except sign bit */ - xor r10,r7,r10 /* xor the sign bits of the operands */ - or.u r6,r0,0x7f80 /* load single precision infinity */ - br.n FP_div_return /* return from subroutine */ - or r6,r6,r10 /* load correctly signed infinity */ - -FPzero_double: - clr r10,r5,31<0> /* clear all of S1HI except sign bit */ - xor r10,r7,r10 /* xor the sign bits of the operands */ - or.u r5,r0,0x7ff0 /* load double precision infinity */ - or r5,r5,r10 /* load correctly signed infinity */ - or r6,r0,r0 /* clear lower word of double */ - -FP_div_return: - ld r1,r31,0 /* load return address */ - jmp r1 /* return from subroutine */ - - - -/* Both NINT and TRNC require a certain rounding mode, so check which */ -/* instruction caused the integer conversion overflow. Use a substitute */ -/* FPCR in r1, and modify the rounding mode if the instruction is NINT or TRNC. */ - text - align 8 -_FPintover: global _FPintover - extu r10,r9,5<11> /* extract opcode */ - cmp r11,r10,INTop /* see if instruction is INT */ - st r1,r31,0 /* save return address */ - bb1.n eq,r11,checksize /* instruction is INT, do not modify */ - /* rounding mode */ - or r1,r0,r3 /* load FPCR into r1 */ - cmp r11,r10,NINTop /* see if instruction is NINT */ - bb1 eq,r11,NINT /* instruction is NINT */ - -TRNC: clr r1,r1,2<rndlo> /* clear rounding mode bits, */ +/* + * Both NINT and TRNC require a certain rounding mode, so check which + * instruction caused the integer conversion overflow. Use a substitute + * FPCR in r1, and modify the rounding mode if the instruction is NINT + * or TRNC. + */ +ASLOCAL(FPintover) + extu r10,r9,5<11> /* extract opcode */ + cmp r11,r10,INTop /* see if instruction is INT */ + st r1,r31,0 /* save return address */ + bb1.n eq,r11,checksize /* instruction is INT, do not modify */ + /* rounding mode */ + or r1,r0,r3 /* load FPCR into r1 */ + cmp r11,r10,NINTop /* see if instruction is NINT */ + bb1 eq,r11,NINT /* instruction is NINT */ +TRNC: + clr r1,r1,2<rndlo> /* clear rounding mode bits, */ /* instruction is TRNC */ - br.n checksize /* branch to check size */ - set r1,r1,1<rndlo> /* make rounding mode round towards zero */ - -NINT: clr r1,r1,2<rndlo> /* make rounding mode round to nearest */ - - + br.n checksize /* branch to check size */ + set r1,r1,1<rndlo> /* make rounding mode round towards */ + /* zero */ +NINT: + clr r1,r1,2<rndlo> /* make rounding mode round to */ + /* nearest */ + /* See whether the source is single or double precision. */ - -checksize: bb1 s2size,r9,checkdoub /* S2 is double, branch to see if there */ -/* is a false alarm */ - - -/* An integer has more bits than the mantissa of a single precision floating */ -/* point number, so to check for false alarms (i.e. valid conversion), simply */ -/* check the exponents. False alarms are detected for 2**30 to (2**30) - 1 and */ -/* -2**30 to -2**31. Only seven bits need to be looked at since an exception */ -/* will not occur for the other half of the numbering system. */ -/* To speed up the processing, first check to see if the exponent is 32 or */ -/* greater. */ - -/* This code was originally written for the exponent in the control */ -/* register to have the most significant bit (8 - single, 11 - double) */ -/* flipped and sign extended. For precise exceptions, however, the most */ -/* significant bit is only sign extended. Therefore, the code was chopped */ -/* up so that it would work for positive values of real exponent which were */ -/* only sign extended. */ - -checksing: extu r10,r7,7<20> /* internal representation for single */ -/* precision is IEEE 8 bits sign extended */ -/* to 11 bits; for real exp. = 30, the */ -/* above instruction gives a result exp. */ -/* that has the MSB flipped and sign */ -/* extended like in the IMPCR */ - cmp r11,r10,31 /* compare to 32,but exp. off by 1 */ -/* these 2 instructions to speed up valid */ -/* execution of valid cases */ - bb1 ge,r11,overflw /* valid case, perform overflow routine */ - bb1 sign,r7,checksingn /* source operand is negative */ - -/* If the number is positve and the exponent is greater than 30, than it is */ - /* overflow. */ - -checksingp: cmp r10,r10,29 /* compare to 30, but exp. off by 1 */ - bb1 gt,r10,overflw /* no false alarm, its overflow */ - br conversionsp /* finish single precision conversion */ - -/* If the number is negative, and the exponent is 30, or 31 with a mantissa */ -/* of 0, then it is a false alarm. */ - -checksingn: cmp r11,r10,30 /* compare to 31,but exp. off by 1 */ - bb1 lt,r11,conversionsn /* exp. less than 31, so convert */ - extu r10,r8,3<29> /* get upper three bits of lower mantissa */ - mak r12,r7,20<3> /* get upper 20 bits of mantissa */ - or r10,r10,r12 /* form complete mantissa */ - bcnd eq0,r10,conversionsn /* complete conversion if mantissa is 0 */ - br overflw /* no false alarm, its overflow */ - - -/* False alarms are detected for 2**30 to (2**30) - 1 and */ -/* -2**30 to -2**31. Only seven bits need to be looked at since an exception */ -/* will not occur for the other half of the numbering system. */ -/* To speed up the processing, first check to see if the exponent is 32 or */ -/* greater. Since there are more mantissa bits than integer bits, rounding */ -/* could cause overflow. (2**31) - 1 needs to be checked so that it does */ -/* not round to 2**31, and -2**31 needs to be checked in case it rounds to */ -/* -((2**31) + 1). */ - -checkdoub: extu r10,r7,10<20> /* internal representation for double */ -/* precision is the same IEEE 11 bits */ -/* for real exp. = 30, the */ -/* above instruction gives a result exp. */ -/* that has the MSB flipped and sign */ -/* extended like in the IMPCR */ - cmp r11,r10,31 /* compare to 32,but exp. off by 1 */ -/* these 2 instructions to speed up valid */ -/* execution of valid cases */ - bb1 ge,r11,overflw /* valid case, perform overflow routine */ - bb1 sign,r7,checkdoubn /* source operand is negative */ - -/* If the exponent is not 31, then the floating point number will be rounded */ -/* before the conversion is done. A branch table is set up with bits 4 and 3 */ -/* being the rounding mode, and bits 2, 1, and 0 are the guard, round, and */ -/* sticky bits. */ - -checkdoubp: cmp r11,r10,30 /* compare to 31, but exponent off by 1 */ - bb1 eq,r11,overflw /* no false alarm, its overflow */ - extu r12,r8,1<22> /* get LSB for integer with exp. = 30 */ - mak r12,r12,1<2> /* start to set up field for branch table */ - extu r11,r8,1<21> /* get guard bit */ - mak r11,r11,1<1> /* set up field for branch table */ - or r12,r11,r12 /* set up field for branch table */ - extu r11,r8,21<0> /* get bits for sticky bit */ - bcnd eq0,r11,nostickyp /* do not set sticky */ - set r12,r12,1<0> /* set sticky bit */ -nostickyp: rot r11,r1,0<rndlo> /* shift rounding mode to 2 LSB''s */ - mak r11,r11,2<3> /* set up field, clear other bits */ - or r12,r11,r12 /* set up field for branch table */ - lda r12,r0[r12] /* scale r12 */ - or.u r12,r12,hi16(ptable) /* load pointer into table */ - addu r12,r12,lo16(ptable) - jmp r12 /* jump into branch table */ - -ptable: br conversiondp -p00001: br conversiondp -p00010: br conversiondp -p00011: br paddone -p00100: br conversiondp -p00101: br conversiondp -p00110: br paddone -p00111: br paddone -p01000: br conversiondp -p01001: br conversiondp -p01010: br conversiondp -p01011: br conversiondp -p01100: br conversiondp -p01101: br conversiondp -p01110: br conversiondp -p01111: br conversiondp -p10000: br conversiondp -p10001: br conversiondp -p10010: br conversiondp -p10011: br conversiondp -p10100: br conversiondp -p10101: br conversiondp -p10110: br conversiondp -p10111: br conversiondp -p11000: br conversiondp -p11001: br paddone -p11010: br paddone -p11011: br paddone -p11100: br conversiondp -p11101: br paddone -p11110: br paddone -p11111: br paddone - -/* Add one to the bit of the mantissa which corresponds to the LSB of an */ -/* integer. If the mantissa overflows, then there is a valid integer */ -/* overflow conversion; otherwise, the mantissa can be converted to the integer. */ - -paddone: or r10,r0,r0 /* clear r10 */ - set r10,r10,1<22> /* set LSB bit to 1 for adding */ - addu.co r8,r8,r10 /* add the 1 obtained from rounding */ - clr r11,r7,12<20> /* clear exponent and sign */ - addu.ci r11,r0,r11 /* add carry */ - bb1 20,r11,overflw /* overflow to 2**31, abort the rest */ - br.n conversiondp /* since the exp. was 30, and the exp. */ - /* did not round up to 31, the largest */ - /* number that S2 could become is 2**31-1 */ - or r7,r0,r11 /* store r11 into r7 for conversion */ - -/* Now check for negative double precision sources. If the exponent is 30, */ -/* then convert the false alarm. If the exponent is 31, then check the mantissa */ -/* bits which correspond to integer bits. If any of them are a one, then there */ -/* is overflow. If they are zero, then check the guard, round, and sticky bits. */ -/* Round toward zero and positive will not cause a roundup, but round toward */ -/* nearest and negative may, so perform those roundings. If there is no overflow, */ - /* then convert and return from subroutine. */ - -checkdoubn: cmp r11,r10,29 /* compare to 30, but exp. off by 1 */ - bb1 eq,r11,conversiondn /* false alarm if exp. = 30 */ - extu r10,r8,11<21> /* check upper bits of lower mantissa */ - bcnd ne0,r10,overflw /* one of the bits is a 1, so overflow */ - extu r10,r7,20<0> /* check upper bits of upper mantissa */ - bcnd ne0,r10,overflw /* one of the bits is a 1, so overflow */ - bb0 rndlo,r1,possround /* rounding mode is either round near or */ - /* round negative, which may cause a */ - /* round */ - br.n FPintov_return /* round positive, which will not cause a */ - /* round */ - set r6,r0,1<sign> /* rounding mode is either round zero or */ -possround: extu r12,r8,1<20> /* get guard bit */ - extu r11,r8,20<0> /* get bits for sticky bit */ - bcnd.n eq0,r11,nostickyn /* do not set sticky */ - mak r12,r12,1<1> /* set up field for branch table */ - set r12,r12,1<0> /* set sticky bit */ -nostickyn: bb1 rndhi,r1,negative /* rounding mode is negative */ -nearest: cmp r12,r12,3 /* are both guard and sticky set */ - bb1 eq,r12,overflw /* both guard and sticky are set, */ - /* so signal overflow */ - or r6,r0,r0 /* clear destination register r6 */ - br.n FPintov_return /* return from subroutine */ - set r6,r6,1<sign> /* set the sign bit and take care of */ - /* this special case */ -negative: bcnd ne0,r12,overflw /* -2**31 will be rounded to -(2**31+1), */ - /* so signal overflow */ - or r6,r0,r0 /* clear destination register r6 */ - br.n FPintov_return /* return from subroutine */ - set r6,r6,1<sign> /* set the sign bit and take care of */ - /* this special case */ - - /* since the exp. was 30, and there was */ - /* no round-up, the largest number that */ - /* S2 could have been was 2**31 - 1 */ - - + +checksize: + bb1 s2size,r9,checkdoub /* S2 is double, branch to see if */ + /* there is a false alarm */ + +/* + * An integer has more bits than the mantissa of a single precision floating + * point number, so to check for false alarms (i.e. valid conversion), simply + * check the exponents. False alarms are detected for 2**30 to (2**30) - 1 + * and -2**30 to -2**31. Only seven bits need to be looked at since an + * exception will not occur for the other half of the numbering system. + * To speed up the processing, first check to see if the exponent is 32 or + * greater. + * + * This code was originally written for the exponent in the control + * register to have the most significant bit (8 - single, 11 - double) + * flipped and sign extended. For precise exceptions, however, the most + * significant bit is only sign extended. Therefore, the code was chopped + * up so that it would work for positive values of real exponent which were + * only sign extended. + */ + +checksing: + extu r10,r7,7<20> /* internal representation for single */ + /* precision is IEEE 8 bits sign extended */ + /* to 11 bits; for real exp. = 30, the */ + /* above instruction gives a result exp. */ + /* that has the MSB flipped and sign */ + /* extended like in the IMPCR */ + cmp r11,r10,31 /* compare to 32,but exp. off by 1 */ + /* these 2 instructions to speed up valid */ + /* execution of valid cases */ + bb1 ge,r11,overflw /* valid case, perform overflow routine */ + bb1 sign,r7,checksingn /* source operand is negative */ + +/* + * If the number is positve and the exponent is greater than 30, than it is + * overflow. + */ +checksingp: + cmp r10,r10,29 /* compare to 30, but exp. off by 1 */ + bb1 gt,r10,overflw /* no false alarm, its overflow */ + br conversionsp /* finish single precision conversion */ + +/* + * If the number is negative, and the exponent is 30, or 31 with a mantissa + * of 0, then it is a false alarm. + */ +checksingn: + cmp r11,r10,30 /* compare to 31,but exp. off by 1 */ + bb1 lt,r11,conversionsn /* exp. less than 31, so convert */ + extu r10,r8,3<29> /* get upper three bits of lower */ + /* mantissa */ + mak r12,r7,20<3> /* get upper 20 bits of mantissa */ + or r10,r10,r12 /* form complete mantissa */ + bcnd eq0,r10,conversionsn /* complete conversion if mantissa */ + /* is 0 */ + br overflw /* no false alarm, its overflow */ + +/* + * False alarms are detected for 2**30 to (2**30) - 1 and -2**30 to -2**31. + * Only seven bits need to be looked at since an exception will not occur + * for the other half of the numbering system. + * To speed up the processing, first check to see if the exponent is 32 or + * greater. Since there are more mantissa bits than integer bits, rounding + * could cause overflow. (2**31) - 1 needs to be checked so that it does + * not round to 2**31, and -2**31 needs to be checked in case it rounds to + * -((2**31) + 1). + */ +checkdoub: + extu r10,r7,10<20> /* internal representation for double */ + /* precision is the same IEEE 11 bits */ + /* for real exp. = 30, the */ + /* above instruction gives a result exp. */ + /* that has the MSB flipped and sign */ + /* extended like in the IMPCR */ + cmp r11,r10,31 /* compare to 32,but exp. off by 1 */ + /* these 2 instructions to speed up valid */ + /* execution of valid cases */ + bb1 ge,r11,overflw /* valid case, perform overflow routine */ + bb1 sign,r7,checkdoubn /* source operand is negative */ + +/* + * If the exponent is not 31, then the floating point number will be rounded + * before the conversion is done. A branch table is set up with bits 4 and 3 + * being the rounding mode, and bits 2, 1, and 0 are the guard, round, and + * sticky bits. + */ +checkdoubp: + cmp r11,r10,30 /* compare to 31, but exponent off by 1 */ + bb1 eq,r11,overflw /* no false alarm, its overflow */ + extu r12,r8,1<22> /* get LSB for integer with exp. = 30 */ + mak r12,r12,1<2> /* start to set up field for branch table */ + extu r11,r8,1<21> /* get guard bit */ + mak r11,r11,1<1> /* set up field for branch table */ + or r12,r11,r12 /* set up field for branch table */ + extu r11,r8,21<0> /* get bits for sticky bit */ + bcnd eq0,r11,nostickyp /* do not set sticky */ + set r12,r12,1<0> /* set sticky bit */ +nostickyp: + rot r11,r1,0<rndlo> /* shift rounding mode to 2 LSB''s */ + mak r11,r11,2<3> /* set up field, clear other bits */ + or r12,r11,r12 /* set up field for branch table */ + lda r12,r0[r12] /* scale r12 */ + or.u r12,r12,hi16(ptable) /* load pointer into table */ + addu r12,r12,lo16(ptable) + jmp r12 + +ptable: + br conversiondp + br conversiondp + br conversiondp + br paddone + br conversiondp + br conversiondp + br paddone + br paddone + br conversiondp + br conversiondp + br conversiondp + br conversiondp + br conversiondp + br conversiondp + br conversiondp + br conversiondp + br conversiondp + br conversiondp + br conversiondp + br conversiondp + br conversiondp + br conversiondp + br conversiondp + br conversiondp + br conversiondp + br paddone + br paddone + br paddone + br conversiondp + br paddone + br paddone + br paddone + +/* + * Add one to the bit of the mantissa which corresponds to the LSB of an + * integer. If the mantissa overflows, then there is a valid integer + * overflow conversion; otherwise, the mantissa can be converted to the + * integer. + */ +paddone: + or r10,r0,r0 /* clear r10 */ + set r10,r10,1<22> /* set LSB bit to 1 for adding */ + addu.co r8,r8,r10 /* add the 1 obtained from rounding */ + clr r11,r7,12<20> /* clear exponent and sign */ + addu.ci r11,r0,r11 /* add carry */ + bb1 20,r11,overflw /* overflow to 2**31, abort the rest */ + br.n conversiondp /* since the exp. was 30, and the exp. */ + /* did not round up to 31, the largest */ + /* number that S2 could become is 2**31-1 */ + or r7,r0,r11 /* store r11 into r7 for conversion */ + +/* + * Now check for negative double precision sources. If the exponent is 30, + * then convert the false alarm. If the exponent is 31, then check the + * mantissa bits which correspond to integer bits. If any of them are a one, + * then there is overflow. If they are zero, then check the guard, round, + * and sticky bits. + * Round toward zero and positive will not cause a roundup, but round toward + * nearest and negative may, so perform those roundings. If there is no + * overflow, then convert and return. + */ +checkdoubn: + cmp r11,r10,29 /* compare to 30, but exp. off by 1 */ + bb1 eq,r11,conversiondn /* false alarm if exp. = 30 */ + extu r10,r8,11<21> /* check upper bits of lower mantissa */ + bcnd ne0,r10,overflw /* one of the bits is a 1, so oflow */ + extu r10,r7,20<0> /* check upper bits of upper mantissa */ + bcnd ne0,r10,overflw /* one of the bits is a 1, so oflow */ + bb0 rndlo,r1,possround /* rounding mode is either round near */ + /* or round negative, which may cause */ + /* a round */ + br.n FPintov_return /* round positive, which will not */ + /* cause a round */ + set r6,r0,1<sign> +possround: + extu r12,r8,1<20> /* get guard bit */ + extu r11,r8,20<0> /* get bits for sticky bit */ + bcnd.n eq0,r11,nostickyn /* do not set sticky */ + mak r12,r12,1<1> /* set up field for branch table */ + set r12,r12,1<0> /* set sticky bit */ +nostickyn: + bb1 rndhi,r1,negative /* rounding mode is negative */ +nearest: + cmp r12,r12,3 /* are both guard and sticky set */ + bb1 eq,r12,overflw /* both guard and sticky are set, */ + /* so signal overflow */ + or r6,r0,r0 /* clear destination register r6 */ + br.n FPintov_return + set r6,r6,1<sign> /* set the sign bit and take care of */ + /* this special case */ +negative: + bcnd ne0,r12,overflw /* -2**31 will be rounded to */ + /* -(2**31+1), so signal overflow */ + or r6,r0,r0 /* clear destination register r6 */ + br.n FPintov_return + set r6,r6,1<sign> /* set the sign bit and take care of */ + /* this special case */ + + /* + * Since the exp. was 30, and there was no round-up, the largest + * number that S2 could have been was 2**31 - 1 + */ + + /* Convert the single precision positive floating point number. */ - -conversionsp: extu r6,r8,3<29> /* extract lower bits of integer */ - mak r6,r6,3<7> /* shift left to correct place in integer */ - mak r10,r7,20<10> /* shift left upper bits of integer */ - or r6,r6,r10 /* form most of integer */ - br.n FPintov_return /* return from subroutine */ - set r6,r6,1<30> /* set hidden one */ - - + +conversionsp: + extu r6,r8,3<29> /* extract lower bits of integer */ + mak r6,r6,3<7> /* shift left to correct place in integer */ + mak r10,r7,20<10> /* shift left upper bits of integer */ + or r6,r6,r10 /* form most of integer */ + br.n FPintov_return + set r6,r6,1<30> /* set hidden one */ + /* Convert the single precision negative floating point number. */ - -conversionsn: bb1 eq,r11,exp31s /* use old r11 to see if exp. is 31 */ - extu r6,r8,3<29> /* extract lower bits of mantissa */ - mak r6,r6,3<7> /* shift left to correct place in integer */ - mak r10,r7,20<10> /* shift left upper bits of integer */ - or r6,r6,r10 /* form most of integer */ - set r6,r6,1<30> /* set hidden one */ - or.c r6,r0,r6 /* negate result */ - br.n FPintov_return /* return from subroutine */ - addu r6,r6,1 /* add 1 to get 2''s complement */ -exp31s: or r6,r0,r0 /* clear r6 */ - br.n FPintov_return /* return from subroutine */ - set r6,r6,1<sign> /* set sign bit */ - - + +conversionsn: + bb1 eq,r11,exp31s /* use old r11 to see if exp. is 31 */ + extu r6,r8,3<29> /* extract lower bits of mantissa */ + mak r6,r6,3<7> /* shift left to correct place in integer */ + mak r10,r7,20<10> /* shift left upper bits of integer */ + or r6,r6,r10 /* form most of integer */ + set r6,r6,1<30> /* set hidden one */ + or.c r6,r0,r6 /* negate result */ + br.n FPintov_return + addu r6,r6,1 /* add 1 to get 2''s complement */ +exp31s: + or r6,r0,r0 /* clear r6 */ + br.n FPintov_return + set r6,r6,1<sign> /* set sign bit */ + /* Convert the double precision positive floating point number. */ - -conversiondp: extu r6,r8,10<22> /* extract lower bits of integer */ - mak r10,r7,20<10> /* shift left upper bits of integer */ - or r6,r6,r10 /* form most of integer */ - br.n FPintov_return /* return from subroutine */ - set r6,r6,1<30> /* set hidden one */ - - - /* Convert the double precision negative floating point number. The number, */ - /* whose exponent is 30, must be rounded before converting. Bits 4 and 3 are */ - /* the rounding mode, and bits 2, 1, and 0 are the guard, round, and sticky */ - /* bits for the branch table. */ - -conversiondn: extu r12,r8,1<22> /* get LSB for integer with exp. = 30 */ - mak r12,r12,1<2> /* start to set up field for branch table */ - extu r11,r8,1<21> /* get guard bit */ - mak r11,r11,1<1> /* set up field for branch table */ - or r12,r11,r12 /* set up field for branch table */ - extu r11,r8,21<0> /* get bits for sticky bit */ - bcnd eq0,r11,nostkyn /* do not set sticky */ - set r12,r12,1<0> /* set sticky bit */ -nostkyn: rot r11,r1,0<rndlo> /* shift rounding mode to 2 LSB''s */ - mak r11,r11,2<3> /* set up field, clear other bits */ - or r12,r11,r12 /* set up field for branch table */ - lda r12,r0[r12] /* scale r12 */ - or.u r12,r12,hi16(ntable)/* load pointer into table */ - addu r12,r12,lo16(ntable) - jmp r12 /* jump into branch table */ - -ntable: br nnoaddone -n00001: br nnoaddone -n00010: br nnoaddone -n00011: br naddone -n00100: br nnoaddone -n00101: br nnoaddone -n00110: br naddone -n00111: br naddone -n01000: br nnoaddone -n01001: br nnoaddone -n01010: br nnoaddone -n01011: br nnoaddone -n01100: br nnoaddone -n01101: br nnoaddone -n01110: br nnoaddone -n01111: br nnoaddone -n10000: br nnoaddone -n10001: br naddone -n10010: br naddone -n10011: br naddone -n10100: br nnoaddone -n10101: br naddone -n10110: br naddone -n10111: br naddone -n11000: br nnoaddone -n11001: br nnoaddone -n11010: br nnoaddone -n11011: br nnoaddone -n11100: br nnoaddone -n11101: br nnoaddone -n11110: br nnoaddone -n11111: br nnoaddone - - - /* Add one to the mantissa, and check to see if it overflows to -2**31. */ -/* The conversion is done in nnoaddone:. */ - -naddone: or r10,r0,r0 /* clear r10 */ - set r10,r10,1<22> /* set LSB bit to 1 for adding */ - add.co r8,r8,r10 /* add the 1 obtained from rounding */ - clr r7,r7,12<20> /* clear exponent and sign */ - add.ci r7,r0,r7 /* add carry */ - bb1 20,r7,maxneg /* rounded to -2**31,handle separately */ - /* the exponent was originally 30 */ -nnoaddone: extu r6,r8,11<22> /* extract lower bits of integer */ - mak r10,r7,20<10> /* shift left upper bits of integer */ - or r6,r6,r10 /* form most of integer */ - set r6,r6,1<30> /* set hidden one */ - or.c r6,r0,r6 /* negate integer */ - br.n FPintov_return /* return from subroutine */ - addu r6,r6,1 /* add 1 to get 2''s complement */ - -maxneg: or r6,r0,r0 /* clear integer */ - br.n FPintov_return /* return from subroutine */ - set r6,r6,1<sign> /* set sign bit */ - - - /* For valid overflows, check to see if the integer overflow user handler is */ - /* set. If it is set, then go to user handler, else write the correctly */ - /* signed largest integer. */ - -overflw: + +conversiondp: + extu r6,r8,10<22> /* extract lower bits of integer */ + mak r10,r7,20<10> /* shift left upper bits of integer */ + or r6,r6,r10 /* form most of integer */ + br.n FPintov_return + set r6,r6,1<30> /* set hidden one */ + + /* + * Convert the double precision negative floating point number. + * The number, whose exponent is 30, must be rounded before converting. + * Bits 4 and 3 are the rounding mode, and bits 2, 1, and 0 are the + * guard, round, and sticky bits for the branch table. + */ + +conversiondn: + extu r12,r8,1<22> /* get LSB for integer with exp. = 30 */ + mak r12,r12,1<2> /* start to set up field for branch table */ + extu r11,r8,1<21> /* get guard bit */ + mak r11,r11,1<1> /* set up field for branch table */ + or r12,r11,r12 /* set up field for branch table */ + extu r11,r8,21<0> /* get bits for sticky bit */ + bcnd eq0,r11,nostkyn /* do not set sticky */ + set r12,r12,1<0> /* set sticky bit */ +nostkyn: + rot r11,r1,0<rndlo> /* shift rounding mode to 2 LSB''s */ + mak r11,r11,2<3> /* set up field, clear other bits */ + or r12,r11,r12 /* set up field for branch table */ + lda r12,r0[r12] /* scale r12 */ + or.u r12,r12,hi16(ntable) /* load pointer into table */ + addu r12,r12,lo16(ntable) + jmp r12 + +ntable: + br nnoaddone + br nnoaddone + br nnoaddone + br naddone + br nnoaddone + br nnoaddone + br naddone + br naddone + br nnoaddone + br nnoaddone + br nnoaddone + br nnoaddone + br nnoaddone + br nnoaddone + br nnoaddone + br nnoaddone + br nnoaddone + br naddone + br naddone + br naddone + br nnoaddone + br naddone + br naddone + br naddone + br nnoaddone + br nnoaddone + br nnoaddone + br nnoaddone + br nnoaddone + br nnoaddone + br nnoaddone + br nnoaddone + +/* + * Add one to the mantissa, and check to see if it overflows to -2**31. + * The conversion is done in nnoaddone. + */ + +naddone: + or r10,r0,r0 /* clear r10 */ + set r10,r10,1<22> /* set LSB bit to 1 for adding */ + add.co r8,r8,r10 /* add the 1 obtained from rounding */ + clr r7,r7,12<20> /* clear exponent and sign */ + add.ci r7,r0,r7 /* add carry */ + bb1 20,r7,maxneg /* rounded to -2**31,handle separately */ + /* the exponent was originally 30 */ +nnoaddone: + extu r6,r8,11<22> /* extract lower bits of integer */ + mak r10,r7,20<10> /* shift left upper bits of integer */ + or r6,r6,r10 /* form most of integer */ + set r6,r6,1<30> /* set hidden one */ + or.c r6,r0,r6 /* negate integer */ + br.n FPintov_return + addu r6,r6,1 /* add 1 to get 2''s complement */ + +maxneg: + or r6,r0,r0 /* clear integer */ + br.n FPintov_return + set r6,r6,1<sign> /* set sign bit */ + + /* + * For valid overflows, check to see if the integer overflow user + * handler is set. If it is set, then go to user handler, else write + * the correctly signed largest integer. + */ + +overflw: #ifdef HANDLER - bb0.n oper,r3,nohandler /* do not go to user handler routine */ - set r2,r2,1<oper> /* set invalid operand bit */ - bsr _handler /* go to user handler routine */ - br FPintov_return /* return from subroutine */ -nohandler: + bb0.n oper,r3,nohandler /* do not go to user handler routine */ + set r2,r2,1<oper> /* set invalid operand bit */ + bsr _handler /* go to user handler routine */ + br FPintov_return +nohandler: #endif - bb0.n sign,r7,FPintov_return /* if positive then return from subroutine */ - set r6,r6,31<0> /* set result to largest positive integer */ - or.c r6,r0,r6 /* negate r6,giving largest negative int. */ - -FPintov_return: ld r1,r31,0 /* load return address from memory */ - jmp r1 /* return from subroutine */ - - data + bb0.n sign,r7,FPintov_return /* if positive then return */ + set r6,r6,31<0> /* set result to largest positive int */ + or.c r6,r0,r6 /* negate r6, giving largest negative */ + /* integer */ + +FPintov_return: + ld r1,r31,0 /* load return address from memory */ + jmp r1 -/* Some instructions only have the S2 operations, so clear S1HI and S1LO */ -/* for those instructions so that the previous contents of S1HI and S1LO */ -/* do not influence this instruction. */ - - text -GLOBAL(FPresoper) - st r1, r31, 0 - extu r10,r9,5<11> /* extract opcode */ -/* cmp r11,r10,FSQRTop ;compare to FSQRT */ -/* bb1 eq,r11,S1clear ;clear S1 if instruction only had S2 operand */ - cmp r11,r10,INTop /* compare to INT */ - bb1 eq,r11,S1clear /* clear S1 if instruction only had S2 operand */ - cmp r11,r10,NINTop /* compare to NINT */ - bb1 eq,r11,S1clear /* clear S1 if instruction only had S2 operand */ - cmp r11,r10,TRNCop /* compare to TRNC */ - bb0 eq,r11,opercheck /* check for reserved operands */ - -ASGLOBAL(S1clear) - or r5,r0,r0 /* clear any NaN''s, denorms, or infinities */ - or r6,r0,r0 /* that may be left in S1HI,S1LO from a */ - /* previous instruction */ - -/* r12 contains the following flags: */ -/* bit 9 -- s1sign */ -/* bit 8 -- s2sign */ -/* bit 7 -- s1nan */ -/* bit 6 -- s2nan */ -/* bit 5 -- s1inf */ -/* bit 4 -- s2inf */ -/* bit 3 -- s1zero */ -/* bit 2 -- s2zero */ -/* bit 1 -- s1denorm */ -/* bit 0 -- s2denorm */ - -/* Using code for both single and double precision, check if S1 is either */ -/* a NaN or infinity and set the appropriate flags in r12. Then check if */ -/* S2 is a NaN or infinity. If it is a NaN, then branch to the NaN routine. */ - - -ASGLOBAL(opercheck) - extu r10,r5,11<20> /* internal representation for double */ - bb1.n s1size,r9,S1NaNdoub /* S1 is double precision */ - or r12,r0,r0 /* clear operand flag register */ -ASGLOBAL(S1NaNsing) - xor r10,r10,0x0080 /* internal representation for single */ - ext r10,r10,8<0> /* precision is IEEE 8 bits sign extended */ - /* to 11 bits; for real exp. > 0, the */ - /* above instructions gives a result exp. */ - /* that has the MSB flipped and sign */ - /* extended like in the IMPCR */ - cmp r11,r10,127 /* Is exponent equal to IEEE 255 (internal 127) */ - bb1 ne,r11,S2NaN /* source 1 is not a NaN or infinity */ - mak r10,r5,20<0> /* load r10 with upper bits of S1 mantissa */ - extu r11,r6,3<29> /* get 3 upper bits of lower word */ - or r11,r10,r11 /* combine any existing 1''s */ - bcnd eq0,r11,noS1NaNs /* since r11 can only hold 0 or a positive */ - /* number, branch to noS1NaN when eq0 */ - br.n S2NaN /* see if S2 has a NaN */ - set r12,r12,1<s1nan> /* indicate that S1 has a NaN */ -ASGLOBAL(noS1NaNs) - br.n S2NaN /* check contents of S2 */ - set r12,r0,1<s1inf> /* indicate that S1 has an infinity */ - -ASGLOBAL(S1NaNdoub) - xor r10,r10,0x0400 /* precision is the same IEEE 11 bits */ - /* The */ - /* above instructions gives a result exp. */ - /* that has the MSB flipped and sign */ - /* extended like in the IMPCR */ - cmp r11,r10,1023 /* Is exp. equal to IEEE 2047 (internal 1023) */ - bb1 ne,r11,S2NaN /* source 1 is not a NaN or infinity */ - mak r10,r5,20<0> /* load r10 with upper bits of S1 mantissa */ - or r11,r6,r10 /* combine existing 1''s of mantissa */ - bcnd eq0,r11,noS1NaNd /* since r11 can only hold 0 or a positive */ - /* number, branch to noS1NaN when eq0 */ - br.n S2NaN /* see if S2 has a NaN */ - set r12,r12,1<s1nan> /* indicate that S1 has a NaN */ -ASGLOBAL(noS1NaNd) - set r12,r0,1<s1inf> /* indicate that S1 has an infinity */ - -ASGLOBAL(S2NaN) - bb1.n s2size,r9,S2NaNdoub /* S1 is double precision */ - extu r10,r7,11<20> /* internal representation for double */ -ASGLOBAL(S2NaNsing) - xor r10,r10,0x0080 /* internal representation for single */ - ext r10,r10,8<0> /* precision is IEEE 8 bits sign extended */ - /* to 11 bits; for real exp. > 0, the */ - /* above instruction gives a result exp. */ - /* that has the MSB flipped and sign */ - /* extended like in the IMPCR */ - cmp r11,r10,127 /* Is exponent equal to IEEE 255 (internal 127) */ - bb1 ne,r11,inf /* source 2 is not a NaN or infinity */ - mak r10,r7,20<0> /* load r10 with upper bits of S1 mantissa */ - extu r11,r8,3<29> /* get 3 upper bits of lower word */ - or r11,r10,r11 /* combine any existing 1''s */ - bcnd eq0,r11,noS2NaNs /* since r11 can only hold 0 or a positive */ - /* number, branch to noS2NaNs when eq0 */ - br.n _NaN /* branch to NaN routine */ - set r12,r12,1<s2nan> /* indicate that s2 has a NaN */ -ASGLOBAL(noS2NaNs) - bb0 s1nan,r12, 1f /* branch to NaN if S1 is a NaN */ - br _NaN -1: br.n _infinity /* If S1 had a NaN we would have already */ - /* branched, and S2 does not have a NaN, but */ - /* it does have an infinity, so branch to */ - /* handle the finity */ - set r12,r12,1<s2inf> /* indicate that S2 has an infinity */ - -ASGLOBAL(S2NaNdoub) - xor r10,r10,0x0400 /* precision is the same IEEE 11 bits */ - /* The */ - /* above instruction gives a result exp. */ - /* that has the MSB flipped and sign */ - /* extended like in the IMPCR */ - cmp r11,r10,1023 /* Is exp. equal to IEEE 2047 (internal 1023) */ - bb1 ne,r11,inf /* source 2 is not a NaN or infinity */ - mak r10,r7,20<0> /* load r10 with upper bits of S2 mantissa */ - or r11,r8,r10 /* combine existing 1''s of mantissa */ - bcnd eq0,r11,noS2NaNd /* since r11 can only hold 0 or a positive */ - /* number, branch to noS2NaNd when eq0 */ - br.n _NaN /* branch to NaN routine */ - set r12,r12,1<s2nan> /* indicate that s2 has a NaN */ -ASGLOBAL(noS2NaNd) - bb0 s1nan,r12,1f /* branch to NaN if S1 is a NaN */ - br _NaN -1: br.n _infinity /* If S1 had a NaN we would have already */ - /* branched, and S2 does not have a NaN, but */ - /* it does have an infinity, so branch to */ - /* handle the finity */ - set r12,r12,1<s2inf> /* indicate that S2 has an infinity */ - - -/* If S2 was a NaN, the routine would have already branched to NaN. If S1 */ -/* is a NaN, then branch to NaN. If S1 is not a NaN and S2 is infinity, then */ -/* we would have already branched to infinity. If S1 is infinity, then branch. */ -/* If the routine still has not branched, then branch to denorm, the only */ -/* reserved operand left. */ - -ASGLOBAL(inf) - bb0 s1nan,r12,1f /* branch if S1 has a NaN and S2 does not */ - br _NaN -1: bb0 s1inf,r12,2f /* Neither S1 or S2 has a NaN, and we would */ - /* have branched already if S2 had an */ - /* infinity, so branch if S1 is infinity */ /* - * The above "bb0 s1inf, r12,2f" had been a "bb1", but it just didn't make - * sense (and didn't work, either), so I changed it. - * jfriedl Dec 1, 1989. + * Some instructions only have the S2 operations, so clear S1HI and S1LO + * for those instructions so that the previous contents of S1HI and S1LO + * do not influence this instruction. + */ + +ASLOCAL(FPresoper) + st r1, r31, 0 + extu r10,r9,5<11> /* extract opcode */ +#if 0 + cmp r11,r10,FSQRTop /* compare to FSQRT */ + bb1 eq,r11,S1clear /* clear S1 if instruction only had S2 operand */ +#endif + cmp r11,r10,INTop /* compare to INT */ + bb1 eq,r11,S1clear /* clear S1 if instruction only had S2 operand */ + cmp r11,r10,NINTop /* compare to NINT */ + bb1 eq,r11,S1clear /* clear S1 if instruction only had S2 operand */ + cmp r11,r10,TRNCop /* compare to TRNC */ + bb0 eq,r11,opercheck /* check for reserved operands */ + +ASLOCAL(S1clear) + or r5,r0,r0 /* clear any NaN''s, denorms, or infinities */ + or r6,r0,r0 /* that may be left in S1HI,S1LO from a */ + /* previous instruction */ + +/* + * r12 contains the following flags: + * bit 9 -- s1sign + * bit 8 -- s2sign + * bit 7 -- s1nan + * bit 6 -- s2nan + * bit 5 -- s1inf + * bit 4 -- s2inf + * bit 3 -- s1zero + * bit 2 -- s2zero + * bit 1 -- s1denorm + * bit 0 -- s2denorm + */ + +/* + * Using code for both single and double precision, check if S1 is either + * a NaN or infinity and set the appropriate flags in r12. Then check if + * S2 is a NaN or infinity. If it is a NaN, then branch to the NaN routine. */ - br _infinity -2: - br _denorm /* branch to denorm, the only remaining */ - /* alternative */ +ASLOCAL(opercheck) + extu r10,r5,11<20> /* internal representation for double */ + bb1.n s1size,r9,S1NaNdoub /* S1 is double precision */ + or r12,r0,r0 /* clear operand flag register */ +ASLOCAL(S1NaNsing) + xor r10,r10,0x0080 /* internal representation for single */ + ext r10,r10,8<0> /* precision is IEEE 8 bits sign extended */ + /* to 11 bits; for real exp. > 0, the */ + /* above instructions gives a result exp. */ + /* that has the MSB flipped and sign */ + /* extended like in the IMPCR */ + cmp r11,r10,127 /* Is exponent equal to IEEE 255 (here 127) */ + bb1 ne,r11,S2NaN /* source 1 is not a NaN or infinity */ + mak r10,r5,20<0> /* load r10 with upper bits of S1 mantissa */ + extu r11,r6,3<29> /* get 3 upper bits of lower word */ + or r11,r10,r11 /* combine any existing 1 */ + bcnd eq0,r11,noS1NaNs /* since r11 can only hold 0 or a */ + /* > 0 number, branch to noS1NaN when eq0 */ + br.n S2NaN /* see if S2 has a NaN */ + set r12,r12,1<s1nan> /* indicate that S1 has a NaN */ +ASLOCAL(noS1NaNs) + br.n S2NaN /* check contents of S2 */ + set r12,r0,1<s1inf> /* indicate that S1 has an infinity */ + +ASLOCAL(S1NaNdoub) + xor r10,r10,0x0400 /* precision is the same IEEE 11 bits */ + /* The above instructions gives a result exp. */ + /* that has the MSB flipped and sign */ + /* extended like in the IMPCR */ + cmp r11,r10,1023 /* Is exp. equal to IEEE 2047 (internal 1023) */ + bb1 ne,r11,S2NaN /* source 1 is not a NaN or infinity */ + mak r10,r5,20<0> /* load r10 with upper bits of S1 mantissa */ + or r11,r6,r10 /* combine existing 1''s of mantissa */ + bcnd eq0,r11,noS1NaNd /* since r11 can only hold 0 or a > 0 */ + /* number, branch to noS1NaN when eq0 */ + br.n S2NaN /* see if S2 has a NaN */ + set r12,r12,1<s1nan> /* indicate that S1 has a NaN */ +ASLOCAL(noS1NaNd) + set r12,r0,1<s1inf> /* indicate that S1 has an infinity */ + +ASLOCAL(S2NaN) + bb1.n s2size,r9,S2NaNdoub /* S1 is double precision */ + extu r10,r7,11<20> /* internal representation for double */ +ASLOCAL(S2NaNsing) + xor r10,r10,0x0080 /* internal representation for single */ + ext r10,r10,8<0> /* precision is IEEE 8 bits sign extended */ + /* to 11 bits; for real exp. > 0, the */ + /* above instruction gives a result exp. */ + /* that has the MSB flipped and sign */ + /* extended like in the IMPCR */ + cmp r11,r10,127 /* Is exponent equal to IEEE 255 (here 127) */ + bb1 ne,r11,inf /* source 2 is not a NaN or infinity */ + mak r10,r7,20<0> /* load r10 with upper bits of S1 mantissa */ + extu r11,r8,3<29> /* get 3 upper bits of lower word */ + or r11,r10,r11 /* combine any existing 1''s */ + bcnd eq0,r11,noS2NaNs /* since r11 can only hold 0 or a > 0 */ + /* number, branch to noS2NaNs when eq0 */ + br.n _ASM_LABEL(NaN) /* branch to NaN routine */ + set r12,r12,1<s2nan> /* indicate that s2 has a NaN */ +ASLOCAL(noS2NaNs) + bb0 s1nan,r12, 1f /* branch to NaN if S1 is a NaN */ + br _ASM_LABEL(NaN) +1: + br.n _ASM_LABEL(infinity) /* If S1 had a NaN we would have */ + /* already branched, and S2 does not have a */ + /* NaN, but it does have an infinity, so */ + /* branch to handle the finity */ + set r12,r12,1<s2inf> /* indicate that S2 has an infinity */ + +ASLOCAL(S2NaNdoub) + xor r10,r10,0x0400 /* precision is the same IEEE 11 bits */ + /* The above instruction gives a result exp. */ + /* that has the MSB flipped and sign */ + /* extended like in the IMPCR */ + cmp r11,r10,1023 /* Is exp. equal to IEEE 2047 (internal 1023) */ + bb1 ne,r11,inf /* source 2 is not a NaN or infinity */ + mak r10,r7,20<0> /* load r10 with upper bits of S2 mantissa */ + or r11,r8,r10 /* combine existing 1''s of mantissa */ + bcnd eq0,r11,noS2NaNd /* since r11 can only hold 0 or a > 0 */ + /* number, branch to noS2NaNd when eq0 */ + br.n _ASM_LABEL(NaN) /* branch to NaN routine */ + set r12,r12,1<s2nan> /* indicate that s2 has a NaN */ +ASLOCAL(noS2NaNd) + bb0 s1nan,r12,1f /* branch to NaN if S1 is a NaN */ + br _ASM_LABEL(NaN) +1: + br.n _ASM_LABEL(infinity) /* If S1 had a NaN we would have */ + /* already branched, and S2 does not have a */ + /* NaN, but it does have an infinity, so */ + /* branch to handle the finity */ + set r12,r12,1<s2inf> /* indicate that S2 has an infinity */ -/* function _FPunderflow -- */ -/* The documentation for this release give an overall description of this code. */ +/* + * If S2 was a NaN, the routine would have already branched to NaN. If S1 + * is a NaN, then branch to NaN. If S1 is not a NaN and S2 is infinity, then + * we would have already branched to infinity. If S1 is infinity, then branch. + * If the routine still has not branched, then branch to denorm, the only + * reserved operand left. + */ - text - global _FPunderflow +ASLOCAL(inf) + bb0 s1nan,r12,1f /* branch if S1 has a NaN and S2 does not */ + br _ASM_LABEL(NaN) +1: + bb0 s1inf,r12,2f /* Neither S1 or S2 has a NaN, and we would */ + /* have branched already if S2 had an */ + /* infinity, so branch if S1 is infinity */ + br _ASM_LABEL(infinity) +2: + br _ASM_LABEL(denorm) /* branch to denorm, the only */ + /* remaining alternative */ -/* First check for an underflow user handler. If there is not one, then */ -/* branch to the routine to make a denormalized number. Before branching */ -/* to the underflow user handler, add 192 to a single precision exponent */ -/* and 1536 to a double precision exponent. */ +/* + * First check for an underflow user handler. If there is not one, then + * branch to the routine to make a denormalized number. Before branching + * to the underflow user handler, add 192 to a single precision exponent + * and 1536 to a double precision exponent. + */ -_FPunderflow: st r1,r31,0 /* save return address */ +ASLOCAL(FPunderflow) + st r1,r31,0 /* save return address */ #ifdef HANDLER - bb0 efunf,r12,denorm /* jump to default procedure */ - bb1.n destsize,r12,doubleprec /* double precision destination */ - set r2,r2,1<underflow> /* set underflow flag in FPSR */ -singleprec: or.u r6,r0,0x0c00 /* load exponent adjust 192 */ - br.n callundhand /* branch to call handler for user handler */ - add r12,r6,r12 /* adjust single precision exponent */ -doubleprec: or.u r6,r0,0x6000 /* load exponent adjust 1536 */ - add r12,r6,r12 /* adjust double precision exponent */ -callundhand: bsr _handler /* call handler for user handler */ - br Ureturn /* return from subroutine */ + bb0 efunf,r12,FPU_denorm /* jump to default procedure */ + bb1.n destsize,r12,doubleprec /* double precision destination */ + set r2,r2,1<underflow> /* set underflow flag in FPSR */ +singleprec: + or.u r6,r0,0x0c00 /* load exponent adjust 192 */ + br.n callundhand /* branch to call handler for user handler */ + add r12,r6,r12 /* adjust single precision exponent */ +doubleprec: + or.u r6,r0,0x6000 /* load exponent adjust 1536 */ + add r12,r6,r12 /* adjust double precision exponent */ +callundhand: + bsr _handler /* call handler for user handler */ + br Ureturn #endif -/* Now the floating point number, which has an exponent smaller than what */ -/* IEEE allows, must be denormalized. Denormalization is done by calculating */ -/* the difference between a denormalized exponent and an underflow exponent and */ -/* shifting the mantissa by that amount. A one may need to be subtracted from */ -/* the LSB if a one was added during rounding. */ -/* r9 is used to contain the guard, round, sticky, and an inaccuracy bit in */ -/* case some bits were shifted off the mantissa during denormalization. */ -/* r9 will contain: bit 4 -- new addone if one added during rounding */ -/* after denormalization */ -/* bit 3 -- inaccuracy flag caused by denormalization */ -/* or pre-denormalization inexactness */ -/* bit 2 -- guard bit of result */ -/* bit 1 -- round bit of result */ -/* bit 0 -- sticky bit of result */ - -denorm: bb1.n destsize,r12,Udouble /* denorm for double */ - extu r9,r10,3<26> /* load r9 with grs */ -Usingle: mak r5,r10,21<3> /* extract high 21 bits of mantissa */ - extu r6,r11,3<29> /* extract low 3 bits of mantissa */ - or r11,r5,r6 /* form 24 bits of mantissa */ +/* + * Now the floating point number, which has an exponent smaller than what + * IEEE allows, must be denormalized. Denormalization is done by calculating + * the difference between a denormalized exponent and an underflow exponent + * and shifting the mantissa by that amount. A one may need to be subtracted + * from the LSB if a one was added during rounding. + * r9 is used to contain the guard, round, sticky, and an inaccuracy bit in + * case some bits were shifted off the mantissa during denormalization. + * r9 will contain: + * bit 4 -- new addone if one added during rounding after denormalization + * bit 3 -- inaccuracy flag caused by denormalization or pre-denormalization + * inexactness + * bit 2 -- guard bit of result + * bit 1 -- round bit of result + * bit 0 -- sticky bit of result + */ + +FPU_denorm: + bb1.n destsize,r12,Udouble /* denorm for double */ + extu r9,r10,3<26> /* load r9 with grs */ +Usingle: + mak r5,r10,21<3> /* extract high 21 bits of mantissa */ + extu r6,r11,3<29> /* extract low 3 bits of mantissa */ + or r11,r5,r6 /* form 24 bits of mantissa */ /* See if the addone bit is set and unround if it is. */ - bb0.n 25,r10,nounrounds /* do not unround if addone bit clear */ - extu r6,r12,12<20> /* extract signed exponent from IMPCR */ -unrounds: subu r11,r11,1 /* subtract 1 from mantissa */ -/* If the hidden bit is cleared after subtracting the one, then the one added */ -/* during the rounding must have propagated through the mantissa. The exponent */ -/* will need to be decremented. */ - bb1 23,r11,nounrounds /* if hidden bit is set,then exponent does */ - /* not need to be decremented */ -decexps: sub r6,r6,1 /* decrement exponent 1 */ - set r11,r11,1<23> /* set the hidden bit */ - -/* For both single and double precision, there are cases where it is easier */ -/* and quicker to make a special case. Examples of this are if the shift */ -/* amount is only 1 or 2, or all the mantissa is shifted off, or all the */ -/* mantissa is shifted off and it is still shifting, or, in the case of */ -/* doubles, if the shift amount is around the boundary of MANTLO and MANTHI. */ - -nounrounds: or r8,r0,lo16(0x00000f81) /* load r8 with -127 in decimal */ - /* for lowest 12 bits */ - sub r7,r8,r6 /* find difference between two exponents, */ - /* this amount is the shift amount */ - cmp r6,r7,3 /* check to see if r7 contains 3 or more */ - bb1 ge,r6,threesing /* br to code that handles shifts of >=3 */ - cmp r6,r7,2 /* check to see if r7 contains 2 */ - bb1 eq,r6,twosing /* br to code that handles shifts of 2 */ -one: rot r9,r9,0<1> /* rotate roundoff register once, this places */ - /* guard in round and round in sticky */ - bb0 31,r9,nosticky1s/* do not or round and sticky if sticky is */ - /* 0, this lost bit will be cleared later */ - set r9,r9,1<0> /* or round and sticky */ -nosticky1s: bb0 0,r11,guardclr1s /* do not set guard bit if LSB = 0 */ - set r9,r9,1<2> /* set guard bit */ -guardclr1s: extu r11,r11,31<1> /* shift mantissa right 1 */ - br.n round /* round result */ - mak r9,r9,3<0> /* clear bits lost during rotation */ - -twosing: rot r9,r9,0<2> /* rotate roundff register twice, this places */ - /* guard in sticky */ - bb0 30,r9,nosticky2s /* do not or guard and sticky if stick is 0 */ - /* this lost bit will be cleared later */ - br.n noround2s /* skip or old guard and old round if old */ - /* sticky set */ - set r9,r9,1<0> /* or guard and sticky */ -nosticky2s: bb0 31,r9,noround2s /* do not or guard and round if round is 0 */ - /* this lost bit will be cleared later */ - set r9,r9,1<0> /* or guard and round */ -noround2s: bb0 0,r11,roundclr2s /* do not set round bit if LSB = 0 */ - set r9,r9,1<1> /* set round bit */ -roundclr2s: bb0 1,r11,guardclr2s /* do not set guard bit if LSB + 1 = 0 */ - set r9,r9,1<2> /* set guard bit */ -guardclr2s: extu r11,r11,30<2> /* shift mantissa right 2 */ - br.n round /* round result */ - mak r9,r9,3<0> /* clear bits lost during rotation */ - -threesing: bb1 0,r9,noguard3s /* check sticky initially */ - /* sticky is set, forget most of the oring */ -nosticky3s: bb0 1,r9,noround3s /* check round initially, do not set sticky */ - br.n noguard3s /* forget most of the rest of oring */ - set r9,r9,1<0> /* if round is clear,set sticky if round set */ -noround3s: bb0.n 2,r9,noguard3s /* check guard initially, do not set sticky */ - clr r9,r9,2<1> /* clear the original guard and round for when */ - /* you get to round section */ - set r9,r9,1<0> /* if guard is clear,set sticky if guard set */ -noguard3s: cmp r6,r7,23 /* check if # of shifts is <=23 */ - bb1 gt,r6,s24 /* branch to see if shifts = 24 */ - sub r6,r7,2 /* get number of bits to check for sticky */ - mak r6,r6,5<5> /* shift width into width field */ - mak r8,r11,r6 /* mask off shifted bits -2 */ - ff1 r8,r8 /* see if r8 has any ones */ - bb1 5,r8,nostky23 /* do not set sticky if no ones found */ - set r9,r9,1<0> /* set sticky bit */ -nostky23: or r8,r0,34 /* start code to get new mantissa plus two */ - /* extra bits for new round and new guard bits */ - subu r8,r8,r7 - mak r8,r8,5<5> /* shift field width into second five bits */ - extu r6,r6,5<5> /* shift previous shifted -2 into offset field */ - or r6,r6,r8 /* complete field */ - extu r11,r11,r6 /* form new mantissa with two extra bits */ - - bb0 0,r11,nornd3s /* do not set new round bit */ - set r9,r9,1<1> /* set new round bit */ -nornd3s: bb0 1,r11,nogrd3s /* do not set new guard bit */ - set r9,r9,1<2> /* set new guard bit */ -nogrd3s: br.n round /* round mantissa */ - extu r11,r11,30<2> /* shift off remaining two bits */ - -s24: cmp r6,r7,24 /* check to see if # of shifts is 24 */ - bb1 gt,r6,s25 /* branch to see if shifts = 25 */ - bb1 0,r9,nostky24 /* skip checking if old sticky set */ - extu r8,r11,22<0> /* prepare to check bits that will be shifted */ - /* into the sticky */ - ff1 r8,r8 /* see if there are any 1''s */ - bb1 5,r8,nostky24 /* do not set sticky if no ones found */ - set r9,r9,1<0> /* set sticky bit */ -nostky24: bb0 22,r11,nornd24 /* do not set new round bit */ - set r9,r9,1<1> /* set new round bit */ -nornd24: set r9,r9,1<2> /* set new guard bit,this is hidden bit */ - br.n round /* round mantissa */ - or r11,r0,r0 /* clear r11, all of mantissa shifted off */ - -s25: cmp r6,r7,25 /* check to see if # of shifts is 25 */ - bb1 gt,r6,s26 /* branch to execute for shifts => 26 */ - bb1 0,r9,nostky25 /* skip checking if old sticky set */ - extu r8,r11,23<0> /* prepare to check bits that will be shifted */ - /* into the sticky */ - ff1 r8,r8 /* see if there are any 1''s */ - bb1 5,r8,nostky25 /* do not set sticky if no ones found */ - set r9,r9,1<0> /* set sticky bit */ -nostky25: set r9,r9,1<1> /* set new round bit,this is hidden bit */ - clr r9,r9,1<2> /* clear guard bit since nothing shifted in */ - br.n round /* round and assemble result */ - or r11,r0,r0 /* clear r11, all of mantissa shifted off */ - -s26: set r9,r9,1<0> /* set sticky bit,this contains hidden bit */ - clr r9,r9,2<1> /* clear guard and round bits since nothing */ - /* shifted in */ - br.n round /* round and assemble result */ - or r11,r0,r0 /* clear mantissa */ - -Udouble: mak r5,r10,21<0> /* extract upper bits of mantissa */ - bb0.n 25,r10,nounroundd /* do not unround if addone bit clear */ - extu r6,r12,12<20>/* extract signed exponenet from IMPCR */ -unroundd: or r8,r0,1 - subu.co r11,r11,r8 /* subtract 1 from mantissa */ - subu.ci r5,r5,r0 /* subtract borrow from upper word */ - bb1 20,r5,nounroundd /* if hidden bit is set, then exponent does */ - /* not need to be decremented */ -decexpd: sub r6,r6,1 /* decrement exponent 1 */ - set r5,r5,1<20> /* set the hidden bit */ - -nounroundd: or r8,r0,lo16(0x00000c01) /* load r8 with -1023 in decimal */ - /* for lowest 12 bits */ - sub r7,r8,r6 /* find difference between two exponents, */ - /* this amount is the shift amount */ - cmp r6,r7,3 /* check to see if r7 contains 3 or more */ - bb1 ge,r6,threedoub /* br to code that handles shifts of >=3 */ - cmp r6,r7,2 /* check to see if r7 contains 2 */ - bb1 eq,r6,twodoub /* br to code that handles shifts of 2 */ - -onedoub: rot r9,r9,0<1> /* rotate roundoff register once, this places */ - /* guard in round and round in sticky */ - bb0 31,r9,nosticky1d/* do not or round and sticky if sticky is 0 */ - /* this lost bit will be cleared later */ - set r9,r9,1<0> /* or old round and old sticky into new sticky */ -nosticky1d: bb0 0,r11,guardclr1d /* do not set new guard bit if old LSB = 0 */ - set r9,r9,1<2> /* set new guard bit */ -guardclr1d: extu r11,r11,31<1> /* shift lower mantissa over 1 */ - mak r6,r5,1<31> /* shift off low bit of high mantissa */ - or r11,r6,r11 /* load high bit onto lower mantissa */ - extu r5,r5,20<1> /* shift right once upper 20 bits of mantissa */ - br.n round /* round mantissa and assemble result */ - mak r9,r9,3<0> /* clear bits lost during rotation */ - -twodoub: rot r9,r9,0<2> /* rotate roundoff register twice, this places */ - /* old guard into sticky */ - bb0 30,r9,nosticky2d /* do not or old guard and old sticky if */ - /* old sticky is 0 */ - br.n noround2d /* skip or of old guard and old round if old */ - /* sticky set */ - set r9,r9,1<0> /* or old guard and old sticky into new sticky */ -nosticky2d: bb0 31,r9,noround2d /* do not or old guard and old round if */ - /* old round is 0 */ - set r9,r9,1<0> /* or old guard and old round into new sticky */ -noround2d: bb0 0,r11,roundclr2d /* do not set round bit if old LSB = 0 */ - set r9,r9,1<1> /* set new round bit */ -roundclr2d: bb0 1,r11,guardclr2d /* do not set guard bit if old LSB + 1 = 0 */ - set r9,r9,1<2> /* set new guard bit */ -guardclr2d: extu r11,r11,30<2> /* shift lower mantissa over 2 */ - mak r6,r5,2<30> /* shift off low bits of high mantissa */ - or r11,r6,r11 /* load high bit onto lower mantissa */ - extu r5,r5,19<2> /* shift right twice upper 19 bits of mantissa */ - br.n round /* round mantissa and assemble result */ - mak r9,r9,3<0> /* clear bits lost during rotation */ - -threedoub: bb1 0,r9,noguard3d /* checky sticky initially */ - /* sticky is set, forget most of rest of oring */ -nosticky3d: bb0 1,r9,noround3d /* check old round, do not set sticky if */ - /* old round is clear, set otherwise */ - br.n noguard3d /* sticky is set, forget most of rest of oring */ - set r9,r9,1<0> /* set sticky if old round is set */ -noround3d: bb0 2,r9,noguard3d /* check old guard, do not set sticky if 0 */ - clr r9,r9,2<1> /* clear the original guard and round for when */ - /* you get to round section */ - set r9,r9,1<0> /* set sticky if old guard is set */ -noguard3d: cmp r6,r7,32 /* do I need to work with a 1 or 2 word mant. */ - /* when forming sticky, round and guard */ - bb1 gt,r6,d33 /* jump to code that handles 2 word mantissas */ - sub r6,r7,2 /* get number of bits to check for sticky */ - mak r6,r6,5<5> /* shift width into width field */ - mak r8,r11,r6 /* mask off shifted bits -2 */ - ff1 r8,r8 /* see if r8 has any ones */ - bb1 5,r8,nostky32 /* do not set sticky if no ones found */ - set r9,r9,1<0> /* set sticky bit */ -nostky32: or r8,r0,34 /* start code to get new mantissa plus two */ - /* extra bits for new round and new guard bits, */ - /* the upper word bits will be shifted after */ - /* the round and guard bits are handled */ - subu r8,r8,r7 - mak r8,r8,5<5> /* shift field width into second five bits */ - extu r6,r6,5<5> /* shift previous shifted -2 into offset field */ - or r6,r6,r8 /* complete bit field */ - extu r11,r11,r6 /* partially form new low mantissa with 2 more */ - /* bits */ - bb0 0,r11,nornd32d /* do not set new round bit */ - set r9,r9,1<1> /* set new round bit */ -nornd32d: bb0 1,r11,nogrd32d /* do not set new guard bit */ - set r9,r9,1<2> /* set new guard bit */ -nogrd32d: extu r11,r11,30<2> /* shift off remaining two bits */ - mak r6,r7,5<5> /* shift field width into second 5 bits, if the */ - /* width is 32, then these bits will be 0 */ - or r8,r0,32 /* load word length into r8 */ - sub r8,r8,r7 /* form offset for high bits moved to low word */ - or r6,r6,r8 /* form complete bit field */ - mak r6,r5,r6 /* get shifted bits of high word */ - or r11,r6,r11 /* form new low word of mantissa */ - bcnd ne0,r8,regular33 /* do not adjust for special case of r8 */ - br.n round /* containing zeros, which would cause */ - or r5,r0,r0 /* all of the bits to be extracted under */ - /* the regular method */ -regular33: mak r6,r7,5<0> /* place lower 5 bits of shift into r6 */ - mak r8,r8,5<5> /* shift r8 into width field */ - or r6,r6,r8 /* form field for shifting of upper bits */ - br.n round /* round and assemble result */ - extu r5,r5,r6 /* form new high word mantissa */ - -d33: cmp r6,r7,33 /* is the number of bits to be shifted is 33? */ - bb1 gt,r6,d34 /* check to see if # of bits is 34 */ - bb1 0,r9,nostky33 /* skip checking if old sticky set */ - mak r6,r11,31<0> /* check bits that will be shifted into sticky */ - ff1 r8,r8 /* check for ones */ - bb1 5,r8,nostky33 /* do not set sticky if there are no ones */ - set r9,r9,1<0> /* set new sticky bit */ -nostky33: bb0 31,r11,nornd33 /* do not set round if bit is not a 1 */ - set r9,r9,1<1> /* set new round bit */ -nornd33: bb0 0,r5,nogrd33 /* do not set guard bit if bit is not a 1 */ - set r9,r9,1<2> /* set new guard bit */ -nogrd33: extu r11,r5,31<1> /* shift high bits into low word */ - br.n round /* round and assemble result */ - or r5,r0,r0 /* clear high word */ - -d34: cmp r6,r7,34 /* is the number of bits to be shifted 34? */ - bb1 gt,r6,d35 /* check to see if # of bits is >= 35 */ - bb1 0,r9,nostky34 /* skip checking if old sticky set */ - ff1 r8,r11 /* check bits that will be shifted into sticky */ - bb1 5,r8,nostky34 /* do not set sticky if there are no ones */ - set r9,r9,1<0> /* set new sticky bit */ -nostky34: bb0 0,r5,nornd34 /* do not set round if bit is not a 1 */ - set r9,r9,1<1> /* set new round bit */ -nornd34: bb0 1,r5,nogrd34 /* do not set guard bit if bit is not a 1 */ - set r9,r9,1<2> /* set new guard bit */ -nogrd34: extu r11,r5,30<2> /* shift high bits into low word */ - br.n round /* round and assemble result */ - or r5,r0,r0 /* clear high word */ - -d35: cmp r6,r7,52 /* see if # of shifts is 35 <= X <= 52 */ - bb1 gt,r6,d53 /* check to see if # of shifts is 52 */ - bb1.n 0,r9,nostky35 /* skip checking if old sticky set */ - sub r7,r7,34 /* subtract 32 from # of shifts so that opera- */ - /* tions can be done on the upper word, and */ - /* then subtract two more checking guard and */ - /* sticky bits */ - ff1 r8,r11 /* see if lower word has a bit for sticky */ - bb1 5,r8,stkycheck35 /* see if upper word has any sticky bits */ - br.n nostky35 /* quit checking for sticky */ - set r9,r9,1<0> /* set sticky bit */ -stkycheck35: mak r6,r7,5<5> /* place width into width field */ - mak r8,r5,r6 /* mask off shifted bits - 2 */ - ff1 r8,r8 /* see if r8 has any ones */ - bb1 5,r8,nostky35 /* do not set sticky if no ones found */ - set r9,r9,1<0> /* set sticky bit */ -nostky35: or r8,r0,32 /* look at what does not get shifted off plus */ - /* round and sticky, remember that the r7 value */ - /* was adjusted so that it did not include */ - /* new round or new sticky in shifted off bits */ - subu r8,r8,r7 /* complement width */ - mak r8,r8,5<5> /* shift width into width field */ - or r8,r7,r8 /* add offset field */ - extu r11,r5,r8 /* extract upper bits into low word */ - bb0 0,r11,nornd35 /* do not set new round bit */ - set r9,r9,1<1> /* set new round bit */ -nornd35: bb0 1,r11,nogrd35 /* do not set new guard bit */ - set r9,r9,1<2> /* set new guard bit */ -nogrd35: extu r11,r11,30<2> /* shift off remaining guard and round bits */ - br.n round /* round and assemble result */ - or r5,r0,r0 /* clear high word */ - -d53: cmp r6,r7,53 /* check to see if # of shifts is 53 */ - bb1 gt,r6,d54 /* branch to see if shifts = 54 */ - bb1 0,r9,nostky53 /* skip checking if old sticky set */ - ff1 r8,r11 /* see if lower word has a bit for sticky */ - bb1 5,r8,stkycheck53 /* see if upper word has any sticky bits */ - br.n nostky53 /* quit checking for sticky */ - set r9,r9,1<0> /* set sticky bit */ -stkycheck53: mak r6,r5,19<0> /* check bits that are shifted into sticky */ - ff1 r8,r6 /* see if r6 has any ones */ - bb1 5,r8,nostky53 /* do not set sticky if no ones found */ - set r9,r9,1<0> /* set sticky bit */ -nostky53: bb0 19,r5,nornd53 /* do not set new round bit */ - set r9,r9,1<1> /* set new round bit */ -nornd53: set r9,r9,1<2> /* set new guard bit,this is hidden bit */ - or r5,r0,r0 /* clear high word */ - br.n round /* round and assemble result */ - or r11,r0,r0 /* clear low word */ - -d54: cmp r6,r7,54 /* check to see if # of shifts is 54 */ - bb1 gt,r6,d55 /* branch to execute for shifts =>55 */ - bb1 0,r9,nostky54 /* skip checking if old sticky set */ - ff1 r8,r11 /* see if lower word has a bit for sticky */ - bb1 5,r8,stkycheck54 /* see if upper word has any sticky bits */ - br.n nostky54 /* quit checking for sticky */ - set r9,r9,1<0> /* set sticky bit */ -stkycheck54: mak r6,r5,20<0> /* check bits that are shifted into sticky */ - ff1 r8,r6 /* see if r6 has any ones */ - bb1 5,r8,nostky54 /* do not set sticky if no ones found */ - set r9,r9,1<0> /* set sticky bit */ -nostky54: set r9,r9,1<1> /* set new round bit,this is hidden bit */ - clr r9,r9,1<2> /* clear guard bit since nothing shifted in */ - or r5,r0,r0 /* clear high word */ - br.n round /* round and assemble result */ - or r11,r0,r0 /* clear low word */ - -d55: set r9,r9,1<0> /* set new sticky bit,this contains hidden bit */ - clr r9,r9,2<1> /* clear guard and round bits since nothing */ - /* shifted in */ - or r5,r0,r0 /* clear high word */ - or r11,r0,r0 /* clear low word */ + bb0.n 25,r10,nounrounds /* do not unround if addone bit clear */ + extu r6,r12,12<20> /* extract signed exponent from IMPCR */ +unrounds: + subu r11,r11,1 /* subtract 1 from mantissa */ + +/* + * If the hidden bit is cleared after subtracting the one, then the one added + * during the rounding must have propagated through the mantissa. The exponent + * will need to be decremented. + */ + bb1 23,r11,nounrounds /* if hidden bit is set,then exponent */ + /* does not need to be decremented */ +decexps: + sub r6,r6,1 /* decrement exponent 1 */ + set r11,r11,1<23> /* set the hidden bit */ + +/* + * For both single and double precision, there are cases where it is easier + * and quicker to make a special case. Examples of this are if the shift + * amount is only 1 or 2, or all the mantissa is shifted off, or all the + * mantissa is shifted off and it is still shifting, or, in the case of + * doubles, if the shift amount is around the boundary of MANTLO and MANTHI. + */ + +nounrounds: + or r8,r0,lo16(0x00000f81) /* load r8 with -127 in decimal */ + /* for lowest 12 bits */ + sub r7,r8,r6 /* find difference between two exponents, */ + /* this amount is the shift amount */ + cmp r6,r7,3 /* check to see if r7 contains 3 or more */ + bb1 ge,r6,threesing /* br to code that handles shifts of >=3 */ + cmp r6,r7,2 /* check to see if r7 contains 2 */ + bb1 eq,r6,twosing /* br to code that handles shifts of 2 */ +one: + rot r9,r9,0<1> /* rotate roundoff register once, this places */ + /* guard in round and round in sticky */ + bb0 31,r9,nosticky1s /* do not or round and sticky if sticky is */ + /* 0, this lost bit will be cleared later */ + set r9,r9,1<0> /* or round and sticky */ +nosticky1s: + bb0 0,r11,guardclr1s /* do not set guard bit if LSB = 0 */ + set r9,r9,1<2> /* set guard bit */ +guardclr1s: + extu r11,r11,31<1> /* shift mantissa right 1 */ + br.n round /* round result */ + mak r9,r9,3<0> /* clear bits lost during rotation */ + +twosing: + rot r9,r9,0<2> /* rotate roundff register twice, this places */ + /* guard in sticky */ + bb0 30,r9,nosticky2s /* do not or guard and sticky if stick is 0 */ + /* this lost bit will be cleared later */ + br.n noround2s /* skip or old guard and old round if old */ + /* sticky set */ + set r9,r9,1<0> /* or guard and sticky */ +nosticky2s: + bb0 31,r9,noround2s /* do not or guard and round if round is 0 */ + /* this lost bit will be cleared later */ + set r9,r9,1<0> /* or guard and round */ +noround2s: + bb0 0,r11,roundclr2s /* do not set round bit if LSB = 0 */ + set r9,r9,1<1> /* set round bit */ +roundclr2s: + bb0 1,r11,guardclr2s /* do not set guard bit if LSB + 1 = 0 */ + set r9,r9,1<2> /* set guard bit */ +guardclr2s: + extu r11,r11,30<2> /* shift mantissa right 2 */ + br.n round /* round result */ + mak r9,r9,3<0> /* clear bits lost during rotation */ + +threesing: + bb1 0,r9,noguard3s /* check sticky initially */ + /* sticky is set, forget most of the oring */ +nosticky3s: + bb0 1,r9,noround3s /* check round initially, do not set sticky */ + br.n noguard3s /* forget most of the rest of oring */ + set r9,r9,1<0> /* if round is clear,set sticky if round set */ +noround3s: + bb0.n 2,r9,noguard3s /* check guard initially, do not set sticky */ + clr r9,r9,2<1> /* clear the original guard and round for when */ + /* you get to round section */ + set r9,r9,1<0> /* if guard is clear,set sticky if guard set */ +noguard3s: + cmp r6,r7,23 /* check if # of shifts is <=23 */ + bb1 gt,r6,s24 /* branch to see if shifts = 24 */ + sub r6,r7,2 /* get number of bits to check for sticky */ + mak r6,r6,5<5> /* shift width into width field */ + mak r8,r11,r6 /* mask off shifted bits -2 */ + ff1 r8,r8 /* see if r8 has any ones */ + bb1 5,r8,nostky23 /* do not set sticky if no ones found */ + set r9,r9,1<0> /* set sticky bit */ +nostky23: + or r8,r0,34 /* start code to get new mantissa plus two */ + /* extra bits for new round and new guard */ + /* bits */ + subu r8,r8,r7 + mak r8,r8,5<5> /* shift field width into second five bits */ + extu r6,r6,5<5> /* shift previous shifted -2 into offset field */ + or r6,r6,r8 /* complete field */ + extu r11,r11,r6 /* form new mantissa with two extra bits */ + + bb0 0,r11,nornd3s /* do not set new round bit */ + set r9,r9,1<1> /* set new round bit */ +nornd3s: + bb0 1,r11,nogrd3s /* do not set new guard bit */ + set r9,r9,1<2> /* set new guard bit */ +nogrd3s: + br.n round /* round mantissa */ + extu r11,r11,30<2> /* shift off remaining two bits */ + +s24: + cmp r6,r7,24 /* check to see if # of shifts is 24 */ + bb1 gt,r6,s25 /* branch to see if shifts = 25 */ + bb1 0,r9,nostky24 /* skip checking if old sticky set */ + extu r8,r11,22<0> /* prepare to check bits that will be shifted */ + /* into the sticky */ + ff1 r8,r8 /* see if there are any 1''s */ + bb1 5,r8,nostky24 /* do not set sticky if no ones found */ + set r9,r9,1<0> /* set sticky bit */ +nostky24: + bb0 22,r11,nornd24 /* do not set new round bit */ + set r9,r9,1<1> /* set new round bit */ +nornd24: + set r9,r9,1<2> /* set new guard bit,this is hidden bit */ + br.n round /* round mantissa */ + or r11,r0,r0 /* clear r11, all of mantissa shifted off */ + +s25: + cmp r6,r7,25 /* check to see if # of shifts is 25 */ + bb1 gt,r6,s26 /* branch to execute for shifts => 26 */ + bb1 0,r9,nostky25 /* skip checking if old sticky set */ + extu r8,r11,23<0> /* prepare to check bits that will be shifted */ + /* into the sticky */ + ff1 r8,r8 /* see if there are any 1''s */ + bb1 5,r8,nostky25 /* do not set sticky if no ones found */ + set r9,r9,1<0> /* set sticky bit */ +nostky25: + set r9,r9,1<1> /* set new round bit,this is hidden bit */ + clr r9,r9,1<2> /* clear guard bit since nothing shifted in */ + br.n round /* round and assemble result */ + or r11,r0,r0 /* clear r11, all of mantissa shifted off */ + +s26: + set r9,r9,1<0> /* set sticky bit,this contains hidden bit */ + clr r9,r9,2<1> /* clear guard and round bits since nothing */ + /* shifted in */ + br.n round /* round and assemble result */ + or r11,r0,r0 /* clear mantissa */ + +Udouble: + mak r5,r10,21<0> /* extract upper bits of mantissa */ + bb0.n 25,r10,nounroundd /* do not unround if addone bit clear */ + extu r6,r12,12<20> /* extract signed exponenet from IMPCR */ +unroundd: + or r8,r0,1 + subu.co r11,r11,r8 /* subtract 1 from mantissa */ + subu.ci r5,r5,r0 /* subtract borrow from upper word */ + bb1 20,r5,nounroundd /* if hidden bit is set, then exponent does */ + /* not need to be decremented */ +decexpd: + sub r6,r6,1 /* decrement exponent 1 */ + set r5,r5,1<20> /* set the hidden bit */ + +nounroundd: + or r8,r0,lo16(0x00000c01) /* load r8 with -1023 in decimal */ + /* for lowest 12 bits */ + sub r7,r8,r6 /* find difference between two exponents, */ + /* this amount is the shift amount */ + cmp r6,r7,3 /* check to see if r7 contains 3 or more */ + bb1 ge,r6,threedoub /* br to code that handles shifts of >=3 */ + cmp r6,r7,2 /* check to see if r7 contains 2 */ + bb1 eq,r6,twodoub /* br to code that handles shifts of 2 */ + +onedoub: + rot r9,r9,0<1> /* rotate roundoff register once, this places */ + /* guard in round and round in sticky */ + bb0 31,r9,nosticky1d/* do not or round and sticky if sticky is 0 */ + /* this lost bit will be cleared later */ + set r9,r9,1<0> /* or old round and old sticky into new sticky */ +nosticky1d: + bb0 0,r11,guardclr1d /* do not set new guard bit if old LSB = 0 */ + set r9,r9,1<2> /* set new guard bit */ +guardclr1d: + extu r11,r11,31<1> /* shift lower mantissa over 1 */ + mak r6,r5,1<31> /* shift off low bit of high mantissa */ + or r11,r6,r11 /* load high bit onto lower mantissa */ + extu r5,r5,20<1> /* shift right once upper 20 bits of mantissa */ + br.n round /* round mantissa and assemble result */ + mak r9,r9,3<0> /* clear bits lost during rotation */ + +twodoub: + rot r9,r9,0<2> /* rotate roundoff register twice, this places */ + /* old guard into sticky */ + bb0 30,r9,nosticky2d /* do not or old guard and old sticky if */ + /* old sticky is 0 */ + br.n noround2d /* skip or of old guard and old round if old */ + /* sticky set */ + set r9,r9,1<0> /* or old guard and old sticky into new sticky */ +nosticky2d: + bb0 31,r9,noround2d /* do not or old guard and old round if */ + /* old round is 0 */ + set r9,r9,1<0> /* or old guard and old round into new sticky */ +noround2d: + bb0 0,r11,roundclr2d /* do not set round bit if old LSB = 0 */ + set r9,r9,1<1> /* set new round bit */ +roundclr2d: + bb0 1,r11,guardclr2d /* do not set guard bit if old LSB + 1 = 0 */ + set r9,r9,1<2> /* set new guard bit */ +guardclr2d: + extu r11,r11,30<2> /* shift lower mantissa over 2 */ + mak r6,r5,2<30> /* shift off low bits of high mantissa */ + or r11,r6,r11 /* load high bit onto lower mantissa */ + extu r5,r5,19<2> /* shift right twice upper 19 bits of mantissa */ + br.n round /* round mantissa and assemble result */ + mak r9,r9,3<0> /* clear bits lost during rotation */ + +threedoub: + bb1 0,r9,noguard3d /* checky sticky initially */ + /* sticky is set, forget most of rest of oring */ +nosticky3d: + bb0 1,r9,noround3d /* check old round, do not set sticky if */ + /* old round is clear, set otherwise */ + br.n noguard3d /* sticky is set, forget most of rest of oring */ + set r9,r9,1<0> /* set sticky if old round is set */ +noround3d: + bb0 2,r9,noguard3d /* check old guard, do not set sticky if 0 */ + clr r9,r9,2<1> /* clear the original guard and round for when */ + /* you get to round section */ + set r9,r9,1<0> /* set sticky if old guard is set */ +noguard3d: + cmp r6,r7,32 /* do I need to work with a 1 or 2 word mant. */ + /* when forming sticky, round and guard */ + bb1 gt,r6,d33 /* jump to code that handles 2 word mantissas */ + sub r6,r7,2 /* get number of bits to check for sticky */ + mak r6,r6,5<5> /* shift width into width field */ + mak r8,r11,r6 /* mask off shifted bits -2 */ + ff1 r8,r8 /* see if r8 has any ones */ + bb1 5,r8,nostky32 /* do not set sticky if no ones found */ + set r9,r9,1<0> /* set sticky bit */ +nostky32: + or r8,r0,34 /* start code to get new mantissa plus two */ + /* extra bits for new round and new guard bits, */ + /* the upper word bits will be shifted after */ + /* the round and guard bits are handled */ + subu r8,r8,r7 + mak r8,r8,5<5> /* shift field width into second five bits */ + extu r6,r6,5<5> /* shift previous shifted -2 into offset field */ + or r6,r6,r8 /* complete bit field */ + extu r11,r11,r6 /* partially form new low mantissa with 2 more */ + /* bits */ + bb0 0,r11,nornd32d /* do not set new round bit */ + set r9,r9,1<1> /* set new round bit */ +nornd32d: + bb0 1,r11,nogrd32d /* do not set new guard bit */ + set r9,r9,1<2> /* set new guard bit */ +nogrd32d: + extu r11,r11,30<2> /* shift off remaining two bits */ + mak r6,r7,5<5> /* shift field width into second 5 bits, if the */ + /* width is 32, then these bits will be 0 */ + or r8,r0,32 /* load word length into r8 */ + sub r8,r8,r7 /* form offset for high bits moved to low word */ + or r6,r6,r8 /* form complete bit field */ + mak r6,r5,r6 /* get shifted bits of high word */ + or r11,r6,r11 /* form new low word of mantissa */ + bcnd ne0,r8,regular33 /* do not adjust for special case of r8 */ + br.n round /* containing zeros, which would cause */ + or r5,r0,r0 /* all of the bits to be extracted under */ + /* the regular method */ +regular33: + mak r6,r7,5<0> /* place lower 5 bits of shift into r6 */ + mak r8,r8,5<5> /* shift r8 into width field */ + or r6,r6,r8 /* form field for shifting of upper bits */ + br.n round /* round and assemble result */ + extu r5,r5,r6 /* form new high word mantissa */ + +d33: + cmp r6,r7,33 /* is the number of bits to be shifted is 33? */ + bb1 gt,r6,d34 /* check to see if # of bits is 34 */ + bb1 0,r9,nostky33 /* skip checking if old sticky set */ + mak r6,r11,31<0> /* check bits that will be shifted into sticky */ + ff1 r8,r8 /* check for ones */ + bb1 5,r8,nostky33 /* do not set sticky if there are no ones */ + set r9,r9,1<0> /* set new sticky bit */ +nostky33: + bb0 31,r11,nornd33 /* do not set round if bit is not a 1 */ + set r9,r9,1<1> /* set new round bit */ +nornd33: + bb0 0,r5,nogrd33 /* do not set guard bit if bit is not a 1 */ + set r9,r9,1<2> /* set new guard bit */ +nogrd33: + extu r11,r5,31<1> /* shift high bits into low word */ + br.n round /* round and assemble result */ + or r5,r0,r0 /* clear high word */ + +d34: + cmp r6,r7,34 /* is the number of bits to be shifted 34? */ + bb1 gt,r6,d35 /* check to see if # of bits is >= 35 */ + bb1 0,r9,nostky34 /* skip checking if old sticky set */ + ff1 r8,r11 /* check bits that will be shifted into sticky */ + bb1 5,r8,nostky34 /* do not set sticky if there are no ones */ + set r9,r9,1<0> /* set new sticky bit */ +nostky34: + bb0 0,r5,nornd34 /* do not set round if bit is not a 1 */ + set r9,r9,1<1> /* set new round bit */ +nornd34: + bb0 1,r5,nogrd34 /* do not set guard bit if bit is not a 1 */ + set r9,r9,1<2> /* set new guard bit */ +nogrd34: + extu r11,r5,30<2> /* shift high bits into low word */ + br.n round /* round and assemble result */ + or r5,r0,r0 /* clear high word */ + +d35: + cmp r6,r7,52 /* see if # of shifts is 35 <= X <= 52 */ + bb1 gt,r6,d53 /* check to see if # of shifts is 52 */ + bb1.n 0,r9,nostky35 /* skip checking if old sticky set */ + sub r7,r7,34 /* subtract 32 from # of shifts so that opera- */ + /* tions can be done on the upper word, and */ + /* then subtract two more checking guard and */ + /* sticky bits */ + ff1 r8,r11 /* see if lower word has a bit for sticky */ + bb1 5,r8,stkycheck35 /* see if upper word has any sticky bits */ + br.n nostky35 /* quit checking for sticky */ + set r9,r9,1<0> /* set sticky bit */ +stkycheck35: + mak r6,r7,5<5> /* place width into width field */ + mak r8,r5,r6 /* mask off shifted bits - 2 */ + ff1 r8,r8 /* see if r8 has any ones */ + bb1 5,r8,nostky35 /* do not set sticky if no ones found */ + set r9,r9,1<0> /* set sticky bit */ +nostky35: + or r8,r0,32 /* look at what does not get shifted off plus */ + /* round and sticky, remember that the r7 value */ + /* was adjusted so that it did not include */ + /* new round or new sticky in shifted off bits */ + subu r8,r8,r7 /* complement width */ + mak r8,r8,5<5> /* shift width into width field */ + or r8,r7,r8 /* add offset field */ + extu r11,r5,r8 /* extract upper bits into low word */ + bb0 0,r11,nornd35 /* do not set new round bit */ + set r9,r9,1<1> /* set new round bit */ +nornd35: + bb0 1,r11,nogrd35 /* do not set new guard bit */ + set r9,r9,1<2> /* set new guard bit */ +nogrd35: + extu r11,r11,30<2> /* shift off remaining guard and round bits */ + br.n round /* round and assemble result */ + or r5,r0,r0 /* clear high word */ + +d53: + cmp r6,r7,53 /* check to see if # of shifts is 53 */ + bb1 gt,r6,d54 /* branch to see if shifts = 54 */ + bb1 0,r9,nostky53 /* skip checking if old sticky set */ + ff1 r8,r11 /* see if lower word has a bit for sticky */ + bb1 5,r8,stkycheck53 /* see if upper word has any sticky bits */ + br.n nostky53 /* quit checking for sticky */ + set r9,r9,1<0> /* set sticky bit */ +stkycheck53: + mak r6,r5,19<0> /* check bits that are shifted into sticky */ + ff1 r8,r6 /* see if r6 has any ones */ + bb1 5,r8,nostky53 /* do not set sticky if no ones found */ + set r9,r9,1<0> /* set sticky bit */ +nostky53: + bb0 19,r5,nornd53 /* do not set new round bit */ + set r9,r9,1<1> /* set new round bit */ +nornd53: + set r9,r9,1<2> /* set new guard bit,this is hidden bit */ + or r5,r0,r0 /* clear high word */ + br.n round /* round and assemble result */ + or r11,r0,r0 /* clear low word */ + +d54: + cmp r6,r7,54 /* check to see if # of shifts is 54 */ + bb1 gt,r6,d55 /* branch to execute for shifts =>55 */ + bb1 0,r9,nostky54 /* skip checking if old sticky set */ + ff1 r8,r11 /* see if lower word has a bit for sticky */ + bb1 5,r8,stkycheck54 /* see if upper word has any sticky bits */ + br.n nostky54 /* quit checking for sticky */ + set r9,r9,1<0> /* set sticky bit */ +stkycheck54: + mak r6,r5,20<0> /* check bits that are shifted into sticky */ + ff1 r8,r6 /* see if r6 has any ones */ + bb1 5,r8,nostky54 /* do not set sticky if no ones found */ + set r9,r9,1<0> /* set sticky bit */ +nostky54: + set r9,r9,1<1> /* set new round bit,this is hidden bit */ + clr r9,r9,1<2> /* clear guard bit since nothing shifted in */ + or r5,r0,r0 /* clear high word */ + br.n round /* round and assemble result */ + or r11,r0,r0 /* clear low word */ + +d55: + set r9,r9,1<0> /* set new sticky bit,this contains hidden bit */ + clr r9,r9,2<1> /* clear guard and round bits since nothing */ + /* shifted in */ + or r5,r0,r0 /* clear high word */ + or r11,r0,r0 /* clear low word */ /* The first item that the rounding code does is see if either guard, round, */ -/* or sticky is set. If all are clear, then there is no denormalization loss */ +/* or sticky is set. If all are clear, then there is no denormalization loss */ /* and no need to round, then branch to assemble answer. */ -/* For rounding, a branch table is set up. The left two most bits are the */ -/* rounding mode. The third bit is either the LSB of the mantissa or the */ -/* sign bit, depending on the rounding mode. The three LSB''s are the guard, */ +/* For rounding, a branch table is set up. The left two most bits are the */ +/* rounding mode. The third bit is either the LSB of the mantissa or the */ +/* sign bit, depending on the rounding mode. The three LSB''s are the guard, */ /* round and sticky bits. */ -round: ff1 r8,r9 /* see if there is denormalization loss */ - bb1 5,r8,assemble /* no denormalization loss or inexactness */ - extu r6,r10,2<modelo> /* extract rounding mode */ - bb1.n modehi,r10,signext /* use sign bit instead of LSB */ - mak r6,r6,2<4> /* shift over rounding mode */ - extu r7,r11,1<0> /* extract LSB */ - br.n grs /* skip sign extraction */ - mak r7,r7,1<3> /* shift over LSB */ -signext: extu r7,r10,1<31> /* extract sign bit */ - mak r7,r7,1<3> /* shift sign bit over */ -grs: or r6,r6,r7 - or r6,r6,r9 /* or in guard, round, and sticky */ - or.u r1,r0,hi16(roundtable) /* form address of branch table */ - or r1,r1,lo16(roundtable) - lda r6,r1[r6] /* scale offset into branch table */ - jmp.n r6 /* jump to branch table */ - set r9,r9,1<3> /* set inexact flag in r9 */ - -roundtable: br noaddone -r000001: br noaddone -r000010: br noaddone -r000011: br noaddone -r000100: br noaddone -r000101: br addone -r000110: br addone -r000111: br addone -r001000: br noaddone -r001001: br noaddone -r001010: br noaddone -r001011: br noaddone -r001100: br addone -r001101: br addone -r001110: br addone -r001111: br addone -r010000: br noaddone -r010001: br noaddone -r010010: br noaddone -r010011: br noaddone -r010100: br noaddone -r010101: br noaddone -r010110: br noaddone -r010111: br noaddone -r011000: br noaddone -r011001: br noaddone -r011010: br noaddone -r011011: br noaddone -r011100: br noaddone -r011101: br noaddone -r011110: br noaddone -r011111: br noaddone -r100000: br noaddone -r100001: br noaddone -r100010: br noaddone -r100011: br noaddone -r100100: br noaddone -r100101: br noaddone -r100110: br noaddone -r100111: br noaddone -r101000: br noaddone -r101001: br addone -r101010: br addone -r101011: br addone -r101100: br addone -r101101: br addone -r101110: br addone -r101111: br addone -r110000: br noaddone -r110001: br addone -r110010: br addone -r110011: br addone -r110100: br addone -r110101: br addone -r110110: br addone -r110111: br addone -r111000: br noaddone -r111001: br noaddone -r111010: br noaddone -r111011: br noaddone -r111100: br noaddone -r111101: br noaddone -r111110: br noaddone -r111111: br noaddone +round: + ff1 r8,r9 /* see if there is denormalization loss */ + bb1 5,r8,assemble /* no denormalization loss or inexactness */ + extu r6,r10,2<modelo> /* extract rounding mode */ + bb1.n modehi,r10,signext /* use sign bit instead of LSB */ + mak r6,r6,2<4> /* shift over rounding mode */ + extu r7,r11,1<0> /* extract LSB */ + br.n grs /* skip sign extraction */ + mak r7,r7,1<3> /* shift over LSB */ +signext: + extu r7,r10,1<31> /* extract sign bit */ + mak r7,r7,1<3> /* shift sign bit over */ +grs: + or r6,r6,r7 + or r6,r6,r9 /* or in guard, round, and sticky */ + or.u r1,r0,hi16(roundtable) /* form address of branch table */ + or r1,r1,lo16(roundtable) + lda r6,r1[r6] /* scale offset into branch table */ + jmp.n r6 /* jump to branch table */ + set r9,r9,1<3> /* set inexact flag in r9 */ + +roundtable: + br noaddone + br noaddone + br noaddone + br noaddone + br noaddone + br addone + br addone + br addone + br noaddone + br noaddone + br noaddone + br noaddone + br addone + br addone + br addone + br addone + br noaddone + br noaddone + br noaddone + br noaddone + br noaddone + br noaddone + br noaddone + br noaddone + br noaddone + br noaddone + br noaddone + br noaddone + br noaddone + br noaddone + br noaddone + br noaddone + br noaddone + br noaddone + br noaddone + br noaddone + br noaddone + br noaddone + br noaddone + br noaddone + br noaddone + br addone + br addone + br addone + br addone + br addone + br addone + br addone + br noaddone + br addone + br addone + br addone + br addone + br addone + br addone + br addone + br noaddone + br noaddone + br noaddone + br noaddone + br noaddone + br noaddone + br noaddone + br noaddone /* Round by adding a one to the LSB of the mantissa. */ -addone: or r6,r0,1 /* load a 1 into r6 so that add.co can be used */ - add.co r11,r11,r6 /* add a one to the lower word of result */ - bb0.n destsize,r12,noaddone /* single result,forget carry */ - set r9,r9,1<4> /* indicate that a 1 has been added */ - add.ci r5,r5,r0 /* propagate carry into high word */ +addone: + or r6,r0,1 /* load a 1 into r6 so that add.co can be used */ + add.co r11,r11,r6 /* add a one to the lower word of result */ + bb0.n destsize,r12,noaddone /* single result,forget carry */ + set r9,r9,1<4> /* indicate that a 1 has been added */ + add.ci r5,r5,r0 /* propagate carry into high word */ /* Branch to inexact user handler if there is one. */ -noaddone: +noaddone: #ifdef HANDLER - bb1.n efinx,r12,modformdef /* branch to modify form for user */ - /* handler */ - or r2,r2,5 /* set inexact and underflow flags */ + bb1.n efinx,r12,modformdef /* branch to modify form for user */ + /* handler */ + or r2,r2,5 /* set inexact and underflow flags */ #endif -/* Assemble the result of the denormalization routine for writeback to the */ -/* destination register. The exponent of a denormalized number is zero, */ +/* Assemble the result of the denormalization routine for writeback to the */ +/* destination register. The exponent of a denormalized number is zero, */ /* so simply assemble the sign and the new mantissa. */ -assemble: bb1 destsize,r12,doubassem /* assemble double result */ - bb0 sign,r10,exassems /* exit assemble if sign is zero */ - set r11,r11,1<sign> /* make result negative */ -exassems: br Ureturn /* return from subroutine */ +assemble: + bb1 destsize,r12,doubassem /* assemble double result */ + bb0 sign,r10,exassems /* exit assemble if sign is zero */ + set r11,r11,1<sign> /* make result negative */ +exassems: + br Ureturn + +doubassem: + bb0.n sign,r10,signclr /* do not set sign in r10 */ + or r10,r5,r0 /* load high word from r5 into r10 */ + set r10,r10,1<sign> /* high word with sign loaded */ +signclr: + br Ureturn -doubassem: bb0.n sign,r10,signclr /* do not set sign in r10 */ - or r10,r5,r0 /* load high word from r5 into r10 */ - set r10,r10,1<sign> /* high word with sign loaded */ -signclr: br Ureturn /* return from subroutine */ - /* modfordef modifies the result of denormalization to the input format of */ -/* the inexact user handler. This input format is the same format that */ +/* the inexact user handler. This input format is the same format that */ /* MANTHI, MANTLO, and IMPCR were initially loaded with. */ #ifdef HANDLER -modformdef: clr r12,r12,12<20> /* clear result exponent,IMPCR complete */ - clr r10,r10,4<25> /* clear old guard,round,sticky,and addone */ - mak r5,r9,3<26> /* make grs field */ - bb0.n 4,r9,newaddone /* do not set new addone in MANTHI */ - or r10,r5,r10 /* or in new grs field */ - set r10,r10,1<25> /* set new addone */ -newaddone: bb1.n destsize,r12,moddefd /* branch to handle double precision */ - clr r10,r10,21<0> /* clear upper bits of old mantissa */ -moddefs: extu r5,r11,20<3> /* extract upper bits */ - or r10,r5,r10 /* MANTHI complete */ - bsr.n _handler /* execute user handler for inexact */ - rot r11,r11,0<3> /* MANTLO complete */ - br Ureturn /* return from subroutine */ -moddefd: bsr.n _handler /* execute user handler for inexact */ - or r10,r5,r10 /* MANTHI complete,r5 should be set to OR */ +modformdef: + clr r12,r12,12<20> /* clear result exponent,IMPCR complete */ + clr r10,r10,4<25> /* clear old guard,round,sticky,and addone */ + mak r5,r9,3<26> /* make grs field */ + bb0.n 4,r9,newaddone /* do not set new addone in MANTHI */ + or r10,r5,r10 /* or in new grs field */ + set r10,r10,1<25> /* set new addone */ +newaddone: + bb1.n destsize,r12,moddefd /* branch to handle double precision */ + clr r10,r10,21<0> /* clear upper bits of old mantissa */ +moddefs: + extu r5,r11,20<3> /* extract upper bits */ + or r10,r5,r10 /* MANTHI complete */ + bsr.n _handler /* execute user handler for inexact */ + rot r11,r11,0<3> /* MANTLO complete */ + br Ureturn +moddefd: + bsr.n _handler /* execute user handler for inexact */ + or r10,r5,r10 /* MANTHI complete,r5 should be set to OR */ #endif - /* Return to fpui. */ -Ureturn: ld r1,r31,0 /* load return address */ - jmp r1 /* return from subroutine */ - - data - -/* function _FPoverflow -- */ -/* The documentation for this release gives an overall description of this code. */ -data -align 4 -msg2: string "here at line %d, r1 is %x\n\0" -text - -#line 23 +Ureturn: + ld r1,r31,0 /* load return address */ + jmp r1 +/* + * FPoverflow + */ /* If the overflow user handler bit is not set, then the inexact bit in the */ -/* FPSR is set, and the inexact user handler bit is checked. If it is set, */ +/* FPSR is set, and the inexact user handler bit is checked. If it is set, */ /* then the inexact user handler is executed, else the default routine for */ /* overflow is executed. */ - text - align 8 - global _FPoverflow -_FPoverflow: - st r1,r31,0 /* save return address */ + +ASLOCAL(FPoverflow) + st r1,r31,0 /* save return address */ #ifdef HANDLER - set r2,r2,1<overflow> /* set overflow bit in r2 which holds FPSR */ - bb1 efovf,r12,hand /* go to user handler if bit set for overflow */ - set r2,r2,1<inexact> /* set inexact bit in r2 since overflow bit */ - /* in FPCR is not set */ - bb0 efinx,r12,nohandler/* if userhandler for inexact not set,then */ - /* round result */ - br callhandler /* branch to user handler for inexact */ + set r2,r2,1<overflow> /* set overflow bit in r2 which holds FPSR */ + bb1 efovf,r12,hand /* go to user handler if bit set for overflow */ + set r2,r2,1<inexact> /* set inexact bit in r2 since overflow bit */ + /* in FPCR is not set */ + bb0 efinx,r12,nohandler/* if userhandler for inexact not set,then */ + /* round result */ + br callhandler /* branch to user handler for inexact */ /* Before the overflow user handler is executed, the exponent is modified */ /* by subtracting 192 for single precision and 1536 for double precision. */ - -hand: bb1 10,r12,doubleprec /* double precision result */ -singleprec: or.u r5,r0,0x0c00 /* load exponent adjust */ - br.n callhandler /* prepare to call user handler */ - subu r12,r12,r5 /* adjust single precision exponent */ -doubleprec: or.u r5,r0,0x6000 /* load exponent adjust */ - subu r12,r12,r5 /* adjust double precision exponent */ -callhandler: bsr _handler /* branch to common handler routine */ - br return /* return from overflow subroutine */ + +hand: + bb1 10,r12,doubleprec /* double precision result */ +singleprec: + or.u r5,r0,0x0c00 /* load exponent adjust */ + br.n callhandler /* prepare to call user handler */ + subu r12,r12,r5 /* adjust single precision exponent */ +doubleprec: + or.u r5,r0,0x6000 /* load exponent adjust */ + subu r12,r12,r5 /* adjust double precision exponent */ +callhandler: + bsr _handler /* branch to common handler routine */ + br return #endif /* Determine which rounding mode to use for the default procedure. */ -nohandler: bb1 modehi,r10,signed /* mode is either round toward pos. or neg. */ - bb0 modelo,r10,OFnearest /* rounding mode is round nearest */ - br OFzero /* rounding mode is round zero */ -signed: bb0 modelo,r10,OFnegative /* rounding mode is round negative */ - br positive /* rounding mode is round positive */ +nohandler: + bb1 modehi,r10,signed /* mode is either round toward pos. or neg. */ + bb0 modelo,r10,OFnearest /* rounding mode is round nearest */ + br OFzero /* rounding mode is round zero */ +signed: + bb0 modelo,r10,OFnegative /* rounding mode is round negative */ + br positive /* rounding mode is round positive */ /* In the round toward nearest mode, positive values are rounded to */ /* positive infinity and negative values are loaded toward negative infinity. */ /* The value for single or double precision is loaded from a data table. */ -OFnearest: - bb1.n destsize,r12,neardouble /* branch to neardouble of */ - /* double result */ - mask.u r5,r10,0x8000 /* mask off sign bit from MANTHI */ - or.u r11,r0,hi16(0x7f800000) /* load single infinity constant */ - or r11,r11,lo16(0x7f800000) - br.n return /* return with result */ - or r11,r5,r11 /* adjust sign */ +OFnearest: + bb1.n destsize,r12,neardouble /* branch to neardouble of */ + /* double result */ + mask.u r5,r10,0x8000 /* mask off sign bit from MANTHI */ + or.u r11,r0,hi16(0x7f800000) /* load single infinity constant */ + or r11,r11,lo16(0x7f800000) + br.n return /* return with result */ + or r11,r5,r11 /* adjust sign */ neardouble: - or r11,r0,r0 /* load lower word of infinity */ - or.u r10,r0,hi16(0x7ff00000) /* load upper word of infinity */ - or r10,r10,lo16(0x7ff00000) - br.n return /* return with result */ - or r10,r5,r10 /* adjust sign */ + or r11,r0,r0 /* load lower word of infinity */ + or.u r10,r0,hi16(0x7ff00000) /* load upper word of infinity */ + or r10,r10,lo16(0x7ff00000) + br.n return /* return with result */ + or r10,r5,r10 /* adjust sign */ /* In the round toward zero mode, positive values are rounded to the largest */ @@ -1393,494 +1477,520 @@ neardouble: /* negative finite number. */ /* The value for single or double precision is loaded from a data table. */ -OFzero: - bb1.n destsize,r12,zerodouble /* branch to zerodouble of */ - /* double result */ - mask.u r5,r10,0x8000 /* mask off sign bit from MANTHI */ - or.u r11,r0,hi16(0x7f7fffff) /* load single finite number constant */ - or r11,r11,lo16(0x7f7fffff) - br.n return /* return with result */ - or r11,r5,r11 /* adjust sign */ -zerodouble: - set r11,r0,0<0> /* load lower word of finite number */ - or.u r10,r0,hi16(0x7fefffff) /* load upper word of finite number */ - or r10,r10,lo16(0x7fefffff) - br.n return /* return with result */ - or r10,r5,r10 /* adjust sign */ - - -/* In the round toward positve mode, positive values are rounded to */ +OFzero: + bb1.n destsize,r12,zerodouble /* branch to zerodouble of */ + /* double result */ + mask.u r5,r10,0x8000 /* mask off sign bit from MANTHI */ + or.u r11,r0,hi16(0x7f7fffff) /* load single finite number constant */ + or r11,r11,lo16(0x7f7fffff) + br.n return /* return with result */ + or r11,r5,r11 /* adjust sign */ +zerodouble: + set r11,r0,0<0> /* load lower word of finite number */ + or.u r10,r0,hi16(0x7fefffff) /* load upper word of finite number */ + or r10,r10,lo16(0x7fefffff) + br.n return /* return with result */ + or r10,r5,r10 /* adjust sign */ + + +/* In the round toward positve mode, positive values are rounded to */ /* postive infinity and negative values are loaded toward the largest */ /* negative finite number. */ /* The value for single or double precision is loaded from a data table. */ -positive: - bb1 destsize,r12,posdouble /* branch to section for double result */ -possingle: - bb1 sign,r10,possingleneg /* branch to section for negatives */ -possinglepos: - or.u r11,r0,hi16(0x7f800000) /* load single infinity constant */ - br.n return /* return with result */ - or r11,r11,lo16(0x7f800000) +positive: + bb1 destsize,r12,posdouble /* branch to section for double result */ +possingle: + bb1 sign,r10,possingleneg /* branch to section for negatives */ +possinglepos: + or.u r11,r0,hi16(0x7f800000) /* load single infinity constant */ + br.n return /* return with result */ + or r11,r11,lo16(0x7f800000) possingleneg: - or.u r11,r0,hi16(0x7f7fffff) /* load single finite number constant */ - or r11,r11,lo16(0x7f7fffff) - br.n return /* return with result */ - set r11,r11,1<sign> /* set sign for negative */ -posdouble: - bb1 sign,r10,posdoubleneg /* branch to negative double results */ -posdoublepos: - or r11,r0,r0 /* load lower word of double infinity */ - or.u r10,r0,hi16(0x7ff00000) /* load upper word of infinity */ - br.n return /* return with result */ - or r10,r10,lo16(0x7ff00000) -posdoubleneg: - set r11,r0,0<0> /* load lower word of finite number */ - or.u r10,r0,hi16(0x7fefffff) /* load upper word of finite number */ - or r10,r10,lo16(0x7fefffff) - br.n return /* return with result */ - set r10,r10,1<sign> /* set sign for negative */ - - -/* In the round toward negative mode, positive values are rounded to the largest */ + or.u r11,r0,hi16(0x7f7fffff) /* load single finite number constant */ + or r11,r11,lo16(0x7f7fffff) + br.n return /* return with result */ + set r11,r11,1<sign> /* set sign for negative */ +posdouble: + bb1 sign,r10,posdoubleneg /* branch to negative double results */ +posdoublepos: + or r11,r0,r0 /* load lower word of double infinity */ + or.u r10,r0,hi16(0x7ff00000) /* load upper word of infinity */ + br.n return /* return with result */ + or r10,r10,lo16(0x7ff00000) +posdoubleneg: + set r11,r0,0<0> /* load lower word of finite number */ + or.u r10,r0,hi16(0x7fefffff) /* load upper word of finite number */ + or r10,r10,lo16(0x7fefffff) + br.n return /* return with result */ + set r10,r10,1<sign> /* set sign for negative */ + + +/* In the round toward negative mode, positive values are rounded to the largest */ /* postive finite number and negative values are rounded to negative infinity. */ /* The value for single or double precision is loaded from a data table. */ -OFnegative: - bb1 destsize,r12,negdouble /* branch to section for double result */ -negsingle: - bb1 sign,r10,negsingleneg /* branch to section for negatives */ -negsinglepos: - or.u r11,r0,hi16(0x7f7fffff) /* load single finite number constant */ - br.n return /* return with result */ - or r11,r11,lo16(0x7f7fffff) -negsingleneg: - or.u r11,r0,hi16(0x7f800000) /* load single infinity constant */ - or r11,r11,lo16(0x7f800000) - br.n return /* return with result */ - set r11,r11,1<sign> /* set sign for negative */ -negdouble: - bb1 sign,r10,negdoubleneg /* branch to negative double results */ -negdoublepos: - set r11,r0,0<0> /* load lower word of finite number */ - or.u r10,r0,hi16(0x7fefffff) /* load upper word of finite number */ - br.n return /* return with result */ - or r10,r10,lo16(0x7fefffff) -negdoubleneg: - or r11,r0,r0 /* load lower word of double infinity */ - or.u r10,r0,hi16(0x7ff00000) /* load upper word of infinity */ - or r10,r10,lo16(0x7ff00000) - set r10,r10,1<sign> /* set sign for negative */ - -return: - ld r1,r31,0 /* ld return address */ - jmp r1 /* return from subroutine */ - - data +OFnegative: + bb1 destsize,r12,negdouble /* branch to section for double result */ +negsingle: + bb1 sign,r10,negsingleneg /* branch to section for negatives */ +negsinglepos: + or.u r11,r0,hi16(0x7f7fffff) /* load single finite number constant */ + br.n return /* return with result */ + or r11,r11,lo16(0x7f7fffff) +negsingleneg: + or.u r11,r0,hi16(0x7f800000) /* load single infinity constant */ + or r11,r11,lo16(0x7f800000) + br.n return /* return with result */ + set r11,r11,1<sign> /* set sign for negative */ +negdouble: + bb1 sign,r10,negdoubleneg /* branch to negative double results */ +negdoublepos: + set r11,r0,0<0> /* load lower word of finite number */ + or.u r10,r0,hi16(0x7fefffff) /* load upper word of finite number */ + br.n return /* return with result */ + or r10,r10,lo16(0x7fefffff) +negdoubleneg: + or r11,r0,r0 /* load lower word of double infinity */ + or.u r10,r0,hi16(0x7ff00000) /* load upper word of infinity */ + or r10,r10,lo16(0x7ff00000) + set r10,r10,1<sign> /* set sign for negative */ + +return: + ld r1,r31,0 /* ld return address */ + jmp r1 + + data /* If either S1 or S2 is a signalling NaN, then set the invalid operation */ -/* bit of the FPSR. If the invalid operation user handler flag is set and */ +/* bit of the FPSR. If the invalid operation user handler flag is set and */ /* then NaN is signalling, then branch to the handler routine to go to the */ /* user handler. */ /* If S1 is the only NaN or one of two NaN''s, then write */ -/* a quiet S1 to the result. A signalling NaN must be made quiet before */ +/* a quiet S1 to the result. A signalling NaN must be made quiet before */ /* it can be written, but a signalling S2 is not modified in this routine */ /* if S1 is a NaN. */ - text -GLOBAL(NaN) - bb0.n s1nan,r12,S2sigcheck /* S1 is not a NaN */ - st r1,r31,0 /* save return address */ - bb1 sigbit,r5,S2sigcheck /* S1 is not a signaling NaN */ - set r2,r2,1<oper> /* set invalid operation bit in FPSR */ -#ifdef JEFF_DEBUGxxxxxxx - /* - * Generate a signal to the offending process. - * This uses hardcoded constants from mach/exception.h - * and mach/machine/exception.h. - */ - ldcr r2, cr17 /* first arg: current_thread() */ - or r3, r0, 3 /* second arg: EXC_ARITHMETIC */ - or r4, r0, 3 /* third arg: EXC_M88K_FLOAT_P */ - or r5, r0, r0 - subu r31, r31, 48 - bsr.n _thread_doexception - st r1, r31, 44 - ld r1, r31, 44 - br.n FPnan_return - addu r31, r31, 48 -#endif + text +ASLOCAL(NaN) + bb0.n s1nan,r12,S2sigcheck /* S1 is not a NaN */ + st r1,r31,0 /* save return address */ + bb1 sigbit,r5,S2sigcheck /* S1 is not a signaling NaN */ + set r2,r2,1<oper> /* set invalid operation bit in FPSR */ #ifdef HANDLER - bb0 oper,r3,S1nohandler /* branch if no user handler */ - bsr _handler /* branch to handler */ - br FPnan_return -ASGLOBAL(S1nohandler) + bb0 oper,r3,S1nohandler /* branch if no user handler */ + bsr _handler /* branch to handler */ + br FPnan_return +ASLOCAL(S1nohandler) #endif - br.n S1write /* FPSR bit already set, S1 is made quiet, */ - /* and since we always write S1 if it is a */ - /* NaN, write S1 and skip rest of routine */ - set r5,r5,1<sigbit> /* make S1 a quiet NaN */ - -ASGLOBAL(S2sigcheck) - bb0 s2nan,r12,S1write /* S2 is not a NaN */ - bb1 sigbit,r7,S1write /* S2 is not a signaling NaN */ - set r2,r2,1<oper> /* set invalid operation bit in FPSR */ + br.n S1write /* FPSR bit already set, S1 is made quiet, */ + /* and since we always write S1 if it is a */ + /* NaN, write S1 and skip rest of routine */ + set r5,r5,1<sigbit> /* make S1 a quiet NaN */ + +ASLOCAL(S2sigcheck) + bb0 s2nan,r12,S1write /* S2 is not a NaN */ + bb1 sigbit,r7,S1write /* S2 is not a signaling NaN */ + set r2,r2,1<oper> /* set invalid operation bit in FPSR */ #ifdef HANDLER - bb0 oper,r3,S2nohandler /* branch if no user handler */ - bsr _handler /* branch to handler */ - br FPnan_return + bb0 oper,r3,S2nohandler /* branch if no user handler */ + bsr _handler /* branch to handler */ + br FPnan_return #endif -ASGLOBAL(S2nohandler) - set r7,r7,1<sigbit> /* make S2 a quiet NaN */ +ASLOCAL(S2nohandler) + set r7,r7,1<sigbit> /* make S2 a quiet NaN */ /* Write a single or double precision quiet NaN unless the opeation is FCMP. */ /* If the operation is FCMP, then set the not comparable bit in the result. */ -ASGLOBAL(S1write) - bb0 s1nan,r12,S2write /* do not write S1 if it is not a NaN */ - extu r10,r9,5<11> /* extract opcode */ - cmp r11,r10,FCMPop /* compare to FCMP */ - bb1 ne,r11,S1noFCMP /* operation is not FCMP */ - set r6,r0,1<nc> /* set the not comparable bit */ - br.n FPnan_return /* return from subroutine */ - set r6,r6,1<ne> /* set the not equal bit */ -ASGLOBAL(S1noFCMP) - bb1.n dsize,r9,wrdoubS1 /* double destination */ - set r5,r5,11<20> /* set all exponent bits to 1 */ +ASLOCAL(S1write) + bb0 s1nan,r12,S2write /* do not write S1 if it is not a NaN */ + extu r10,r9,5<11> /* extract opcode */ + cmp r11,r10,FCMPop /* compare to FCMP */ + bb1 ne,r11,S1noFCMP /* operation is not FCMP */ + set r6,r0,1<nc> /* set the not comparable bit */ + br.n FPnan_return + set r6,r6,1<ne> /* set the not equal bit */ +ASLOCAL(S1noFCMP) + bb1.n dsize,r9,wrdoubS1 /* double destination */ + set r5,r5,11<20> /* set all exponent bits to 1 */ /* The single result will be formed the same way whether S1 is a single or double */ -ASGLOBAL(wrsingS1) - mak r10,r5,28<3> /* wipe out extra exponent bits */ - extu r11,r6,3<29> /* get lower three bits of mantissa */ - or r10,r10,r11 /* combine all of result except sign */ - clr r6,r5,31<0> /* clear all but sign */ - br.n FPnan_return /* return from function */ - or r6,r6,r10 /* form result */ - -ASGLOBAL(wrdoubS1) -/* ;;;;; bb1 s1size,r9,wrdoubS1d ;write double source to double dest. */ -/* took out the above instruction -- don't see why it's there.... jfriedl */ -ASGLOBAL(wrdoubS1s) - set r6,r6,29<0> /* set extra bits of lower word */ -ASGLOBAL(wrdoubS1d) - br FPnan_return /* no modification necessary for writing */ - /* double to double, so return from function */ - -ASGLOBAL(S2write) - extu r10,r9,5<11> /* extract opcode */ - cmp r11,r10,FCMPop /* compare to FCMP */ - bb1.n ne,r11,S2noFCMP /* operation is not FCMP */ - set r7,r7,11<20> /* set all exponent bits to 1 */ - set r6,r0,1<nc> /* set the not comparable bit */ - br.n FPnan_return /* return from subroutine */ - set r6,r6,1<ne> /* set the not equal bit */ -ASGLOBAL(S2noFCMP) - bb1.n dsize,r9,wrdoubS2 /* double destination */ - /* - * In the original, the ".n" above and the "set r5..." below - * were omitted here. Since they're in the S1 stuff above, - * and since this isn't working right now (r5 isn't being set - * to it's part of the nan), I'll try this... - * jfriedl Dec 1, 1989 - */ - set r5,r5,11<20> /* set all exponent bits to 1 */ +ASLOCAL(wrsingS1) + mak r10,r5,28<3> /* wipe out extra exponent bits */ + extu r11,r6,3<29> /* get lower three bits of mantissa */ + or r10,r10,r11 /* combine all of result except sign */ + clr r6,r5,31<0> /* clear all but sign */ + br.n FPnan_return + or r6,r6,r10 /* form result */ + +ASLOCAL(wrdoubS1) + set r6,r6,29<0> /* set extra bits of lower word */ + br FPnan_return /* no modification necessary for writing */ + /* double to double, so return */ + +ASLOCAL(S2write) + extu r10,r9,5<11> /* extract opcode */ + cmp r11,r10,FCMPop /* compare to FCMP */ + bb1.n ne,r11,S2noFCMP /* operation is not FCMP */ + set r7,r7,11<20> /* set all exponent bits to 1 */ + set r6,r0,1<nc> /* set the not comparable bit */ + br.n FPnan_return + set r6,r6,1<ne> /* set the not equal bit */ +ASLOCAL(S2noFCMP) + bb1.n dsize,r9,wrdoubS2 /* double destination */ + set r5,r5,11<20> /* set all exponent bits to 1 */ /* The single result will be formed the same way whether S1 is a single or double */ -ASGLOBAL(wrsingS2) - mak r10,r7,28<3> /* wipe out extra exponent bits */ - extu r11,r8,3<29> /* get lower three bits of mantissa */ - or r10,r10,r11 /* combine all of result except sign */ - clr r6,r7,31<0> /* clear all but sign */ - br.n FPnan_return /* return from function */ - or r6,r6,r10 /* form result */ - -ASGLOBAL(wrdoubS2) - -/* ;;; bb1 s2size,r9,FPnan_return ;write double source to double dest. */ - /* - * I took out the above branch because I just don't see how it - * makes sense. jfriedl Dec 1, '89 - */ -ASGLOBAL(wrdoubS2s) - set r6,r8,29<0> /* set extra bits of lower word */ +ASLOCAL(wrsingS2) + mak r10,r7,28<3> /* wipe out extra exponent bits */ + extu r11,r8,3<29> /* get lower three bits of mantissa */ + or r10,r10,r11 /* combine all of result except sign */ + clr r6,r7,31<0> /* clear all but sign */ + br.n FPnan_return + or r6,r6,r10 /* form result */ +ASLOCAL(wrdoubS2) + set r6,r8,29<0> /* set extra bits of lower word */ /* Return from this subroutine with the result. */ -ASGLOBAL(FPnan_return) - /* no modification necessary for writing */ - /* double to double, so return from function */ - ld r1,r31, 0 /* retrieve return address */ - jmp r1 /* return from function */ - - data +ASLOCAL(FPnan_return) + /* no modification necessary for writing */ + /* double to double, so return */ + ld r1,r31, 0 /* retrieve return address */ + jmp r1 -/* function _infinity -- */ -/* See the documentation of this release for an overall description of this */ -/* code. */ + data +/* + * infinity + */ /* Extract the opcode, compare to a constant, and branch to the code */ /* for the instruction. */ - text - align 8 - global _infinity -_infinity: extu r10,r9,5<11> /* extract opcode */ - cmp r11,r10,FADDop /* compare to FADD */ - bb1.n eq,r11,FADD /* operation is FADD */ - st r1,r31,0 /* save return address */ - cmp r11,r10,FSUBop /* compare to FSUB */ - bb1 eq,r11,FSUB /* operation is FSUB */ - cmp r11,r10,FCMPop /* compare to FCMP */ - bb1 eq,r11,FCMP /* operation is FCMP */ - cmp r11,r10,FMULop /* compare to FMUL */ - bb1 eq,r11,FMUL /* operation is FMUL */ - cmp r11,r10,FDIVop /* compare to FDIV */ - bb1 eq,r11,FDIV /* operation is FDIV */ -/* cmp r11,r10,FSQRTop;compare to FSQRT */ -/* bb1 eq,r11,FSQRT ;operation is FSQRT */ - cmp r11,r10,INTop /* compare to INT */ - bb1 eq,r11,FP_inf_overflw /* operation is INT */ - cmp r11,r10,NINTop /* compare to NINT */ - bb1 eq,r11,FP_inf_overflw /* operation is NINT */ - cmp r11,r10,TRNCop /* compare to TRNC */ - bb1 eq,r11,FP_inf_overflw /* operation is TRNC */ +ASLOCAL(infinity) + extu r10,r9,5<11> /* extract opcode */ + cmp r11,r10,FADDop /* compare to FADD */ + bb1.n eq,r11,FADD /* operation is FADD */ + st r1,r31,0 /* save return address */ + cmp r11,r10,FSUBop /* compare to FSUB */ + bb1 eq,r11,FSUB /* operation is FSUB */ + cmp r11,r10,FCMPop /* compare to FCMP */ + bb1 eq,r11,FCMP /* operation is FCMP */ + cmp r11,r10,FMULop /* compare to FMUL */ + bb1 eq,r11,FMUL /* operation is FMUL */ + cmp r11,r10,FDIVop /* compare to FDIV */ + bb1 eq,r11,FDIV /* operation is FDIV */ +#if 0 + cmp r11,r10,FSQRTop /* compare to FSQRT */ + bb1 eq,r11,FSQRT /* operation is FSQRT */ +#endif + cmp r11,r10,INTop /* compare to INT */ + bb1 eq,r11,FP_inf_overflw /* operation is INT */ + cmp r11,r10,NINTop /* compare to NINT */ + bb1 eq,r11,FP_inf_overflw /* operation is NINT */ + cmp r11,r10,TRNCop /* compare to TRNC */ + bb1 eq,r11,FP_inf_overflw /* operation is TRNC */ /* Adding infinities of opposite signs will cause an exception, */ /* but all other operands will result in a correctly signed infinity. */ -FADD: bb0 s1inf,r12,addS2write /* branch if S1 not infinity */ - bb0 s2inf,r12,addS1write /* S2 is not inf., so branch to write S1 */ - bb1 sign,r5,addS1neg /* handle case of S1 negative */ -addS1pos: bb1 sign,r7,excpt /* adding infinities of different signs */ - /* causes an exception */ - br poswrinf /* branch to write positive infinity */ -addS1neg: bb0 sign,r7,excpt /* adding infinities of different signs */ - /* causes an exception */ - br negwrinf /* branch to write negative infinity */ -addS1write: bb0 sign,r5,poswrinf /* branch to write positive infinity */ - br negwrinf /* branch to write negative infinity */ -addS2write: bb0 sign,r7,poswrinf /* branch to write positive infinity */ - br negwrinf /* branch to write negative infinity */ +FADD: + bb0 s1inf,r12,addS2write /* branch if S1 not infinity */ + bb0 s2inf,r12,addS1write /* S2 is not inf., so branch to write S1 */ + bb1 sign,r5,addS1neg /* handle case of S1 negative */ +addS1pos: + bb1 sign,r7,excpt /* adding infinities of different */ + /* signs causes an exception */ + br poswrinf /* branch to write positive infinity */ +addS1neg: + bb0 sign,r7,excpt /* adding infinities of different */ + /* signs causes an exception */ + br negwrinf /* branch to write negative infinity */ +addS1write: + bb0 sign,r5,poswrinf /* branch to write positive infinity */ + br negwrinf /* branch to write negative infinity */ +addS2write: + bb0 sign,r7,poswrinf /* branch to write positive infinity */ + br negwrinf /* branch to write negative infinity */ /* Subtracting infinities of the same sign will cause an exception, */ /* but all other operands will result in a correctly signed infinity. */ -FSUB: bb0 s1inf,r12,subS2write /* branch if S1 not infinity */ - bb0 s2inf,r12,subS1write /* S2 is not inf., so branch to write S1 */ - bb1 sign,r5,subS1neg /* handle case of S1 negative */ -subS1pos: bb0 sign,r7,excpt /* subtracting infinities of the same sign */ - /* causes an exception */ - br poswrinf /* branch to write positive infinity */ -subS1neg: bb1 sign,r7,excpt /* subtracting infinities of the same sign */ - /* causes an exception */ - br negwrinf /* branch to write negative infinity */ -subS1write: bb0 sign,r5,poswrinf /* branch to write positive infinity */ - br negwrinf /* branch to write negative infinity */ -subS2write: bb1 sign,r7,poswrinf /* branch to write positive infinity */ - br negwrinf /* branch to write negative infinity */ +FSUB: + bb0 s1inf,r12,subS2write /* branch if S1 not infinity */ + bb0 s2inf,r12,subS1write /* S2 is not inf., so branch to write S1 */ + bb1 sign,r5,subS1neg /* handle case of S1 negative */ +subS1pos: + bb0 sign,r7,excpt /* subtracting infinities of the same */ + /* sign causes an exception */ + br poswrinf /* branch to write positive infinity */ +subS1neg: + bb1 sign,r7,excpt /* subtracting infinities of the same */ + /* sign causes an exception */ + br negwrinf /* branch to write negative infinity */ +subS1write: + bb0 sign,r5,poswrinf /* branch to write positive infinity */ + br negwrinf /* branch to write negative infinity */ +subS2write: + bb1 sign,r7,poswrinf /* branch to write positive infinity */ + br negwrinf /* branch to write negative infinity */ /* Compare the operands, at least one of which is infinity, and set the */ /* correct bits in the destination register. */ -FCMP: bb0.n s1inf,r12,FCMPS1f /* branch for finite S1 */ - set r4,r0,1<cp> /* since neither S1 or S2 is a NaN, set cp */ -FCMPS1i: bb1 sign,r5,FCMPS1ni /* branch to negative S1i */ -FCMPS1pi: bb0 s2inf,r12,FCMPS1piS2f /* branch to finite S2 with S1pi */ -FCMPS1piS2i: bb1 sign,r7,FCMPS1piS2ni /* branch to negative S2i with S1pi */ -FCMPS1piS2pi: set r4,r4,1<eq> /* set eq bit */ - set r4,r4,1<le> /* set le bit */ - set r4,r4,1<ge> /* set ge bit */ - set r4,r4,1<ib> /* set ib bit */ - br.n move /* return from subroutine */ - set r4,r4,1<ob> /* set ob bit */ -FCMPS1piS2ni: set r4,r4,1<ne> /* set ne bit */ - set r4,r4,1<gt> /* set gt bit */ - br.n move /* return from subroutine */ - set r4,r4,1<ge> /* set ge bit */ -FCMPS1piS2f: set r4,r4,1<ne> /* set ne bit */ - set r4,r4,1<gt> /* set gt bit */ - bsr.n _zero /* see if any of the operands are zero */ - set r4,r4,1<ge> /* set ge bit */ - bb0 s2zero,r12,FCMPS1piS2nz /* check for negative if s2 not zero */ - set r4,r4,1<ou> /* set ou bit */ - br.n move - set r4,r4,1<ob> /* set ob bit */ -FCMPS1piS2nz: bb1 sign,r7,move /* return from subroutine if s2 is neg. */ -FCMPS1piS2pf: set r4,r4,1<ou> /* set ou bit */ - br.n move /* return from subroutine */ - set r4,r4,1<ob> /* set ob bit */ -FCMPS1ni: bb0 s2inf,r12,FCMPS1niS2f /* branch to finite S2 with S1ni */ -FCMPS1niS2i: bb1 sign,r7,FCMPS1niS2ni /* branch to negative S2i with S1ni */ -FCMPS1niS2pi: set r4,r4,1<ne> /* set eq bit */ - set r4,r4,1<le> /* set le bit */ - set r4,r4,1<lt> /* set lt bit */ - set r4,r4,1<ou> /* set ou bit */ - br.n move /* return from subroutine */ - set r4,r4,1<ob> /* set ob bit */ -FCMPS1niS2ni: set r4,r4,1<eq> /* set eq bit */ - set r4,r4,1<le> /* set le bit */ - br.n move /* return from subroutine */ - set r4,r4,1<ge> /* set ge bit */ -FCMPS1niS2f: set r4,r4,1<ne> /* set eq bit */ - set r4,r4,1<le> /* set le bit */ - bsr.n _zero /* see if any of the operands are zero */ - set r4,r4,1<lt> /* set lt bit */ - bb0 s2zero,r12,FCMPS1niS2nz /* branch if s2 is not zero */ - set r4,r4,1<ou> /* set ou bit */ - br.n move - set r4,r4,1<ob> /* set ob bit */ -FCMPS1niS2nz: bb1 sign,r7,move /* return from subroutine if s2 is neg. */ - set r4,r4,1<ou> /* set ou bit */ - br.n move /* return from subroutine */ - set r4,r4,1<ob> /* set ob bit */ -FCMPS1f: bb1 sign,r5,FCMPS1nf /* branch to negative S1f */ -FCMPS1pf: bb1.n sign,r7,FCMPS1pfS2ni /* branch to negative S2i with S1pf */ - set r4,r4,1<ne> /* set ne bit */ -FCMPS1pfS2pi: set r4,r4,1<le> /* set le bit */ - set r4,r4,1<lt> /* set lt bit */ - bsr.n _zero - set r4,r4,1<ib> /* set ib bit */ - bb0 s1zero,r12,FCMPS1pfS2pinozero -FCMPS1pfS2pizero: br.n move - set r4,r4,1<ob> /* set ob bit */ -FCMPS1pfS2pinozero: br.n move - set r4,r4,1<in> /* set in bit */ -FCMPS1pfS2ni: set r4,r4,1<gt> /* set gt bit */ - br.n move /* return from subroutine */ - set r4,r4,1<ge> /* set ge bit */ -FCMPS1nf: bb1.n sign,r7,FCMPS1nfS2ni /* branch to negative S2i with S1nf */ - set r4,r4,1<ne> /* set ne bit */ - set r4,r4,1<le> /* set gt bit */ - set r4,r4,1<lt> /* set ge bit */ - bsr.n _zero /* see which of the operands are zero */ - set r4,r4,1<ob> /* set ob bit */ - bb0 s1zero,r12,FCMPS1nfS2pinozero /* no ls and lo */ -FCMPS1nfS2pizero: br.n move - set r4,r4,1<ib> /* set ib bit */ -FCMPS1nfS2pinozero: br.n move - set r4,r4,1<ou> /* set ou bit */ -FCMPS1nfS2ni: set r4,r4,1<gt> /* set gt bit */ - set r4,r4,1<ge> /* set ge bit */ - -move: br.n inf_return /* return from subroutine */ - or r6,r0,r4 /* transfer answer to r6 */ +FCMP: + bb0.n s1inf,r12,FCMPS1f /* branch for finite S1 */ + set r4,r0,1<cp> /* since neither S1 or S2 is a NaN, */ + /* set cp */ +FCMPS1i: + bb1 sign,r5,FCMPS1ni /* branch to negative S1i */ +FCMPS1pi: + bb0 s2inf,r12,FCMPS1piS2f /* branch to finite S2 with S1pi */ +FCMPS1piS2i: + bb1 sign,r7,FCMPS1piS2ni /* branch to negative S2i with S1pi */ +FCMPS1piS2pi: + set r4,r4,1<eq> /* set eq bit */ + set r4,r4,1<le> /* set le bit */ + set r4,r4,1<ge> /* set ge bit */ + set r4,r4,1<ib> /* set ib bit */ + br.n move + set r4,r4,1<ob> /* set ob bit */ +FCMPS1piS2ni: + set r4,r4,1<ne> /* set ne bit */ + set r4,r4,1<gt> /* set gt bit */ + br.n move + set r4,r4,1<ge> /* set ge bit */ +FCMPS1piS2f: + set r4,r4,1<ne> /* set ne bit */ + set r4,r4,1<gt> /* set gt bit */ + bsr.n _ASM_LABEL(zero) /* see if any of the operands are zero */ + set r4,r4,1<ge> /* set ge bit */ + bb0 s2zero,r12,FCMPS1piS2nz /* check for negative if s2 not zero */ + set r4,r4,1<ou> /* set ou bit */ + br.n move + set r4,r4,1<ob> /* set ob bit */ +FCMPS1piS2nz: + bb1 sign,r7,move /* return if s2 is negative */ +FCMPS1piS2pf: + set r4,r4,1<ou> /* set ou bit */ + br.n move + set r4,r4,1<ob> /* set ob bit */ +FCMPS1ni: + bb0 s2inf,r12,FCMPS1niS2f /* branch to finite S2 with S1ni */ +FCMPS1niS2i: + bb1 sign,r7,FCMPS1niS2ni /* branch to negative S2i with S1ni */ +FCMPS1niS2pi: + set r4,r4,1<ne> /* set eq bit */ + set r4,r4,1<le> /* set le bit */ + set r4,r4,1<lt> /* set lt bit */ + set r4,r4,1<ou> /* set ou bit */ + br.n move + set r4,r4,1<ob> /* set ob bit */ +FCMPS1niS2ni: + set r4,r4,1<eq> /* set eq bit */ + set r4,r4,1<le> /* set le bit */ + br.n move + set r4,r4,1<ge> /* set ge bit */ +FCMPS1niS2f: + set r4,r4,1<ne> /* set eq bit */ + set r4,r4,1<le> /* set le bit */ + bsr.n _ASM_LABEL(zero) /* see if any of the operands are zero */ + set r4,r4,1<lt> /* set lt bit */ + bb0 s2zero,r12,FCMPS1niS2nz /* branch if s2 is not zero */ + set r4,r4,1<ou> /* set ou bit */ + br.n move + set r4,r4,1<ob> /* set ob bit */ +FCMPS1niS2nz: + bb1 sign,r7,move /* return if s2 is negative */ + set r4,r4,1<ou> /* set ou bit */ + br.n move + set r4,r4,1<ob> /* set ob bit */ +FCMPS1f: + bb1 sign,r5,FCMPS1nf /* branch to negative S1f */ +FCMPS1pf: + bb1.n sign,r7,FCMPS1pfS2ni /* branch to negative S2i with S1pf */ + set r4,r4,1<ne> /* set ne bit */ +FCMPS1pfS2pi: + set r4,r4,1<le> /* set le bit */ + set r4,r4,1<lt> /* set lt bit */ + bsr.n _ASM_LABEL(zero) + set r4,r4,1<ib> /* set ib bit */ + bb0 s1zero,r12,FCMPS1pfS2pinozero +FCMPS1pfS2pizero: + br.n move + set r4,r4,1<ob> /* set ob bit */ +FCMPS1pfS2pinozero: + br.n move + set r4,r4,1<in> /* set in bit */ +FCMPS1pfS2ni: + set r4,r4,1<gt> /* set gt bit */ + br.n move + set r4,r4,1<ge> /* set ge bit */ +FCMPS1nf: + bb1.n sign,r7,FCMPS1nfS2ni /* branch to negative S2i with S1nf */ + set r4,r4,1<ne> /* set ne bit */ + set r4,r4,1<le> /* set gt bit */ + set r4,r4,1<lt> /* set ge bit */ + bsr.n _ASM_LABEL(zero) /* see which of the operands are zero */ + set r4,r4,1<ob> /* set ob bit */ + bb0 s1zero,r12,FCMPS1nfS2pinozero /* no ls and lo */ +FCMPS1nfS2pizero: + br.n move + set r4,r4,1<ib> /* set ib bit */ +FCMPS1nfS2pinozero: + br.n move + set r4,r4,1<ou> /* set ou bit */ +FCMPS1nfS2ni: + set r4,r4,1<gt> /* set gt bit */ + set r4,r4,1<ge> /* set ge bit */ + +move: + br.n inf_return + or r6,r0,r4 /* transfer answer to r6 */ /* Multiplying infinity and zero causes an exception, but all other */ /* operations produce a correctly signed infinity. */ -FMUL: bsr _zero /* see if any of the operands are zero */ - bb1 s1zero,r12,excpt /* infinity X 0 causes an exception */ - bb1 s2zero,r12,excpt /* infinity X 0 causes an exception */ - bb1 sign,r5,FMULS1neg /* handle negative cases of S1 */ - bb0 sign,r7,poswrinf /* + X + = + */ - br negwrinf /* + X - = - */ -FMULS1neg: bb1 sign,r7,poswrinf /* - X - = + */ - br negwrinf /* - X + = - */ +FMUL: + bsr _ASM_LABEL(zero) /* see if any of the operands are zero */ + bb1 s1zero,r12,excpt /* infinity X 0 causes an exception */ + bb1 s2zero,r12,excpt /* infinity X 0 causes an exception */ + bb1 sign,r5,FMULS1neg /* handle negative cases of S1 */ + bb0 sign,r7,poswrinf /* + X + = + */ + br negwrinf /* + X - = - */ +FMULS1neg: + bb1 sign,r7,poswrinf /* - X - = + */ + br negwrinf /* - X + = - */ -/* Dividing infinity by infinity causes an exception, but dividing */ -/* infinity by a finite yields a correctly signed infinity, and */ +/* Dividing infinity by infinity causes an exception, but dividing */ +/* infinity by a finite yields a correctly signed infinity, and */ /* dividing a finite by an infinity produces a correctly signed zero. */ -FDIV: bb1 s1inf,r12,FDIVS1inf /* handle case of S1 being infinity */ - bb1 sign,r5,FDIVS1nf /* handle cases of S1 being neg. non-inf. */ - bb1 sign,r7,FDIVS1pfS2mi /* handle case of negative S2 */ -FDIVS1pfS2pi: br poswrzero /* +f / +inf = +0 */ -FDIVS1pfS2mi: br negwrzero /* +f / -inf = -0 */ -FDIVS1nf: bb1 sign,r7,FDIVS1nfS2mi /* handle case of negative S2 */ -FDIVS1nfS2pi: br negwrzero /* -f / +inf = -0 */ -FDIVS1nfS2mi: br poswrzero /* -f / -inf = +0 */ -FDIVS1inf: bb1 s2inf,r12,excpt /* inf / inf = exception */ - bb1 sign,r5,FDIVS1mi /* handle cases of S1 being neg. inf. */ - bb1 sign,r7,FDIVS1piS2nf /* handle case of negative S2 */ -FDIVS1piS2pf: br poswrinf /* +inf / +f = +inf */ -FDIVS1piS2nf: br negwrinf /* +inf / -f = -inf */ -FDIVS1mi: bb1 sign,r7,FDIVS1miS2nf /* handle case of negative S2 */ -FDIVS1miS2pf: br negwrinf /* -inf / +f = -inf */ -FDIVS1miS2nf: br poswrinf /* -inf / -f = +inf */ - +FDIV: + bb1 s1inf,r12,FDIVS1inf /* handle case of S1 being infinity */ + bb1 sign,r5,FDIVS1nf /* handle cases of S1 being neg. non-inf. */ + bb1 sign,r7,FDIVS1pfS2mi /* handle case of negative S2 */ +FDIVS1pfS2pi: + br poswrzero /* +f / +inf = +0 */ +FDIVS1pfS2mi: + br negwrzero /* +f / -inf = -0 */ +FDIVS1nf: + bb1 sign,r7,FDIVS1nfS2mi /* handle case of negative S2 */ +FDIVS1nfS2pi: + br negwrzero /* -f / +inf = -0 */ +FDIVS1nfS2mi: + br poswrzero /* -f / -inf = +0 */ +FDIVS1inf: + bb1 s2inf,r12,excpt /* inf / inf = exception */ + bb1 sign,r5,FDIVS1mi /* handle cases of S1 being neg. inf. */ + bb1 sign,r7,FDIVS1piS2nf /* handle case of negative S2 */ +FDIVS1piS2pf: + br poswrinf /* +inf / +f = +inf */ +FDIVS1piS2nf: + br negwrinf /* +inf / -f = -inf */ +FDIVS1mi: + bb1 sign,r7,FDIVS1miS2nf /* handle case of negative S2 */ +FDIVS1miS2pf: + br negwrinf /* -inf / +f = -inf */ +FDIVS1miS2nf: + br poswrinf /* -inf / -f = +inf */ + /* The square root of positive infinity is positive infinity, */ /* but the square root of negative infinity is a NaN */ -/* FSQRT: bb0 sign,r7,poswrinf ;write sqrt(inf) = inf */ -/* br excpt ;write sqrt(-inf) = NaN */ +#if 0 +FSQRT: + bb0 sign,r7,poswrinf /* write sqrt(inf) = inf */ + br excpt /* write sqrt(-inf) = NaN */ +#endif -excpt: - set r2,r2,1<oper> /* set invalid operation bit of FPSR */ +excpt: + set r2,r2,1<oper> /* set invalid operation bit of FPSR */ #ifdef HANDLER - bb0 oper,r3,nohandler /* branch if no user handler */ - bsr _handler /* branch to interface with user handler */ - br inf_return /* return from function */ -nohandler: + bb0 oper,r3,nohandler /* branch if no user handler */ + bsr _handler /* branch to interface with user handler */ + br inf_return +nohandler: #endif - set r5,r0,0<0> /* write NaN into r5 */ - br.n inf_return /* return from subroutine */ - set r6,r0,0<0> /* write NaN into r6, writing NaN''s into */ - /* both of these registers is quicker than */ - /* checking for single or double precision */ + set r5,r0,0<0> /* write NaN into r5 */ + br.n inf_return + set r6,r0,0<0> /* write NaN into r6, writing NaN''s into */ + /* both of these registers is quicker than */ + /* checking for single or double precision */ /* Write positive infinity of the correct precision */ -poswrinf: bb1 dsize,r9,poswrinfd /* branch to write double precision inf. */ - br.n inf_return /* return from subroutine */ - or.u r6,r0,0x7f80 /* load r6 with single precision pos inf. */ -poswrinfd: or.u r5,r0,0x7ff0 /* load double precision pos inf. */ - br.n inf_return /* return from subroutine */ - or r6,r0,r0 +poswrinf: + bb1 dsize,r9,poswrinfd /* branch to write double precision inf. */ + br.n inf_return + or.u r6,r0,0x7f80 /* load r6 with single precision pos inf. */ +poswrinfd: + or.u r5,r0,0x7ff0 /* load double precision pos inf. */ + br.n inf_return + or r6,r0,r0 /* Write negative infinity of the correct precision */ -negwrinf: bb1 dsize,r9,negwrinfd /* branch to write double precision inf. */ - br.n inf_return /* return from subroutine */ - or.u r6,r0,0xff80 /* load r6 with single precision pos inf. */ -negwrinfd: or.u r5,r0,0xfff0 /* load double precision pos inf. */ - br.n inf_return /* return from subroutine */ - or r6,r0,r0 +negwrinf: + bb1 dsize,r9,negwrinfd /* branch to write double precision inf. */ + br.n inf_return + or.u r6,r0,0xff80 /* load r6 with single precision pos inf. */ +negwrinfd: + or.u r5,r0,0xfff0 /* load double precision pos inf. */ + br.n inf_return + or r6,r0,r0 /* Write a positive zero disregarding precision. */ -poswrzero: or r5,r0,r0 /* write to both high word and low word now */ - br.n inf_return /* it does not matter that both are written */ - or r6,r0,r0 +poswrzero: + or r5,r0,r0 /* write to both high word and low word now */ + br.n inf_return /* it does not matter that both are written */ + or r6,r0,r0 /* Write a negative zero of the correct precision. */ -negwrzero: or r6,r0,r0 /* clear low word */ - bb1 dsize,r9,negwrzerod /* branch to write double precision zero */ - br.n inf_return /* return from subroutine */ - set r6,r6,1<31> /* set sign bit */ -negwrzerod: or r5,r0,r0 /* clear high word */ - br.n inf_return /* return from subroutine */ - set r5,r5,1<31> /* set sign bit */ - -FP_inf_overflw: - set r2,r2,1<oper> /* set invalid operand bit */ +negwrzero: + or r6,r0,r0 /* clear low word */ + bb1 dsize,r9,negwrzerod /* branch to write double precision zero */ + br.n inf_return + set r6,r6,1<31> /* set sign bit */ +negwrzerod: + or r5,r0,r0 /* clear high word */ + br.n inf_return + set r5,r5,1<31> /* set sign bit */ + +FP_inf_overflw: + set r2,r2,1<oper> /* set invalid operand bit */ #ifdef HANDLER - bb0 oper,r3,nohandlero /* do not go to user handler routine */ - bsr _handler /* go to user handler routine */ - br inf_return /* return from subroutine */ + bb0 oper,r3,nohandlero /* do not go to user handler routine */ + bsr _handler /* go to user handler routine */ + br inf_return #endif -nohandlero: bb0.n sign,r7,inf_return /* if positive then return from subroutine */ - - set r6,r6,31<0> /* set result to largest positive integer */ - or.c r6,r0,r6 /* negate r6,giving largest negative int. */ +nohandlero: + bb0.n sign,r7,inf_return /* if positive then return */ -inf_return: ld r1,r31,0 /* load return address */ - jmp r1 /* return from subroutine */ + set r6,r6,31<0> /* set result to largest positive integer */ + or.c r6,r0,r6 /* negate r6,giving largest negative int. */ - data +inf_return: + ld r1,r31,0 /* load return address */ + jmp r1 + + data #define FADD denorm_FADD #define FSUB denorm_FSUB @@ -1890,397 +2000,521 @@ inf_return: ld r1,r31,0 /* load return address */ #define NINT denorm_NINT #define TRNC denorm_TRNC #define return denorm_return -/* function _denorm -- */ -/* See the documentation for this release for an overall description of this */ -/* code. */ - -/* Check to see if either S1 or S2 is a denormalized number. First */ +/* + * denorm + */ + +/* Check to see if either S1 or S2 is a denormalized number. First */ /* extract the exponent to see if it is zero, and then check to see if */ -/* the mantissa is not zero. If the number is denormalized, then set the */ +/* the mantissa is not zero. If the number is denormalized, then set the */ /* 1 or 0 bit 10 r12. */ - text - align 8 - global _denorm -_denorm: st r1,r31,0 /* save return address */ -dnmcheckS1: extu r10,r5,11<20> /* extract exponent */ - bcnd ne0,r10,dnmsetS2 /* S1 is not a denorm, so S2 must be */ - bb1.n 9,r9,dnmcheckS1d /* S1 is double precision */ - mak r10,r5,20<3> /* mak field with only mantissa bits */ - /* into final result */ -dnmcheckS1s: extu r11,r6,3<29> /* get three low bits of mantissa */ - or r10,r10,r11 /* assemble all of the mantissa bits */ - bcnd eq0,r10,dnmsetS2 /* S1 is not a denorm, so S2 must be */ - br dnmsetS1 /* S1 is a denorm */ - -dnmcheckS1d: or r10,r6,r10 /* or all of mantissa bits */ - bcnd eq0,r10,dnmsetS2 /* S1 is not a denorm, so S2 must be */ -dnmsetS1: set r12,r12,1<1> /* S1 is a denorm */ - -dnmcheckS2: extu r10,r7,11<20> /* extract exponent */ - bcnd ne0,r10,S1form /* S2 is not a denorm */ - bb1.n 7,r9,dnmcheckS2d /* S2 is double precision */ - mak r10,r7,20<3> /* mak field with only mantissa bits */ -dnmcheckS2s: extu r11,r8,3<29> /* get three low bits of mantissa */ - or r10,r10,r11 /* assemble all of the mantissa bits */ - bcnd eq0,r10,S1form /* S2 is not a denorm */ - br dnmsetS2 /* S1 is a denorm */ -dnmcheckS2d: or r10,r8,r10 /* or all or mantissa bits */ - bcnd eq0,r10,S1form /* S2 is not a denorm */ -dnmsetS2: set r12,r12,1<0> /* S2 is a denorm */ +ASLOCAL(denorm) + st r1,r31,0 /* save return address */ +dnmcheckS1: + extu r10,r5,11<20> /* extract exponent */ + bcnd ne0,r10,dnmsetS2 /* S1 is not a denorm, so S2 must be */ + bb1.n 9,r9,dnmcheckS1d /* S1 is double precision */ + mak r10,r5,20<3> /* mak field with only mantissa bits */ + /* into final result */ +dnmcheckS1s: + extu r11,r6,3<29> /* get three low bits of mantissa */ + or r10,r10,r11 /* assemble all of the mantissa bits */ + bcnd eq0,r10,dnmsetS2 /* S1 is not a denorm, so S2 must be */ + br dnmsetS1 /* S1 is a denorm */ + +dnmcheckS1d: + or r10,r6,r10 /* or all of mantissa bits */ + bcnd eq0,r10,dnmsetS2 /* S1 is not a denorm, so S2 must be */ +dnmsetS1: + set r12,r12,1<1> /* S1 is a denorm */ + +dnmcheckS2: + extu r10,r7,11<20> /* extract exponent */ + bcnd ne0,r10,S1form /* S2 is not a denorm */ + bb1.n 7,r9,dnmcheckS2d /* S2 is double precision */ + mak r10,r7,20<3> /* mak field with only mantissa bits */ +dnmcheckS2s: + extu r11,r8,3<29> /* get three low bits of mantissa */ + or r10,r10,r11 /* assemble all of the mantissa bits */ + bcnd eq0,r10,S1form /* S2 is not a denorm */ + br dnmsetS2 /* S1 is a denorm */ +dnmcheckS2d: + or r10,r8,r10 /* or all or mantissa bits */ + bcnd eq0,r10,S1form /* S2 is not a denorm */ +dnmsetS2: + set r12,r12,1<0> /* S2 is a denorm */ /* Since the operations are going to be reperformed with modified denorms, */ /* the operands which were initially single precision need to be modified */ -/* back to single precision. */ - -S1form: bb1 9,r9,S2form /* S1 is double precision, so do not */ - /* modify S1 into single format */ - mak r11,r5,28<3> /* over final exponent and mantissa */ - /* eliminating extra 3 bits of exponent */ - extu r6,r6,3<29> /* get low 3 bits of mantissa */ - or r11,r6,r11 /* form complete mantissa and exponent */ - extu r10,r5,1<31> /* get the 31 bit */ - mak r10,r10,1<31> /* place 31 bit 10 correct position */ - or r6,r10,r11 /* or 31, exponent, and all of mantissa */ - -S2form: bb1 7,r9,checkop /* S2 is double precision, so do not */ - /* modify S2 into single format */ - mak r11,r7,28<3> /* over final exponent and mantissa */ - /* eliminating extra 3 bits of exponent */ - extu r8,r8,3<29> /* get low 3 bits of mantissa */ - or r11,r8,r11 /* form complete mantissa and exponent */ - extu r10,r7,1<31> /* get the 31 bit */ - mak r10,r10,1<31> /* place 31 bit 10 correct position */ - or r8,r10,r11 /* or 31, exponent, and all of mantissa */ +/* back to single precision. */ + +S1form: + bb1 9,r9,S2form /* S1 is double precision, so do not */ + /* modify S1 into single format */ + mak r11,r5,28<3> /* over final exponent and mantissa */ + /* eliminating extra 3 bits of exponent */ + extu r6,r6,3<29> /* get low 3 bits of mantissa */ + or r11,r6,r11 /* form complete mantissa and exponent */ + extu r10,r5,1<31> /* get the 31 bit */ + mak r10,r10,1<31> /* place 31 bit 10 correct position */ + or r6,r10,r11 /* or 31, exponent, and all of mantissa */ + +S2form: + bb1 7,r9,checkop /* S2 is double precision, so do not */ + /* modify S2 into single format */ + mak r11,r7,28<3> /* over final exponent and mantissa */ + /* eliminating extra 3 bits of exponent */ + extu r8,r8,3<29> /* get low 3 bits of mantissa */ + or r11,r8,r11 /* form complete mantissa and exponent */ + extu r10,r7,1<31> /* get the 31 bit */ + mak r10,r10,1<31> /* place 31 bit 10 correct position */ + or r8,r10,r11 /* or 31, exponent, and all of mantissa */ /* Extract the opcode, compare to a constant, and branch to the code that */ /* deals with that opcode. */ -checkop: extu r10,r9,5<11> /* extract opcode */ - cmp r11,r10,0x05 /* compare to FADD */ - bb1 2,r11,FADD /* operation is FADD */ - cmp r11,r10,0x06 /* compare to FSUB */ - bb1 2,r11,FSUB /* operation is FSUB */ - cmp r11,r10,0x07 /* compare to FCMP */ - bb1 2,r11,FCMP /* operation is FCMP */ - cmp r11,r10,0x00 /* compare to FMUL */ - bb1 2,r11,FMUL /* operation is FMUL */ - cmp r11,r10,0x0e /* compare to FDIV */ - bb1 2,r11,FDIV /* operation is FDIV */ -/* cmp r11,r10,0x0f;compare to FSQRT */ -/* bb1 2,r11,FSQRT ;operation is FSQRT */ - cmp r11,r10,0x09 /* compare to INT */ - bb1 2,r11,INT /* operation is INT */ - cmp r11,r10,0x0a /* compare to NINT */ - bb1 2,r11,NINT /* operation is NINT */ - cmp r11,r10,0x0b /* compare to TRNC */ - bb1 2,r11,TRNC /* operation is TRNC */ +checkop: + extu r10,r9,5<11> /* extract opcode */ + cmp r11,r10,0x05 /* compare to FADD */ + bb1 2,r11,FADD /* operation is FADD */ + cmp r11,r10,0x06 /* compare to FSUB */ + bb1 2,r11,FSUB /* operation is FSUB */ + cmp r11,r10,0x07 /* compare to FCMP */ + bb1 2,r11,FCMP /* operation is FCMP */ + cmp r11,r10,0x00 /* compare to FMUL */ + bb1 2,r11,FMUL /* operation is FMUL */ + cmp r11,r10,0x0e /* compare to FDIV */ + bb1 2,r11,FDIV /* operation is FDIV */ +#if 0 + cmp r11,r10,0x0f /* compare to FSQRT */ + bb1 2,r11,FSQRT /* operation is FSQRT */ +#endif + cmp r11,r10,0x09 /* compare to INT */ + bb1 2,r11,INT /* operation is INT */ + cmp r11,r10,0x0a /* compare to NINT */ + bb1 2,r11,NINT /* operation is NINT */ + cmp r11,r10,0x0b /* compare to TRNC */ + bb1 2,r11,TRNC /* operation is TRNC */ /* For all the following operations, the denormalized number is set to */ /* zero and the operation is reperformed the correct destination and source */ /* sizes. */ -FADD: bb0 1,r12,FADDS2dnm /* S1 is not denorm, so S2 must be */ - or r5,r0,r0 /* set S1 to zero */ - or r6,r0,r0 -FADDS2chk: bb0 0,r12,FADDcalc /* S2 is not a denorm */ -FADDS2dnm: or r7,r0,r0 /* set S2 to zero */ - or r8,r0,r0 -FADDcalc: bb1 5,r9,FADDdD /* branch for double precision destination */ -FADDsD: bb1 9,r9,FADDsDdS1 /* branch for double precision S1 */ -FADDsDsS1: bb1 7,r9,FADDsDsS1dS2 /* branch for double precision S2 */ -FADDsDsS1sS2: br.n return /* return from subroutine */ - fadd.sss r6,r6,r8 /* add the two sources and place result 10 S1 */ -FADDsDsS1dS2: br.n return /* return from subroutine */ - fadd.ssd r6,r6,r7 /* add the two sources and place result 10 S1 */ -FADDsDdS1: bb1 7,r9,FADDsDdS1dS2 /* branch for double precision S2 */ -FADDsDdS1sS2: br.n return /* return from subroutine */ - fadd.sds r6,r5,r8 /* add the two sources and place result 10 S1 */ -FADDsDdS1dS2: br.n return /* return from subroutine */ - fadd.sdd r6,r5,r7 /* add the two sources and place result 10 S1 */ -FADDdD: bb1 9,r9,FADDdDdS1 /* branch for double precision S1 */ -FADDdDsS1: bb1 7,r9,FADDdDsS1dS2 /* branch for double precision S2 */ -FADDdDsS1sS2: br.n return /* return from subroutine */ - fadd.dss r5,r6,r8 /* add the two sources and place result 10 S1 */ -FADDdDsS1dS2: br.n return /* return from subroutine */ - fadd.dsd r5,r6,r7 /* add the two sources and place result 10 S1 */ -FADDdDdS1: bb1 7,r9,FADDdDdS1dS2 /* branch for double precision S2 */ -FADDdDdS1sS2: br.n return /* return from subroutine */ - fadd.dds r5,r5,r8 /* add the two sources and place result 10 S1 */ -FADDdDdS1dS2: br.n return /* return from subroutine */ - fadd.ddd r5,r5,r7 /* add the two sources and place result 10 S1 */ - -FSUB: bb0 1,r12,FSUBS2dnm /* S1 is not denorm, so S2 must be */ - or r5,r0,r0 /* set S1 to zero */ - or r6,r0,r0 -FSUBS2chk: bb0 0,r12,FSUBcalc /* S2 is not a denorm */ -FSUBS2dnm: or r7,r0,r0 /* set S2 to zero */ - or r8,r0,r0 -FSUBcalc: bb1 5,r9,FSUBdD /* branch for double precision destination */ -FSUBsD: bb1 9,r9,FSUBsDdS1 /* branch for double precision S1 */ -FSUBsDsS1: bb1 7,r9,FSUBsDsS1dS2 /* branch for double precision S2 */ -FSUBsDsS1sS2: br.n return /* return from subroutine */ - fsub.sss r6,r6,r8 /* add the two sources and place result 10 S1 */ -FSUBsDsS1dS2: br.n return /* return from subroutine */ - fsub.ssd r6,r6,r7 /* add the two sources and place result 10 S1 */ -FSUBsDdS1: bb1 7,r9,FSUBsDdS1dS2 /* branch for double precision S2 */ -FSUBsDdS1sS2: br.n return /* return from subroutine */ - fsub.sds r6,r5,r8 /* add the two sources and place result 10 S1 */ -FSUBsDdS1dS2: br.n return /* return from subroutine */ - fsub.sdd r6,r5,r7 /* add the two sources and place result 10 S1 */ -FSUBdD: bb1 9,r9,FSUBdDdS1 /* branch for double precision S1 */ -FSUBdDsS1: bb1 7,r9,FSUBdDsS1dS2 /* branch for double precision S2 */ -FSUBdDsS1sS2: br.n return /* return from subroutine */ - fsub.dss r5,r6,r8 /* add the two sources and place result 10 S1 */ -FSUBdDsS1dS2: br.n return /* return from subroutine */ - fsub.dsd r5,r6,r7 /* add the two sources and place result 10 S1 */ -FSUBdDdS1: bb1 7,r9,FSUBdDdS1dS2 /* branch for double precision S2 */ -FSUBdDdS1sS2: br.n return /* return from subroutine */ - fsub.dds r5,r5,r8 /* add the two sources and place result 10 S1 */ -FSUBdDdS1dS2: br.n return /* return from subroutine */ - fsub.ddd r5,r5,r7 /* add the two sources and place result 10 S1 */ - -FCMP: bb0 1,r12,FCMPS2dnm /* S1 is not denorm, so S2 must be */ - or r5,r0,r0 /* set S1 to zero */ - or r6,r0,r0 -FCMPS2chk: bb0 0,r12,FCMPcalc /* S2 is not a denorm */ -FCMPS2dnm: or r7,r0,r0 /* set S2 to zero */ - or r8,r0,r0 -FCMPcalc: bb1 9,r9,FCMPdS1 /* branch for double precision S1 */ -FCMPsS1: bb1 7,r9,FCMPsS1dS2 /* branch for double precision S2 */ -FCMPsS1sS2: br.n return /* return from subroutine */ - fcmp.sss r6,r6,r8 /* add the two sources and place result 10 S1 */ -FCMPsS1dS2: br.n return /* return from subroutine */ - fcmp.ssd r6,r6,r7 /* add the two sources and place result 10 S1 */ -FCMPdS1: bb1 7,r9,FCMPdS1dS2 /* branch for double precision S2 */ -FCMPdS1sS2: br.n return /* return from subroutine */ - fcmp.sds r6,r5,r8 /* add the two sources and place result 10 S1 */ -FCMPdS1dS2: br.n return /* return from subroutine */ - fcmp.sdd r6,r5,r7 /* add the two sources and place result 10 S1 */ - -FMUL: bb0 1,r12,FMULS2dnm /* S1 is not denorm, so S2 must be */ - or r5,r0,r0 /* set S1 to zero */ - or r6,r0,r0 -FMULS2chk: bb0 0,r12,FMULcalc /* S2 is not a denorm */ -FMULS2dnm: or r7,r0,r0 /* set S2 to zero */ - or r8,r0,r0 -FMULcalc: bb1 5,r9,FMULdD /* branch for double precision destination */ -FMULsD: bb1 9,r9,FMULsDdS1 /* branch for double precision S1 */ -FMULsDsS1: bb1 7,r9,FMULsDsS1dS2 /* branch for double precision S2 */ -FMULsDsS1sS2: br.n return /* return from subroutine */ - fmul.sss r6,r6,r8 /* add the two sources and place result 10 S1 */ -FMULsDsS1dS2: br.n return /* return from subroutine */ - fmul.ssd r6,r6,r7 /* add the two sources and place result 10 S1 */ -FMULsDdS1: bb1 7,r9,FMULsDdS1dS2 /* branch for double precision S2 */ -FMULsDdS1sS2: br.n return /* return from subroutine */ - fmul.sds r6,r5,r8 /* add the two sources and place result 10 S1 */ -FMULsDdS1dS2: br.n return /* return from subroutine */ - fmul.sdd r6,r5,r7 /* add the two sources and place result 10 S1 */ -FMULdD: bb1 9,r9,FMULdDdS1 /* branch for double precision S1 */ -FMULdDsS1: bb1 7,r9,FMULdDsS1dS2 /* branch for double precision S2 */ -FMULdDsS1sS2: br.n return /* return from subroutine */ - fmul.dss r5,r6,r8 /* add the two sources and place result 10 S1 */ -FMULdDsS1dS2: br.n return /* return from subroutine */ - fmul.dsd r5,r6,r7 /* add the two sources and place result 10 S1 */ -FMULdDdS1: bb1 7,r9,FMULdDdS1dS2 /* branch for double precision S2 */ -FMULdDdS1sS2: br.n return /* return from subroutine */ - fmul.dds r5,r5,r8 /* add the two sources and place result 10 S1 */ -FMULdDdS1dS2: br.n return /* return from subroutine */ - fmul.ddd r5,r5,r7 /* add the two sources and place result 10 S1 */ - -FDIV: bb0 1,r12,FDIVS2dnm /* S1 is not denorm, so S2 must be */ - or r5,r0,r0 /* set S1 to zero */ - or r6,r0,r0 -FDIVS2chk: bb0 0,r12,FDIVcalc /* S2 is not a denorm */ -FDIVS2dnm: or r7,r0,r0 /* set S2 to zero */ - or r8,r0,r0 -FDIVcalc: bb1 5,r9,FDIVdD /* branch for double precision destination */ -FDIVsD: bb1 9,r9,FDIVsDdS1 /* branch for double precision S1 */ -FDIVsDsS1: bb1 7,r9,FDIVsDsS1dS2 /* branch for double precision S2 */ -FDIVsDsS1sS2: fdiv.sss r6,r6,r8 /* add the two sources and place result 10 S1 */ - br return /* return from subroutine */ -FDIVsDsS1dS2: fdiv.ssd r6,r6,r7 /* add the two sources and place result 10 S1 */ - br return /* return from subroutine */ -FDIVsDdS1: bb1 7,r9,FDIVsDdS1dS2 /* branch for double precision S2 */ -FDIVsDdS1sS2: fdiv.sds r6,r5,r8 /* add the two sources and place result 10 S1 */ - br return /* return from subroutine */ -FDIVsDdS1dS2: fdiv.sdd r6,r5,r7 /* add the two sources and place result 10 S1 */ - br return /* return from subroutine */ -FDIVdD: bb1 9,r9,FDIVdDdS1 /* branch for double precision S1 */ -FDIVdDsS1: bb1 7,r9,FDIVdDsS1dS2 /* branch for double precision S2 */ -FDIVdDsS1sS2: fdiv.dss r5,r6,r8 /* add the two sources and place result 10 S1 */ - br return /* return from subroutine */ -FDIVdDsS1dS2: fdiv.dsd r5,r6,r7 /* add the two sources and place result 10 S1 */ - br return /* return from subroutine */ -FDIVdDdS1: bb1 7,r9,FDIVdDdS1dS2 /* branch for double precision S2 */ -FDIVdDdS1sS2: fdiv.dds r5,r5,r8 /* add the two sources and place result 10 S1 */ - br return /* return from subroutine */ -FDIVdDdS1dS2: fdiv.ddd r5,r5,r7 /* add the two sources and place result 10 S1 */ - br return /* return from subroutine */ - -/* FSQRT: or r7,r0,r0 ;set S2 to zero */ -/* or r8,r0,r0 */ -/* FSQRTcalc: bb1 5,r9,FSQRTdD ;branch for double precision destination */ -/* FSQRTsD: bb1 7,r9,FSQRTsDdS2 ;branch for double precision S2 */ -/* FSQRTsDsS2: br.n return ;return from subroutine */ - /* fsqrt.ss r6,r8 ;add the two sources and place result 10 S1 */ -/* FSQRTsDdS2: br.n return ;return from subroutine */ - /* fsqrt.sd r6,r7 ;add the two sources and place result 10 S1 */ -/* FSQRTdD: bb1 7,r9,FSQRTdDdS2 ;branch for double precision S2 */ -/* FSQRTdDsS2: br.n return ;return from subroutine */ - /* fsqrt.ds r5,r8 ;add the two sources and place result 10 S1 */ -/* FSQRTdDdS2: br.n return ;return from subroutine */ - /* fsqrt.dd r5,r7 ;add the two sources and place result 10 S1 */ - -INT: or r7,r0,r0 /* set S2 to zero */ - or r8,r0,r0 -INTcalc: bb1 7,r9,INTdS2 /* branch for double precision S2 */ -INTsS2: br.n return /* return from subroutine */ - int.ss r6,r8 /* add the two sources and place result 10 S1 */ -INTdS2: br.n return /* return from subroutine */ - int.sd r6,r7 /* add the two sources and place result 10 S1 */ - -NINT: or r7,r0,r0 /* set S2 to zero */ - or r8,r0,r0 -NINTcalc: bb1 7,r9,NINTdS2 /* branch for double precision S2 */ -NINTsS2: br.n return /* return from subroutine */ - nint.ss r6,r8 /* add the two sources and place result 10 S1 */ -NINTdS2: br.n return /* return from subroutine */ - nint.sd r6,r7 /* add the two sources and place result 10 S1 */ - -TRNC: or r7,r0,r0 /* set S2 to zero */ - or r8,r0,r0 -TRNCcalc: bb1 7,r9,TRNCdS2 /* branch for double precision S2 */ -TRNCsS2: br.n return /* return from subroutine */ - trnc.ss r6,r8 /* add the two sources and place result 10 S1 */ -TRNCdS2: trnc.sd r6,r7 /* add the two sources and place result 10 S1 */ +FADD: + bb0 1,r12,FADDS2dnm /* S1 is not denorm, so S2 must be */ + or r5,r0,r0 /* set S1 to zero */ + or r6,r0,r0 +FADDS2chk: + bb0 0,r12,FADDcalc /* S2 is not a denorm */ +FADDS2dnm: + or r7,r0,r0 /* set S2 to zero */ + or r8,r0,r0 +FADDcalc: + bb1 5,r9,FADDdD /* branch for double precision destination */ +FADDsD: + bb1 9,r9,FADDsDdS1 /* branch for double precision S1 */ +FADDsDsS1: + bb1 7,r9,FADDsDsS1dS2 /* branch for double precision S2 */ +FADDsDsS1sS2: + br.n return + fadd.sss r6,r6,r8 /* add the two sources and place result 10 S1 */ +FADDsDsS1dS2: + br.n return + fadd.ssd r6,r6,r7 /* add the two sources and place result 10 S1 */ +FADDsDdS1: + bb1 7,r9,FADDsDdS1dS2 /* branch for double precision S2 */ +FADDsDdS1sS2: + br.n return + fadd.sds r6,r5,r8 /* add the two sources and place result 10 S1 */ +FADDsDdS1dS2: + br.n return + fadd.sdd r6,r5,r7 /* add the two sources and place result 10 S1 */ +FADDdD: + bb1 9,r9,FADDdDdS1 /* branch for double precision S1 */ +FADDdDsS1: + bb1 7,r9,FADDdDsS1dS2 /* branch for double precision S2 */ +FADDdDsS1sS2: + br.n return + fadd.dss r5,r6,r8 /* add the two sources and place result 10 S1 */ +FADDdDsS1dS2: + br.n return + fadd.dsd r5,r6,r7 /* add the two sources and place result 10 S1 */ +FADDdDdS1: + bb1 7,r9,FADDdDdS1dS2 /* branch for double precision S2 */ +FADDdDdS1sS2: + br.n return + fadd.dds r5,r5,r8 /* add the two sources and place result 10 S1 */ +FADDdDdS1dS2: + br.n return + fadd.ddd r5,r5,r7 /* add the two sources and place result 10 S1 */ + +FSUB: + bb0 1,r12,FSUBS2dnm /* S1 is not denorm, so S2 must be */ + or r5,r0,r0 /* set S1 to zero */ + or r6,r0,r0 +FSUBS2chk: + bb0 0,r12,FSUBcalc /* S2 is not a denorm */ +FSUBS2dnm: + or r7,r0,r0 /* set S2 to zero */ + or r8,r0,r0 +FSUBcalc: + bb1 5,r9,FSUBdD /* branch for double precision destination */ +FSUBsD: + bb1 9,r9,FSUBsDdS1 /* branch for double precision S1 */ +FSUBsDsS1: + bb1 7,r9,FSUBsDsS1dS2 /* branch for double precision S2 */ +FSUBsDsS1sS2: + br.n return + fsub.sss r6,r6,r8 /* add the two sources and place result 10 S1 */ +FSUBsDsS1dS2: + br.n return + fsub.ssd r6,r6,r7 /* add the two sources and place result 10 S1 */ +FSUBsDdS1: + bb1 7,r9,FSUBsDdS1dS2 /* branch for double precision S2 */ +FSUBsDdS1sS2: + br.n return + fsub.sds r6,r5,r8 /* add the two sources and place result 10 S1 */ +FSUBsDdS1dS2: + br.n return + fsub.sdd r6,r5,r7 /* add the two sources and place result 10 S1 */ +FSUBdD: + bb1 9,r9,FSUBdDdS1 /* branch for double precision S1 */ +FSUBdDsS1: + bb1 7,r9,FSUBdDsS1dS2 /* branch for double precision S2 */ +FSUBdDsS1sS2: + br.n return + fsub.dss r5,r6,r8 /* add the two sources and place result 10 S1 */ +FSUBdDsS1dS2: + br.n return + fsub.dsd r5,r6,r7 /* add the two sources and place result 10 S1 */ +FSUBdDdS1: + bb1 7,r9,FSUBdDdS1dS2 /* branch for double precision S2 */ +FSUBdDdS1sS2: + br.n return + fsub.dds r5,r5,r8 /* add the two sources and place result 10 S1 */ +FSUBdDdS1dS2: + br.n return + fsub.ddd r5,r5,r7 /* add the two sources and place result 10 S1 */ + +FCMP: + bb0 1,r12,FCMPS2dnm /* S1 is not denorm, so S2 must be */ + or r5,r0,r0 /* set S1 to zero */ + or r6,r0,r0 +FCMPS2chk: + bb0 0,r12,FCMPcalc /* S2 is not a denorm */ +FCMPS2dnm: + or r7,r0,r0 /* set S2 to zero */ + or r8,r0,r0 +FCMPcalc: + bb1 9,r9,FCMPdS1 /* branch for double precision S1 */ +FCMPsS1: + bb1 7,r9,FCMPsS1dS2 /* branch for double precision S2 */ +FCMPsS1sS2: + br.n return + fcmp.sss r6,r6,r8 /* add the two sources and place result 10 S1 */ +FCMPsS1dS2: + br.n return + fcmp.ssd r6,r6,r7 /* add the two sources and place result 10 S1 */ +FCMPdS1: + bb1 7,r9,FCMPdS1dS2 /* branch for double precision S2 */ +FCMPdS1sS2: + br.n return + fcmp.sds r6,r5,r8 /* add the two sources and place result 10 S1 */ +FCMPdS1dS2: + br.n return + fcmp.sdd r6,r5,r7 /* add the two sources and place result 10 S1 */ + +FMUL: + bb0 1,r12,FMULS2dnm /* S1 is not denorm, so S2 must be */ + or r5,r0,r0 /* set S1 to zero */ + or r6,r0,r0 +FMULS2chk: + bb0 0,r12,FMULcalc /* S2 is not a denorm */ +FMULS2dnm: + or r7,r0,r0 /* set S2 to zero */ + or r8,r0,r0 +FMULcalc: + bb1 5,r9,FMULdD /* branch for double precision destination */ +FMULsD: + bb1 9,r9,FMULsDdS1 /* branch for double precision S1 */ +FMULsDsS1: + bb1 7,r9,FMULsDsS1dS2 /* branch for double precision S2 */ +FMULsDsS1sS2: + br.n return + fmul.sss r6,r6,r8 /* add the two sources and place result 10 S1 */ +FMULsDsS1dS2: + br.n return + fmul.ssd r6,r6,r7 /* add the two sources and place result 10 S1 */ +FMULsDdS1: + bb1 7,r9,FMULsDdS1dS2 /* branch for double precision S2 */ +FMULsDdS1sS2: + br.n return + fmul.sds r6,r5,r8 /* add the two sources and place result 10 S1 */ +FMULsDdS1dS2: + br.n return + fmul.sdd r6,r5,r7 /* add the two sources and place result 10 S1 */ +FMULdD: + bb1 9,r9,FMULdDdS1 /* branch for double precision S1 */ +FMULdDsS1: + bb1 7,r9,FMULdDsS1dS2 /* branch for double precision S2 */ +FMULdDsS1sS2: + br.n return + fmul.dss r5,r6,r8 /* add the two sources and place result 10 S1 */ +FMULdDsS1dS2: + br.n return + fmul.dsd r5,r6,r7 /* add the two sources and place result 10 S1 */ +FMULdDdS1: + bb1 7,r9,FMULdDdS1dS2 /* branch for double precision S2 */ +FMULdDdS1sS2: + br.n return + fmul.dds r5,r5,r8 /* add the two sources and place result 10 S1 */ +FMULdDdS1dS2: + br.n return + fmul.ddd r5,r5,r7 /* add the two sources and place result 10 S1 */ + +FDIV: + bb0 1,r12,FDIVS2dnm /* S1 is not denorm, so S2 must be */ + or r5,r0,r0 /* set S1 to zero */ + or r6,r0,r0 +FDIVS2chk: + bb0 0,r12,FDIVcalc /* S2 is not a denorm */ +FDIVS2dnm: + or r7,r0,r0 /* set S2 to zero */ + or r8,r0,r0 +FDIVcalc: + bb1 5,r9,FDIVdD /* branch for double precision destination */ +FDIVsD: + bb1 9,r9,FDIVsDdS1 /* branch for double precision S1 */ +FDIVsDsS1: + bb1 7,r9,FDIVsDsS1dS2 /* branch for double precision S2 */ +FDIVsDsS1sS2: + fdiv.sss r6,r6,r8 /* add the two sources and place result 10 S1 */ + br return +FDIVsDsS1dS2: + fdiv.ssd r6,r6,r7 /* add the two sources and place result 10 S1 */ + br return +FDIVsDdS1: + bb1 7,r9,FDIVsDdS1dS2 /* branch for double precision S2 */ +FDIVsDdS1sS2: + fdiv.sds r6,r5,r8 /* add the two sources and place result 10 S1 */ + br return +FDIVsDdS1dS2: + fdiv.sdd r6,r5,r7 /* add the two sources and place result 10 S1 */ + br return +FDIVdD: + bb1 9,r9,FDIVdDdS1 /* branch for double precision S1 */ +FDIVdDsS1: + bb1 7,r9,FDIVdDsS1dS2 /* branch for double precision S2 */ +FDIVdDsS1sS2: + fdiv.dss r5,r6,r8 /* add the two sources and place result 10 S1 */ + br return +FDIVdDsS1dS2: + fdiv.dsd r5,r6,r7 /* add the two sources and place result 10 S1 */ + br return +FDIVdDdS1: + bb1 7,r9,FDIVdDdS1dS2 /* branch for double precision S2 */ +FDIVdDdS1sS2: + fdiv.dds r5,r5,r8 /* add the two sources and place result 10 S1 */ + br return +FDIVdDdS1dS2: + fdiv.ddd r5,r5,r7 /* add the two sources and place result 10 S1 */ + br return + +#if 0 +FSQRT: + or r7,r0,r0 /* set S2 to zero */ + or r8,r0,r0 +FSQRTcalc: + bb1 5,r9,FSQRTdD /* branch for double precision destination */ +FSQRTsD: + bb1 7,r9,FSQRTsDdS2 /* branch for double precision S2 */ +FSQRTsDsS2: + br.n return + fsqrt.ss r6,r8 /* add the two sources and place result 10 S1 */ +FSQRTsDdS2: + br.n return + fsqrt.sd r6,r7 /* add the two sources and place result 10 S1 */ +FSQRTdD: + bb1 7,r9,FSQRTdDdS2 /* branch for double precision S2 */ +FSQRTdDsS2: + br.n return + fsqrt.ds r5,r8 /* add the two sources and place result 10 S1 */ +FSQRTdDdS2: + br.n return + fsqrt.dd r5,r7 /* add the two sources and place result 10 S1 */ +#endif + +INT: + or r7,r0,r0 /* set S2 to zero */ + or r8,r0,r0 +INTcalc: + bb1 7,r9,INTdS2 /* branch for double precision S2 */ +INTsS2: + br.n return + int.ss r6,r8 /* add the two sources and place result 10 S1 */ +INTdS2: + br.n return + int.sd r6,r7 /* add the two sources and place result 10 S1 */ + +NINT: + or r7,r0,r0 /* set S2 to zero */ + or r8,r0,r0 +NINTcalc: + bb1 7,r9,NINTdS2 /* branch for double precision S2 */ +NINTsS2: + br.n return + nint.ss r6,r8 /* add the two sources and place result 10 S1 */ +NINTdS2: + br.n return + nint.sd r6,r7 /* add the two sources and place result 10 S1 */ + +TRNC: + or r7,r0,r0 /* set S2 to zero */ + or r8,r0,r0 +TRNCcalc: + bb1 7,r9,TRNCdS2 /* branch for double precision S2 */ +TRNCsS2: + br.n return + trnc.ss r6,r8 /* add the two sources and place result 10 S1 */ +TRNCdS2: + trnc.sd r6,r7 /* add the two sources and place result 10 S1 */ /* Return to the routine that detected the reserved operand. */ -return: ld r1,r31,0 /* load return address */ - jmp r1 /* return from subroutine */ +return: + ld r1,r31,0 /* load return address */ + jmp r1 - data + data /* S1 and/or S2 is an infinity, and the other operand may be a zero. */ /* Knowing which operands are infinity, check the remaining operands for zeros. */ - text - align 8 - global _zero -_zero: bb0 s1inf,r12,S1noinf /* see if S1 is zero */ - bb0 s2inf,r12,S2noinf /* see if S2 is zero */ - jmp r1 /* return from function */ - -/* See if S1 is zero. Whether or not S1 is a zero, being in this routine */ -/* implies that S2 is infinity, so return to subroutine infinity after */ -/* completing this code. Set the s1zero flag in r12 if S1 is zero. */ - -S1noinf: bb1 s1size,r9,S1noinfd /* work with double precision operand */ -S1noinfs: or r10,r0,r5 /* load high word into r10 */ - clr r10,r10,1<sign> /* clear the sign bit */ - extu r11,r6,3<29> /* extract lower 3 bits of mantissa */ - or r10,r10,r11 /* or these 3 bits with high word */ - bcnd ne0,r10,operation /* do not set zero flag */ - jmp.n r1 /* since this operand was not infinity, */ - /* S2 must have been, so return from */ - /* function */ - set r12,r12,1<s1zero> /* set zeroflag */ -S1noinfd: clr r10,r5,1<sign> /* clear the sign bit */ - or r10,r6,r10 /* or high and low word */ - bcnd ne0,r10,operation /* do not set zero flag */ - jmp.n r1 /* since this operand was not infinity, */ - /* S2 must have been, so return from */ - /* function */ - set r12,r12,1<s1zero> /* set zeroflag */ - - -/* Check S2 for zero. If it is zero, then set the s2zero flag in r12. */ - -S2noinf: bb1 s2size,r9,S2noinfd /* work with double precision operand */ -S2noinfs: or r10,r0,r7 /* load high word into r10 */ - clr r10,r10,1<sign> /* clear the sign bit */ - extu r11,r8,3<29> /* extract lower 3 bits of mantissa */ - or r10,r10,r11 /* or these 3 bits with high word */ - bcnd ne0,r10,operation /* do not set zero flag */ - jmp.n r1 /* since this operand was not infinity, */ - /* S1 must have been, so return from */ - /* function */ - set r12,r12,1<s2zero> /* set zeroflag */ -S2noinfd: clr r10,r7,1<sign> /* clear the sign bit */ - or r10,r8,r10 /* or high and low word */ - bcnd ne0,r10,operation /* do not set zero flag */ - set r12,r12,1<s2zero> /* set zeroflag */ - /* since this operand was not infinity, */ - /* S1 must have been, so return from */ - /* function */ -operation: jmp r1 /* return from function */ +ASLOCAL(zero) + bb0 s1inf,r12,S1noinf /* see if S1 is zero */ + bb0 s2inf,r12,S2noinf /* see if S2 is zero */ + jmp r1 + +/* See if S1 is zero. Whether or not S1 is a zero, being in this routine */ +/* implies that S2 is infinity, so return to subroutine infinity after */ +/* completing this code. Set the s1zero flag in r12 if S1 is zero. */ + +S1noinf: + bb1 s1size,r9,S1noinfd /* work with double precision operand */ +S1noinfs: + or r10,r0,r5 /* load high word into r10 */ + clr r10,r10,1<sign> /* clear the sign bit */ + extu r11,r6,3<29> /* extract lower 3 bits of mantissa */ + or r10,r10,r11 /* or these 3 bits with high word */ + bcnd ne0,r10,operation /* do not set zero flag */ + jmp.n r1 /* since this operand was not */ + /* infinity, S2 must have been, */ + /* so return */ + set r12,r12,1<s1zero> /* set zeroflag */ +S1noinfd: + clr r10,r5,1<sign> /* clear the sign bit */ + or r10,r6,r10 /* or high and low word */ + bcnd ne0,r10,operation /* do not set zero flag */ + jmp.n r1 /* since this operand was not */ + /* infinity, S2 must have been, */ + /* so return */ + set r12,r12,1<s1zero> /* set zeroflag */ + + +/* Check S2 for zero. If it is zero, then set the s2zero flag in r12. */ + +S2noinf: + bb1 s2size,r9,S2noinfd /* work with double precision operand */ +S2noinfs: + or r10,r0,r7 /* load high word into r10 */ + clr r10,r10,1<sign> /* clear the sign bit */ + extu r11,r8,3<29> /* extract lower 3 bits of mantissa */ + or r10,r10,r11 /* or these 3 bits with high word */ + bcnd ne0,r10,operation /* do not set zero flag */ + jmp.n r1 /* since this operand was not */ + /* infinity, S1 must have been, */ + /* so return */ + set r12,r12,1<s2zero> /* set zeroflag */ +S2noinfd: + clr r10,r7,1<sign> /* clear the sign bit */ + or r10,r8,r10 /* or high and low word */ + bcnd ne0,r10,operation /* do not set zero flag */ + set r12,r12,1<s2zero> /* set zeroflag */ + /* since this operand was not */ + /* infinity, S1 must have been, */ + /* so return */ +operation: + jmp r1 ASENTRY(Xfp_imprecise) -/* input: r3 is the excepton frame */ - or r29, r3, r0 /* r29 is now the E.F. */ - subu r31, r31, 40 - st r1, r31, 32 - st r29, r31, 36 - - ld r2 , r29, EF_FPSR * 4 - ld r3 , r29, EF_FPCR * 4 - ld r4 , r29, EF_FPECR * 4 - ld r10, r29, EF_FPRH * 4 - ld r11, r29, EF_FPRL * 4 - ld r12, r29, EF_FPIT * 4 - -/* Load into r1 the return address for the exception handlers. Looking */ +/* input: r3 is the excepton frame */ + or r29, r3, r0 /* r29 is now the E.F. */ + subu r31, r31, 40 + st r1, r31, 32 + st r29, r31, 36 + + ld r2 , r29, EF_FPSR * 4 + ld r3 , r29, EF_FPCR * 4 + ld r4 , r29, EF_FPECR * 4 + ld r10, r29, EF_FPRH * 4 + ld r11, r29, EF_FPRL * 4 + ld r12, r29, EF_FPIT * 4 + +/* Load into r1 the return address for the exception handlers. Looking */ /* at FPECR, branch to the appropriate exception handler. */ - or.u r1,r0,hi16(fpui_wrapup)/* load return address of functions */ - or r1,r1,lo16(fpui_wrapup) + or.u r1,r0,hi16(fpui_wrapup)/* load return address of functions */ + or r1,r1,lo16(fpui_wrapup) - bb0 2,r4,2f /* branch to FPunderflow if bit set */ - br _FPunderflow - 2: bb0 1,r4,3f /* branch to FPoverflow if bit set */ - br _FPoverflow - 3: + bb0 2,r4,2f /* branch to FPunderflow if bit set */ + br _ASM_LABEL(FPunderflow) +2: + bb0 1,r4,3f /* branch to FPoverflow if bit set */ + br _ASM_LABEL(FPoverflow) +3: #ifdef HANDLER - br _handler /* branch to handler since bit will be set */ - /* for inexact */ + br _handler /* branch to handler since bit will */ + /* be set for inexact */ #endif - /* should never get here!!!! */ + +/* + * XXX should never get here! + */ data align 8 - 1: string "error in inprecise fp exception handler, r4 is 0x%08x" - align 8 +1: + string "error in inprecise fp exception handler, r4 is 0x%08x\0" text - or.u r2, r0, hi16(1b) - or r2, r2, lo16(1b) - or r3, r4, r0 - bsr _printf - or.u r2, r0, hi16(1b) - or r2, r2, lo16(1b) - bsr _panic + align 8 + or.u r2, r0, hi16(1b) + or r2, r2, lo16(1b) + or r3, r4, r0 + bsr _printf + or.u r2, r0, hi16(1b) + or r2, r2, lo16(1b) + bsr _panic fpui_wrapup: - tb1 0,r0,0 /* make sure all floating point operations */ - ldcr r5, psr /* load the PSR */ - /* have finished */ - or r5, r5, 0x2 /* disable interrupts */ - stcr r5, psr + tb1 0,r0,0 /* make sure all floating point operations */ + /* have finished */ + ldcr r10, cr1 /* load the PSR */ #if 0 -Why is this done? -- it screws up things later. - or r5, r5, 0x8 /* set SFU 1 disable bit, disable SFU 1 */ - stcr r5, psr + set r10, r10, 1<PSR_FPU_DISABLE_BIT> #endif + set r10, r10, 1<PSR_INTERRUPT_DISABLE_BIT> + stcr r10, cr1 ld r1, r31, 32 ld r29,r31, 36 addu r31, r31, 40 - /* write back the results */ + /* write back the results */ extu r2, r12, 5<0> addu r3, r29, EF_R0*4 bb0 destsize, r12, Iwritesingle @@ -2289,5 +2523,4 @@ Why is this done? -- it screws up things later. clr r2, r2, 27<5> Iwritesingle: st r11, r3 [r2] -/* Return.. */ jmp r1 |