* $OpenBSD: ssin.sa,v 1.2 1996/05/29 21:05:42 niklas Exp $ * $NetBSD: ssin.sa,v 1.3 1994/10/26 07:50:01 cgd Exp $ * MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP * M68000 Hi-Performance Microprocessor Division * M68040 Software Package * * M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc. * All rights reserved. * * THE SOFTWARE is provided on an "AS IS" basis and without warranty. * To the maximum extent permitted by applicable law, * MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, * INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A * PARTICULAR PURPOSE and any warranty against infringement with * regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) * and any accompanying written materials. * * To the maximum extent permitted by applicable law, * IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER * (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS * PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR * OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE * SOFTWARE. Motorola assumes no responsibility for the maintenance * and support of the SOFTWARE. * * You are hereby granted a copyright license to use, modify, and * distribute the SOFTWARE so long as this entire notice is retained * without alteration in any modified and/or redistributed versions, * and that such modified versions are clearly identified as such. * No licenses are granted by implication, estoppel or otherwise * under any patents or trademarks of Motorola, Inc. * * ssin.sa 3.3 7/29/91 * * The entry point sSIN computes the sine of an input argument * sCOS computes the cosine, and sSINCOS computes both. The * corresponding entry points with a "d" computes the same * corresponding function values for denormalized inputs. * * Input: Double-extended number X in location pointed to * by address register a0. * * Output: The funtion value sin(X) or cos(X) returned in Fp0 if SIN or * COS is requested. Otherwise, for SINCOS, sin(X) is returned * in Fp0, and cos(X) is returned in Fp1. * * Modifies: Fp0 for SIN or COS; both Fp0 and Fp1 for SINCOS. * * Accuracy and Monotonicity: The returned result is within 1 ulp in * 64 significant bit, i.e. within 0.5001 ulp to 53 bits if the * result is subsequently rounded to double precision. The * result is provably monotonic in double precision. * * Speed: The programs sSIN and sCOS take approximately 150 cycles for * input argument X such that |X| < 15Pi, which is the the usual * situation. The speed for sSINCOS is approximately 190 cycles. * * Algorithm: * * SIN and COS: * 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1. * * 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7. * * 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let * k = N mod 4, so in particular, k = 0,1,2,or 3. Overwirte * k by k := k + AdjN. * * 4. If k is even, go to 6. * * 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. Return sgn*cos(r) * where cos(r) is approximated by an even polynomial in r, * 1 + r*r*(B1+s*(B2+ ... + s*B8)), s = r*r. * Exit. * * 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r) * where sin(r) is approximated by an odd polynomial in r * r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r. * Exit. * * 7. If |X| > 1, go to 9. * * 8. (|X|<2**(-40)) If SIN is invoked, return X; otherwise return 1. * * 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 3. * * SINCOS: * 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. * * 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let * k = N mod 4, so in particular, k = 0,1,2,or 3. * * 3. If k is even, go to 5. * * 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), i.e. * j1 exclusive or with the l.s.b. of k. * sgn1 := (-1)**j1, sgn2 := (-1)**j2. * SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where * sin(r) and cos(r) are computed as odd and even polynomials * in r, respectively. Exit * * 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1. * SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where * sin(r) and cos(r) are computed as odd and even polynomials * in r, respectively. Exit * * 6. If |X| > 1, go to 8. * * 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit. * * 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2. * SSIN IDNT 2,1 Motorola 040 Floating Point Software Package section 8 include fpsp.h BOUNDS1 DC.L $3FD78000,$4004BC7E TWOBYPI DC.L $3FE45F30,$6DC9C883 SINA7 DC.L $BD6AAA77,$CCC994F5 SINA6 DC.L $3DE61209,$7AAE8DA1 SINA5 DC.L $BE5AE645,$2A118AE4 SINA4 DC.L $3EC71DE3,$A5341531 SINA3 DC.L $BF2A01A0,$1A018B59,$00000000,$00000000 SINA2 DC.L $3FF80000,$88888888,$888859AF,$00000000 SINA1 DC.L $BFFC0000,$AAAAAAAA,$AAAAAA99,$00000000 COSB8 DC.L $3D2AC4D0,$D6011EE3 COSB7 DC.L $BDA9396F,$9F45AC19 COSB6 DC.L $3E21EED9,$0612C972 COSB5 DC.L $BE927E4F,$B79D9FCF COSB4 DC.L $3EFA01A0,$1A01D423,$00000000,$00000000 COSB3 DC.L $BFF50000,$B60B60B6,$0B61D438,$00000000 COSB2 DC.L $3FFA0000,$AAAAAAAA,$AAAAAB5E COSB1 DC.L $BF000000 INVTWOPI DC.L $3FFC0000,$A2F9836E,$4E44152A TWOPI1 DC.L $40010000,$C90FDAA2,$00000000,$00000000 TWOPI2 DC.L $3FDF0000,$85A308D4,$00000000,$00000000 xref PITBL INARG equ FP_SCR4 X equ FP_SCR5 XDCARE equ X+2 XFRAC equ X+4 RPRIME equ FP_SCR1 SPRIME equ FP_SCR2 POSNEG1 equ L_SCR1 TWOTO63 equ L_SCR1 ENDFLAG equ L_SCR2 N equ L_SCR2 ADJN equ L_SCR3 xref t_frcinx xref t_extdnrm xref sto_cos xdef ssind ssind: *--SIN(X) = X FOR DENORMALIZED X bra t_extdnrm xdef scosd scosd: *--COS(X) = 1 FOR DENORMALIZED X FMOVE.S #:3F800000,FP0 * * 9D25B Fix: Sometimes the previous fmove.s sets fpsr bits * fmove.l #0,fpsr * bra t_frcinx xdef ssin ssin: *--SET ADJN TO 0 CLR.L ADJN(a6) BRA.B SINBGN xdef scos scos: *--SET ADJN TO 1 MOVE.L #1,ADJN(a6) SINBGN: *--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE FMOVE.X (a0),FP0 ...LOAD INPUT MOVE.L (A0),D0 MOVE.W 4(A0),D0 FMOVE.X FP0,X(a6) ANDI.L #$7FFFFFFF,D0 ...COMPACTIFY X CMPI.L #$3FD78000,D0 ...|X| >= 2**(-40)? BGE.B SOK1 BRA.W SINSM SOK1: CMPI.L #$4004BC7E,D0 ...|X| < 15 PI? BLT.B SINMAIN BRA.W REDUCEX SINMAIN: *--THIS IS THE USUAL CASE, |X| <= 15 PI. *--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. FMOVE.X FP0,FP1 FMUL.D TWOBYPI,FP1 ...X*2/PI *--HIDE THE NEXT THREE INSTRUCTIONS LEA PITBL+$200,A1 ...TABLE OF N*PI/2, N = -32,...,32 *--FP1 IS NOW READY FMOVE.L FP1,N(a6) ...CONVERT TO INTEGER MOVE.L N(a6),D0 ASL.L #4,D0 ADDA.L D0,A1 ...A1 IS THE ADDRESS OF N*PIBY2 * ...WHICH IS IN TWO PIECES Y1 & Y2 FSUB.X (A1)+,FP0 ...X-Y1 *--HIDE THE NEXT ONE FSUB.S (A1),FP0 ...FP0 IS R = (X-Y1)-Y2 SINCONT: *--continuation from REDUCEX *--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED MOVE.L N(a6),D0 ADD.L ADJN(a6),D0 ...SEE IF D0 IS ODD OR EVEN ROR.L #1,D0 ...D0 WAS ODD IFF D0 IS NEGATIVE TST.L D0 BLT.W COSPOLY SINPOLY: *--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J. *--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY *--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE *--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS *--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))]) *--WHERE T=S*S. *--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION *--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT. FMOVE.X FP0,X(a6) ...X IS R FMUL.X FP0,FP0 ...FP0 IS S *---HIDE THE NEXT TWO WHILE WAITING FOR FP0 FMOVE.D SINA7,FP3 FMOVE.D SINA6,FP2 *--FP0 IS NOW READY FMOVE.X FP0,FP1 FMUL.X FP1,FP1 ...FP1 IS T *--HIDE THE NEXT TWO WHILE WAITING FOR FP1 ROR.L #1,D0 ANDI.L #$80000000,D0 * ...LEAST SIG. BIT OF D0 IN SIGN POSITION EOR.L D0,X(a6) ...X IS NOW R'= SGN*R FMUL.X FP1,FP3 ...TA7 FMUL.X FP1,FP2 ...TA6 FADD.D SINA5,FP3 ...A5+TA7 FADD.D SINA4,FP2 ...A4+TA6 FMUL.X FP1,FP3 ...T(A5+TA7) FMUL.X FP1,FP2 ...T(A4+TA6) FADD.D SINA3,FP3 ...A3+T(A5+TA7) FADD.X SINA2,FP2 ...A2+T(A4+TA6) FMUL.X FP3,FP1 ...T(A3+T(A5+TA7)) FMUL.X FP0,FP2 ...S(A2+T(A4+TA6)) FADD.X SINA1,FP1 ...A1+T(A3+T(A5+TA7)) FMUL.X X(a6),FP0 ...R'*S FADD.X FP2,FP1 ...[A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] *--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING *--FP2 RELEASED, RESTORE NOW AND TAKE FULL ADVANTAGE OF HIDING FMUL.X FP1,FP0 ...SIN(R')-R' *--FP1 RELEASED. FMOVE.L d1,FPCR ;restore users exceptions FADD.X X(a6),FP0 ;last inst - possible exception set bra t_frcinx COSPOLY: *--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J. *--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY *--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE *--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS *--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))]) *--WHERE T=S*S. *--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION *--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2 *--AND IS THEREFORE STORED AS SINGLE PRECISION. FMUL.X FP0,FP0 ...FP0 IS S *---HIDE THE NEXT TWO WHILE WAITING FOR FP0 FMOVE.D COSB8,FP2 FMOVE.D COSB7,FP3 *--FP0 IS NOW READY FMOVE.X FP0,FP1 FMUL.X FP1,FP1 ...FP1 IS T *--HIDE THE NEXT TWO WHILE WAITING FOR FP1 FMOVE.X FP0,X(a6) ...X IS S ROR.L #1,D0 ANDI.L #$80000000,D0 * ...LEAST SIG. BIT OF D0 IN SIGN POSITION FMUL.X FP1,FP2 ...TB8 *--HIDE THE NEXT TWO WHILE WAITING FOR THE XU EOR.L D0,X(a6) ...X IS NOW S'= SGN*S ANDI.L #$80000000,D0 FMUL.X FP1,FP3 ...TB7 *--HIDE THE NEXT TWO WHILE WAITING FOR THE XU ORI.L #$3F800000,D0 ...D0 IS SGN IN SINGLE MOVE.L D0,POSNEG1(a6) FADD.D COSB6,FP2 ...B6+TB8 FADD.D COSB5,FP3 ...B5+TB7 FMUL.X FP1,FP2 ...T(B6+TB8) FMUL.X FP1,FP3 ...T(B5+TB7) FADD.D COSB4,FP2 ...B4+T(B6+TB8) FADD.X COSB3,FP3 ...B3+T(B5+TB7) FMUL.X FP1,FP2 ...T(B4+T(B6+TB8)) FMUL.X FP3,FP1 ...T(B3+T(B5+TB7)) FADD.X COSB2,FP2 ...B2+T(B4+T(B6+TB8)) FADD.S COSB1,FP1 ...B1+T(B3+T(B5+TB7)) FMUL.X FP2,FP0 ...S(B2+T(B4+T(B6+TB8))) *--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING *--FP2 RELEASED. FADD.X FP1,FP0 *--FP1 RELEASED FMUL.X X(a6),FP0 FMOVE.L d1,FPCR ;restore users exceptions FADD.S POSNEG1(a6),FP0 ;last inst - possible exception set bra t_frcinx SINBORS: *--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION. *--IF |X| < 2**(-40), RETURN X OR 1. CMPI.L #$3FFF8000,D0 BGT.B REDUCEX SINSM: MOVE.L ADJN(a6),D0 TST.L D0 BGT.B COSTINY SINTINY: CLR.W XDCARE(a6) ...JUST IN CASE FMOVE.L d1,FPCR ;restore users exceptions FMOVE.X X(a6),FP0 ;last inst - possible exception set bra t_frcinx COSTINY: FMOVE.S #:3F800000,FP0 FMOVE.L d1,FPCR ;restore users exceptions FSUB.S #:00800000,FP0 ;last inst - possible exception set bra t_frcinx REDUCEX: *--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW. *--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING *--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE. FMOVEM.X FP2-FP5,-(A7) ...save FP2 through FP5 MOVE.L D2,-(A7) FMOVE.S #:00000000,FP1 *--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that *--there is a danger of unwanted overflow in first LOOP iteration. In this *--case, reduce argument by one remainder step to make subsequent reduction *--safe. cmpi.l #$7ffeffff,d0 ;is argument dangerously large? bne.b LOOP move.l #$7ffe0000,FP_SCR2(a6) ;yes * ;create 2**16383*PI/2 move.l #$c90fdaa2,FP_SCR2+4(a6) clr.l FP_SCR2+8(a6) ftst.x fp0 ;test sign of argument move.l #$7fdc0000,FP_SCR3(a6) ;create low half of 2**16383* * ;PI/2 at FP_SCR3 move.l #$85a308d3,FP_SCR3+4(a6) clr.l FP_SCR3+8(a6) fblt.w red_neg or.w #$8000,FP_SCR2(a6) ;positive arg or.w #$8000,FP_SCR3(a6) red_neg: fadd.x FP_SCR2(a6),fp0 ;high part of reduction is exact fmove.x fp0,fp1 ;save high result in fp1 fadd.x FP_SCR3(a6),fp0 ;low part of reduction fsub.x fp0,fp1 ;determine low component of result fadd.x FP_SCR3(a6),fp1 ;fp0/fp1 are reduced argument. *--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4. *--integer quotient will be stored in N *--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1) LOOP: FMOVE.X FP0,INARG(a6) ...+-2**K * F, 1 <= F < 2 MOVE.W INARG(a6),D0 MOVE.L D0,A1 ...save a copy of D0 ANDI.L #$00007FFF,D0 SUBI.L #$00003FFF,D0 ...D0 IS K CMPI.L #28,D0 BLE.B LASTLOOP CONTLOOP: SUBI.L #27,D0 ...D0 IS L := K-27 CLR.L ENDFLAG(a6) BRA.B WORK LASTLOOP: CLR.L D0 ...D0 IS L := 0 MOVE.L #1,ENDFLAG(a6) WORK: *--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN *--THAT INT( X * (2/PI) / 2**(L) ) < 2**29. *--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63), *--2**L * (PIby2_1), 2**L * (PIby2_2) MOVE.L #$00003FFE,D2 ...BIASED EXPO OF 2/PI SUB.L D0,D2 ...BIASED EXPO OF 2**(-L)*(2/PI) MOVE.L #$A2F9836E,FP_SCR1+4(a6) MOVE.L #$4E44152A,FP_SCR1+8(a6) MOVE.W D2,FP_SCR1(a6) ...FP_SCR1 is 2**(-L)*(2/PI) FMOVE.X FP0,FP2 FMUL.X FP_SCR1(a6),FP2 *--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN *--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N *--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT *--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE *--US THE DESIRED VALUE IN FLOATING POINT. *--HIDE SIX CYCLES OF INSTRUCTION MOVE.L A1,D2 SWAP D2 ANDI.L #$80000000,D2 ORI.L #$5F000000,D2 ...D2 IS SIGN(INARG)*2**63 IN SGL MOVE.L D2,TWOTO63(a6) MOVE.L D0,D2 ADDI.L #$00003FFF,D2 ...BIASED EXPO OF 2**L * (PI/2) *--FP2 IS READY FADD.S TWOTO63(a6),FP2 ...THE FRACTIONAL PART OF FP1 IS ROUNDED *--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1 and 2**(L)*Piby2_2 MOVE.W D2,FP_SCR2(a6) CLR.W FP_SCR2+2(a6) MOVE.L #$C90FDAA2,FP_SCR2+4(a6) CLR.L FP_SCR2+8(a6) ...FP_SCR2 is 2**(L) * Piby2_1 *--FP2 IS READY FSUB.S TWOTO63(a6),FP2 ...FP2 is N ADDI.L #$00003FDD,D0 MOVE.W D0,FP_SCR3(a6) CLR.W FP_SCR3+2(a6) MOVE.L #$85A308D3,FP_SCR3+4(a6) CLR.L FP_SCR3+8(a6) ...FP_SCR3 is 2**(L) * Piby2_2 MOVE.L ENDFLAG(a6),D0 *--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and *--P2 = 2**(L) * Piby2_2 FMOVE.X FP2,FP4 FMul.X FP_SCR2(a6),FP4 ...W = N*P1 FMove.X FP2,FP5 FMul.X FP_SCR3(a6),FP5 ...w = N*P2 FMove.X FP4,FP3 *--we want P+p = W+w but |p| <= half ulp of P *--Then, we need to compute A := R-P and a := r-p FAdd.X FP5,FP3 ...FP3 is P FSub.X FP3,FP4 ...W-P FSub.X FP3,FP0 ...FP0 is A := R - P FAdd.X FP5,FP4 ...FP4 is p = (W-P)+w FMove.X FP0,FP3 ...FP3 A FSub.X FP4,FP1 ...FP1 is a := r - p *--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but *--|r| <= half ulp of R. FAdd.X FP1,FP0 ...FP0 is R := A+a *--No need to calculate r if this is the last loop TST.L D0 BGT.W RESTORE *--Need to calculate r FSub.X FP0,FP3 ...A-R FAdd.X FP3,FP1 ...FP1 is r := (A-R)+a BRA.W LOOP RESTORE: FMOVE.L FP2,N(a6) MOVE.L (A7)+,D2 FMOVEM.X (A7)+,FP2-FP5 MOVE.L ADJN(a6),D0 CMPI.L #4,D0 BLT.W SINCONT BRA.B SCCONT xdef ssincosd ssincosd: *--SIN AND COS OF X FOR DENORMALIZED X FMOVE.S #:3F800000,FP1 bsr sto_cos ;store cosine result bra t_extdnrm xdef ssincos ssincos: *--SET ADJN TO 4 MOVE.L #4,ADJN(a6) FMOVE.X (a0),FP0 ...LOAD INPUT MOVE.L (A0),D0 MOVE.W 4(A0),D0 FMOVE.X FP0,X(a6) ANDI.L #$7FFFFFFF,D0 ...COMPACTIFY X CMPI.L #$3FD78000,D0 ...|X| >= 2**(-40)? BGE.B SCOK1 BRA.W SCSM SCOK1: CMPI.L #$4004BC7E,D0 ...|X| < 15 PI? BLT.B SCMAIN BRA.W REDUCEX SCMAIN: *--THIS IS THE USUAL CASE, |X| <= 15 PI. *--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. FMOVE.X FP0,FP1 FMUL.D TWOBYPI,FP1 ...X*2/PI *--HIDE THE NEXT THREE INSTRUCTIONS LEA PITBL+$200,A1 ...TABLE OF N*PI/2, N = -32,...,32 *--FP1 IS NOW READY FMOVE.L FP1,N(a6) ...CONVERT TO INTEGER MOVE.L N(a6),D0 ASL.L #4,D0 ADDA.L D0,A1 ...ADDRESS OF N*PIBY2, IN Y1, Y2 FSUB.X (A1)+,FP0 ...X-Y1 FSUB.S (A1),FP0 ...FP0 IS R = (X-Y1)-Y2 SCCONT: *--continuation point from REDUCEX *--HIDE THE NEXT TWO MOVE.L N(a6),D0 ROR.L #1,D0 TST.L D0 ...D0 < 0 IFF N IS ODD BGE.W NEVEN NODD: *--REGISTERS SAVED SO FAR: D0, A0, FP2. FMOVE.X FP0,RPRIME(a6) FMUL.X FP0,FP0 ...FP0 IS S = R*R FMOVE.D SINA7,FP1 ...A7 FMOVE.D COSB8,FP2 ...B8 FMUL.X FP0,FP1 ...SA7 MOVE.L d2,-(A7) MOVE.L D0,d2 FMUL.X FP0,FP2 ...SB8 ROR.L #1,d2 ANDI.L #$80000000,d2 FADD.D SINA6,FP1 ...A6+SA7 EOR.L D0,d2 ANDI.L #$80000000,d2 FADD.D COSB7,FP2 ...B7+SB8 FMUL.X FP0,FP1 ...S(A6+SA7) EOR.L d2,RPRIME(a6) MOVE.L (A7)+,d2 FMUL.X FP0,FP2 ...S(B7+SB8) ROR.L #1,D0 ANDI.L #$80000000,D0 FADD.D SINA5,FP1 ...A5+S(A6+SA7) MOVE.L #$3F800000,POSNEG1(a6) EOR.L D0,POSNEG1(a6) FADD.D COSB6,FP2 ...B6+S(B7+SB8) FMUL.X FP0,FP1 ...S(A5+S(A6+SA7)) FMUL.X FP0,FP2 ...S(B6+S(B7+SB8)) FMOVE.X FP0,SPRIME(a6) FADD.D SINA4,FP1 ...A4+S(A5+S(A6+SA7)) EOR.L D0,SPRIME(a6) FADD.D COSB5,FP2 ...B5+S(B6+S(B7+SB8)) FMUL.X FP0,FP1 ...S(A4+...) FMUL.X FP0,FP2 ...S(B5+...) FADD.D SINA3,FP1 ...A3+S(A4+...) FADD.D COSB4,FP2 ...B4+S(B5+...) FMUL.X FP0,FP1 ...S(A3+...) FMUL.X FP0,FP2 ...S(B4+...) FADD.X SINA2,FP1 ...A2+S(A3+...) FADD.X COSB3,FP2 ...B3+S(B4+...) FMUL.X FP0,FP1 ...S(A2+...) FMUL.X FP0,FP2 ...S(B3+...) FADD.X SINA1,FP1 ...A1+S(A2+...) FADD.X COSB2,FP2 ...B2+S(B3+...) FMUL.X FP0,FP1 ...S(A1+...) FMUL.X FP2,FP0 ...S(B2+...) FMUL.X RPRIME(a6),FP1 ...R'S(A1+...) FADD.S COSB1,FP0 ...B1+S(B2...) FMUL.X SPRIME(a6),FP0 ...S'(B1+S(B2+...)) move.l d1,-(sp) ;restore users mode & precision andi.l #$ff,d1 ;mask off all exceptions fmove.l d1,FPCR FADD.X RPRIME(a6),FP1 ...COS(X) bsr sto_cos ;store cosine result FMOVE.L (sp)+,FPCR ;restore users exceptions FADD.S POSNEG1(a6),FP0 ...SIN(X) bra t_frcinx NEVEN: *--REGISTERS SAVED SO FAR: FP2. FMOVE.X FP0,RPRIME(a6) FMUL.X FP0,FP0 ...FP0 IS S = R*R FMOVE.D COSB8,FP1 ...B8 FMOVE.D SINA7,FP2 ...A7 FMUL.X FP0,FP1 ...SB8 FMOVE.X FP0,SPRIME(a6) FMUL.X FP0,FP2 ...SA7 ROR.L #1,D0 ANDI.L #$80000000,D0 FADD.D COSB7,FP1 ...B7+SB8 FADD.D SINA6,FP2 ...A6+SA7 EOR.L D0,RPRIME(a6) EOR.L D0,SPRIME(a6) FMUL.X FP0,FP1 ...S(B7+SB8) ORI.L #$3F800000,D0 MOVE.L D0,POSNEG1(a6) FMUL.X FP0,FP2 ...S(A6+SA7) FADD.D COSB6,FP1 ...B6+S(B7+SB8) FADD.D SINA5,FP2 ...A5+S(A6+SA7) FMUL.X FP0,FP1 ...S(B6+S(B7+SB8)) FMUL.X FP0,FP2 ...S(A5+S(A6+SA7)) FADD.D COSB5,FP1 ...B5+S(B6+S(B7+SB8)) FADD.D SINA4,FP2 ...A4+S(A5+S(A6+SA7)) FMUL.X FP0,FP1 ...S(B5+...) FMUL.X FP0,FP2 ...S(A4+...) FADD.D COSB4,FP1 ...B4+S(B5+...) FADD.D SINA3,FP2 ...A3+S(A4+...) FMUL.X FP0,FP1 ...S(B4+...) FMUL.X FP0,FP2 ...S(A3+...) FADD.X COSB3,FP1 ...B3+S(B4+...) FADD.X SINA2,FP2 ...A2+S(A3+...) FMUL.X FP0,FP1 ...S(B3+...) FMUL.X FP0,FP2 ...S(A2+...) FADD.X COSB2,FP1 ...B2+S(B3+...) FADD.X SINA1,FP2 ...A1+S(A2+...) FMUL.X FP0,FP1 ...S(B2+...) fmul.x fp2,fp0 ...s(a1+...) FADD.S COSB1,FP1 ...B1+S(B2...) FMUL.X RPRIME(a6),FP0 ...R'S(A1+...) FMUL.X SPRIME(a6),FP1 ...S'(B1+S(B2+...)) move.l d1,-(sp) ;save users mode & precision andi.l #$ff,d1 ;mask off all exceptions fmove.l d1,FPCR FADD.S POSNEG1(a6),FP1 ...COS(X) bsr sto_cos ;store cosine result FMOVE.L (sp)+,FPCR ;restore users exceptions FADD.X RPRIME(a6),FP0 ...SIN(X) bra t_frcinx SCBORS: CMPI.L #$3FFF8000,D0 BGT.W REDUCEX SCSM: CLR.W XDCARE(a6) FMOVE.S #:3F800000,FP1 move.l d1,-(sp) ;save users mode & precision andi.l #$ff,d1 ;mask off all exceptions fmove.l d1,FPCR FSUB.S #:00800000,FP1 bsr sto_cos ;store cosine result FMOVE.L (sp)+,FPCR ;restore users exceptions FMOVE.X X(a6),FP0 bra t_frcinx end