*       $NetBSD: ssin.sa,v 1.6 2021/12/05 07:04:30 msaitoh Exp $

*       MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
*       M68000 Hi-Performance Microprocessor Division
*       M68040 Software Package
*
*       M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
*       All rights reserved.
*
*       THE SOFTWARE is provided on an "AS IS" basis and without warranty.
*       To the maximum extent permitted by applicable law,
*       MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
*       INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
*       PARTICULAR PURPOSE and any warranty against infringement with
*       regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
*       and any accompanying written materials.
*
*       To the maximum extent permitted by applicable law,
*       IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
*       (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
*       PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
*       OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
*       SOFTWARE.  Motorola assumes no responsibility for the maintenance
*       and support of the SOFTWARE.
*
*       You are hereby granted a copyright license to use, modify, and
*       distribute the SOFTWARE so long as this entire notice is retained
*       without alteration in any modified and/or redistributed versions,
*       and that such modified versions are clearly identified as such.
*       No licenses are granted by implication, estoppel or otherwise
*       under any patents or trademarks of Motorola, Inc.

*
*       ssin.sa 3.3 7/29/91
*
*       The entry point sSIN computes the sine of an input argument
*       sCOS computes the cosine, and sSINCOS computes both. The
*       corresponding entry points with a "d" computes the same
*       corresponding function values for denormalized inputs.
*
*       Input: Double-extended number X in location pointed to
*               by address register a0.
*
*       Output: The function value sin(X) or cos(X) returned in Fp0 if SIN or
*               COS is requested. Otherwise, for SINCOS, sin(X) is returned
*               in Fp0, and cos(X) is returned in Fp1.
*
*       Modifies: Fp0 for SIN or COS; both Fp0 and Fp1 for SINCOS.
*
*       Accuracy and Monotonicity: The returned result is within 1 ulp in
*               64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
*               result is subsequently rounded to double precision. The
*               result is provably monotonic in double precision.
*
*       Speed: The programs sSIN and sCOS take approximately 150 cycles for
*               input argument X such that |X| < 15Pi, which is the usual
*               situation. The speed for sSINCOS is approximately 190 cycles.
*
*       Algorithm:
*
*       SIN and COS:
*       1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1.
*
*       2. If |X| >= 15Pi or |X| < 2**(-40), go to 7.
*
*       3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
*               k = N mod 4, so in particular, k = 0,1,2,or 3. Overwrite
*               k by k := k + AdjN.
*
*       4. If k is even, go to 6.
*
*       5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. Return sgn*cos(r)
*               where cos(r) is approximated by an even polynomial in r,
*               1 + r*r*(B1+s*(B2+ ... + s*B8)),        s = r*r.
*               Exit.
*
*       6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r)
*               where sin(r) is approximated by an odd polynomial in r
*               r + r*s*(A1+s*(A2+ ... + s*A7)),        s = r*r.
*               Exit.
*
*       7. If |X| > 1, go to 9.
*
*       8. (|X|<2**(-40)) If SIN is invoked, return X; otherwise return 1.
*
*       9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 3.
*
*       SINCOS:
*       1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
*
*       2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
*               k = N mod 4, so in particular, k = 0,1,2,or 3.
*
*       3. If k is even, go to 5.
*
*       4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), i.e.
*               j1 exclusive or with the l.s.b. of k.
*               sgn1 := (-1)**j1, sgn2 := (-1)**j2.
*               SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where
*               sin(r) and cos(r) are computed as odd and even polynomials
*               in r, respectively. Exit
*
*       5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1.
*               SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where
*               sin(r) and cos(r) are computed as odd and even polynomials
*               in r, respectively. Exit
*
*       6. If |X| > 1, go to 8.
*
*       7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit.
*
*       8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
*

SSIN    IDNT    2,1 Motorola 040 Floating Point Software Package

       section 8

       include fpsp.h

BOUNDS1 DC.L $3FD78000,$4004BC7E
TWOBYPI DC.L $3FE45F30,$6DC9C883

SINA7   DC.L $BD6AAA77,$CCC994F5
SINA6   DC.L $3DE61209,$7AAE8DA1

SINA5   DC.L $BE5AE645,$2A118AE4
SINA4   DC.L $3EC71DE3,$A5341531

SINA3   DC.L $BF2A01A0,$1A018B59,$00000000,$00000000

SINA2   DC.L $3FF80000,$88888888,$888859AF,$00000000

SINA1   DC.L $BFFC0000,$AAAAAAAA,$AAAAAA99,$00000000

COSB8   DC.L $3D2AC4D0,$D6011EE3
COSB7   DC.L $BDA9396F,$9F45AC19

COSB6   DC.L $3E21EED9,$0612C972
COSB5   DC.L $BE927E4F,$B79D9FCF

COSB4   DC.L $3EFA01A0,$1A01D423,$00000000,$00000000

COSB3   DC.L $BFF50000,$B60B60B6,$0B61D438,$00000000

COSB2   DC.L $3FFA0000,$AAAAAAAA,$AAAAAB5E
COSB1   DC.L $BF000000

INVTWOPI DC.L $3FFC0000,$A2F9836E,$4E44152A

TWOPI1  DC.L $40010000,$C90FDAA2,$00000000,$00000000
TWOPI2  DC.L $3FDF0000,$85A308D4,$00000000,$00000000

       xref    PITBL

INARG   equ     FP_SCR4

X       equ     FP_SCR5
XDCARE  equ     X+2
XFRAC   equ     X+4

RPRIME  equ     FP_SCR1
SPRIME  equ     FP_SCR2

POSNEG1 equ     L_SCR1
TWOTO63 equ     L_SCR1

ENDFLAG equ     L_SCR2
N       equ     L_SCR2

ADJN    equ     L_SCR3

       xref    t_frcinx
       xref    t_extdnrm
       xref    sto_cos

       xdef    ssind
ssind:
*--SIN(X) = X FOR DENORMALIZED X
       bra             t_extdnrm

       xdef    scosd
scosd:
*--COS(X) = 1 FOR DENORMALIZED X

       FMOVE.S         #:3F800000,FP0
*
*       9D25B Fix: Sometimes the previous fmove.s sets fpsr bits
*
       fmove.l         #0,fpsr
*
       bra             t_frcinx

       xdef    ssin
ssin:
*--SET ADJN TO 0
       CLR.L           ADJN(a6)
       BRA.B           SINBGN

       xdef    scos
scos:
*--SET ADJN TO 1
       MOVE.L          #1,ADJN(a6)

SINBGN:
*--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE

       FMOVE.X         (a0),FP0        ...LOAD INPUT

       MOVE.L          (A0),D0
       MOVE.W          4(A0),D0
       FMOVE.X         FP0,X(a6)
       ANDI.L          #$7FFFFFFF,D0           ...COMPACTIFY X

       CMPI.L          #$3FD78000,D0           ...|X| >= 2**(-40)?
       BGE.B           SOK1
       BRA.W           SINSM

SOK1:
       CMPI.L          #$4004BC7E,D0           ...|X| < 15 PI?
       BLT.B           SINMAIN
       BRA.W           REDUCEX

SINMAIN:
*--THIS IS THE USUAL CASE, |X| <= 15 PI.
*--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
       FMOVE.X         FP0,FP1
       FMUL.D          TWOBYPI,FP1     ...X*2/PI

*--HIDE THE NEXT THREE INSTRUCTIONS
       LEA             PITBL+$200,A1 ...TABLE OF N*PI/2, N = -32,...,32


*--FP1 IS NOW READY
       FMOVE.L         FP1,N(a6)               ...CONVERT TO INTEGER

       MOVE.L          N(a6),D0
       ASL.L           #4,D0
       ADDA.L          D0,A1   ...A1 IS THE ADDRESS OF N*PIBY2
*                               ...WHICH IS IN TWO PIECES Y1 & Y2

       FSUB.X          (A1)+,FP0       ...X-Y1
*--HIDE THE NEXT ONE
       FSUB.S          (A1),FP0        ...FP0 IS R = (X-Y1)-Y2

SINCONT:
*--continuation from REDUCEX

*--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
       MOVE.L          N(a6),D0
       ADD.L           ADJN(a6),D0     ...SEE IF D0 IS ODD OR EVEN
       ROR.L           #1,D0   ...D0 WAS ODD IFF D0 IS NEGATIVE
       TST.L           D0
       BLT.W           COSPOLY

SINPOLY:
*--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
*--THEN WE RETURN       SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
*--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
*--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
*--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
*--WHERE T=S*S.
*--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
*--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
       FMOVE.X         FP0,X(a6)       ...X IS R
       FMUL.X          FP0,FP0 ...FP0 IS S
*---HIDE THE NEXT TWO WHILE WAITING FOR FP0
       FMOVE.D         SINA7,FP3
       FMOVE.D         SINA6,FP2
*--FP0 IS NOW READY
       FMOVE.X         FP0,FP1
       FMUL.X          FP1,FP1 ...FP1 IS T
*--HIDE THE NEXT TWO WHILE WAITING FOR FP1

       ROR.L           #1,D0
       ANDI.L          #$80000000,D0
*                               ...LEAST SIG. BIT OF D0 IN SIGN POSITION
       EOR.L           D0,X(a6)        ...X IS NOW R'= SGN*R

       FMUL.X          FP1,FP3 ...TA7
       FMUL.X          FP1,FP2 ...TA6

       FADD.D          SINA5,FP3 ...A5+TA7
       FADD.D          SINA4,FP2 ...A4+TA6

       FMUL.X          FP1,FP3 ...T(A5+TA7)
       FMUL.X          FP1,FP2 ...T(A4+TA6)

       FADD.D          SINA3,FP3 ...A3+T(A5+TA7)
       FADD.X          SINA2,FP2 ...A2+T(A4+TA6)

       FMUL.X          FP3,FP1 ...T(A3+T(A5+TA7))

       FMUL.X          FP0,FP2 ...S(A2+T(A4+TA6))
       FADD.X          SINA1,FP1 ...A1+T(A3+T(A5+TA7))
       FMUL.X          X(a6),FP0       ...R'*S

       FADD.X          FP2,FP1 ...[A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
*--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
*--FP2 RELEASED, RESTORE NOW AND TAKE FULL ADVANTAGE OF HIDING


       FMUL.X          FP1,FP0         ...SIN(R')-R'
*--FP1 RELEASED.

       FMOVE.L         d1,FPCR         ;restore users exceptions
       FADD.X          X(a6),FP0               ;last inst - possible exception set
       bra             t_frcinx


COSPOLY:
*--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
*--THEN WE RETURN       SGN*COS(R). SGN*COS(R) IS COMPUTED BY
*--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
*--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
*--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
*--WHERE T=S*S.
*--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
*--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
*--AND IS THEREFORE STORED AS SINGLE PRECISION.

       FMUL.X          FP0,FP0 ...FP0 IS S
*---HIDE THE NEXT TWO WHILE WAITING FOR FP0
       FMOVE.D         COSB8,FP2
       FMOVE.D         COSB7,FP3
*--FP0 IS NOW READY
       FMOVE.X         FP0,FP1
       FMUL.X          FP1,FP1 ...FP1 IS T
*--HIDE THE NEXT TWO WHILE WAITING FOR FP1
       FMOVE.X         FP0,X(a6)       ...X IS S
       ROR.L           #1,D0
       ANDI.L          #$80000000,D0
*                       ...LEAST SIG. BIT OF D0 IN SIGN POSITION

       FMUL.X          FP1,FP2 ...TB8
*--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
       EOR.L           D0,X(a6)        ...X IS NOW S'= SGN*S
       ANDI.L          #$80000000,D0

       FMUL.X          FP1,FP3 ...TB7
*--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
       ORI.L           #$3F800000,D0   ...D0 IS SGN IN SINGLE
       MOVE.L          D0,POSNEG1(a6)

       FADD.D          COSB6,FP2 ...B6+TB8
       FADD.D          COSB5,FP3 ...B5+TB7

       FMUL.X          FP1,FP2 ...T(B6+TB8)
       FMUL.X          FP1,FP3 ...T(B5+TB7)

       FADD.D          COSB4,FP2 ...B4+T(B6+TB8)
       FADD.X          COSB3,FP3 ...B3+T(B5+TB7)

       FMUL.X          FP1,FP2 ...T(B4+T(B6+TB8))
       FMUL.X          FP3,FP1 ...T(B3+T(B5+TB7))

       FADD.X          COSB2,FP2 ...B2+T(B4+T(B6+TB8))
       FADD.S          COSB1,FP1 ...B1+T(B3+T(B5+TB7))

       FMUL.X          FP2,FP0 ...S(B2+T(B4+T(B6+TB8)))
*--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
*--FP2 RELEASED.


       FADD.X          FP1,FP0
*--FP1 RELEASED

       FMUL.X          X(a6),FP0

       FMOVE.L         d1,FPCR         ;restore users exceptions
       FADD.S          POSNEG1(a6),FP0 ;last inst - possible exception set
       bra             t_frcinx


SINBORS:
*--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
*--IF |X| < 2**(-40), RETURN X OR 1.
       CMPI.L          #$3FFF8000,D0
       BGT.B           REDUCEX


SINSM:
       MOVE.L          ADJN(a6),D0
       TST.L           D0
       BGT.B           COSTINY

SINTINY:
       CLR.W           XDCARE(a6)      ...JUST IN CASE
       FMOVE.L         d1,FPCR         ;restore users exceptions
       FMOVE.X         X(a6),FP0               ;last inst - possible exception set
       bra             t_frcinx


COSTINY:
       FMOVE.S         #:3F800000,FP0

       FMOVE.L         d1,FPCR         ;restore users exceptions
       FSUB.S          #:00800000,FP0  ;last inst - possible exception set
       bra             t_frcinx


REDUCEX:
*--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
*--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
*--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.

       FMOVEM.X        FP2-FP5,-(A7)   ...save FP2 through FP5
       MOVE.L          D2,-(A7)
       FMOVE.S         #:00000000,FP1
*--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
*--there is a danger of unwanted overflow in first LOOP iteration.  In this
*--case, reduce argument by one remainder step to make subsequent reduction
*--safe.
       cmpi.l  #$7ffeffff,d0           ;is argument dangerously large?
       bne.b   LOOP
       move.l  #$7ffe0000,FP_SCR2(a6)  ;yes
*                                       ;create 2**16383*PI/2
       move.l  #$c90fdaa2,FP_SCR2+4(a6)
       clr.l   FP_SCR2+8(a6)
       ftst.x  fp0                     ;test sign of argument
       move.l  #$7fdc0000,FP_SCR3(a6)  ;create low half of 2**16383*
*                                       ;PI/2 at FP_SCR3
       move.l  #$85a308d3,FP_SCR3+4(a6)
       clr.l   FP_SCR3+8(a6)
       fblt.w  red_neg
       or.w    #$8000,FP_SCR2(a6)      ;positive arg
       or.w    #$8000,FP_SCR3(a6)
red_neg:
       fadd.x  FP_SCR2(a6),fp0         ;high part of reduction is exact
       fmove.x  fp0,fp1                ;save high result in fp1
       fadd.x  FP_SCR3(a6),fp0         ;low part of reduction
       fsub.x  fp0,fp1                 ;determine low component of result
       fadd.x  FP_SCR3(a6),fp1         ;fp0/fp1 are reduced argument.

*--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
*--integer quotient will be stored in N
*--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)

LOOP:
       FMOVE.X         FP0,INARG(a6)   ...+-2**K * F, 1 <= F < 2
       MOVE.W          INARG(a6),D0
       MOVE.L          D0,A1           ...save a copy of D0
       ANDI.L          #$00007FFF,D0
       SUBI.L          #$00003FFF,D0   ...D0 IS K
       CMPI.L          #28,D0
       BLE.B           LASTLOOP
CONTLOOP:
       SUBI.L          #27,D0   ...D0 IS L := K-27
       CLR.L           ENDFLAG(a6)
       BRA.B           WORK
LASTLOOP:
       CLR.L           D0              ...D0 IS L := 0
       MOVE.L          #1,ENDFLAG(a6)

WORK:
*--FIND THE REMAINDER OF (R,r) W.R.T.   2**L * (PI/2). L IS SO CHOSEN
*--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.

*--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
*--2**L * (PIby2_1), 2**L * (PIby2_2)

       MOVE.L          #$00003FFE,D2   ...BIASED EXPO OF 2/PI
       SUB.L           D0,D2           ...BIASED EXPO OF 2**(-L)*(2/PI)

       MOVE.L          #$A2F9836E,FP_SCR1+4(a6)
       MOVE.L          #$4E44152A,FP_SCR1+8(a6)
       MOVE.W          D2,FP_SCR1(a6)  ...FP_SCR1 is 2**(-L)*(2/PI)

       FMOVE.X         FP0,FP2
       FMUL.X          FP_SCR1(a6),FP2
*--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
*--FLOATING POINT FORMAT, THE TWO FMOVE'S       FMOVE.L FP <--> N
*--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
*--(SIGN(INARG)*2**63   +       FP2) - SIGN(INARG)*2**63 WILL GIVE
*--US THE DESIRED VALUE IN FLOATING POINT.

*--HIDE SIX CYCLES OF INSTRUCTION
       MOVE.L          A1,D2
       SWAP            D2
       ANDI.L          #$80000000,D2
       ORI.L           #$5F000000,D2   ...D2 IS SIGN(INARG)*2**63 IN SGL
       MOVE.L          D2,TWOTO63(a6)

       MOVE.L          D0,D2
       ADDI.L          #$00003FFF,D2   ...BIASED EXPO OF 2**L * (PI/2)

*--FP2 IS READY
       FADD.S          TWOTO63(a6),FP2 ...THE FRACTIONAL PART OF FP1 IS ROUNDED

*--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1  and  2**(L)*Piby2_2
       MOVE.W          D2,FP_SCR2(a6)
       CLR.W           FP_SCR2+2(a6)
       MOVE.L          #$C90FDAA2,FP_SCR2+4(a6)
       CLR.L           FP_SCR2+8(a6)           ...FP_SCR2 is  2**(L) * Piby2_1

*--FP2 IS READY
       FSUB.S          TWOTO63(a6),FP2         ...FP2 is N

       ADDI.L          #$00003FDD,D0
       MOVE.W          D0,FP_SCR3(a6)
       CLR.W           FP_SCR3+2(a6)
       MOVE.L          #$85A308D3,FP_SCR3+4(a6)
       CLR.L           FP_SCR3+8(a6)           ...FP_SCR3 is 2**(L) * Piby2_2

       MOVE.L          ENDFLAG(a6),D0

*--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
*--P2 = 2**(L) * Piby2_2
       FMOVE.X         FP2,FP4
       FMul.X          FP_SCR2(a6),FP4         ...W = N*P1
       FMove.X         FP2,FP5
       FMul.X          FP_SCR3(a6),FP5         ...w = N*P2
       FMove.X         FP4,FP3
*--we want P+p = W+w  but  |p| <= half ulp of P
*--Then, we need to compute  A := R-P   and  a := r-p
       FAdd.X          FP5,FP3                 ...FP3 is P
       FSub.X          FP3,FP4                 ...W-P

       FSub.X          FP3,FP0                 ...FP0 is A := R - P
       FAdd.X          FP5,FP4                 ...FP4 is p = (W-P)+w

       FMove.X         FP0,FP3                 ...FP3 A
       FSub.X          FP4,FP1                 ...FP1 is a := r - p

*--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
*--|r| <= half ulp of R.
       FAdd.X          FP1,FP0                 ...FP0 is R := A+a
*--No need to calculate r if this is the last loop
       TST.L           D0
       BGT.W           RESTORE

*--Need to calculate r
       FSub.X          FP0,FP3                 ...A-R
       FAdd.X          FP3,FP1                 ...FP1 is r := (A-R)+a
       BRA.W           LOOP

RESTORE:
       FMOVE.L         FP2,N(a6)
       MOVE.L          (A7)+,D2
       FMOVEM.X        (A7)+,FP2-FP5


       MOVE.L          ADJN(a6),D0
       CMPI.L          #4,D0

       BLT.W           SINCONT
       BRA.B           SCCONT

       xdef    ssincosd
ssincosd:
*--SIN AND COS OF X FOR DENORMALIZED X

       FMOVE.S         #:3F800000,FP1
       bsr             sto_cos         ;store cosine result
       bra             t_extdnrm

       xdef    ssincos
ssincos:
*--SET ADJN TO 4
       MOVE.L          #4,ADJN(a6)

       FMOVE.X         (a0),FP0        ...LOAD INPUT

       MOVE.L          (A0),D0
       MOVE.W          4(A0),D0
       FMOVE.X         FP0,X(a6)
       ANDI.L          #$7FFFFFFF,D0           ...COMPACTIFY X

       CMPI.L          #$3FD78000,D0           ...|X| >= 2**(-40)?
       BGE.B           SCOK1
       BRA.W           SCSM

SCOK1:
       CMPI.L          #$4004BC7E,D0           ...|X| < 15 PI?
       BLT.B           SCMAIN
       BRA.W           REDUCEX


SCMAIN:
*--THIS IS THE USUAL CASE, |X| <= 15 PI.
*--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
       FMOVE.X         FP0,FP1
       FMUL.D          TWOBYPI,FP1     ...X*2/PI

*--HIDE THE NEXT THREE INSTRUCTIONS
       LEA             PITBL+$200,A1 ...TABLE OF N*PI/2, N = -32,...,32


*--FP1 IS NOW READY
       FMOVE.L         FP1,N(a6)               ...CONVERT TO INTEGER

       MOVE.L          N(a6),D0
       ASL.L           #4,D0
       ADDA.L          D0,A1           ...ADDRESS OF N*PIBY2, IN Y1, Y2

       FSUB.X          (A1)+,FP0       ...X-Y1
       FSUB.S          (A1),FP0        ...FP0 IS R = (X-Y1)-Y2

SCCONT:
*--continuation point from REDUCEX

*--HIDE THE NEXT TWO
       MOVE.L          N(a6),D0
       ROR.L           #1,D0

       TST.L           D0              ...D0 < 0 IFF N IS ODD
       BGE.W           NEVEN

NODD:
*--REGISTERS SAVED SO FAR: D0, A0, FP2.

       FMOVE.X         FP0,RPRIME(a6)
       FMUL.X          FP0,FP0  ...FP0 IS S = R*R
       FMOVE.D         SINA7,FP1       ...A7
       FMOVE.D         COSB8,FP2       ...B8
       FMUL.X          FP0,FP1  ...SA7
       MOVE.L          d2,-(A7)
       MOVE.L          D0,d2
       FMUL.X          FP0,FP2  ...SB8
       ROR.L           #1,d2
       ANDI.L          #$80000000,d2

       FADD.D          SINA6,FP1       ...A6+SA7
       EOR.L           D0,d2
       ANDI.L          #$80000000,d2
       FADD.D          COSB7,FP2       ...B7+SB8

       FMUL.X          FP0,FP1  ...S(A6+SA7)
       EOR.L           d2,RPRIME(a6)
       MOVE.L          (A7)+,d2
       FMUL.X          FP0,FP2  ...S(B7+SB8)
       ROR.L           #1,D0
       ANDI.L          #$80000000,D0

       FADD.D          SINA5,FP1       ...A5+S(A6+SA7)
       MOVE.L          #$3F800000,POSNEG1(a6)
       EOR.L           D0,POSNEG1(a6)
       FADD.D          COSB6,FP2       ...B6+S(B7+SB8)

       FMUL.X          FP0,FP1  ...S(A5+S(A6+SA7))
       FMUL.X          FP0,FP2  ...S(B6+S(B7+SB8))
       FMOVE.X         FP0,SPRIME(a6)

       FADD.D          SINA4,FP1       ...A4+S(A5+S(A6+SA7))
       EOR.L           D0,SPRIME(a6)
       FADD.D          COSB5,FP2       ...B5+S(B6+S(B7+SB8))

       FMUL.X          FP0,FP1  ...S(A4+...)
       FMUL.X          FP0,FP2  ...S(B5+...)

       FADD.D          SINA3,FP1       ...A3+S(A4+...)
       FADD.D          COSB4,FP2       ...B4+S(B5+...)

       FMUL.X          FP0,FP1  ...S(A3+...)
       FMUL.X          FP0,FP2  ...S(B4+...)

       FADD.X          SINA2,FP1       ...A2+S(A3+...)
       FADD.X          COSB3,FP2       ...B3+S(B4+...)

       FMUL.X          FP0,FP1  ...S(A2+...)
       FMUL.X          FP0,FP2  ...S(B3+...)

       FADD.X          SINA1,FP1       ...A1+S(A2+...)
       FADD.X          COSB2,FP2       ...B2+S(B3+...)

       FMUL.X          FP0,FP1  ...S(A1+...)
       FMUL.X          FP2,FP0  ...S(B2+...)



       FMUL.X          RPRIME(a6),FP1  ...R'S(A1+...)
       FADD.S          COSB1,FP0       ...B1+S(B2...)
       FMUL.X          SPRIME(a6),FP0  ...S'(B1+S(B2+...))

       move.l          d1,-(sp)        ;restore users mode & precision
       andi.l          #$ff,d1         ;mask off all exceptions
       fmove.l         d1,FPCR
       FADD.X          RPRIME(a6),FP1  ...COS(X)
       bsr             sto_cos         ;store cosine result
       FMOVE.L         (sp)+,FPCR      ;restore users exceptions
       FADD.S          POSNEG1(a6),FP0 ...SIN(X)

       bra             t_frcinx


NEVEN:
*--REGISTERS SAVED SO FAR: FP2.

       FMOVE.X         FP0,RPRIME(a6)
       FMUL.X          FP0,FP0  ...FP0 IS S = R*R
       FMOVE.D         COSB8,FP1                       ...B8
       FMOVE.D         SINA7,FP2                       ...A7
       FMUL.X          FP0,FP1  ...SB8
       FMOVE.X         FP0,SPRIME(a6)
       FMUL.X          FP0,FP2  ...SA7
       ROR.L           #1,D0
       ANDI.L          #$80000000,D0
       FADD.D          COSB7,FP1       ...B7+SB8
       FADD.D          SINA6,FP2       ...A6+SA7
       EOR.L           D0,RPRIME(a6)
       EOR.L           D0,SPRIME(a6)
       FMUL.X          FP0,FP1  ...S(B7+SB8)
       ORI.L           #$3F800000,D0
       MOVE.L          D0,POSNEG1(a6)
       FMUL.X          FP0,FP2  ...S(A6+SA7)

       FADD.D          COSB6,FP1       ...B6+S(B7+SB8)
       FADD.D          SINA5,FP2       ...A5+S(A6+SA7)

       FMUL.X          FP0,FP1  ...S(B6+S(B7+SB8))
       FMUL.X          FP0,FP2  ...S(A5+S(A6+SA7))

       FADD.D          COSB5,FP1       ...B5+S(B6+S(B7+SB8))
       FADD.D          SINA4,FP2       ...A4+S(A5+S(A6+SA7))

       FMUL.X          FP0,FP1  ...S(B5+...)
       FMUL.X          FP0,FP2  ...S(A4+...)

       FADD.D          COSB4,FP1       ...B4+S(B5+...)
       FADD.D          SINA3,FP2       ...A3+S(A4+...)

       FMUL.X          FP0,FP1  ...S(B4+...)
       FMUL.X          FP0,FP2  ...S(A3+...)

       FADD.X          COSB3,FP1       ...B3+S(B4+...)
       FADD.X          SINA2,FP2       ...A2+S(A3+...)

       FMUL.X          FP0,FP1  ...S(B3+...)
       FMUL.X          FP0,FP2  ...S(A2+...)

       FADD.X          COSB2,FP1       ...B2+S(B3+...)
       FADD.X          SINA1,FP2       ...A1+S(A2+...)

       FMUL.X          FP0,FP1  ...S(B2+...)
       fmul.x          fp2,fp0  ...s(a1+...)



       FADD.S          COSB1,FP1       ...B1+S(B2...)
       FMUL.X          RPRIME(a6),FP0  ...R'S(A1+...)
       FMUL.X          SPRIME(a6),FP1  ...S'(B1+S(B2+...))

       move.l          d1,-(sp)        ;save users mode & precision
       andi.l          #$ff,d1         ;mask off all exceptions
       fmove.l         d1,FPCR
       FADD.S          POSNEG1(a6),FP1 ...COS(X)
       bsr             sto_cos         ;store cosine result
       FMOVE.L         (sp)+,FPCR      ;restore users exceptions
       FADD.X          RPRIME(a6),FP0  ...SIN(X)

       bra             t_frcinx

SCBORS:
       CMPI.L          #$3FFF8000,D0
       BGT.W           REDUCEX


SCSM:
       CLR.W           XDCARE(a6)
       FMOVE.S         #:3F800000,FP1

       move.l          d1,-(sp)        ;save users mode & precision
       andi.l          #$ff,d1         ;mask off all exceptions
       fmove.l         d1,FPCR
       FSUB.S          #:00800000,FP1
       bsr             sto_cos         ;store cosine result
       FMOVE.L         (sp)+,FPCR      ;restore users exceptions
       FMOVE.X         X(a6),FP0
       bra             t_frcinx

       end