* $NetBSD: x_unfl.sa,v 1.6 2024/09/20 19:38:53 andvar Exp $

* $NetBSD: x_unfl.sa,v 1.6 2024/09/20 19:38:53 andvar Exp $

* MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
* M68000 Hi-Performance Microprocessor Division
* M68040 Software Package
*
* M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
* All rights reserved.
*
* THE SOFTWARE is provided on an "AS IS" basis and without warranty.
* To the maximum extent permitted by applicable law,
* MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
* INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
* PARTICULAR PURPOSE and any warranty against infringement with
* regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
* and any accompanying written materials.
*
* To the maximum extent permitted by applicable law,
* IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
* (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
* PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
* OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
* SOFTWARE. Motorola assumes no responsibility for the maintenance
* and support of the SOFTWARE.
*
* You are hereby granted a copyright license to use, modify, and
* distribute the SOFTWARE so long as this entire notice is retained
* without alteration in any modified and/or redistributed versions,
* and that such modified versions are clearly identified as such.
* No licenses are granted by implication, estoppel or otherwise
* under any patents or trademarks of Motorola, Inc.

*
* x_unfl.sa 3.4 7/1/91
*
* fpsp_unfl --- FPSP handler for underflow exception
*
* Trap disabled results
* For 881/2 compatibility, sw must denormalize the intermediate
* result, then store the result. Denormalization is accomplished
* by taking the intermediate result (which is always normalized) and
* shifting the mantissa right while incrementing the exponent until
* it is equal to the denormalized exponent for the destination
* format. After denormalization, the result is rounded to the
* destination format.
*
* Trap enabled results
* All trap disabled code applies. In addition the exceptional
* operand needs to made available to the user with a bias of $6000
* added to the exponent.
*

X_UNFL IDNT 2,1 Motorola 040 Floating Point Software Package

section 8

include fpsp.h

xref denorm
xref round
xref store
xref g_rndpr
xref g_opcls
xref g_dfmtou
xref real_unfl
xref real_inex
xref fpsp_done
xref b1238_fix

xdef fpsp_unfl
fpsp_unfl:
link a6,#-LOCAL_SIZE
fsave -(a7)
movem.l d0-d1/a0-a1,USER_DA(a6)
fmovem.x fp0-fp3,USER_FP0(a6)
fmovem.l fpcr/fpsr/fpiar,USER_FPCR(a6)

*
bsr.l unf_res ;denormalize, round & store interm op
*
* If underflow exceptions are not enabled, check for inexact
* exception
*
btst.b #unfl_bit,FPCR_ENABLE(a6)
beq.b ck_inex

btst.b #E3,E_BYTE(a6)
beq.b no_e3_1
*
* Clear dirty bit on dest register in the frame before branching
* to b1238_fix.
*
bfextu CMDREG3B(a6){6:3},d0 ;get dest reg no
bclr.b d0,FPR_DIRTY_BITS(a6) ;clr dest dirty bit
bsr.l b1238_fix ;test for bug1238 case
move.l USER_FPSR(a6),FPSR_SHADOW(a6)
or.l #sx_mask,E_BYTE(a6)
no_e3_1:
movem.l USER_DA(a6),d0-d1/a0-a1
fmovem.x USER_FP0(a6),fp0-fp3
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar
frestore (a7)+
unlk a6
bra.l real_unfl
*
* It is possible to have either inex2 or inex1 exceptions with the
* unfl. If the inex enable bit is set in the FPCR, and either
* inex2 or inex1 occurred, we must clean up and branch to the
* real inex handler.
*
ck_inex:
move.b FPCR_ENABLE(a6),d0
and.b FPSR_EXCEPT(a6),d0
andi.b #$3,d0
beq.b unfl_done

*
* Inexact enabled and reported, and we must take an inexact exception
*
take_inex:
btst.b #E3,E_BYTE(a6)
beq.b no_e3_2
*
* Clear dirty bit on dest register in the frame before branching
* to b1238_fix.
*
bfextu CMDREG3B(a6){6:3},d0 ;get dest reg no
bclr.b d0,FPR_DIRTY_BITS(a6) ;clr dest dirty bit
bsr.l b1238_fix ;test for bug1238 case
move.l USER_FPSR(a6),FPSR_SHADOW(a6)
or.l #sx_mask,E_BYTE(a6)
no_e3_2:
move.b #INEX_VEC,EXC_VEC+1(a6)
movem.l USER_DA(a6),d0-d1/a0-a1
fmovem.x USER_FP0(a6),fp0-fp3
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar
frestore (a7)+
unlk a6
bra.l real_inex

unfl_done:
bclr.b #E3,E_BYTE(a6)
beq.b e1_set ;if set then branch
*
* Clear dirty bit on dest register in the frame before branching
* to b1238_fix.
*
bfextu CMDREG3B(a6){6:3},d0 ;get dest reg no
bclr.b d0,FPR_DIRTY_BITS(a6) ;clr dest dirty bit
bsr.l b1238_fix ;test for bug1238 case
move.l USER_FPSR(a6),FPSR_SHADOW(a6)
or.l #sx_mask,E_BYTE(a6)
movem.l USER_DA(a6),d0-d1/a0-a1
fmovem.x USER_FP0(a6),fp0-fp3
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar
frestore (a7)+
unlk a6
bra.l fpsp_done
e1_set:
movem.l USER_DA(a6),d0-d1/a0-a1
fmovem.x USER_FP0(a6),fp0-fp3
fmovem.l USER_FPCR(a6),fpcr/fpsr/fpiar
unlk a6
bra.l fpsp_done
*
* unf_res --- underflow result calculation
*
unf_res:
bsr.l g_rndpr ;returns RND_PREC in d0 0=ext,
* ;1=sgl, 2=dbl
* ;we need the RND_PREC in the
* ;upper word for round
clr.w -(a7)
move.w d0,-(a7) ;copy RND_PREC to stack
*
*
* If the exception bit set is E3, the exceptional operand from the
* fpu is in WBTEMP; else it is in FPTEMP.
*
btst.b #E3,E_BYTE(a6)
beq.b unf_E1
unf_E3:
lea WBTEMP(a6),a0 ;a0 now points to operand
*
* Test for fsgldiv and fsglmul. If the inst was one of these, then
* force the precision to extended for the denorm routine. Use
* the user's precision for the round routine.
*
move.w CMDREG3B(a6),d1 ;check for fsgldiv or fsglmul
andi.w #$7f,d1
cmpi.w #$30,d1 ;check for sgldiv
beq.b unf_sgl
cmpi.w #$33,d1 ;check for sglmul
bne.b unf_cont ;if not, use fpcr prec in round
unf_sgl:
clr.l d0
move.w #$1,(a7) ;override g_rndpr precision
* ;force single
bra.b unf_cont
unf_E1:
lea FPTEMP(a6),a0 ;a0 now points to operand
unf_cont:
bclr.b #sign_bit,LOCAL_EX(a0) ;clear sign bit
sne LOCAL_SGN(a0) ;store sign

bsr.l denorm ;returns denorm, a0 points to it
*
* WARNING:
* ;d0 has guard,round sticky bit
* ;make sure that it is not corrupted
* ;before it reaches the round subroutine
* ;also ensure that a0 isn't corrupted

*
* Set up d1 for round subroutine d1 contains the PREC/MODE
* information respectively on upper/lower register halves.
*
bfextu FPCR_MODE(a6){2:2},d1 ;get mode from FPCR
* ;mode in lower d1
add.l (a7)+,d1 ;merge PREC/MODE
*
* WARNING: a0 and d0 are assumed to be intact between the denorm and
* round subroutines. All code between these two subroutines
* must not corrupt a0 and d0.
*
*
* Perform Round
* Input: a0 points to input operand
* d0{31:29} has guard, round, sticky
* d1{01:00} has rounding mode
* d1{17:16} has rounding precision
* Output: a0 points to rounded operand
*

bsr.l round ;returns rounded denorm at (a0)
*
* Differentiate between store to memory vs. store to register
*
unf_store:
bsr.l g_opcls ;returns opclass in d0{2:0}
cmpi.b #$3,d0
bne.b not_opc011
*
* At this point, a store to memory is pending
*
opc011:
bsr.l g_dfmtou
tst.b d0
beq.b ext_opc011 ;If extended, do not subtract
* ;If destination format is sgl/dbl,
tst.b LOCAL_HI(a0) ;If rounded result is normal,don't
* ;subtract
bmi.b ext_opc011
subq.w #1,LOCAL_EX(a0) ;account for denorm bias vs.
* ;normalized bias
* ; normalized denormalized
* ;single $7f $7e
* ;double $3ff $3fe
*
ext_opc011:
bsr.l store ;stores to memory
bra.b unf_done ;finish up

*
* At this point, a store to a float register is pending
*
not_opc011:
bsr.l store ;stores to float register
* ;a0 is not corrupted on a store to a
* ;float register.
*
* Set the condition codes according to result
*
tst.l LOCAL_HI(a0) ;check upper mantissa
bne.b ck_sgn
tst.l LOCAL_LO(a0) ;check lower mantissa
bne.b ck_sgn
bset.b #z_bit,FPSR_CC(a6) ;set condition codes if zero
ck_sgn:
btst.b #sign_bit,LOCAL_EX(a0) ;check the sign bit
beq.b unf_done
bset.b #neg_bit,FPSR_CC(a6)

*
* Finish.
*
unf_done:
btst.b #inex2_bit,FPSR_EXCEPT(a6)
beq.b no_aunfl
bset.b #aunfl_bit,FPSR_AEXCEPT(a6)
no_aunfl:
rts

end