/* $NetBSD: fpu_emu.c,v 1.60 2022/09/20 12:25:01 rin Exp $ */
/*
* Copyright 2001 Wasabi Systems, Inc.
* All rights reserved.
*
* Written by Eduardo Horvath and Simon Burge for Wasabi Systems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed for the NetBSD Project by
* Wasabi Systems, Inc.
* 4. The name of Wasabi Systems, Inc. may not be used to endorse
* or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* This software was developed by the Computer Systems Engineering group
* at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
* contributed to Berkeley.
*
* All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Lawrence Berkeley Laboratory.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)fpu.c 8.1 (Berkeley) 6/11/93
*/
/*
* fpu_execute returns the following error numbers (0 = no error):
*/
#define FPE 1 /* take a floating point exception */
#define NOTFPU 2 /* not an FPU instruction */
#define FAULT 3
/*
* Emulate a floating-point instruction.
* Return true if insn is consumed anyway.
* Otherwise, the caller must take care of it.
*/
bool
fpu_emulate(struct trapframe *tf, struct fpreg *fpf, ksiginfo_t *ksi)
{
struct pcb *pcb;
union instr insn;
struct fpemu fe;
/*
* fpu_to_single(): Helper function for stfs{,u}{,x}.
*
* Single-precision (float) data is internally represented in
* double-precision (double) format in floating-point registers (FRs).
* Even though double value cannot be translated into float format in
* general, Power ISA (2.0.3--3.1) specify conversion algorithm when
* stored to memory (see Sec. 4.6.3):
*
* - Extra fraction bits are truncated regardless of rounding mode.
* - When magnitude is larger than the maximum number in float format,
* bits 63--62 and 58--29 are mechanically copied into bits 31--0.
* - When magnitude is representable as denormalized number in float
* format, it is stored as normalized double value in FRs;
* denormalization is required in this case.
* - When magnitude is smaller than the minimum denormalized number in
* float format, the result is undefined. For G5 (970MP Rev 1.1),
* (sign | 0) seems to be stored. For G4 and prior, some ``random''
* garbage is stored in exponent. We mimic G5 for now.
*/
static uint32_t
fpu_to_single(uint64_t reg)
{
uint32_t sign, frac, word;
int exp, shift;
sign = (reg & __BIT(63)) >> 32;
exp = __SHIFTOUT(reg, __BITS(62, 52)) - 1023;
if (exp > -127 || (reg & ~__BIT(63)) == 0) {
/*
* No denormalization required: normalized, zero, inf, NaN,
* or numbers larger than MAXFLOAT (see comment above).
*
* Note that MSB and 7-LSBs in exponent are same for double
* and float formats in this case.
*/
word = ((reg & __BIT(62)) >> 32) |
__SHIFTOUT(reg, __BITS(58, 52) | __BITS(51, 29));
} else if (exp <= -127 && exp >= -149) {
/* Denormalized. */
shift = - 126 - exp; /* 1 ... 23 */
frac = __SHIFTOUT(__BIT(52) | reg, __BITS(52, 29 + shift));
word = /* __SHIFTIN(0, __BITS(30, 23)) | */ frac;
} else {
/* Undefined. Mimic G5 for now. */
word = 0;
}
return sign | word;
}
/*
* Execute an FPU instruction (one that runs entirely in the FPU; not
* FBfcc or STF, for instance). On return, fe->fe_fs->fs_fsr will be
* modified to reflect the setting the hardware would have left.
*
* Note that we do not catch all illegal opcodes, so you can, for instance,
* multiply two integers this way.
*/
static int
fpu_execute(struct trapframe *tf, struct fpemu *fe, union instr *insn)
{
struct fpn *fp;
union instr instr = *insn;
int *a;
int ra, rb, rc, rt, type, mask, fsr, cx, bf, setcr, cond;
u_int bits;
struct fpreg *fs;
int i;
/*
* On PowerPC all floating point values are stored in registers
* as doubles, even when used for single precision operations.
*/
type = FTYPE_DBL;
cond = instr.i_any.i_rc;
setcr = 0;
bf = 0; /* XXX gcc */
#if defined(DDB) && defined(DEBUG)
if (fpe_debug & FPE_EX) {
vaddr_t loc = tf->tf_srr0;
if ((instr.i_any.i_opcd >= OPC_LFS && instr.i_any.i_opcd <= OPC_STFDU) ||
instr.i_any.i_opcd == OPC_integer_31) {
/*
* Handle load/store insns:
*
* Convert to/from single if needed, calculate addr,
* and update index reg if needed.
*/
vaddr_t addr;
size_t size = sizeof(double);
int store, update;
cond = 0; /* ld/st never set condition codes */
if (instr.i_any.i_opcd == OPC_integer_31) {
if (instr.i_x.i_xo == OPC31_STFIWX) {
FPU_EMU_EVCNT_INCR(stfiwx);
/* Store as integer */
ra = instr.i_x.i_ra;
rb = instr.i_x.i_rb;
DPRINTF(FPE_INSN, ("reg %d has %lx reg %d has %lx\n",
ra, tf->tf_fixreg[ra], rb, tf->tf_fixreg[rb]));
addr = tf->tf_fixreg[rb];
if (ra != 0)
addr += tf->tf_fixreg[ra];
rt = instr.i_x.i_rt;
a = (int *)&fs->fpreg[rt];
DPRINTF(FPE_INSN,
("fpu_execute: Store INT %x at %p\n",
a[1], (void *)addr));
if (copyout(&a[1], (void *)addr, sizeof(int))) {
fe->fe_addr = addr;
return (FAULT);
}
return (0);
}
if ((instr.i_x.i_xo & OPC31_FPMASK) != OPC31_FPOP)
/* Not an indexed FP load/store op */
return (NOTFPU);
store = (instr.i_x.i_xo & 0x80);
if ((instr.i_x.i_xo & 0x40) == 0) {
type = FTYPE_SNG;
size = sizeof(float);
}
update = (instr.i_x.i_xo & 0x20);
/* calculate EA of load/store */
ra = instr.i_x.i_ra;
rb = instr.i_x.i_rb;
DPRINTF(FPE_INSN, ("reg %d has %lx reg %d has %lx\n",
ra, tf->tf_fixreg[ra], rb, tf->tf_fixreg[rb]));
addr = tf->tf_fixreg[rb];
if (ra != 0)
addr += tf->tf_fixreg[ra];
rt = instr.i_x.i_rt;
} else {
store = instr.i_d.i_opcd & 0x4;
if ((instr.i_d.i_opcd & 0x2) == 0) {
type = FTYPE_SNG;
size = sizeof(float);
}
update = instr.i_d.i_opcd & 0x1;
/* calculate EA of load/store */
ra = instr.i_d.i_ra;
addr = instr.i_d.i_d;
DPRINTF(FPE_INSN, ("reg %d has %lx displ %lx\n",
ra, tf->tf_fixreg[ra], addr));
if (ra != 0)
addr += tf->tf_fixreg[ra];
rt = instr.i_d.i_rt;
}
if (update && ra == 0)
return (NOTFPU);
if (store) {
/* Store */
uint32_t word;
const void *kaddr;
FPU_EMU_EVCNT_INCR(fpstore);
if (type != FTYPE_DBL) {
/*
* As Power ISA specifies conversion algorithm
* for store floating-point single insns, we
* cannot use fpu_explode() and _implode() here.
* See fpu_to_single() and comment therein for
* more details.
*/
DPRINTF(FPE_INSN,
("fpu_execute: Store SNG at %p\n",
(void *)addr));
word = fpu_to_single(FR(rt));
kaddr = &word;
} else {
DPRINTF(FPE_INSN,
("fpu_execute: Store DBL at %p\n",
(void *)addr));
kaddr = &FR(rt);
}
if (copyout(kaddr, (void *)addr, size)) {
fe->fe_addr = addr;
return (FAULT);
}
} else {
/* Load */
FPU_EMU_EVCNT_INCR(fpload);
DPRINTF(FPE_INSN, ("fpu_execute: Load from %p\n",
(void *)addr));
if (copyin((const void *)addr, &FR(rt), size)) {
fe->fe_addr = addr;
return (FAULT);
}
if (type != FTYPE_DBL) {
fpu_explode(fe, fp = &fe->fe_f1, type, FR(rt));
fpu_implode(fe, fp, FTYPE_DBL, &FR(rt));
}
}
if (update)
tf->tf_fixreg[ra] = addr;
/* Complete. */
return (0);
} else if (instr.i_any.i_opcd == OPC_sp_fp_59 ||
instr.i_any.i_opcd == OPC_dp_fp_63) {
if (instr.i_any.i_opcd == OPC_dp_fp_63 &&
!(instr.i_a.i_xo & OPC63M_MASK)) {
/* Format X */
rt = instr.i_x.i_rt;
ra = instr.i_x.i_ra;
rb = instr.i_x.i_rb;
/* One of the special opcodes.... */
switch (instr.i_x.i_xo) {
case OPC63_FCMPU:
FPU_EMU_EVCNT_INCR(fcmpu);
DPRINTF(FPE_INSN, ("fpu_execute: FCMPU\n"));
rt >>= 2;
fpu_explode(fe, &fe->fe_f1, type, FR(ra));
fpu_explode(fe, &fe->fe_f2, type, FR(rb));
fpu_compare(fe, 0);
/* Make sure we do the condition regs. */
cond = 0;
/* N.B.: i_rs is already left shifted by two. */
bf = instr.i_x.i_rs & 0xfc;
setcr = 1;
break;
case OPC63_FRSP:
/*
* Convert to single:
*
* PowerPC uses this to round a double
* precision value to single precision,
* but values in registers are always
* stored in double precision format.
*/
FPU_EMU_EVCNT_INCR(frsp);
DPRINTF(FPE_INSN, ("fpu_execute: FRSP\n"));
fpu_explode(fe, fp = &fe->fe_f1, FTYPE_DBL,
FR(rb));
fpu_implode(fe, fp, FTYPE_SNG, &FR(rt));
fpu_explode(fe, fp = &fe->fe_f1, FTYPE_SNG,
FR(rt));
type = FTYPE_DBL | FTYPE_FPSCR;
break;
case OPC63_FCTIW:
case OPC63_FCTIWZ:
FPU_EMU_EVCNT_INCR(fctiw);
DPRINTF(FPE_INSN, ("fpu_execute: FCTIW\n"));
fpu_explode(fe, fp = &fe->fe_f1, type, FR(rb));
type = FTYPE_INT | FTYPE_FPSCR;
if (instr.i_x.i_xo == OPC63_FCTIWZ)
type |= FTYPE_RD_RZ;
break;
case OPC63_FCMPO:
FPU_EMU_EVCNT_INCR(fcmpo);
DPRINTF(FPE_INSN, ("fpu_execute: FCMPO\n"));
rt >>= 2;
fpu_explode(fe, &fe->fe_f1, type, FR(ra));
fpu_explode(fe, &fe->fe_f2, type, FR(rb));
fpu_compare(fe, 1);
/* Make sure we do the condition regs. */
cond = 0;
/* N.B.: i_rs is already left shifted by two. */
bf = instr.i_x.i_rs & 0xfc;
setcr = 1;
break;
case OPC63_MTFSB1:
FPU_EMU_EVCNT_INCR(mtfsb1);
DPRINTF(FPE_INSN, ("fpu_execute: MTFSB1\n"));
fe->fe_cx = (1 << (31 - rt)) &
~(FPSCR_FEX | FPSCR_VX);
break;
case OPC63_FNEG:
FPU_EMU_EVCNT_INCR(fnegabs);
DPRINTF(FPE_INSN, ("fpu_execute: FNEGABS\n"));
memcpy(&fs->fpreg[rt], &fs->fpreg[rb],
sizeof(double));
a = (int *)&fs->fpreg[rt];
*a ^= (1 << 31);
break;
case OPC63_MCRFS:
FPU_EMU_EVCNT_INCR(mcrfs);
DPRINTF(FPE_INSN, ("fpu_execute: MCRFS\n"));
cond = 0;
rt &= 0x1c;
ra &= 0x1c;
/* Extract the bits we want */
bits = (fe->fe_fpscr >> (28 - ra)) & 0xf;
/* Clear the bits we copied. */
mask = (0xf << (28 - ra)) & MCRFS_MASK;
fe->fe_fpscr &= ~mask;
/* Now shove them in the right part of cr */
tf->tf_cr &= ~(0xf << (28 - rt));
tf->tf_cr |= bits << (28 - rt);
break;
case OPC63_MTFSB0:
FPU_EMU_EVCNT_INCR(mtfsb0);
DPRINTF(FPE_INSN, ("fpu_execute: MTFSB0\n"));
fe->fe_fpscr &= ~(1 << (31 - rt)) |
(FPSCR_FEX | FPSCR_VX);
break;
case OPC63_FMR:
FPU_EMU_EVCNT_INCR(fmr);
DPRINTF(FPE_INSN, ("fpu_execute: FMR\n"));
memcpy(&fs->fpreg[rt], &fs->fpreg[rb],
sizeof(double));
break;
case OPC63_MTFSFI:
FPU_EMU_EVCNT_INCR(mtfsfi);
DPRINTF(FPE_INSN, ("fpu_execute: MTFSFI\n"));
rb >>= 1;
rt &= 0x1c; /* Already left-shifted 4 */
bits = rb << (28 - rt);
mask = 0xf << (28 - rt);
fe->fe_fpscr = (fe->fe_fpscr & ~mask) | bits;
break;
case OPC63_FNABS:
FPU_EMU_EVCNT_INCR(fnabs);
DPRINTF(FPE_INSN, ("fpu_execute: FABS\n"));
memcpy(&fs->fpreg[rt], &fs->fpreg[rb],
sizeof(double));
a = (int *)&fs->fpreg[rt];
*a |= (1 << 31);
break;
case OPC63_FABS:
FPU_EMU_EVCNT_INCR(fabs);
DPRINTF(FPE_INSN, ("fpu_execute: FABS\n"));
memcpy(&fs->fpreg[rt], &fs->fpreg[rb],
sizeof(double));
a = (int *)&fs->fpreg[rt];
*a &= ~(1 << 31);
break;
case OPC63_MFFS:
FPU_EMU_EVCNT_INCR(mffs);
DPRINTF(FPE_INSN, ("fpu_execute: MFFS\n"));
memcpy(&fs->fpreg[rt], &fs->fpscr,
sizeof(fs->fpscr));
break;
case OPC63_MTFSF:
FPU_EMU_EVCNT_INCR(mtfsf);
DPRINTF(FPE_INSN, ("fpu_execute: MTFSF\n"));
if ((rt = instr.i_xfl.i_flm) == -1) {
mask = -1;
} else {
mask = 0;
/* Convert 1 bit -> 4 bits */
for (i = 0; i < 8; i++)
if (rt & (1 << i))
mask |=
(0xf << (4 * i));
}
a = (int *)&fs->fpreg[rb];
bits = a[1] & mask;
fe->fe_fpscr = (fe->fe_fpscr & ~mask) | bits;
break;
case OPC63_FCTID:
case OPC63_FCTIDZ:
FPU_EMU_EVCNT_INCR(fctid);
DPRINTF(FPE_INSN, ("fpu_execute: FCTID\n"));
fpu_explode(fe, fp = &fe->fe_f1, type, FR(rb));
type = FTYPE_LNG | FTYPE_FPSCR;
if (instr.i_x.i_xo == OPC63_FCTIDZ)
type |= FTYPE_RD_RZ;
break;
case OPC63_FCFID:
FPU_EMU_EVCNT_INCR(fcfid);
DPRINTF(FPE_INSN, ("fpu_execute: FCFID\n"));
fpu_explode(fe, fp = &fe->fe_f1, FTYPE_LNG,
FR(rb));
type = FTYPE_DBL | FTYPE_FPSCR;
break;
default:
return (NOTFPU);
break;
}
} else {
/* Format A */
rt = instr.i_a.i_frt;
ra = instr.i_a.i_fra;
rb = instr.i_a.i_frb;
rc = instr.i_a.i_frc;
/* If the instruction was single precision, round */
if (!(instr.i_any.i_opcd & 0x4)) {
fpu_implode(fe, fp, FTYPE_SNG | FTYPE_FPSCR,
&FR(rt));
fpu_explode(fe, fp = &fe->fe_f1, FTYPE_SNG,
FR(rt));
} else
type |= FTYPE_FPSCR;
}
} else {
return (NOTFPU);
}
/*
* ALU operation is complete. Collapse the result and then check
* for exceptions. If we got any, and they are enabled, do not
* alter the destination register, just stop with an exception.
* Otherwise set new current exceptions and accrue.
*/
if (fp)
fpu_implode(fe, fp, type, &FR(rt));
cx = fe->fe_cx;
fsr = fe->fe_fpscr & ~(FPSCR_FEX|FPSCR_VX);
if (cx != 0) {
fsr |= cx;
DPRINTF(FPE_INSN, ("fpu_execute: cx %x, fsr %x\n", cx, fsr));
}
if (fsr & FPSR_INV)
fsr |= FPSCR_VX;
mask = (fsr & FPSR_EX) << (25 - 3);
if (fsr & mask)
fsr |= FPSCR_FEX;
if ((fsr ^ fe->fe_fpscr) & FPSR_EX_MSK)
fsr |= FPSCR_FX;