diff options
author | Miod Vallat <miod@cvs.openbsd.org> | 2007-12-29 17:41:35 +0000 |
---|---|---|
committer | Miod Vallat <miod@cvs.openbsd.org> | 2007-12-29 17:41:35 +0000 |
commit | 5d627e49583fbe40c704231f5c838be297f04bda (patch) | |
tree | 8e87d2682f541b76dbedaea3d1336f305b5869e8 /sys/arch | |
parent | cca61561941a9c9b2cf068fc2e03370efb4a47b1 (diff) |
Had I known we had a kernel version of John Hauser's SoftFloat code, I would
have jumped on it instead of basing the FPU completion work on the sparc
FPU code.
This is now repaired with this commit, and m88110_fp.c changes directory
again, for the last time.
Diffstat (limited to 'sys/arch')
-rw-r--r-- | sys/arch/m88k/conf/files.m88k | 13 | ||||
-rw-r--r-- | sys/arch/m88k/fpu/fpu_add.c | 210 | ||||
-rw-r--r-- | sys/arch/m88k/fpu/fpu_arith.h | 92 | ||||
-rw-r--r-- | sys/arch/m88k/fpu/fpu_compare.c | 243 | ||||
-rw-r--r-- | sys/arch/m88k/fpu/fpu_div.c | 265 | ||||
-rw-r--r-- | sys/arch/m88k/fpu/fpu_emu.h | 231 | ||||
-rw-r--r-- | sys/arch/m88k/fpu/fpu_explode.c | 255 | ||||
-rw-r--r-- | sys/arch/m88k/fpu/fpu_implode.c | 462 | ||||
-rw-r--r-- | sys/arch/m88k/fpu/fpu_mul.c | 223 | ||||
-rw-r--r-- | sys/arch/m88k/fpu/fpu_sqrt.c | 392 | ||||
-rw-r--r-- | sys/arch/m88k/fpu/fpu_subr.c | 219 | ||||
-rw-r--r-- | sys/arch/m88k/fpu/m88110_fp.c | 421 | ||||
-rw-r--r-- | sys/arch/m88k/include/ieeefp.h | 24 | ||||
-rw-r--r-- | sys/arch/m88k/m88k/m88110_fp.c | 745 |
14 files changed, 771 insertions, 3024 deletions
diff --git a/sys/arch/m88k/conf/files.m88k b/sys/arch/m88k/conf/files.m88k index fa06f56685c..c37b6715772 100644 --- a/sys/arch/m88k/conf/files.m88k +++ b/sys/arch/m88k/conf/files.m88k @@ -1,4 +1,4 @@ -# $OpenBSD: files.m88k,v 1.19 2007/12/25 00:29:46 miod Exp $ +# $OpenBSD: files.m88k,v 1.20 2007/12/29 17:41:29 miod Exp $ file arch/m88k/m88k/db_disasm.c ddb file arch/m88k/m88k/db_interface.c ddb @@ -23,15 +23,8 @@ file arch/m88k/m88k/vectors_88110.S m88110 file arch/m88k/m88k/vm_machdep.c # floating-point support code for 88110 -file arch/m88k/fpu/fpu_add.c m88110 -file arch/m88k/fpu/fpu_compare.c m88110 -file arch/m88k/fpu/fpu_div.c m88110 -file arch/m88k/fpu/fpu_explode.c m88110 -file arch/m88k/fpu/fpu_implode.c m88110 -file arch/m88k/fpu/fpu_mul.c m88110 -file arch/m88k/fpu/fpu_sqrt.c m88110 -file arch/m88k/fpu/fpu_subr.c m88110 -file arch/m88k/fpu/m88110_fp.c m88110 +file arch/m88k/m88k/m88110_fp.c m88110 +file lib/libkern/softfloat.c m88110 # quad support is neccessary for 32 bit architectures file lib/libkern/adddi3.c diff --git a/sys/arch/m88k/fpu/fpu_add.c b/sys/arch/m88k/fpu/fpu_add.c deleted file mode 100644 index c27c5410ad3..00000000000 --- a/sys/arch/m88k/fpu/fpu_add.c +++ /dev/null @@ -1,210 +0,0 @@ -/* $OpenBSD: fpu_add.c,v 1.1 2007/12/25 00:29:49 miod Exp $ */ - -/* - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This software was developed by the Computer Systems Engineering group - * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and - * contributed to Berkeley. - * - * All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Lawrence Berkeley Laboratory. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)fpu_add.c 8.1 (Berkeley) 6/11/93 - */ - -/* - * Perform an FPU add (return x + y). - * - * To subtract, negate y and call add. - */ - -#include <sys/types.h> -#ifdef DIAGNOSTIC -#include <sys/systm.h> -#endif - -#include <machine/frame.h> -#include <machine/ieeefp.h> - -#include <m88k/fpu/fpu_arith.h> -#include <m88k/fpu/fpu_emu.h> - -struct fpn * -fpu_add(struct fpemu *fe) -{ - struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2, *r; - u_int r0, r1, r2, r3; - int rd; - - /* - * Put the `heavier' operand on the right (see fpu_emu.h). - * Then we will have one of the following cases, taken in the - * following order: - * - * - y = NaN. Implied: if only one is a signalling NaN, y is. - * The result is y. - * - y = Inf. Implied: x != NaN (is 0, number, or Inf: the NaN - * case was taken care of earlier). - * If x = -y, the result is NaN. Otherwise the result - * is y (an Inf of whichever sign). - * - y is 0. Implied: x = 0. - * If x and y differ in sign (one positive, one negative), - * the result is +0 except when rounding to -Inf. If same: - * +0 + +0 = +0; -0 + -0 = -0. - * - x is 0. Implied: y != 0. - * Result is y. - * - other. Implied: both x and y are numbers. - * Do addition a la Hennessey & Patterson. - */ - ORDER(x, y); - if (ISNAN(y)) - return (y); - if (ISINF(y)) { - if (ISINF(x) && x->fp_sign != y->fp_sign) - return (fpu_newnan(fe, 0)); - return (y); - } - rd = ((fe->fe_fpcr >> FPCR_RD_SHIFT) & FPCR_RD_MASK); - if (ISZERO(y)) { - if (rd != FP_RM) /* only -0 + -0 gives -0 */ - y->fp_sign &= x->fp_sign; - else /* any -0 operand gives -0 */ - y->fp_sign |= x->fp_sign; - return (y); - } - if (ISZERO(x)) - return (y); - /* - * We really have two numbers to add, although their signs may - * differ. Make the exponents match, by shifting the smaller - * number right (e.g., 1.011 => 0.1011) and increasing its - * exponent (2^3 => 2^4). Note that we do not alter the exponents - * of x and y here. - */ - r = &fe->fe_f3; - r->fp_class = FPC_NUM; - if (x->fp_exp == y->fp_exp) { - r->fp_exp = x->fp_exp; - r->fp_sticky = 0; - } else { - if (x->fp_exp < y->fp_exp) { - /* - * Try to avoid subtract case iii (see below). - * This also guarantees that x->fp_sticky = 0. - */ - SWAP(x, y); - } - /* now x->fp_exp > y->fp_exp */ - r->fp_exp = x->fp_exp; - r->fp_sticky = fpu_shr(y, x->fp_exp - y->fp_exp); - } - r->fp_sign = x->fp_sign; - if (x->fp_sign == y->fp_sign) { - FPU_DECL_CARRY - - /* - * The signs match, so we simply add the numbers. The result - * may be `supernormal' (as big as 1.111...1 + 1.111...1, or - * 11.111...0). If so, a single bit shift-right will fix it - * (but remember to adjust the exponent). - */ - /* r->fp_mant = x->fp_mant + y->fp_mant */ - FPU_ADDS(r->fp_mant[3], x->fp_mant[3], y->fp_mant[3]); - FPU_ADDCS(r->fp_mant[2], x->fp_mant[2], y->fp_mant[2]); - FPU_ADDCS(r->fp_mant[1], x->fp_mant[1], y->fp_mant[1]); - FPU_ADDC(r0, x->fp_mant[0], y->fp_mant[0]); - if ((r->fp_mant[0] = r0) >= FP_2) { - (void) fpu_shr(r, 1); - r->fp_exp++; - } - } else { - FPU_DECL_CARRY - - /* - * The signs differ, so things are rather more difficult. - * H&P would have us negate the negative operand and add; - * this is the same as subtracting the negative operand. - * This is quite a headache. Instead, we will subtract - * y from x, regardless of whether y itself is the negative - * operand. When this is done one of three conditions will - * hold, depending on the magnitudes of x and y: - * case i) |x| > |y|. The result is just x - y, - * with x's sign, but it may need to be normalized. - * case ii) |x| = |y|. The result is 0 (maybe -0) - * so must be fixed up. - * case iii) |x| < |y|. We goofed; the result should - * be (y - x), with the same sign as y. - * We could compare |x| and |y| here and avoid case iii, - * but that would take just as much work as the subtract. - * We can tell case iii has occurred by an overflow. - * - * N.B.: since x->fp_exp >= y->fp_exp, x->fp_sticky = 0. - */ - /* r->fp_mant = x->fp_mant - y->fp_mant */ - FPU_SET_CARRY(y->fp_sticky); - FPU_SUBCS(r3, x->fp_mant[3], y->fp_mant[3]); - FPU_SUBCS(r2, x->fp_mant[2], y->fp_mant[2]); - FPU_SUBCS(r1, x->fp_mant[1], y->fp_mant[1]); - FPU_SUBC(r0, x->fp_mant[0], y->fp_mant[0]); - if (r0 < FP_2) { - /* cases i and ii */ - if ((r0 | r1 | r2 | r3) == 0) { - /* case ii */ - r->fp_class = FPC_ZERO; - r->fp_sign = rd == FP_RM; - return (r); - } - } else { - /* - * Oops, case iii. This can only occur when the - * exponents were equal, in which case neither - * x nor y have sticky bits set. Flip the sign - * (to y's sign) and negate the result to get y - x. - */ -#ifdef DIAGNOSTIC - if (x->fp_exp != y->fp_exp || r->fp_sticky) - panic("fpu_add"); -#endif - r->fp_sign = y->fp_sign; - FPU_SUBS(r3, 0, r3); - FPU_SUBCS(r2, 0, r2); - FPU_SUBCS(r1, 0, r1); - FPU_SUBC(r0, 0, r0); - } - r->fp_mant[3] = r3; - r->fp_mant[2] = r2; - r->fp_mant[1] = r1; - r->fp_mant[0] = r0; - if (r0 < FP_1) - fpu_norm(r); - } - return (r); -} diff --git a/sys/arch/m88k/fpu/fpu_arith.h b/sys/arch/m88k/fpu/fpu_arith.h deleted file mode 100644 index b1504030403..00000000000 --- a/sys/arch/m88k/fpu/fpu_arith.h +++ /dev/null @@ -1,92 +0,0 @@ -/* $OpenBSD: fpu_arith.h,v 1.2 2007/12/26 18:27:04 miod Exp $ */ - -/* - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This software was developed by the Computer Systems Engineering group - * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and - * contributed to Berkeley. - * - * All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Lawrence Berkeley Laboratory. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)fpu_arith.h 8.1 (Berkeley) 6/11/93 - */ - -/* - * Extended-precision arithmetic. - * - * We hold the notion of a `carry register', which may or may not be a - * machine carry bit or register. On the 88110, it is just the machine's - * carry bit. - * - * In the worst case, you can compute the carry from x+y as - * (unsigned)(x + y) < (unsigned)x - * and from x+y+c as - * ((unsigned)(x + y + c) <= (unsigned)x && (y|c) != 0) - * for example. - */ - -/* set up for extended-precision arithemtic */ -#define FPU_DECL_CARRY - -/* - * We have three kinds of add: - * add with carry: r = x + y + c - * add (ignoring current carry) and set carry: c'r = x + y + 0 - * add with carry and set carry: c'r = x + y + c - * The macros use `C' for `use carry' and `S' for `set carry'. - * Note that the state of the carry is undefined after ADDC and SUBC, - * so if all you have for these is `add with carry and set carry', - * that is OK. - * - * The same goes for subtract, except that we compute x - y - c. - * - * Finally, we have a way to get the carry into a `regular' variable, - * or set it from a value. SET_CARRY turns 0 into no-carry, nonzero - * into carry; GET_CARRY sets its argument to 0 or 1. - */ -#define FPU_ADDC(r, x, y) \ - asm volatile("addu.ci %0,%1,%2" : "=r"(r) : "r"(x), "r"(y)) -#define FPU_ADDS(r, x, y) \ - asm volatile("addu.co %0,%1,%2" : "=r"(r) : "r"(x), "r"(y)) -#define FPU_ADDCS(r, x, y) \ - asm volatile("addu.cio %0,%1,%2" : "=r"(r) : "r"(x), "r"(y)) -#define FPU_SUBC(r, x, y) \ - asm volatile("subu.ci %0,%1,%2" : "=r"(r) : "r"(x), "r"(y)) -#define FPU_SUBS(r, x, y) \ - asm volatile("subu.co %0,%1,%2" : "=r"(r) : "r"(x), "r"(y)) -#define FPU_SUBCS(r, x, y) \ - asm volatile("subu.cio %0,%1,%2" : "=r"(r) : "r"(x), "r"(y)) - -#define FPU_GET_CARRY(r) asm volatile("addu.ci %0,r0,r0" : "=r"(r)) -#define FPU_SET_CARRY(v) asm volatile("subu.co r0,r0,%0" :: "r"(!v)) - -#define FPU_SHL1_BY_ADD /* shift left 1 faster by ADDC than (a<<1)|(b>>31) */ diff --git a/sys/arch/m88k/fpu/fpu_compare.c b/sys/arch/m88k/fpu/fpu_compare.c deleted file mode 100644 index 3d539dfe48e..00000000000 --- a/sys/arch/m88k/fpu/fpu_compare.c +++ /dev/null @@ -1,243 +0,0 @@ -/* $OpenBSD: fpu_compare.c,v 1.3 2007/12/26 18:27:43 miod Exp $ */ - -/* - * Copyright (c) 2007 Miodrag Vallat. - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice, this permission notice, and the disclaimer below - * appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ -/* - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This software was developed by the Computer Systems Engineering group - * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and - * contributed to Berkeley. - * - * All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Lawrence Berkeley Laboratory. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)fpu_compare.c 8.1 (Berkeley) 6/11/93 - */ - -/* - * fcmp and fcmpu instructions. - * - * These rely on the fact that our internal wide format is achieved by - * adding zero bits to the end of narrower mantissas. - */ - -#include <sys/types.h> - -#include <machine/fpu.h> -#include <machine/frame.h> - -#include <m88k/fpu/fpu_arith.h> -#include <m88k/fpu/fpu_emu.h> - -/* - * Perform a compare instruction. - * - * If either operand is NaN, the result is unordered. This causes an - * reserved operand exception (except for nonsignalling NaNs for fcmpu). - * - * Everything else is ordered: - * |Inf| > |numbers| > |0|. - * We already arranged for fp_class(Inf) > fp_class(numbers) > fp_class(0), - * so we get this directly. Note, however, that two zeros compare equal - * regardless of sign, while everything else depends on sign. - * - * Incidentally, two Infs of the same sign compare equal. Since the 88110 - * does infinity arithmetic on hardware, this codepath should never be - * entered. - */ -u_int32_t -fpu_compare(struct fpemu *fe, int fcmpu) -{ - struct fpn *a, *b; - u_int32_t cc; - int r3, r2, r1, r0; - FPU_DECL_CARRY - - a = &fe->fe_f1; - b = &fe->fe_f2; - - /* fcmpu shall only raise an exception for signalling NaNs */ - if (ISNAN(a)) { - if (!fcmpu || (a->fp_mant[0] & FP_QUIETBIT) != 0) - fe->fe_fpsr |= FPSR_EFINV; - cc = CC_UN; - goto done; - } - if (ISNAN(b)) { - if (!fcmpu || (b->fp_mant[0] & FP_QUIETBIT) != 0) - fe->fe_fpsr |= FPSR_EFINV; - cc = CC_UN; - goto done; - } - - /* - * Must handle both-zero early to avoid sign goofs. Otherwise, - * at most one is 0, and if the signs differ we are done. - */ - if (ISZERO(a) && ISZERO(b)) { - cc = CC_EQ; - goto done; - } - if (a->fp_sign) { /* a < 0 (or -0) */ - if (!b->fp_sign) { /* b >= 0 (or if a = -0, b > 0) */ - cc = CC_LT; - goto done; - } - } else { /* a > 0 (or +0) */ - if (b->fp_sign) { /* b <= -0 (or if a = +0, b < 0) */ - cc = CC_GT; - goto done; - } - } - - /* - * Now the signs are the same (but may both be negative). All - * we have left are these cases: - * - * |a| < |b| [classes or values differ] - * |a| > |b| [classes or values differ] - * |a| == |b| [classes and values identical] - * - * We define `diff' here to expand these as: - * - * |a| < |b|, a,b >= 0: a < b => CC_LT - * |a| < |b|, a,b < 0: a > b => CC_GT - * |a| > |b|, a,b >= 0: a > b => CC_GT - * |a| > |b|, a,b < 0: a < b => CC_LT - */ -#define opposite_cc(cc) ((cc) == CC_LT ? CC_GT : CC_LT) -#define diff(magnitude) (a->fp_sign ? opposite_cc(magnitude) : (magnitude)) - if (a->fp_class < b->fp_class) { /* |a| < |b| */ - cc = diff(CC_LT); - goto done; - } - if (a->fp_class > b->fp_class) { /* |a| > |b| */ - cc = diff(CC_GT); - goto done; - } - /* now none can be 0: only Inf and numbers remain */ - if (ISINF(a)) { /* |Inf| = |Inf| */ - cc = CC_EQ; - goto done; - } - /* - * Only numbers remain. To compare two numbers in magnitude, we - * simply subtract their mantissas. - */ - FPU_SUBS(r3, a->fp_mant[0], b->fp_mant[0]); - FPU_SUBCS(r2, a->fp_mant[1], b->fp_mant[1]); - FPU_SUBCS(r1, a->fp_mant[2], b->fp_mant[2]); - FPU_SUBC(r0, a->fp_mant[3], b->fp_mant[3]); - if (r0 < 0) /* underflow: |a| < |b| */ - cc = diff(CC_LT); - else if ((r0 | r1 | r2 | r3) != 0) /* |a| > |b| */ - cc = diff(CC_GT); - else - cc = CC_EQ; /* |a| == |b| */ -done: - - /* - * Complete condition code mask. - */ - - if (cc & CC_UN) - cc |= CC_UE | CC_UG | CC_ULE | CC_UL | CC_UGE; - if (cc & CC_EQ) - cc |= CC_LE | CC_GE | CC_UE; - if (cc & CC_GT) - cc |= CC_GE; - if (cc & CC_LT) - cc |= CC_LE; - if (cc & (CC_LT | CC_GT)) - cc |= CC_LG; - if (cc & (CC_LT | CC_GT | CC_EQ)) - cc |= CC_LEG; - if (cc & CC_GT) - cc |= CC_UG; - if (cc & CC_LE) - cc |= CC_ULE; - if (cc & CC_LT) - cc |= CC_UL; - if (cc & CC_GE) - cc |= CC_UGE; - - /* - * Fill the interval bits. - * s1 (here `a') is compared to the interval [0, s2 (here `b')]. - */ - if (!(cc & CC_UN)) { - /* s1 and s2 are either Zero, numbers or Inf */ - if (ISZERO(a) || (cc & CC_EQ)) { - /* if s1 and s2 are equal, s1 is on boundary */ - cc |= CC_IB | CC_OB; - } else if (b->fp_sign == 0) { - /* s2 is positive, the interval is [0, s2] */ - if (cc & CC_GT) { - /* 0 <= s2 < s1 -> out of interval */ - cc |= CC_OU | CC_OB; - } else if (a->fp_sign == 0) { - /* 0 < s1 < s2 -> in interval */ - cc |= CC_IB | CC_IN; - } else { - /* s1 < 0 <= s2 */ - cc |= CC_OU | CC_OB; - } - } else { - /* s2 is negative, the interval is [s2, 0] */ - if (cc & CC_LT) { - /* s1 < s2 <= 0 */ - cc |= CC_OU | CC_OB; - } else if (a->fp_sign != 0) { - /* s2 < s1 < 0 */ - cc |= CC_IB | CC_IN; - } else { - /* s2 < 0 < s1 */ - cc |= CC_OU | CC_OB; - } - } - } - - return (cc); -} diff --git a/sys/arch/m88k/fpu/fpu_div.c b/sys/arch/m88k/fpu/fpu_div.c deleted file mode 100644 index 1e214b44b80..00000000000 --- a/sys/arch/m88k/fpu/fpu_div.c +++ /dev/null @@ -1,265 +0,0 @@ -/* $OpenBSD: fpu_div.c,v 1.2 2007/12/25 15:45:48 miod Exp $ */ - -/* - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This software was developed by the Computer Systems Engineering group - * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and - * contributed to Berkeley. - * - * All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Lawrence Berkeley Laboratory. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)fpu_div.c 8.1 (Berkeley) 6/11/93 - */ - -/* - * Perform an FPU divide (return x / y). - */ - -#include <sys/types.h> - -#include <machine/fpu.h> -#include <machine/frame.h> - -#include <m88k/fpu/fpu_arith.h> -#include <m88k/fpu/fpu_emu.h> - -/* - * Division of normal numbers is done as follows: - * - * x and y are floating point numbers, i.e., in the form 1.bbbb * 2^e. - * If X and Y are the mantissas (1.bbbb's), the quotient is then: - * - * q = (X / Y) * 2^((x exponent) - (y exponent)) - * - * Since X and Y are both in [1.0,2.0), the quotient's mantissa (X / Y) - * will be in [0.5,2.0). Moreover, it will be less than 1.0 if and only - * if X < Y. In that case, it will have to be shifted left one bit to - * become a normal number, and the exponent decremented. Thus, the - * desired exponent is: - * - * left_shift = x->fp_mant < y->fp_mant; - * result_exp = x->fp_exp - y->fp_exp - left_shift; - * - * The quotient mantissa X/Y can then be computed one bit at a time - * using the following algorithm: - * - * Q = 0; -- Initial quotient. - * R = X; -- Initial remainder, - * if (left_shift) -- but fixed up in advance. - * R *= 2; - * for (bit = FP_NMANT; --bit >= 0; R *= 2) { - * if (R >= Y) { - * Q |= 1 << bit; - * R -= Y; - * } - * } - * - * The subtraction R -= Y always removes the uppermost bit from R (and - * can sometimes remove additional lower-order 1 bits); this proof is - * left to the reader. - * - * This loop correctly calculates the guard and round bits since they are - * included in the expanded internal representation. The sticky bit - * is to be set if and only if any other bits beyond guard and round - * would be set. From the above it is obvious that this is true if and - * only if the remainder R is nonzero when the loop terminates. - * - * Examining the loop above, we can see that the quotient Q is built - * one bit at a time ``from the top down''. This means that we can - * dispense with the multi-word arithmetic and just build it one word - * at a time, writing each result word when it is done. - * - * Furthermore, since X and Y are both in [1.0,2.0), we know that, - * initially, R >= Y. (Recall that, if X < Y, R is set to X * 2 and - * is therefore at in [2.0,4.0).) Thus Q is sure to have bit FP_NMANT-1 - * set, and R can be set initially to either X - Y (when X >= Y) or - * 2X - Y (when X < Y). In addition, comparing R and Y is difficult, - * so we will simply calculate R - Y and see if that underflows. - * This leads to the following revised version of the algorithm: - * - * R = X; - * bit = FP_1; - * D = R - Y; - * if (D >= 0) { - * result_exp = x->fp_exp - y->fp_exp; - * R = D; - * q = bit; - * bit >>= 1; - * } else { - * result_exp = x->fp_exp - y->fp_exp - 1; - * q = 0; - * } - * R <<= 1; - * do { - * D = R - Y; - * if (D >= 0) { - * q |= bit; - * R = D; - * } - * R <<= 1; - * } while ((bit >>= 1) != 0); - * Q[0] = q; - * for (i = 1; i < 4; i++) { - * q = 0, bit = 1 << 31; - * do { - * D = R - Y; - * if (D >= 0) { - * q |= bit; - * R = D; - * } - * R <<= 1; - * } while ((bit >>= 1) != 0); - * Q[i] = q; - * } - * - * This can be refined just a bit further by moving the `R <<= 1' - * calculations to the front of the do-loops and eliding the first one. - * The process can be terminated immediately whenever R becomes 0, but - * this is relatively rare, and we do not bother. - */ - -struct fpn * -fpu_div(struct fpemu *fe) -{ - struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2; - u_int q, bit; - u_int r0, r1, r2, r3, d0, d1, d2, d3, y0, y1, y2, y3; - FPU_DECL_CARRY - - /* - * Since divide is not commutative, we cannot just use ORDER. - * Check either operand for NaN first; if there is at least one, - * order the signalling one (if only one) onto the right, then - * return it. Otherwise we have the following cases: - * - * Inf / Inf = NaN, plus NV exception - * Inf / num = Inf [i.e., return x] - * Inf / 0 = Inf [i.e., return x] - * 0 / Inf = 0 [i.e., return x] - * 0 / num = 0 [i.e., return x] - * 0 / 0 = NaN, plus NV exception - * num / Inf = 0 - * num / num = num (do the divide) - * num / 0 = Inf, plus DZ exception - */ - if (ISNAN(x) || ISNAN(y)) { - ORDER(x, y); - return (y); - } - if (ISINF(x) || ISZERO(x)) { - if (x->fp_class == y->fp_class) - return (fpu_newnan(fe, !ISINF(x))); - if (ISZERO(x)) - x->fp_sign = y->fp_sign; - return (x); - } - - /* all results at this point use XOR of operand signs */ - x->fp_sign ^= y->fp_sign; - if (ISINF(y)) { - x->fp_class = FPC_ZERO; - return (x); - } - if (ISZERO(y)) { - fe->fe_fpsr |= FPSR_EFDVZ; - x->fp_class = FPC_INF; - return (x); - } - - /* - * Macros for the divide. See comments at top for algorithm. - * Note that we expand R, D, and Y here. - */ - -#define SUBTRACT /* D = R - Y */ \ - FPU_SUBS(d3, r3, y3); FPU_SUBCS(d2, r2, y2); \ - FPU_SUBCS(d1, r1, y1); FPU_SUBC(d0, r0, y0) - -#define NONNEGATIVE /* D >= 0 */ \ - ((int)d0 >= 0) - -#ifdef FPU_SHL1_BY_ADD -#define SHL1 /* R <<= 1 */ \ - FPU_ADDS(r3, r3, r3); FPU_ADDCS(r2, r2, r2); \ - FPU_ADDCS(r1, r1, r1); FPU_ADDC(r0, r0, r0) -#else -#define SHL1 \ - r0 = (r0 << 1) | (r1 >> 31), r1 = (r1 << 1) | (r2 >> 31), \ - r2 = (r2 << 1) | (r3 >> 31), r3 <<= 1 -#endif - -#define LOOP /* do ... while (bit >>= 1) */ \ - do { \ - SHL1; \ - SUBTRACT; \ - if (NONNEGATIVE) { \ - q |= bit; \ - r0 = d0, r1 = d1, r2 = d2, r3 = d3; \ - } \ - } while ((bit >>= 1) != 0) - -#define WORD(r, i) /* calculate r->fp_mant[i] */ \ - q = 0; \ - bit = 1 << 31; \ - LOOP; \ - (x)->fp_mant[i] = q - - /* Setup. Note that we put our result in x. */ - r0 = x->fp_mant[0]; - r1 = x->fp_mant[1]; - r2 = x->fp_mant[2]; - r3 = x->fp_mant[3]; - y0 = y->fp_mant[0]; - y1 = y->fp_mant[1]; - y2 = y->fp_mant[2]; - y3 = y->fp_mant[3]; - - bit = FP_1; - SUBTRACT; - if (NONNEGATIVE) { - x->fp_exp -= y->fp_exp; - r0 = d0, r1 = d1, r2 = d2, r3 = d3; - q = bit; - bit >>= 1; - } else { - x->fp_exp -= y->fp_exp + 1; - q = 0; - } - LOOP; - x->fp_mant[0] = q; - WORD(x, 1); - WORD(x, 2); - WORD(x, 3); - x->fp_sticky = r0 | r1 | r2 | r3; - - return (x); -} diff --git a/sys/arch/m88k/fpu/fpu_emu.h b/sys/arch/m88k/fpu/fpu_emu.h deleted file mode 100644 index c53f35f2145..00000000000 --- a/sys/arch/m88k/fpu/fpu_emu.h +++ /dev/null @@ -1,231 +0,0 @@ -/* $OpenBSD: fpu_emu.h,v 1.1 2007/12/25 00:29:49 miod Exp $ */ - -/* - * Copyright (c) 2007 Miodrag Vallat. - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice, this permission notice, and the disclaimer below - * appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ -/* - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This software was developed by the Computer Systems Engineering group - * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and - * contributed to Berkeley. - * - * All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Lawrence Berkeley Laboratory. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)fpu_emu.h 8.1 (Berkeley) 6/11/93 - */ - -/* - * Floating point emulator (initially tailored for SPARC, but structurally - * machine-independent). - * - * Floating point numbers are carried around internally in an `expanded' - * or `unpacked' form consisting of: - * - sign - * - unbiased exponent - * - mantissa (`1.' + 112-bit fraction + guard + round) - * - sticky bit - * Any implied `1' bit is inserted, giving a 113-bit mantissa that is - * always nonzero. Additional low-order `guard' and `round' bits are - * scrunched in, making the entire mantissa 115 bits long. This is divided - * into four 32-bit words, with `spare' bits left over in the upper part - * of the top word (the high bits of fp_mant[0]). An internal `exploded' - * number is thus kept within the half-open interval [1.0,2.0) (but see - * the `number classes' below). This holds even for denormalized numbers: - * when we explode an external denorm, we normalize it, introducing low-order - * zero bits, so that the rest of the code always sees normalized values. - * - * Note that a number of our algorithms use the `spare' bits at the top. - * The most demanding algorithm---the one for sqrt---depends on two such - * bits, so that it can represent values up to (but not including) 8.0, - * and then it needs a carry on top of that, so that we need three `spares'. - * - * The sticky-word is 32 bits so that we can use `OR' operators to goosh - * whole words from the mantissa into it. - * - * All operations are done in this internal extended precision. According - * to Hennesey & Patterson, Appendix A, rounding can be repeated---that is, - * it is OK to do a+b in extended precision and then round the result to - * single precision---provided single, double, and extended precisions are - * `far enough apart' (they always are), but we will try to avoid any such - * extra work where possible. - */ -struct fpn { - int fp_class; /* see below */ - int fp_sign; /* 0 => positive, 1 => negative */ - int fp_exp; /* exponent (unbiased) */ - int fp_sticky; /* nonzero bits lost at right end */ - u_int fp_mant[4]; /* 115-bit mantissa */ -}; - -#define FP_NMANT 115 /* total bits in mantissa (incl g,r) */ -#define FP_NG 2 /* number of low-order guard bits */ -#define FP_LG ((FP_NMANT - 1) & 31) /* log2(1.0) for fp_mant[0] */ -#define FP_QUIETBIT (1 << (FP_LG - 1)) /* Quiet bit in NaNs (0.5) */ -#define FP_1 (1 << FP_LG) /* 1.0 in fp_mant[0] */ -#define FP_2 (1 << (FP_LG + 1)) /* 2.0 in fp_mant[0] */ - -/* - * Number classes. Since zero, Inf, and NaN cannot be represented using - * the above layout, we distinguish these from other numbers via a class. - * In addition, to make computation easier and to follow Appendix N of - * the SPARC Version 8 standard, we give each kind of NaN a separate class. - */ -#define FPC_SNAN -2 /* signalling NaN (sign irrelevant) */ -#define FPC_QNAN -1 /* quiet NaN (sign irrelevant) */ -#define FPC_ZERO 0 /* zero (sign matters) */ -#define FPC_NUM 1 /* number (sign matters) */ -#define FPC_INF 2 /* infinity (sign matters) */ - -#define ISNAN(fp) ((fp)->fp_class < 0) -#define ISZERO(fp) ((fp)->fp_class == 0) -#define ISINF(fp) ((fp)->fp_class == FPC_INF) - -/* - * ORDER(x,y) `sorts' a pair of `fpn *'s so that the right operand (y) points - * to the `more significant' operand for our purposes. Appendix N says that - * the result of a computation involving two numbers are: - * - * If both are SNaN: operand 2, converted to Quiet - * If only one is SNaN: the SNaN operand, converted to Quiet - * If both are QNaN: operand 2 - * If only one is QNaN: the QNaN operand - * - * In addition, in operations with an Inf operand, the result is usually - * Inf. The class numbers are carefully arranged so that if - * (unsigned)class(op1) > (unsigned)class(op2) - * then op1 is the one we want; otherwise op2 is the one we want. - */ -#define ORDER(x, y) \ -do { \ - if ((u_int)(x)->fp_class > (u_int)(y)->fp_class) \ - SWAP(x, y); \ -} while (0) -#define SWAP(x, y) \ -do { \ - struct fpn *swap; \ - swap = (x), (x) = (y), (y) = swap; \ -} while (0) - -/* - * Values for individual bits in fcmp results. - */ -#define CC_UN 0x00000001 /* unordered */ -#define CC_LEG 0x00000002 /* less than, equal or greater than */ -#define CC_EQ 0x00000004 /* equal */ -#define CC_NE 0x00000008 /* not equal */ -#define CC_GT 0x00000010 /* greater than */ -#define CC_LE 0x00000020 /* less than or equal */ -#define CC_LT 0x00000040 /* less than */ -#define CC_GE 0x00000080 /* greater than or equal */ -#define CC_OU 0x00000100 /* out of range */ -#define CC_IB 0x00000200 /* in range or on boundary */ -#define CC_IN 0x00000400 /* in range */ -#define CC_OB 0x00000800 /* out of range or on boundary */ -#define CC_UE 0x00001000 /* unordered or equal */ -#define CC_LG 0x00002000 /* less than or greater than */ -#define CC_UG 0x00004000 /* unordered or greater than */ -#define CC_ULE 0x00008000 /* unordered or less than or equal */ -#define CC_UL 0x00010000 /* unordered or less than */ -#define CC_UGE 0x00020000 /* unordered or greater than or equal */ - -/* - * Rounding mode position in FPCR - */ -#define FPCR_RD_SHIFT 14 -#define FPCR_RD_MASK 0x03 - -/* - * Data width (matching the TD field of the instructions) - */ -#define FTYPE_SNG 0 -#define FTYPE_DBL 1 -#define FTYPE_EXT 2 -#define FTYPE_INT 3 /* not a real T value */ - -/* - * Emulator state. - */ -struct fpemu { - struct trapframe *fe_fpstate; /* registers, etc */ -#define fe_fpecr fe_fpstate->tf_fpecr -#define fe_fpsr fe_fpstate->tf_fpsr -#define fe_fpcr fe_fpstate->tf_fpcr - struct fpn fe_f1; /* operand 1 */ - struct fpn fe_f2; /* operand 2, if required */ - struct fpn fe_f3; /* available storage for result */ -}; - -/* - * Arithmetic functions. - * Each of these may modify its inputs (f1,f2) and/or the temporary. - * Each returns a pointer to the result and/or sets exceptions. - */ -struct fpn *fpu_add(struct fpemu *); -#define fpu_sub(fe) ((fe)->fe_f2.fp_sign ^= 1, fpu_add(fe)) -struct fpn *fpu_mul(struct fpemu *); -struct fpn *fpu_div(struct fpemu *); -struct fpn *fpu_sqrt(struct fpemu *); - -/* - * Other functions. - */ - -/* Perform a compare instruction (with or without unordered exception). */ -u_int32_t fpu_compare(struct fpemu *, int); - -/* Build a new Quiet NaN (sign=0, frac=all 1's). */ -struct fpn *fpu_newnan(struct fpemu *, int); - -void fpu_norm(register struct fpn *); - -/* - * Shift a number right some number of bits, taking care of round/sticky. - * Note that the result is probably not a well-formed number (it will lack - * the normal 1-bit mant[0]&FP_1). - */ -int fpu_shr(struct fpn *, int); - -void fpu_explode(struct fpemu *, struct fpn *, int, u_int); -void fpu_implode(struct fpemu *, struct fpn *, int, u_int *); diff --git a/sys/arch/m88k/fpu/fpu_explode.c b/sys/arch/m88k/fpu/fpu_explode.c deleted file mode 100644 index 76e4e8f7e18..00000000000 --- a/sys/arch/m88k/fpu/fpu_explode.c +++ /dev/null @@ -1,255 +0,0 @@ -/* $OpenBSD: fpu_explode.c,v 1.1 2007/12/25 00:29:49 miod Exp $ */ - -/* - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This software was developed by the Computer Systems Engineering group - * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and - * contributed to Berkeley. - * - * All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Lawrence Berkeley Laboratory. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)fpu_explode.c 8.1 (Berkeley) 6/11/93 - */ - -/* - * FPU subroutines: `explode' the machine's `packed binary' format numbers - * into our internal format. - */ - -#include <sys/types.h> -#include <sys/systm.h> - -#include <machine/fpu.h> -#include <machine/frame.h> -#include <machine/ieee.h> - -#include <m88k/fpu/fpu_arith.h> -#include <m88k/fpu/fpu_emu.h> - -int fpu_itof(struct fpn *, u_int); -int fpu_stof(struct fpn *, u_int); -int fpu_dtof(struct fpn *, u_int, u_int); -int fpu_xtof(struct fpn *, u_int, u_int , u_int , u_int); - -/* - * N.B.: in all of the following, we assume the FP format is - * - * --------------------------- - * | s | exponent | fraction | - * --------------------------- - * - * (which represents -1**s * 1.fraction * 2**exponent), so that the - * sign bit is way at the top (bit 31), the exponent is next, and - * then the remaining bits mark the fraction. A zero exponent means - * zero or denormalized (0.fraction rather than 1.fraction), and the - * maximum possible exponent, 2bias+1, signals inf (fraction==0) or NaN. - * - * Since the sign bit is always the topmost bit---this holds even for - * integers---we set that outside all the *tof functions. Each function - * returns the class code for the new number (but note that we use - * FPC_QNAN for all NaNs; fpu_explode will fix this if appropriate). - */ - -/* - * int -> fpn. - */ -int -fpu_itof(struct fpn *fp, u_int i) -{ - - if (i == 0) - return (FPC_ZERO); - /* - * The value FP_1 represents 2^FP_LG, so set the exponent - * there and let normalization fix it up. Convert negative - * numbers to sign-and-magnitude. Note that this relies on - * fpu_norm()'s handling of `supernormals'; see fpu_subr.c. - */ - fp->fp_exp = FP_LG; - fp->fp_mant[0] = (int)i < 0 ? -i : i; - fp->fp_mant[1] = 0; - fp->fp_mant[2] = 0; - fp->fp_mant[3] = 0; - fpu_norm(fp); - return (FPC_NUM); -} - -#define mask(nbits) ((1 << (nbits)) - 1) - -/* - * All external floating formats convert to internal in the same manner, - * as defined here. Note that only normals get an implied 1.0 inserted. - */ -#define FP_TOF(exp, expbias, allfrac, f0, f1, f2, f3) \ - if (exp == 0) { \ - if (allfrac == 0) \ - return (FPC_ZERO); \ - fp->fp_exp = 1 - expbias; \ - fp->fp_mant[0] = f0; \ - fp->fp_mant[1] = f1; \ - fp->fp_mant[2] = f2; \ - fp->fp_mant[3] = f3; \ - fpu_norm(fp); \ - return (FPC_NUM); \ - } \ - if (exp == (2 * expbias + 1)) { \ - if (allfrac == 0) \ - return (FPC_INF); \ - fp->fp_mant[0] = f0; \ - fp->fp_mant[1] = f1; \ - fp->fp_mant[2] = f2; \ - fp->fp_mant[3] = f3; \ - return (FPC_QNAN); \ - } \ - fp->fp_exp = exp - expbias; \ - fp->fp_mant[0] = FP_1 | f0; \ - fp->fp_mant[1] = f1; \ - fp->fp_mant[2] = f2; \ - fp->fp_mant[3] = f3; \ - return (FPC_NUM) - -/* - * 32-bit single precision -> fpn. - * We assume a single occupies at most (64-FP_LG) bits in the internal - * format: i.e., needs at most fp_mant[0] and fp_mant[1]. - */ -int -fpu_stof(struct fpn *fp, u_int i) -{ - int exp; - u_int frac, f0, f1; -#define SNG_SHIFT (SNG_FRACBITS - FP_LG) - - exp = (i >> (32 - 1 - SNG_EXPBITS)) & mask(SNG_EXPBITS); - frac = i & mask(SNG_FRACBITS); - f0 = frac >> SNG_SHIFT; - f1 = frac << (32 - SNG_SHIFT); - FP_TOF(exp, SNG_EXP_BIAS, frac, f0, f1, 0, 0); -} - -/* - * 64-bit double -> fpn. - * We assume this uses at most (96-FP_LG) bits. - */ -int -fpu_dtof(struct fpn *fp, u_int i, u_int j) -{ - int exp; - u_int frac, f0, f1, f2; -#define DBL_SHIFT (DBL_FRACBITS - 32 - FP_LG) - - exp = (i >> (32 - 1 - DBL_EXPBITS)) & mask(DBL_EXPBITS); - frac = i & mask(DBL_FRACBITS - 32); - f0 = frac >> DBL_SHIFT; - f1 = (frac << (32 - DBL_SHIFT)) | (j >> DBL_SHIFT); - f2 = j << (32 - DBL_SHIFT); - frac |= j; - FP_TOF(exp, DBL_EXP_BIAS, frac, f0, f1, f2, 0); -} - -#ifdef notyet -/* - * 128-bit extended -> fpn. - */ -int -fpu_xtof(struct fpn *fp, u_int i, u_int j, u_int k, u_int l) -{ - int exp; - u_int frac, f0, f1, f2, f3; -#define EXT_SHIFT (-(EXT_FRACBITS - 3 * 32 - FP_LG)) /* left shift! */ - - /* - * Note that ext and fpn `line up', hence no shifting needed. - */ - exp = (i >> (32 - 1 - EXT_EXPBITS)) & mask(EXT_EXPBITS); - frac = i & mask(EXT_FRACBITS - 3 * 32); - f0 = (frac << EXT_SHIFT) | (j >> (32 - EXT_SHIFT)); - f1 = (j << EXT_SHIFT) | (k >> (32 - EXT_SHIFT)); - f2 = (k << EXT_SHIFT) | (l >> (32 - EXT_SHIFT)); - f3 = l << EXT_SHIFT; - frac |= j | k | l; - FP_TOF(exp, EXT_EXP_BIAS, frac, f0, f1, f2, f3); -} -#endif - -/* - * Explode the contents of a register / regpair / regquad. - * If the input is a signalling NaN, an NV (invalid) exception - * will be set. (Note that nothing but NV can occur until ALU - * operations are performed.) - */ -void -fpu_explode(struct fpemu *fe, struct fpn *fp, int type, u_int reg) -{ - u_int s; - - s = reg == 0 ? 0 : fe->fe_fpstate->tf_r[reg]; - fp->fp_sign = s >> 31; - fp->fp_sticky = 0; - switch (type) { - - case FTYPE_INT: - s = fpu_itof(fp, s); - break; - - case FTYPE_SNG: - s = fpu_stof(fp, s); - break; - - case FTYPE_DBL: - s = fpu_dtof(fp, s, - reg == 31 ? 0 : fe->fe_fpstate->tf_r[reg + 1]); - break; - -#ifdef notyet - case FTYPE_EXT: - s = fpu_xtof(fp, s, fe->fe_fpstate->tf_r[reg + 1], - fe->fe_fpstate->tf_r[reg + 2], - fe->fe_fpstate->tf_r[reg + 3]); - break; -#endif - } - - if (s == FPC_QNAN && (fp->fp_mant[0] & FP_QUIETBIT) == 0) { - /* - * Input is a signalling NaN. All operations that return - * an input NaN operand put it through a ``NaN conversion'', - * which basically just means ``turn on the quiet bit''. - * We do this here so that all NaNs internally look quiet - * (we can tell signalling ones by their class). - */ - fp->fp_mant[0] |= FP_QUIETBIT; - fe->fe_fpsr |= FPSR_EFINV; /* assert invalid operand */ - s = FPC_SNAN; - } - fp->fp_class = s; -} diff --git a/sys/arch/m88k/fpu/fpu_implode.c b/sys/arch/m88k/fpu/fpu_implode.c deleted file mode 100644 index 91b67ee2093..00000000000 --- a/sys/arch/m88k/fpu/fpu_implode.c +++ /dev/null @@ -1,462 +0,0 @@ -/* $OpenBSD: fpu_implode.c,v 1.3 2007/12/26 18:29:33 miod Exp $ */ - -/* - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This software was developed by the Computer Systems Engineering group - * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and - * contributed to Berkeley. - * - * All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Lawrence Berkeley Laboratory. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)fpu_implode.c 8.1 (Berkeley) 6/11/93 - */ - -/* - * FPU subroutines: `implode' internal format numbers into the machine's - * `packed binary' format. - */ - -#include <sys/types.h> -#include <sys/systm.h> - -#include <machine/fpu.h> -#include <machine/frame.h> -#include <machine/ieee.h> -#include <machine/ieeefp.h> - -#include <m88k/fpu/fpu_arith.h> -#include <m88k/fpu/fpu_emu.h> - -static int round(struct fpemu *, struct fpn *); -static int toinf(struct fpemu *, int); - -u_int fpu_ftoi(struct fpemu *, struct fpn *); -u_int fpu_ftos(struct fpemu *, struct fpn *); -u_int fpu_ftod(struct fpemu *, struct fpn *, u_int *); -u_int fpu_ftox(struct fpemu *, struct fpn *, u_int *); - -/* - * Round a number (algorithm from Motorola MC68882 manual, modified for - * our internal format). Set inexact exception if rounding is required. - * Return true iff we rounded up. - * - * After rounding, we discard the guard and round bits by shifting right - * 2 bits (a la fpu_shr(), but we do not bother with fp->fp_sticky). - * This saves effort later. - * - * Note that we may leave the value 2.0 in fp->fp_mant; it is the caller's - * responsibility to fix this if necessary. - */ -static int -round(struct fpemu *fe, struct fpn *fp) -{ - u_int m0, m1, m2, m3; - int gr, s; - - m0 = fp->fp_mant[0]; - m1 = fp->fp_mant[1]; - m2 = fp->fp_mant[2]; - m3 = fp->fp_mant[3]; - gr = m3 & 3; - s = fp->fp_sticky; - - /* mant >>= FP_NG */ - m3 = (m3 >> FP_NG) | (m2 << (32 - FP_NG)); - m2 = (m2 >> FP_NG) | (m1 << (32 - FP_NG)); - m1 = (m1 >> FP_NG) | (m0 << (32 - FP_NG)); - m0 >>= FP_NG; - - if ((gr | s) == 0) /* result is exact: no rounding needed */ - goto rounddown; - - fe->fe_fpsr |= FPSR_EFINX; /* inexact */ - - /* Go to rounddown to round down; break to round up. */ - switch ((fe->fe_fpcr >> FPCR_RD_SHIFT) & FPCR_RD_MASK) { - - case FP_RN: - default: - /* - * Round only if guard is set (gr & 2). If guard is set, - * but round & sticky both clear, then we want to round - * but have a tie, so round to even, i.e., add 1 iff odd. - */ - if ((gr & 2) == 0) - goto rounddown; - if ((gr & 1) || fp->fp_sticky || (m3 & 1)) - break; - goto rounddown; - - case FP_RZ: - /* Round towards zero, i.e., down. */ - goto rounddown; - - case FP_RM: - /* Round towards -Inf: up if negative, down if positive. */ - if (fp->fp_sign) - break; - goto rounddown; - - case FP_RP: - /* Round towards +Inf: up if positive, down otherwise. */ - if (!fp->fp_sign) - break; - goto rounddown; - } - - /* Bump low bit of mantissa, with carry. */ - if (++m3 == 0 && ++m2 == 0 && ++m1 == 0) - m0++; - - fp->fp_mant[0] = m0; - fp->fp_mant[1] = m1; - fp->fp_mant[2] = m2; - fp->fp_mant[3] = m3; - return (1); - -rounddown: - fp->fp_mant[0] = m0; - fp->fp_mant[1] = m1; - fp->fp_mant[2] = m2; - fp->fp_mant[3] = m3; - return (0); -} - -/* - * For overflow: return true if overflow is to go to +/-Inf, according - * to the sign of the overflowing result. If false, overflow is to go - * to the largest magnitude value instead. - */ -static int -toinf(struct fpemu *fe, int sign) -{ - int inf; - - /* look at rounding direction */ - switch ((fe->fe_fpcr >> FPCR_RD_SHIFT) & FPCR_RD_MASK) { - - default: - case FP_RN: /* the nearest value is always Inf */ - inf = 1; - break; - - case FP_RZ: /* toward 0 => never towards Inf */ - inf = 0; - break; - - case FP_RP: /* toward +Inf iff positive */ - inf = sign == 0; - break; - - case FP_RM: /* toward -Inf iff negative */ - inf = sign; - break; - } - return (inf); -} - -/* - * fpn -> int (int value returned as return value). - */ -u_int -fpu_ftoi(struct fpemu *fe, struct fpn *fp) -{ - u_int i; - int sign, exp, sticky, rd; - - sign = fp->fp_sign; - switch (fp->fp_class) { - - case FPC_ZERO: - return (0); - - case FPC_NUM: - /* - * If exp >= 2^32, overflow. Otherwise shift value right - * into last mantissa word (this will not exceed 0xffffffff), - * shifting any guard and round bits out into the sticky - * bit. - * Then round according to the rounding mode. - * If the result is > 0x80000000, or is positive and equals - * 0x80000000, overflow; otherwise the last fraction word - * is the result. - */ - if ((exp = fp->fp_exp) >= 32) - break; - /* NB: the following includes exp < 0 cases */ - if ((sticky = fpu_shr(fp, FP_NMANT - 1 - exp)) != 0) - fe->fe_fpsr |= FPSR_EFINX; - i = fp->fp_mant[3]; - rd = (fe->fe_fpcr >> FPCR_RD_SHIFT) & FPCR_RD_MASK; - if ((rd == FP_RN && sticky != 0) || - (rd == FP_RM && sign != 0) || (rd == FP_RP && sign == 0)) { - /* round up */ - i++; - } - if (i >= ((u_int)0x80000000 + sign)) - break; - return (sign ? -i : i); - - default: /* Inf, qNaN, sNaN */ - break; - } - - /* Inf or NaN: replace any inexact exception with invalid */ - fe->fe_fpsr = (fe->fe_fpsr & ~FPSR_EFINX) | FPSR_EFINV; - return (0x7fffffff + sign); -} - -/* - * fpn -> single (32 bit single returned as return value). - * We assume <= 29 bits in a single-precision fraction (1.f part). - */ -u_int -fpu_ftos(struct fpemu *fe, struct fpn *fp) -{ - u_int sign = fp->fp_sign << 31; - int exp; - -#define SNG_EXP(e) ((e) << SNG_FRACBITS) /* makes e an exponent */ -#define SNG_MASK (SNG_EXP(1) - 1) /* mask for fraction */ - - /* Take care of non-numbers first. */ - if (ISNAN(fp)) { - /* - * Preserve upper bits of NaN, per SPARC V8 appendix N. - * Note that fp->fp_mant[0] has the quiet bit set, - * even if it is classified as a signalling NaN. - */ - (void) fpu_shr(fp, FP_NMANT - 1 - SNG_FRACBITS); - exp = SNG_EXP_INFNAN; - goto done; - } - if (ISINF(fp)) - return (sign | SNG_EXP(SNG_EXP_INFNAN)); - if (ISZERO(fp)) - return (sign); - - /* - * Normals (including subnormals). Drop all the fraction bits - * (including the explicit ``implied'' 1 bit) down into the - * single-precision range. If the number is subnormal, move - * the ``implied'' 1 into the explicit range as well, and shift - * right to introduce leading zeroes. Rounding then acts - * differently for normals and subnormals: the largest subnormal - * may round to the smallest normal (1.0 x 2^minexp), or may - * remain subnormal. In the latter case, signal an underflow - * if the result was inexact. - * - * Rounding a normal, on the other hand, always produces another - * normal (although either way the result might be too big for - * single precision, and cause an overflow). If rounding a - * normal produces 2.0 in the fraction, we need not adjust that - * fraction at all, since both 1.0 and 2.0 are zero under the - * fraction mask. - * - * Note that the guard and round bits vanish from the number after - * rounding. - */ - if ((exp = fp->fp_exp + SNG_EXP_BIAS) <= 0) { /* subnormal */ - /* -NG for g,r; -SNG_FRACBITS-exp for fraction */ - (void) fpu_shr(fp, FP_NMANT - FP_NG - SNG_FRACBITS - exp); - if (round(fe, fp) && fp->fp_mant[3] == SNG_EXP(1)) - return (sign | SNG_EXP(1) | 0); - if (fe->fe_fpsr & FPSR_EFINX) - fe->fe_fpsr |= FPSR_EFUNF; - return (sign | SNG_EXP(0) | fp->fp_mant[3]); - } - /* -FP_NG for g,r; -1 for implied 1; -SNG_FRACBITS for fraction */ - (void) fpu_shr(fp, FP_NMANT - FP_NG - 1 - SNG_FRACBITS); -#ifdef DIAGNOSTIC - if ((fp->fp_mant[3] & SNG_EXP(1 << FP_NG)) == 0) - panic("fpu_ftos"); -#endif - if (round(fe, fp) && fp->fp_mant[3] == SNG_EXP(2)) - exp++; - if (exp >= SNG_EXP_INFNAN) { - /* overflow to inf or to max single */ - fe->fe_fpsr |= FPSR_EFOVF | FPSR_EFINX; - if (toinf(fe, sign)) - return (sign | SNG_EXP(SNG_EXP_INFNAN)); - return (sign | SNG_EXP(SNG_EXP_INFNAN - 1) | SNG_MASK); - } -done: - /* phew, made it */ - return (sign | SNG_EXP(exp) | (fp->fp_mant[3] & SNG_MASK)); -} - -/* - * fpn -> double (32 bit high-order result returned; 32-bit low order result - * left in res[1]). Assumes <= 61 bits in double precision fraction. - * - * This code mimics fpu_ftos; see it for comments. - */ -u_int -fpu_ftod(struct fpemu *fe, struct fpn *fp, u_int *res) -{ - u_int sign = fp->fp_sign << 31; - int exp; - -#define DBL_EXP(e) ((e) << (DBL_FRACBITS & 31)) -#define DBL_MASK (DBL_EXP(1) - 1) - - if (ISNAN(fp)) { - (void) fpu_shr(fp, FP_NMANT - 1 - DBL_FRACBITS); - exp = DBL_EXP_INFNAN; - goto done; - } - if (ISINF(fp)) { - sign |= DBL_EXP(DBL_EXP_INFNAN); - goto zero; - } - if (ISZERO(fp)) { -zero: res[1] = 0; - return (sign); - } - - if ((exp = fp->fp_exp + DBL_EXP_BIAS) <= 0) { - (void) fpu_shr(fp, FP_NMANT - FP_NG - DBL_FRACBITS - exp); - if (round(fe, fp) && fp->fp_mant[2] == DBL_EXP(1)) { - res[1] = 0; - return (sign | DBL_EXP(1) | 0); - } - if (fe->fe_fpsr & FPSR_EFINX) - fe->fe_fpsr |= FPSR_EFUNF; - exp = 0; - goto done; - } - (void) fpu_shr(fp, FP_NMANT - FP_NG - 1 - DBL_FRACBITS); - if (round(fe, fp) && fp->fp_mant[2] == DBL_EXP(2)) - exp++; - if (exp >= DBL_EXP_INFNAN) { - fe->fe_fpsr |= FPSR_EFOVF | FPSR_EFINX; - if (toinf(fe, sign)) { - res[1] = 0; - return (sign | DBL_EXP(DBL_EXP_INFNAN) | 0); - } - res[1] = ~0; - return (sign | DBL_EXP(DBL_EXP_INFNAN) | DBL_MASK); - } -done: - res[1] = fp->fp_mant[3]; - return (sign | DBL_EXP(exp) | (fp->fp_mant[2] & DBL_MASK)); -} - -#ifdef notyet -/* - * fpn -> extended (32 bit high-order result returned; low-order fraction - * words left in res[1]..res[3]). Like ftod, which is like ftos ... but - * our internal format *is* extended precision, plus 2 bits for guard/round, - * so we can avoid a small bit of work. - */ -u_int -fpu_ftox(struct fpemu *fe, struct fpn *fp, u_int *res) -{ - u_int sign = fp->fp_sign << 31; - int exp; - -#define EXT_EXP(e) ((e) << (EXT_FRACBITS & 31)) -#define EXT_MASK (EXT_EXP(1) - 1) - - if (ISNAN(fp)) { - (void) fpu_shr(fp, 2); /* since we are not rounding */ - exp = EXT_EXP_INFNAN; - goto done; - } - if (ISINF(fp)) { - sign |= EXT_EXP(EXT_EXP_INFNAN); - goto zero; - } - if (ISZERO(fp)) { -zero: res[1] = res[2] = res[3] = 0; - return (sign); - } - - if ((exp = fp->fp_exp + EXT_EXP_BIAS) <= 0) { - (void) fpu_shr(fp, FP_NMANT - FP_NG - EXT_FRACBITS - exp); - if (round(fe, fp) && fp->fp_mant[0] == EXT_EXP(1)) { - res[1] = res[2] = res[3] = 0; - return (sign | EXT_EXP(1) | 0); - } - if (fe->fe_fpsr & FPSR_EFINX) - fe->fe_fpsr |= FPSR_EFUNF; - exp = 0; - goto done; - } - /* Since internal == extended, no need to shift here. */ - if (round(fe, fp) && fp->fp_mant[0] == EXT_EXP(2)) - exp++; - if (exp >= EXT_EXP_INFNAN) { - fe->fe_fpsr |= FPSR_EFOVF | FPSR_EFINX; - if (toinf(fe, sign)) { - res[1] = res[2] = res[3] = 0; - return (sign | EXT_EXP(EXT_EXP_INFNAN) | 0); - } - res[1] = res[2] = res[3] = ~0; - return (sign | EXT_EXP(EXT_EXP_INFNAN) | EXT_MASK); - } -done: - res[1] = fp->fp_mant[1]; - res[2] = fp->fp_mant[2]; - res[3] = fp->fp_mant[3]; - return (sign | EXT_EXP(exp) | (fp->fp_mant[0] & EXT_MASK)); -} -#endif - -/* - * Implode an fpn, writing the result into the given space. - */ -void -fpu_implode(struct fpemu *fe, struct fpn *fp, int type, u_int *space) -{ - - switch (type) { - - case FTYPE_INT: - space[0] = fpu_ftoi(fe, fp); - break; - - case FTYPE_SNG: - space[0] = fpu_ftos(fe, fp); - break; - - case FTYPE_DBL: - space[0] = fpu_ftod(fe, fp, space); - break; - -#ifdef notyet - case FTYPE_EXT: - space[0] = fpu_ftox(fe, fp, space); - break; -#endif - } -} diff --git a/sys/arch/m88k/fpu/fpu_mul.c b/sys/arch/m88k/fpu/fpu_mul.c deleted file mode 100644 index 50e5d13b625..00000000000 --- a/sys/arch/m88k/fpu/fpu_mul.c +++ /dev/null @@ -1,223 +0,0 @@ -/* $OpenBSD: fpu_mul.c,v 1.2 2007/12/25 15:47:16 miod Exp $ */ - -/* - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This software was developed by the Computer Systems Engineering group - * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and - * contributed to Berkeley. - * - * All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Lawrence Berkeley Laboratory. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)fpu_mul.c 8.1 (Berkeley) 6/11/93 - */ - -/* - * Perform an FPU multiply (return x * y). - */ - -#include <sys/types.h> - -#include <machine/fpu.h> -#include <machine/frame.h> - -#include <m88k/fpu/fpu_arith.h> -#include <m88k/fpu/fpu_emu.h> - -/* - * The multiplication algorithm for normal numbers is as follows: - * - * The fraction of the product is built in the usual stepwise fashion. - * Each step consists of shifting the accumulator right one bit - * (maintaining any guard bits) and, if the next bit in y is set, - * adding the multiplicand (x) to the accumulator. Then, in any case, - * we advance one bit leftward in y. Algorithmically: - * - * A = 0; - * for (bit = 0; bit < FP_NMANT; bit++) { - * sticky |= A & 1, A >>= 1; - * if (Y & (1 << bit)) - * A += X; - * } - * - * (X and Y here represent the mantissas of x and y respectively.) - * The resultant accumulator (A) is the product's mantissa. It may - * be as large as 11.11111... in binary and hence may need to be - * shifted right, but at most one bit. - * - * Since we do not have efficient multiword arithmetic, we code the - * accumulator as four separate words, just like any other mantissa. - * We use local `register' variables in the hope that this is faster - * than memory. We keep x->fp_mant in locals for the same reason. - * - * In the algorithm above, the bits in y are inspected one at a time. - * We will pick them up 32 at a time and then deal with those 32, one - * at a time. Note, however, that we know several things about y: - * - * - the guard and round bits at the bottom are sure to be zero; - * - * - often many low bits are zero (y is often from a single or double - * precision source); - * - * - bit FP_NMANT-1 is set, and FP_1*2 fits in a word. - * - * We can also test for 32-zero-bits swiftly. In this case, the center - * part of the loop---setting sticky, shifting A, and not adding---will - * run 32 times without adding X to A. We can do a 32-bit shift faster - * by simply moving words. Since zeros are common, we optimize this case. - * Furthermore, since A is initially zero, we can omit the shift as well - * until we reach a nonzero word. - */ -struct fpn * -fpu_mul(struct fpemu *fe) -{ - struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2; - u_int a3, a2, a1, a0, x3, x2, x1, x0, bit, m; - int sticky; - FPU_DECL_CARRY - - /* - * Put the `heavier' operand on the right (see fpu_emu.h). - * Then we will have one of the following cases, taken in the - * following order: - * - * - y = NaN. Implied: if only one is a signalling NaN, y is. - * The result is y. - * - y = Inf. Implied: x != NaN (is 0, number, or Inf: the NaN - * case was taken care of earlier). - * If x = 0, the result is NaN. Otherwise the result - * is y, with its sign reversed if x is negative. - * - x = 0. Implied: y is 0 or number. - * The result is 0 (with XORed sign as usual). - * - other. Implied: both x and y are numbers. - * The result is x * y (XOR sign, multiply bits, add exponents). - */ - ORDER(x, y); - if (ISNAN(y)) { - y->fp_sign ^= x->fp_sign; - fe->fe_fpsr |= FPSR_EFINV; - return (y); - } - if (ISINF(y)) { - if (ISZERO(x)) - return (fpu_newnan(fe, 0)); - y->fp_sign ^= x->fp_sign; - return (y); - } - if (ISZERO(x)) { - x->fp_sign ^= y->fp_sign; - return (x); - } - - /* - * Setup. In the code below, the mask `m' will hold the current - * mantissa byte from y. The variable `bit' denotes the bit - * within m. We also define some macros to deal with everything. - */ - x3 = x->fp_mant[3]; - x2 = x->fp_mant[2]; - x1 = x->fp_mant[1]; - x0 = x->fp_mant[0]; - sticky = a3 = a2 = a1 = a0 = 0; - -#define ADD /* A += X */ \ - FPU_ADDS(a3, a3, x3); \ - FPU_ADDCS(a2, a2, x2); \ - FPU_ADDCS(a1, a1, x1); \ - FPU_ADDC(a0, a0, x0) - -#define SHR1 /* A >>= 1, with sticky */ \ - sticky |= a3 & 1, a3 = (a3 >> 1) | (a2 << 31), \ - a2 = (a2 >> 1) | (a1 << 31), a1 = (a1 >> 1) | (a0 << 31), a0 >>= 1 - -#define SHR32 /* A >>= 32, with sticky */ \ - sticky |= a3, a3 = a2, a2 = a1, a1 = a0, a0 = 0 - -#define STEP /* each 1-bit step of the multiplication */ \ - SHR1; if (bit & m) { ADD; }; bit <<= 1 - - /* - * We are ready to begin. The multiply loop runs once for each - * of the four 32-bit words. Some words, however, are special. - * As noted above, the low order bits of Y are often zero. Even - * if not, the first loop can certainly skip the guard bits. - * The last word of y has its highest 1-bit in position FP_NMANT-1, - * so we stop the loop when we move past that bit. - */ - if ((m = y->fp_mant[3]) == 0) { - /* SHR32; */ /* unneeded since A==0 */ - } else { - bit = 1 << FP_NG; - do { - STEP; - } while (bit != 0); - } - if ((m = y->fp_mant[2]) == 0) { - SHR32; - } else { - bit = 1; - do { - STEP; - } while (bit != 0); - } - if ((m = y->fp_mant[1]) == 0) { - SHR32; - } else { - bit = 1; - do { - STEP; - } while (bit != 0); - } - m = y->fp_mant[0]; /* definitely != 0 */ - bit = 1; - do { - STEP; - } while (bit <= m); - - /* - * Done with mantissa calculation. Get exponent and handle - * 11.111...1 case, then put result in place. We reuse x since - * it already has the right class (FP_NUM). - */ - m = x->fp_exp + y->fp_exp; - if (a0 >= FP_2) { - SHR1; - m++; - } - x->fp_sign ^= y->fp_sign; - x->fp_exp = m; - x->fp_sticky = sticky; - x->fp_mant[3] = a3; - x->fp_mant[2] = a2; - x->fp_mant[1] = a1; - x->fp_mant[0] = a0; - return (x); -} diff --git a/sys/arch/m88k/fpu/fpu_sqrt.c b/sys/arch/m88k/fpu/fpu_sqrt.c deleted file mode 100644 index 8ec6e6ba262..00000000000 --- a/sys/arch/m88k/fpu/fpu_sqrt.c +++ /dev/null @@ -1,392 +0,0 @@ -/* $OpenBSD: fpu_sqrt.c,v 1.1 2007/12/25 00:29:49 miod Exp $ */ - -/* - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This software was developed by the Computer Systems Engineering group - * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and - * contributed to Berkeley. - * - * All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Lawrence Berkeley Laboratory. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)fpu_sqrt.c 8.1 (Berkeley) 6/11/93 - */ - -/* - * Perform an FPU square root (return sqrt(x)). - */ - -#include <sys/types.h> - -#include <machine/frame.h> - -#include <m88k/fpu/fpu_arith.h> -#include <m88k/fpu/fpu_emu.h> - -/* - * Our task is to calculate the square root of a floating point number x0. - * This number x normally has the form: - * - * exp - * x = mant * 2 (where 1 <= mant < 2 and exp is an integer) - * - * This can be left as it stands, or the mantissa can be doubled and the - * exponent decremented: - * - * exp-1 - * x = (2 * mant) * 2 (where 2 <= 2 * mant < 4) - * - * If the exponent `exp' is even, the square root of the number is best - * handled using the first form, and is by definition equal to: - * - * exp/2 - * sqrt(x) = sqrt(mant) * 2 - * - * If exp is odd, on the other hand, it is convenient to use the second - * form, giving: - * - * (exp-1)/2 - * sqrt(x) = sqrt(2 * mant) * 2 - * - * In the first case, we have - * - * 1 <= mant < 2 - * - * and therefore - * - * sqrt(1) <= sqrt(mant) < sqrt(2) - * - * while in the second case we have - * - * 2 <= 2*mant < 4 - * - * and therefore - * - * sqrt(2) <= sqrt(2*mant) < sqrt(4) - * - * so that in any case, we are sure that - * - * sqrt(1) <= sqrt(n * mant) < sqrt(4), n = 1 or 2 - * - * or - * - * 1 <= sqrt(n * mant) < 2, n = 1 or 2. - * - * This root is therefore a properly formed mantissa for a floating - * point number. The exponent of sqrt(x) is either exp/2 or (exp-1)/2 - * as above. This leaves us with the problem of finding the square root - * of a fixed-point number in the range [1..4). - * - * Though it may not be instantly obvious, the following square root - * algorithm works for any integer x of an even number of bits, provided - * that no overflows occur: - * - * let q = 0 - * for k = NBITS-1 to 0 step -1 do -- for each digit in the answer... - * x *= 2 -- multiply by radix, for next digit - * if x >= 2q + 2^k then -- if adding 2^k does not - * x -= 2q + 2^k -- exceed the correct root, - * q += 2^k -- add 2^k and adjust x - * fi - * done - * sqrt = q / 2^(NBITS/2) -- (and any remainder is in x) - * - * If NBITS is odd (so that k is initially even), we can just add another - * zero bit at the top of x. Doing so means that q is not going to acquire - * a 1 bit in the first trip around the loop (since x0 < 2^NBITS). If the - * final value in x is not needed, or can be off by a factor of 2, this is - * equivalant to moving the `x *= 2' step to the bottom of the loop: - * - * for k = NBITS-1 to 0 step -1 do if ... fi; x *= 2; done - * - * and the result q will then be sqrt(x0) * 2^floor(NBITS / 2). - * (Since the algorithm is destructive on x, we will call x's initial - * value, for which q is some power of two times its square root, x0.) - * - * If we insert a loop invariant y = 2q, we can then rewrite this using - * C notation as: - * - * q = y = 0; x = x0; - * for (k = NBITS; --k >= 0;) { - * #if (NBITS is even) - * x *= 2; - * #endif - * t = y + (1 << k); - * if (x >= t) { - * x -= t; - * q += 1 << k; - * y += 1 << (k + 1); - * } - * #if (NBITS is odd) - * x *= 2; - * #endif - * } - * - * If x0 is fixed point, rather than an integer, we can simply alter the - * scale factor between q and sqrt(x0). As it happens, we can easily arrange - * for the scale factor to be 2**0 or 1, so that sqrt(x0) == q. - * - * In our case, however, x0 (and therefore x, y, q, and t) are multiword - * integers, which adds some complication. But note that q is built one - * bit at a time, from the top down, and is not used itself in the loop - * (we use 2q as held in y instead). This means we can build our answer - * in an integer, one word at a time, which saves a bit of work. Also, - * since 1 << k is always a `new' bit in q, 1 << k and 1 << (k+1) are - * `new' bits in y and we can set them with an `or' operation rather than - * a full-blown multiword add. - * - * We are almost done, except for one snag. We must prove that none of our - * intermediate calculations can overflow. We know that x0 is in [1..4) - * and therefore the square root in q will be in [1..2), but what about x, - * y, and t? - * - * We know that y = 2q at the beginning of each loop. (The relation only - * fails temporarily while y and q are being updated.) Since q < 2, y < 4. - * The sum in t can, in our case, be as much as y+(1<<1) = y+2 < 6, and. - * Furthermore, we can prove with a bit of work that x never exceeds y by - * more than 2, so that even after doubling, 0 <= x < 8. (This is left as - * an exercise to the reader, mostly because I have become tired of working - * on this comment.) - * - * If our floating point mantissas (which are of the form 1.frac) occupy - * B+1 bits, our largest intermediary needs at most B+3 bits, or two extra. - * In fact, we want even one more bit (for a carry, to avoid compares), or - * three extra. There is a comment in fpu_emu.h reminding maintainers of - * this, so we have some justification in assuming it. - */ -struct fpn * -fpu_sqrt(struct fpemu *fe) -{ - struct fpn *x = &fe->fe_f1; - u_int bit, q, tt; - u_int x0, x1, x2, x3; - u_int y0, y1, y2, y3; - u_int d0, d1, d2, d3; - int e; - - /* - * Take care of special cases first. In order: - * - * sqrt(NaN) = NaN - * sqrt(+0) = +0 - * sqrt(-0) = -0 - * sqrt(x < 0) = NaN (including sqrt(-Inf)) - * sqrt(+Inf) = +Inf - * - * Then all that remains are numbers with mantissas in [1..2). - */ - if (ISNAN(x) || ISZERO(x)) - return (x); - if (x->fp_sign) - return (fpu_newnan(fe, 1)); - if (ISINF(x)) - return (x); - - /* - * Calculate result exponent. As noted above, this may involve - * doubling the mantissa. We will also need to double x each - * time around the loop, so we define a macro for this here, and - * we break out the multiword mantissa. - */ -#ifdef FPU_SHL1_BY_ADD -#define DOUBLE_X { \ - FPU_ADDS(x3, x3, x3); FPU_ADDCS(x2, x2, x2); \ - FPU_ADDCS(x1, x1, x1); FPU_ADDC(x0, x0, x0); \ -} -#else -#define DOUBLE_X { \ - x0 = (x0 << 1) | (x1 >> 31); x1 = (x1 << 1) | (x2 >> 31); \ - x2 = (x2 << 1) | (x3 >> 31); x3 <<= 1; \ -} -#endif -#if (FP_NMANT & 1) != 0 -# define ODD_DOUBLE DOUBLE_X -# define EVEN_DOUBLE /* nothing */ -#else -# define ODD_DOUBLE /* nothing */ -# define EVEN_DOUBLE DOUBLE_X -#endif - x0 = x->fp_mant[0]; - x1 = x->fp_mant[1]; - x2 = x->fp_mant[2]; - x3 = x->fp_mant[3]; - e = x->fp_exp; - if (e & 1) /* exponent is odd; use sqrt(2mant) */ - DOUBLE_X; - /* THE FOLLOWING ASSUMES THAT RIGHT SHIFT DOES SIGN EXTENSION */ - x->fp_exp = e >> 1; /* calculates (e&1 ? (e-1)/2 : e/2 */ - - /* - * Now calculate the mantissa root. Since x is now in [1..4), - * we know that the first trip around the loop will definitely - * set the top bit in q, so we can do that manually and start - * the loop at the next bit down instead. We must be sure to - * double x correctly while doing the `known q=1.0'. - * - * We do this one mantissa-word at a time, as noted above, to - * save work. To avoid `(1 << 31) << 1', we also do the top bit - * outside of each per-word loop. - * - * The calculation `t = y + bit' breaks down into `t0 = y0, ..., - * t3 = y3, t? |= bit' for the appropriate word. Since the bit - * is always a `new' one, this means that three of the `t?'s are - * just the corresponding `y?'; we use `#define's here for this. - * The variable `tt' holds the actual `t?' variable. - */ - - /* calculate q0 */ -#define t0 tt - bit = FP_1; - EVEN_DOUBLE; - /* if (x >= (t0 = y0 | bit)) { */ /* always true */ - q = bit; - x0 -= bit; - y0 = bit << 1; - /* } */ - ODD_DOUBLE; - while ((bit >>= 1) != 0) { /* for remaining bits in q0 */ - EVEN_DOUBLE; - t0 = y0 | bit; /* t = y + bit */ - if (x0 >= t0) { /* if x >= t then */ - x0 -= t0; /* x -= t */ - q |= bit; /* q += bit */ - y0 |= bit << 1; /* y += bit << 1 */ - } - ODD_DOUBLE; - } - x->fp_mant[0] = q; -#undef t0 - - /* calculate q1. note (y0&1)==0. */ -#define t0 y0 -#define t1 tt - q = 0; - y1 = 0; - bit = 1 << 31; - EVEN_DOUBLE; - t1 = bit; - FPU_SUBS(d1, x1, t1); - FPU_SUBC(d0, x0, t0); /* d = x - t */ - if ((int)d0 >= 0) { /* if d >= 0 (i.e., x >= t) then */ - x0 = d0, x1 = d1; /* x -= t */ - q = bit; /* q += bit */ - y0 |= 1; /* y += bit << 1 */ - } - ODD_DOUBLE; - while ((bit >>= 1) != 0) { /* for remaining bits in q1 */ - EVEN_DOUBLE; /* as before */ - t1 = y1 | bit; - FPU_SUBS(d1, x1, t1); - FPU_SUBC(d0, x0, t0); - if ((int)d0 >= 0) { - x0 = d0, x1 = d1; - q |= bit; - y1 |= bit << 1; - } - ODD_DOUBLE; - } - x->fp_mant[1] = q; -#undef t1 - - /* calculate q2. note (y1&1)==0; y0 (aka t0) is fixed. */ -#define t1 y1 -#define t2 tt - q = 0; - y2 = 0; - bit = 1 << 31; - EVEN_DOUBLE; - t2 = bit; - FPU_SUBS(d2, x2, t2); - FPU_SUBCS(d1, x1, t1); - FPU_SUBC(d0, x0, t0); - if ((int)d0 >= 0) { - x0 = d0, x1 = d1, x2 = d2; - q |= bit; - y1 |= 1; /* now t1, y1 are set in concrete */ - } - ODD_DOUBLE; - while ((bit >>= 1) != 0) { - EVEN_DOUBLE; - t2 = y2 | bit; - FPU_SUBS(d2, x2, t2); - FPU_SUBCS(d1, x1, t1); - FPU_SUBC(d0, x0, t0); - if ((int)d0 >= 0) { - x0 = d0, x1 = d1, x2 = d2; - q |= bit; - y2 |= bit << 1; - } - ODD_DOUBLE; - } - x->fp_mant[2] = q; -#undef t2 - - /* calculate q3. y0, t0, y1, t1 all fixed; y2, t2, almost done. */ -#define t2 y2 -#define t3 tt - q = 0; - y3 = 0; - bit = 1 << 31; - EVEN_DOUBLE; - t3 = bit; - FPU_SUBS(d3, x3, t3); - FPU_SUBCS(d2, x2, t2); - FPU_SUBCS(d1, x1, t1); - FPU_SUBC(d0, x0, t0); - ODD_DOUBLE; - if ((int)d0 >= 0) { - x0 = d0, x1 = d1, x2 = d2; - q |= bit; - y2 |= 1; - } - while ((bit >>= 1) != 0) { - EVEN_DOUBLE; - t3 = y3 | bit; - FPU_SUBS(d3, x3, t3); - FPU_SUBCS(d2, x2, t2); - FPU_SUBCS(d1, x1, t1); - FPU_SUBC(d0, x0, t0); - if ((int)d0 >= 0) { - x0 = d0, x1 = d1, x2 = d2; - q |= bit; - y3 |= bit << 1; - } - ODD_DOUBLE; - } - x->fp_mant[3] = q; - - /* - * The result, which includes guard and round bits, is exact iff - * x is now zero; any nonzero bits in x represent sticky bits. - */ - x->fp_sticky = x0 | x1 | x2 | x3; - return (x); -} diff --git a/sys/arch/m88k/fpu/fpu_subr.c b/sys/arch/m88k/fpu/fpu_subr.c deleted file mode 100644 index 7661696452c..00000000000 --- a/sys/arch/m88k/fpu/fpu_subr.c +++ /dev/null @@ -1,219 +0,0 @@ -/* $OpenBSD: fpu_subr.c,v 1.1 2007/12/25 00:29:49 miod Exp $ */ - -/* - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This software was developed by the Computer Systems Engineering group - * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and - * contributed to Berkeley. - * - * All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Lawrence Berkeley Laboratory. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)fpu_subr.c 8.1 (Berkeley) 6/11/93 - */ - -/* - * FPU subroutines. - */ - -#include <sys/types.h> -#ifdef DIAGNOSTIC -#include <sys/systm.h> -#endif - -#include <machine/frame.h> -#include <machine/fpu.h> - -#include <m88k/fpu/fpu_arith.h> -#include <m88k/fpu/fpu_emu.h> - -/* - * Shift the given number right rsh bits. Any bits that `fall off' will get - * shoved into the sticky field; we return the resulting sticky. Note that - * shifting NaNs is legal (this will never shift all bits out); a NaN's - * sticky field is ignored anyway. - */ -int -fpu_shr(struct fpn *fp, int rsh) -{ - u_int m0, m1, m2, m3, s; - int lsh; - -#ifdef DIAGNOSTIC - if (rsh <= 0 || (fp->fp_class != FPC_NUM && !ISNAN(fp))) - panic("fpu_rightshift 1"); -#endif - - m0 = fp->fp_mant[0]; - m1 = fp->fp_mant[1]; - m2 = fp->fp_mant[2]; - m3 = fp->fp_mant[3]; - - /* If shifting all the bits out, take a shortcut. */ - if (rsh >= FP_NMANT) { -#ifdef DIAGNOSTIC - if ((m0 | m1 | m2 | m3) == 0) - panic("fpu_rightshift 2"); -#endif - fp->fp_mant[0] = 0; - fp->fp_mant[1] = 0; - fp->fp_mant[2] = 0; - fp->fp_mant[3] = 0; -#ifdef notdef - if ((m0 | m1 | m2 | m3) == 0) - fp->fp_class = FPC_ZERO; - else -#endif - fp->fp_sticky = 1; - return (1); - } - - /* Squish out full words. */ - s = fp->fp_sticky; - if (rsh >= 32 * 3) { - s |= m3 | m2 | m1; - m3 = m0, m2 = 0, m1 = 0, m0 = 0; - } else if (rsh >= 32 * 2) { - s |= m3 | m2; - m3 = m1, m2 = m0, m1 = 0, m0 = 0; - } else if (rsh >= 32) { - s |= m3; - m3 = m2, m2 = m1, m1 = m0, m0 = 0; - } - - /* Handle any remaining partial word. */ - if ((rsh &= 31) != 0) { - lsh = 32 - rsh; - s |= m3 << lsh; - m3 = (m3 >> rsh) | (m2 << lsh); - m2 = (m2 >> rsh) | (m1 << lsh); - m1 = (m1 >> rsh) | (m0 << lsh); - m0 >>= rsh; - } - fp->fp_mant[0] = m0; - fp->fp_mant[1] = m1; - fp->fp_mant[2] = m2; - fp->fp_mant[3] = m3; - fp->fp_sticky = s; - return (s); -} - -/* - * Force a number to be normal, i.e., make its fraction have all zero - * bits before FP_1, then FP_1, then all 1 bits. This is used for denorms - * and (sometimes) for intermediate results. - * - * Internally, this may use a `supernormal' -- a number whose fp_mant - * is greater than or equal to 2.0 -- so as a side effect you can hand it - * a supernormal and it will fix it (provided fp->fp_mant[3] == 0). - */ -void -fpu_norm(struct fpn *fp) -{ - u_int m0, m1, m2, m3, top, sup, nrm; - int lsh, rsh, exp; - - exp = fp->fp_exp; - m0 = fp->fp_mant[0]; - m1 = fp->fp_mant[1]; - m2 = fp->fp_mant[2]; - m3 = fp->fp_mant[3]; - - /* Handle severe subnormals with 32-bit moves. */ - if (m0 == 0) { - if (m1) - m0 = m1, m1 = m2, m2 = m3, m3 = 0, exp -= 32; - else if (m2) - m0 = m2, m1 = m3, m2 = 0, m3 = 0, exp -= 2 * 32; - else if (m3) - m0 = m3, m1 = 0, m2 = 0, m3 = 0, exp -= 3 * 32; - else { - fp->fp_class = FPC_ZERO; - return; - } - } - - /* Now fix any supernormal or remaining subnormal. */ - nrm = FP_1; - sup = nrm << 1; - if (m0 >= sup) { - /* - * We have a supernormal number. We need to shift it right. - * We may assume m3==0. - */ - for (rsh = 1, top = m0 >> 1; top >= sup; rsh++) /* XXX slow */ - top >>= 1; - exp += rsh; - lsh = 32 - rsh; - m3 = m2 << lsh; - m2 = (m2 >> rsh) | (m1 << lsh); - m1 = (m1 >> rsh) | (m0 << lsh); - m0 = top; - } else if (m0 < nrm) { - /* - * We have a regular denorm (a subnormal number), and need - * to shift it left. - */ - for (lsh = 1, top = m0 << 1; top < nrm; lsh++) /* XXX slow */ - top <<= 1; - exp -= lsh; - rsh = 32 - lsh; - m0 = top | (m1 >> rsh); - m1 = (m1 << lsh) | (m2 >> rsh); - m2 = (m2 << lsh) | (m3 >> rsh); - m3 <<= lsh; - } - - fp->fp_exp = exp; - fp->fp_mant[0] = m0; - fp->fp_mant[1] = m1; - fp->fp_mant[2] = m2; - fp->fp_mant[3] = m3; -} - -/* - * Concoct a `fresh' Quiet NaN. - * As a side effect, we raise an invalid operation exception if allowed to. - */ -struct fpn * -fpu_newnan(register struct fpemu *fe, int exception) -{ - register struct fpn *fp; - - if (exception) - fe->fe_fpsr |= FPSR_EFINV; - fp = &fe->fe_f3; - fp->fp_class = FPC_QNAN; - fp->fp_sign = 0; - fp->fp_mant[0] = FP_1 - 1; - fp->fp_mant[1] = fp->fp_mant[2] = fp->fp_mant[3] = ~0; - return (fp); -} diff --git a/sys/arch/m88k/fpu/m88110_fp.c b/sys/arch/m88k/fpu/m88110_fp.c deleted file mode 100644 index 9eb75c92a48..00000000000 --- a/sys/arch/m88k/fpu/m88110_fp.c +++ /dev/null @@ -1,421 +0,0 @@ -/* $OpenBSD: m88110_fp.c,v 1.4 2007/12/25 15:45:04 miod Exp $ */ - -/* - * Copyright (c) 2007, Miodrag Vallat. - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice, this permission notice, and the disclaimer below - * appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ -/* - * Copyright (c) 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * This software was developed by the Computer Systems Engineering group - * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and - * contributed to Berkeley. - * - * All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Lawrence Berkeley Laboratory. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)fpu.c 8.1 (Berkeley) 6/11/93 - */ - -#include <sys/types.h> -#include <sys/param.h> -#include <sys/proc.h> -#include <sys/signalvar.h> -#include <sys/user.h> -#include <sys/systm.h> - -#include <machine/fpu.h> -#include <machine/frame.h> -#include <machine/ieeefp.h> -#include <machine/trap.h> -#include <machine/m88110.h> - -#include <m88k/fpu/fpu_emu.h> - -int fpu_emulate(struct trapframe *, u_int32_t); - -/* - * All 88110 floating-point exceptions are handled there. - * - * We can unfortunately not trust the floating-point exception cause - * register, as the 88110 will conveniently only set the ``unimplemented - * instruction'' bit, more often than not. - * - * So we ignore it completely, and try to emulate the faulting instruction. - * The instruction can be: - * - * - an invalid SFU1 opcode, in which case we'll send SIGILL to the process. - * - * - a genuinely unimplement feature: fsqrt. - * - * - an opcode involving an odd-numbered register pair (as a double precision - * operand). Rather than issueing a correctly formed flavour in kernel mode, - * and having to handle a possible nested exception, we emulate it. This - * will of course be slower, but we have to draw the line somewhere. - * Gcc will however never produce such code, so we don't have to worry - * too much about this under OpenBSD. - * - * Note that, currently, opcodes involving the extended register file (XRF) - * are handled as invalid opcodes. This will eventually change once the - * toolchain can correctly assemble XRF instructions, and the XRF is saved - * accross context switches (or not... lazy switching for XRF makes more - * sense). - */ - -void -m88110_fpu_exception(struct trapframe *frame) -{ - struct proc *p = curproc; - int fault_type; - vaddr_t fault_addr; - union sigval sv; - u_int32_t insn; - int sig; - - fault_addr = frame->tf_exip & XIP_ADDR; - - /* - * Skip the instruction now. Signals will blame the correct - * address, and this has to be done before trapsignal() is - * invoked, or we won't run the first instruction of the signal - * handler... - */ - m88110_skip_insn(frame); - - /* - * The low-level exception code did not save the floating point - * exception registers. Do it now, and reset the exception - * cause register. - */ - __asm__ __volatile__ ("fldcr %0, fcr0" : "=r"(frame->tf_fpecr)); - __asm__ __volatile__ ("fldcr %0, fcr62" : "=r"(frame->tf_fpsr)); - __asm__ __volatile__ ("fldcr %0, fcr63" : "=r"(frame->tf_fpcr)); - __asm__ __volatile__ ("fstcr r0, fcr0"); - - /* - * Fetch the faulting instruction. This should not fail, if it - * does, it's probably not your lucky day. - */ - if (copyin((void *)fault_addr, &insn, sizeof insn) != 0) { - sig = SIGBUS; - fault_type = BUS_OBJERR; - goto deliver; - } - - switch (insn >> 26) { - case 0x20: - /* - * f{ld,st,x}cr instruction. If it caused a fault in - * user mode, this is a privilege violation. - */ - sig = SIGILL; - fault_type = ILL_PRVREG; - goto deliver; - case 0x21: - /* - * ``real'' FPU instruction. We'll try to emulate it. - */ - sig = fpu_emulate(frame, insn); - fault_type = SI_NOINFO; - /* - * Update the floating point status register regardless of - * whether we'll deliver a signal or not. - */ - __asm__ __volatile__ ("fstcr %0, fcr62" :: "r"(frame->tf_fpsr)); - break; - default: - /* - * Not a FPU instruction. Should not have raised this - * exception, so bail out. - */ - sig = SIGILL; - fault_type = ILL_ILLOPC; - goto deliver; - } - - if (frame->tf_epsr & PSR_SFD1) { /* don't bother */ - sig = SIGFPE; - fault_type = FPE_FLTINV; - goto deliver; - } - - if (sig != 0) { - if (sig == SIGILL) - fault_type = ILL_ILLOPC; - else { - if (frame->tf_fpecr & FPECR_FIOV) - fault_type = FPE_FLTSUB; - else if (frame->tf_fpecr & FPECR_FROP) - fault_type = FPE_FLTINV; - else if (frame->tf_fpecr & FPECR_FDVZ) - fault_type = FPE_INTDIV; - else if (frame->tf_fpecr & FPECR_FUNF) { - if (frame->tf_fpsr & FPSR_EFUNF) - fault_type = FPE_FLTUND; - else if (frame->tf_fpsr & FPSR_EFINX) - fault_type = FPE_FLTRES; - } else if (frame->tf_fpecr & FPECR_FOVF) { - if (frame->tf_fpsr & FPSR_EFOVF) - fault_type = FPE_FLTOVF; - else if (frame->tf_fpsr & FPSR_EFINX) - fault_type = FPE_FLTRES; - } else if (frame->tf_fpecr & FPECR_FINX) - fault_type = FPE_FLTRES; - } - -deliver: - sv.sival_ptr = (void *)fault_addr; - KERNEL_PROC_LOCK(p); - trapsignal(p, sig, 0, fault_type, sv); - KERNEL_PROC_UNLOCK(p); - } -} - -/* - * Emulate an FPU instruction. On return, the trapframe registers - * will be modified to reflect the settings the hardware would have left. - */ -int -fpu_emulate(struct trapframe *frame, u_int32_t insn) -{ - struct fpemu fe; - u_int rf, rd, rs1, rs2, t1, t2, td, opcode; - u_int32_t old_fpsr, old_fpcr; - u_int32_t scratch; - int rc; - - struct fpn *fp; -#ifdef notyet - u_int space[4]; -#else - u_int space[2]; -#endif - - fe.fe_fpstate = frame; - - /* - * Crack the instruction. - */ - rd = (insn >> 21) & 0x1f; - rs1 = (insn >> 16) & 0x1f; - rs2 = insn & 0x1f; - rf = (insn >> 15) & 0x01; - opcode = (insn >> 11) & 0x0f; - t1 = (insn >> 9) & 0x03; - t2 = (insn >> 7) & 0x03; - td = (insn >> 5) & 0x03; - - /* - * Discard invalid opcodes, as well as instructions involving XRF, - * since we do not support them yet. - */ - if (rf != 0) - return (SIGILL); - - switch (opcode) { - case 0x00: /* fmul */ - case 0x05: /* fadd */ - case 0x06: /* fsub */ - case 0x0e: /* fdiv */ - if ((t1 != FTYPE_SNG && t1 != FTYPE_DBL) || - (t2 != FTYPE_SNG && t2 != FTYPE_DBL) || - (td != FTYPE_SNG && td != FTYPE_DBL)) - return (SIGILL); - break; - case 0x04: /* flt */ - if (t1 != 0x00) /* flt on XRF */ - return (SIGILL); - if ((td != FTYPE_SNG && td != FTYPE_DBL) || - t2 != 0x00 || rs1 != 0) - return (SIGILL); - break; - case 0x07: /* fcmp, fcmpu */ - if ((t1 != FTYPE_SNG && t1 != FTYPE_DBL) || - (t2 != FTYPE_SNG && t2 != FTYPE_DBL)) - return (SIGILL); - if (td != 0x00 /* fcmp */ && td != 0x01 /* fcmpu */) - return (SIGILL); - break; - case 0x09: /* int */ - case 0x0a: /* nint */ - case 0x0b: /* trnc */ - if ((t2 != FTYPE_SNG && t2 != FTYPE_DBL) || - t1 != 0x00 || td != 0x00 || rs1 != 0) - return (SIGILL); - break; - case 0x01: /* fcvt */ - if (t2 == td) - return (SIGILL); - /* FALLTHROUGH */ - case 0x0f: /* fsqrt */ - if ((t2 != FTYPE_SNG && t2 != FTYPE_DBL) || - (td != FTYPE_SNG && td != FTYPE_DBL) || - t1 != 0x00 || rs1 != 0) - return (SIGILL); - break; - default: - case 0x08: /* mov */ - return (SIGILL); - } - - /* - * Temporarily reset the status register, so that we can tell - * which exceptions are new after processing the opcode. - */ - old_fpsr = frame->tf_fpsr; - frame->tf_fpsr = 0; - - /* - * Save fpcr as well, since we might need to change rounding mode - * temporarily. - */ - old_fpcr = frame->tf_fpcr; - - switch (opcode) { - case 0x00: /* fmul */ - fpu_explode(&fe, &fe.fe_f1, t1, rs1); - fpu_explode(&fe, &fe.fe_f2, t2, rs2); - fp = fpu_mul(&fe); - break; - - case 0x01: /* fcvt */ - fpu_explode(&fe, &fe.fe_f1, t2, rs2); - fp = &fe.fe_f1; - break; - - case 0x04: /* flt */ - fpu_explode(&fe, &fe.fe_f1, FTYPE_INT, rs2); - fp = &fe.fe_f1; - break; - - case 0x05: /* fadd */ - fpu_explode(&fe, &fe.fe_f1, t1, rs1); - fpu_explode(&fe, &fe.fe_f2, t2, rs2); - fp = fpu_add(&fe); - break; - - case 0x06: /* fsub */ - fpu_explode(&fe, &fe.fe_f1, t1, rs1); - fpu_explode(&fe, &fe.fe_f2, t2, rs2); - fp = fpu_sub(&fe); - break; - - case 0x07: /* fcmp, fcmpu */ - fpu_explode(&fe, &fe.fe_f1, t1, rs1); - fpu_explode(&fe, &fe.fe_f2, t2, rs2); - scratch = fpu_compare(&fe, td); - if (rd != 0) - frame->tf_r[rd] = scratch; - break; - - case 0x09: /* int */ -do_int: - fpu_explode(&fe, &fe.fe_f1, t2, rs2); - fp = &fe.fe_f1; - td = FTYPE_INT; - break; - case 0x0a: /* nint */ - /* round to nearest */ - frame->tf_fpcr = (old_fpcr & ~(FPCR_RD_MASK << FPCR_RD_SHIFT)) | - (FP_RN << FPCR_RD_SHIFT); - goto do_int; - - case 0x0b: /* trnc */ - /* round towards zero */ - frame->tf_fpcr = (old_fpcr & ~(FPCR_RD_MASK << FPCR_RD_SHIFT)) | - (FP_RZ << FPCR_RD_SHIFT); - goto do_int; - - case 0x0e: /* fdiv */ - fpu_explode(&fe, &fe.fe_f1, t1, rs1); - fpu_explode(&fe, &fe.fe_f2, t2, rs2); - fp = fpu_div(&fe); - break; - - case 0x0f: /* sqrt */ - fpu_explode(&fe, &fe.fe_f1, t2, rs2); - fp = fpu_sqrt(&fe); - break; - } - - /* - * Emulated operation is complete. Collapse the result into the - * destination register(s). - */ - if (opcode != 0x07) { - fpu_implode(&fe, fp, td, space); - - switch (td) { -#ifdef notyet - case FTYPE_EXT: - /* ... */ -#endif - case FTYPE_DBL: - if (rd != 31) - frame->tf_r[rd + 1] = space[1]; - /* FALLTHROUGH */ - case FTYPE_SNG: - case FTYPE_INT: - if (rd != 0) - frame->tf_r[rd] = space[0]; - break; - } - } - - /* - * Mark new exceptions, if any, in the fpsr, and decide whether - * to send a signal or not. - */ - - if (frame->tf_fpsr & old_fpcr) - rc = SIGFPE; - else - rc = 0; - frame->tf_fpsr |= old_fpsr; - - /* - * Restore fpcr as well. - */ - frame->tf_fpcr = old_fpcr; - - return (rc); -} diff --git a/sys/arch/m88k/include/ieeefp.h b/sys/arch/m88k/include/ieeefp.h index 4241324d009..39e6b13515e 100644 --- a/sys/arch/m88k/include/ieeefp.h +++ b/sys/arch/m88k/include/ieeefp.h @@ -1,4 +1,4 @@ -/* $OpenBSD: ieeefp.h,v 1.1 2004/04/26 12:34:05 miod Exp $ */ +/* $OpenBSD: ieeefp.h,v 1.2 2007/12/29 17:41:33 miod Exp $ */ /* * Copyright (c) 1996 Nivas Madhur * All rights reserved. @@ -53,4 +53,26 @@ typedef enum { FP_RP=3 /* round toward positive infinity */ } fp_rnd; +#ifdef _KERNEL + +/* + * Defines for the 88110 floating-point completion code. + */ + +#include <sys/param.h> +#include <sys/proc.h> + +#define float_raise(bits) curproc->p_md.md_tf->tf_fpsr |= bits +#define float_set_inexact() float_raise(FP_X_IMP) +#define float_set_invalid() float_raise(FP_X_INV) + +/* rounding mode bits position in FPCR */ +#define FPCR_RD_SHIFT 14 +#define FPCR_RD_MASK 0x03 + +#define float_get_round(fpcr) (((fpcr) >> FPCR_RD_SHIFT) & FPCR_RD_MASK) +#define fpgetround() float_get_round(curproc->p_md.md_tf->tf_fpcr) + +#endif + #endif /* _M88K_IEEEFP_H_ */ diff --git a/sys/arch/m88k/m88k/m88110_fp.c b/sys/arch/m88k/m88k/m88110_fp.c new file mode 100644 index 00000000000..ae9c87815c6 --- /dev/null +++ b/sys/arch/m88k/m88k/m88110_fp.c @@ -0,0 +1,745 @@ +/* $OpenBSD: m88110_fp.c,v 1.1 2007/12/29 17:41:34 miod Exp $ */ + +/* + * Copyright (c) 2007, Miodrag Vallat. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice, this permission notice, and the disclaimer below + * appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/proc.h> +#include <sys/signalvar.h> +#include <sys/user.h> +#include <sys/systm.h> + +#include <machine/fpu.h> +#include <machine/frame.h> +#include <machine/ieeefp.h> +#include <machine/trap.h> +#include <machine/m88110.h> + +#include <lib/libkern/softfloat.h> + +/* + * Values for individual bits in fcmp results. + */ +#define CC_UN 0x00000001 /* unordered */ +#define CC_LEG 0x00000002 /* less than, equal or greater than */ +#define CC_EQ 0x00000004 /* equal */ +#define CC_NE 0x00000008 /* not equal */ +#define CC_GT 0x00000010 /* greater than */ +#define CC_LE 0x00000020 /* less than or equal */ +#define CC_LT 0x00000040 /* less than */ +#define CC_GE 0x00000080 /* greater than or equal */ +#define CC_OU 0x00000100 /* out of range */ +#define CC_IB 0x00000200 /* in range or on boundary */ +#define CC_IN 0x00000400 /* in range */ +#define CC_OB 0x00000800 /* out of range or on boundary */ +#define CC_UE 0x00001000 /* unordered or equal */ +#define CC_LG 0x00002000 /* less than or greater than */ +#define CC_UG 0x00004000 /* unordered or greater than */ +#define CC_ULE 0x00008000 /* unordered or less than or equal */ +#define CC_UL 0x00010000 /* unordered or less than */ +#define CC_UGE 0x00020000 /* unordered or greater than or equal */ + +/* + * Data width (matching the TD field of the instructions) + */ +#define FTYPE_SNG 0 +#define FTYPE_DBL 1 +#define FTYPE_EXT 2 +#define FTYPE_INT 3 /* not a real T value */ + +#define IGNORE_PRECISION FTYPE_SNG + +/* floating point value */ +typedef union { + float32 sng; + float64 dbl; +} fparg; + +void fpu_compare(struct trapframe *, fparg *, fparg *, u_int, u_int, u_int); +int fpu_emulate(struct trapframe *, u_int32_t); +void fpu_fetch(struct trapframe *, u_int, u_int, u_int, fparg *); +u_int fpu_precision(u_int, u_int, u_int); +void fpu_store(struct trapframe *, u_int, u_int, u_int, fparg *); + +/* + * Inlines from softfloat-specialize.h which are not made public, needed + * for fpu_compare. + */ +#define float32_is_nan(a) \ + (0xff000000 < (a << 1)) +#define float32_is_signaling_nan(a) \ + ((((a >> 22) & 0x1ff) == 0x1fe) && (a & 0x003fffff)) + +/* + * All 88110 floating-point exceptions are handled there. + * + * We can unfortunately not trust the floating-point exception cause + * register, as the 88110 will conveniently only set the ``unimplemented + * instruction'' bit, more often than not. + * + * So we ignore it completely, and try to emulate the faulting instruction. + * The instruction can be: + * + * - an invalid SFU1 opcode, in which case we'll send SIGILL to the process. + * + * - a genuinely unimplemented feature: fsqrt. + * + * - an opcode involving an odd-numbered register pair (as a double precision + * operand). Rather than issueing a correctly formed flavour in kernel mode, + * and having to handle a possible nested exception, we emulate it. This + * will of course be slower, but we have to draw the line somewhere. + * Gcc will however never produce such code, so we don't have to worry + * too much about this under OpenBSD. + * + * Note that, currently, opcodes involving the extended register file (XRF) + * are handled as invalid opcodes. This will eventually change once the + * toolchain can correctly assemble XRF instructions, and the XRF is saved + * accross context switches (or not... lazy switching for XRF makes more + * sense). + */ + +void +m88110_fpu_exception(struct trapframe *frame) +{ + struct proc *p = curproc; + int fault_type; + vaddr_t fault_addr; + union sigval sv; + u_int32_t insn; + int sig; + + fault_addr = frame->tf_exip & XIP_ADDR; + + /* + * Skip the instruction now. Signals will blame the correct + * address, and this has to be done before trapsignal() is + * invoked, or we won't run the first instruction of the signal + * handler... + */ + m88110_skip_insn(frame); + + /* + * The low-level exception code did not save the floating point + * exception registers. Do it now, and reset the exception + * cause register. + */ + __asm__ __volatile__ ("fldcr %0, fcr0" : "=r"(frame->tf_fpecr)); + __asm__ __volatile__ ("fldcr %0, fcr62" : "=r"(frame->tf_fpsr)); + __asm__ __volatile__ ("fldcr %0, fcr63" : "=r"(frame->tf_fpcr)); + __asm__ __volatile__ ("fstcr r0, fcr0"); + + /* + * Fetch the faulting instruction. This should not fail, if it + * does, it's probably not your lucky day. + */ + if (copyin((void *)fault_addr, &insn, sizeof insn) != 0) { + sig = SIGBUS; + fault_type = BUS_OBJERR; + goto deliver; + } + + switch (insn >> 26) { + case 0x20: + /* + * f{ld,st,x}cr instruction. If it caused a fault in + * user mode, this is a privilege violation. + */ + sig = SIGILL; + fault_type = ILL_PRVREG; + goto deliver; + case 0x21: + /* + * ``real'' FPU instruction. We'll try to emulate it. + */ + sig = fpu_emulate(frame, insn); + fault_type = SI_NOINFO; + /* + * Update the floating point status register regardless of + * whether we'll deliver a signal or not. + */ + __asm__ __volatile__ ("fstcr %0, fcr62" :: "r"(frame->tf_fpsr)); + break; + default: + /* + * Not a FPU instruction. Should not have raised this + * exception, so bail out. + */ + sig = SIGILL; + fault_type = ILL_ILLOPC; + goto deliver; + } + + if (frame->tf_epsr & PSR_SFD1) { /* don't bother */ + sig = SIGFPE; + fault_type = FPE_FLTINV; + goto deliver; + } + + if (sig != 0) { + if (sig == SIGILL) + fault_type = ILL_ILLOPC; + else { + if (frame->tf_fpecr & FPECR_FIOV) + fault_type = FPE_FLTSUB; + else if (frame->tf_fpecr & FPECR_FROP) + fault_type = FPE_FLTINV; + else if (frame->tf_fpecr & FPECR_FDVZ) + fault_type = FPE_INTDIV; + else if (frame->tf_fpecr & FPECR_FUNF) { + if (frame->tf_fpsr & FPSR_EFUNF) + fault_type = FPE_FLTUND; + else if (frame->tf_fpsr & FPSR_EFINX) + fault_type = FPE_FLTRES; + } else if (frame->tf_fpecr & FPECR_FOVF) { + if (frame->tf_fpsr & FPSR_EFOVF) + fault_type = FPE_FLTOVF; + else if (frame->tf_fpsr & FPSR_EFINX) + fault_type = FPE_FLTRES; + } else if (frame->tf_fpecr & FPECR_FINX) + fault_type = FPE_FLTRES; + } + +deliver: + sv.sival_ptr = (void *)fault_addr; + KERNEL_PROC_LOCK(p); + trapsignal(p, sig, 0, fault_type, sv); + KERNEL_PROC_UNLOCK(p); + } +} + +/* + * Load a floating-point argument into a fparg union, then convert it to + * the required format if it is of larger precision. + * + * This assumes the final format (width) is not FTYPE_INT, and the original + * format (orig_width) <= width. + */ +void +fpu_fetch(struct trapframe *frame, u_int regno, u_int orig_width, u_int width, + fparg *dest) +{ + u_int32_t tmp; + + switch (orig_width) { + case FTYPE_INT: + tmp = regno == 0 ? 0 : frame->tf_r[regno]; + switch (width) { + case FTYPE_SNG: + dest->sng = int32_to_float32(tmp); + break; + case FTYPE_DBL: + dest->dbl = int32_to_float64(tmp); + break; + } + break; + case FTYPE_SNG: + tmp = regno == 0 ? 0 : frame->tf_r[regno]; + switch (width) { + case FTYPE_SNG: + dest->sng = tmp; + break; + case FTYPE_DBL: + dest->dbl = float32_to_float64(tmp); + break; + } + break; + case FTYPE_DBL: + tmp = regno == 0 ? 0 : frame->tf_r[regno]; + dest->dbl = ((float64)tmp) << 32; + tmp = regno == 31 ? 0 : frame->tf_r[regno + 1]; + dest->dbl |= (float64)tmp; + break; + } +} + +/* + * Store a floating-point result, converting it to the required format if it + * is of smaller precision. + * + * This assumes the original format (orig_width) is not FTYPE_INT, and the + * final format (width) <= orig_width. + */ +void +fpu_store(struct trapframe *frame, u_int regno, u_int orig_width, u_int width, + fparg *src) +{ + u_int32_t tmp; + u_int rd; + + switch (width) { + case FTYPE_INT: + rd = float_get_round(frame->tf_fpcr); + switch (orig_width) { + case FTYPE_SNG: + if (rd == FP_RZ) + tmp = float32_to_int32_round_to_zero(src->sng); + else + tmp = float32_to_int32(src->sng); + break; + case FTYPE_DBL: + if (rd == FP_RZ) + tmp = float64_to_int32_round_to_zero(src->dbl); + else + tmp = float64_to_int32(src->dbl); + break; + } + if (regno != 0) + frame->tf_r[regno] = tmp; + break; + case FTYPE_SNG: + switch (orig_width) { + case FTYPE_SNG: + tmp = src->sng; + break; + case FTYPE_DBL: + tmp = float64_to_float32(src->dbl); + break; + } + if (regno != 0) + frame->tf_r[regno] = tmp; + break; + case FTYPE_DBL: + switch (orig_width) { + case FTYPE_DBL: + tmp = (u_int32_t)(src->dbl >> 32); + if (regno != 0) + frame->tf_r[regno] = tmp; + tmp = (u_int32_t)src->dbl; + if (regno != 31) + frame->tf_r[regno + 1] = tmp; + break; + } + break; + } +} + +/* + * Return the largest precision of all precision inputs. + * + * This assumes none of the inputs is FTYPE_INT. + */ +u_int +fpu_precision(u_int ts1, u_int ts2, u_int td) +{ + return max(td, max(ts1, ts2)); +} + +/* + * Emulate an FPU instruction. On return, the trapframe registers + * will be modified to reflect the settings the hardware would have left. + */ +int +fpu_emulate(struct trapframe *frame, u_int32_t insn) +{ + u_int rf, rd, rs1, rs2, t1, t2, td, tmax, opcode; + u_int32_t old_fpsr, old_fpcr; + int rc; + + fparg arg1, arg2, dest; + + /* + * Crack the instruction. + */ + rd = (insn >> 21) & 0x1f; + rs1 = (insn >> 16) & 0x1f; + rs2 = insn & 0x1f; + rf = (insn >> 15) & 0x01; + opcode = (insn >> 11) & 0x0f; + t1 = (insn >> 9) & 0x03; + t2 = (insn >> 7) & 0x03; + td = (insn >> 5) & 0x03; + + /* + * Discard invalid opcodes, as well as instructions involving XRF, + * since we do not support them yet. + */ + if (rf != 0) + return (SIGILL); + + switch (opcode) { + case 0x00: /* fmul */ + case 0x05: /* fadd */ + case 0x06: /* fsub */ + case 0x0e: /* fdiv */ + if ((t1 != FTYPE_SNG && t1 != FTYPE_DBL) || + (t2 != FTYPE_SNG && t2 != FTYPE_DBL) || + (td != FTYPE_SNG && td != FTYPE_DBL)) + return (SIGILL); + break; + case 0x04: /* flt */ + if (t1 != 0x00) /* flt on XRF */ + return (SIGILL); + if ((td != FTYPE_SNG && td != FTYPE_DBL) || + t2 != 0x00 || rs1 != 0) + return (SIGILL); + break; + case 0x07: /* fcmp, fcmpu */ + if ((t1 != FTYPE_SNG && t1 != FTYPE_DBL) || + (t2 != FTYPE_SNG && t2 != FTYPE_DBL)) + return (SIGILL); + if (td != 0x00 /* fcmp */ && td != 0x01 /* fcmpu */) + return (SIGILL); + break; + case 0x09: /* int */ + case 0x0a: /* nint */ + case 0x0b: /* trnc */ + if ((t2 != FTYPE_SNG && t2 != FTYPE_DBL) || + t1 != 0x00 || td != 0x00 || rs1 != 0) + return (SIGILL); + break; + case 0x01: /* fcvt */ + if (t2 == td) + return (SIGILL); + /* FALLTHROUGH */ + case 0x0f: /* fsqrt */ + if ((t2 != FTYPE_SNG && t2 != FTYPE_DBL) || + (td != FTYPE_SNG && td != FTYPE_DBL) || + t1 != 0x00 || rs1 != 0) + return (SIGILL); + break; + default: + case 0x08: /* mov */ + return (SIGILL); + } + + /* + * Temporarily reset the status register, so that we can tell + * which exceptions are new after processing the opcode. + */ + old_fpsr = frame->tf_fpsr; + frame->tf_fpsr = 0; + + /* + * Save fpcr as well, since we might need to change rounding mode + * temporarily. + */ + old_fpcr = frame->tf_fpcr; + + /* + * The logic for instruction emulation is: + * + * - the computation precision is the largest one of all the operands. + * - all source operands are converted to this precision if needed. + * - computation is performed. + * - the result is stored into the destination operand, converting it + * to the destination precision if lower. + */ + + switch (opcode) { + case 0x00: /* fmul */ + tmax = fpu_precision(t1, t2, td); + fpu_fetch(frame, rs1, t1, tmax, &arg1); + fpu_fetch(frame, rs2, t2, tmax, &arg2); + switch (tmax) { + case FTYPE_SNG: + dest.sng = float32_mul(arg1.sng, arg2.sng); + break; + case FTYPE_DBL: + dest.dbl = float64_mul(arg1.dbl, arg2.dbl); + break; + } + fpu_store(frame, rd, tmax, td, &dest); + break; + + case 0x01: /* fcvt */ + tmax = fpu_precision(IGNORE_PRECISION, t2, td); + fpu_fetch(frame, rs2, t2, tmax, &dest); + fpu_store(frame, rd, tmax, td, &dest); + break; + + case 0x04: /* flt */ + fpu_fetch(frame, rs2, FTYPE_INT, td, &dest); + fpu_store(frame, rd, td, td, &dest); + break; + + case 0x05: /* fadd */ + tmax = fpu_precision(t1, t2, td); + fpu_fetch(frame, rs1, t1, tmax, &arg1); + fpu_fetch(frame, rs2, t2, tmax, &arg2); + switch (tmax) { + case FTYPE_SNG: + dest.sng = float32_add(arg1.sng, arg2.sng); + break; + case FTYPE_DBL: + dest.dbl = float64_add(arg1.dbl, arg2.dbl); + break; + } + fpu_store(frame, rd, tmax, td, &dest); + break; + + case 0x06: /* fsub */ + tmax = fpu_precision(t1, t2, td); + fpu_fetch(frame, rs1, t1, tmax, &arg1); + fpu_fetch(frame, rs2, t2, tmax, &arg2); + switch (tmax) { + case FTYPE_SNG: + dest.sng = float32_sub(arg1.sng, arg2.sng); + break; + case FTYPE_DBL: + dest.dbl = float64_sub(arg1.dbl, arg2.dbl); + break; + } + fpu_store(frame, rd, tmax, td, &dest); + break; + + case 0x07: /* fcmp, fcmpu */ + tmax = fpu_precision(t1, t2, IGNORE_PRECISION); + fpu_fetch(frame, rs1, t1, tmax, &arg1); + fpu_fetch(frame, rs2, t2, tmax, &arg2); + fpu_compare(frame, &arg1, &arg2, tmax, rd, td /* fcmpu */); + break; + + case 0x09: /* int */ +do_int: + fpu_fetch(frame, rs2, t2, t2, &dest); + fpu_store(frame, rd, t2, FTYPE_INT, &dest); + break; + case 0x0a: /* nint */ + /* round to nearest */ + frame->tf_fpcr = (old_fpcr & ~(FPCR_RD_MASK << FPCR_RD_SHIFT)) | + (FP_RN << FPCR_RD_SHIFT); + goto do_int; + + case 0x0b: /* trnc */ + /* round towards zero */ + frame->tf_fpcr = (old_fpcr & ~(FPCR_RD_MASK << FPCR_RD_SHIFT)) | + (FP_RZ << FPCR_RD_SHIFT); + goto do_int; + + case 0x0e: /* fdiv */ + tmax = fpu_precision(t1, t2, td); + fpu_fetch(frame, rs1, t1, tmax, &arg1); + fpu_fetch(frame, rs2, t2, tmax, &arg2); + switch (tmax) { + case FTYPE_SNG: + dest.sng = float32_div(arg1.sng, arg2.sng); + break; + case FTYPE_DBL: + dest.dbl = float64_div(arg1.dbl, arg2.dbl); + break; + } + fpu_store(frame, rd, tmax, td, &dest); + break; + + case 0x0f: /* sqrt */ + tmax = fpu_precision(IGNORE_PRECISION, t2, td); + fpu_fetch(frame, rs2, t2, tmax, &arg1); + switch (tmax) { + case FTYPE_SNG: + dest.sng = float32_sqrt(arg1.sng); + break; + case FTYPE_DBL: + dest.dbl = float64_sqrt(arg1.dbl); + break; + } + fpu_store(frame, rd, tmax, td, &dest); + break; + } + + /* + * Mark new exceptions, if any, in the fpsr, and decide whether + * to send a signal or not. + */ + + if (frame->tf_fpsr & old_fpcr) + rc = SIGFPE; + else + rc = 0; + frame->tf_fpsr |= old_fpsr; + + /* + * Restore fpcr as well. + */ + frame->tf_fpcr = old_fpcr; + + return (rc); +} + +/* + * Perform a compare instruction (fcmp, fcmpu). + * + * If either operand is NaN, the result is unordered. This causes an + * reserved operand exception (except for nonsignalling NaNs for fcmpu). + * + * Everything else is ordered: + * |Inf| > |numbers| > |0|. + * We already arranged for fp_class(Inf) > fp_class(numbers) > fp_class(0), + * so we get this directly. Note, however, that two zeros compare equal + * regardless of sign, while everything else depends on sign. + * + * Incidentally, two Infs of the same sign compare equal. Since the 88110 + * does infinity arithmetic on hardware, this codepath should never be + * entered. + */ +void +fpu_compare(struct trapframe *frame, fparg *s1, fparg *s2, u_int width, + u_int rd, u_int fcmpu) +{ + u_int32_t cc; + int zero, s1positive, s2positive; + + /* + * Handle NaNs first, and raise invalid if fcmp or signaling NaN. + */ + switch (width) { + case FTYPE_SNG: + if (float32_is_nan(s1->sng)) { + if (!fcmpu || float32_is_signaling_nan(s1->sng)) + float_set_invalid(); + cc = CC_UN; + goto done; + } + if (float32_is_nan(s2->sng)) { + if (!fcmpu || float32_is_signaling_nan(s2->sng)) + float_set_invalid(); + cc = CC_UN; + goto done; + } + break; + case FTYPE_DBL: + if (float64_is_nan(s1->dbl)) { + if (!fcmpu || float64_is_signaling_nan(s1->dbl)) + float_set_invalid(); + cc = CC_UN; + goto done; + } + if (float64_is_nan(s2->dbl)) { + if (!fcmpu || float64_is_signaling_nan(s2->dbl)) + float_set_invalid(); + cc = CC_UN; + goto done; + } + break; + } + + /* + * Now order the two numbers. + */ + switch (width) { + case FTYPE_SNG: + if (float32_eq(s1->sng, s2->sng)) + cc = CC_EQ; + else if (float32_lt(s1->sng, s2->sng)) + cc = CC_LT; + else + cc = CC_GT; + break; + case FTYPE_DBL: + if (float64_eq(s1->dbl, s2->dbl)) + cc = CC_EQ; + else if (float64_lt(s1->dbl, s2->dbl)) + cc = CC_LT; + else + cc = CC_GT; + break; + } + +done: + + /* + * Complete condition code mask. + */ + + if (cc & CC_UN) + cc |= CC_UE | CC_UG | CC_ULE | CC_UL | CC_UGE; + if (cc & CC_EQ) + cc |= CC_LE | CC_GE | CC_UE; + if (cc & CC_GT) + cc |= CC_GE; + if (cc & CC_LT) + cc |= CC_LE; + if (cc & (CC_LT | CC_GT)) + cc |= CC_LG; + if (cc & (CC_LT | CC_GT | CC_EQ)) + cc |= CC_LEG; + if (cc & CC_GT) + cc |= CC_UG; + if (cc & CC_LE) + cc |= CC_ULE; + if (cc & CC_LT) + cc |= CC_UL; + if (cc & CC_GE) + cc |= CC_UGE; + + /* + * Fill the interval bits. + * s1 is compared to the interval [0, s2]. + */ + if (!(cc & CC_UN)) { + if (cc & CC_EQ) { + /* if s1 and s2 are equal, s1 is on boundary */ + cc |= CC_IB | CC_OB; + goto completed; + } + + /* s1 and s2 are either Zero, numbers or Inf */ + switch (width) { + case FTYPE_SNG: + zero = float32_eq(s1->sng, 0); + break; + case FTYPE_DBL: + zero = float64_eq(s1->dbl, 0LL); + break; + } + if (zero) { + /* if s1 is zero, it is on boundary */ + cc |= CC_IB | CC_OB; + goto completed; + } + + switch (width) { + case FTYPE_SNG: + s1positive = s1->sng >> 31 == 0; + s2positive = s2->sng >> 31 == 0; + break; + case FTYPE_DBL: + s1positive = s1->dbl >> 63 == 0; + s2positive = s2->dbl >> 63 == 0; + break; + } + if (s2positive) { + /* s2 is positive, the interval is [0, s2] */ + if (cc & CC_GT) { + /* 0 <= s2 < s1 -> out of interval */ + cc |= CC_OU | CC_OB; + } else if (s1positive) { + /* 0 < s1 < s2 -> in interval */ + cc |= CC_IB | CC_IN; + } else { + /* s1 < 0 <= s2 */ + cc |= CC_OU | CC_OB; + } + } else { + /* s2 is negative, the interval is [s2, 0] */ + if (cc & CC_LT) { + /* s1 < s2 <= 0 */ + cc |= CC_OU | CC_OB; + } else if (!s1positive) { + /* s2 < s1 < 0 */ + cc |= CC_IB | CC_IN; + } else { + /* s2 < 0 < s1 */ + cc |= CC_OU | CC_OB; + } + } + } + +completed: + if (rd != 0) + frame->tf_r[rd] = cc; +} |