summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorJason Wright <jason@cvs.openbsd.org>2003-07-21 18:41:31 +0000
committerJason Wright <jason@cvs.openbsd.org>2003-07-21 18:41:31 +0000
commit0a048fe16e70e5eed83e7679e7a2ad690dd89261 (patch)
tree96aa32511fa2d46c2d3516aba8e7453bb29c9265 /lib
parenta597a81e08feaf5b79b3d5ff2844a63ce31aaddb (diff)
quad float emulation stuff. Mostly based on freebsd (which is based on
NetBSD's kernel emulation stuff). Not enabled yet.
Diffstat (limited to 'lib')
-rw-r--r--lib/libc/arch/sparc64/fpu/Makefile.inc4
-rw-r--r--lib/libc/arch/sparc64/fpu/fpu_add.c220
-rw-r--r--lib/libc/arch/sparc64/fpu/fpu_arith.h98
-rw-r--r--lib/libc/arch/sparc64/fpu/fpu_compare.c184
-rw-r--r--lib/libc/arch/sparc64/fpu/fpu_div.c275
-rw-r--r--lib/libc/arch/sparc64/fpu/fpu_emu.h177
-rw-r--r--lib/libc/arch/sparc64/fpu/fpu_explode.c310
-rw-r--r--lib/libc/arch/sparc64/fpu/fpu_extern.h95
-rw-r--r--lib/libc/arch/sparc64/fpu/fpu_implode.c543
-rw-r--r--lib/libc/arch/sparc64/fpu/fpu_mul.c233
-rw-r--r--lib/libc/arch/sparc64/fpu/fpu_q.c192
-rw-r--r--lib/libc/arch/sparc64/fpu/fpu_q.h72
-rw-r--r--lib/libc/arch/sparc64/fpu/fpu_qp.c188
-rw-r--r--lib/libc/arch/sparc64/fpu/fpu_reg.c639
-rw-r--r--lib/libc/arch/sparc64/fpu/fpu_reg.h37
-rw-r--r--lib/libc/arch/sparc64/fpu/fpu_sqrt.c404
-rw-r--r--lib/libc/arch/sparc64/fpu/fpu_subr.c227
17 files changed, 3898 insertions, 0 deletions
diff --git a/lib/libc/arch/sparc64/fpu/Makefile.inc b/lib/libc/arch/sparc64/fpu/Makefile.inc
new file mode 100644
index 00000000000..79b3c6329f6
--- /dev/null
+++ b/lib/libc/arch/sparc64/fpu/Makefile.inc
@@ -0,0 +1,4 @@
+# $OpenBSD: Makefile.inc,v 1.1 2003/07/21 18:41:30 jason Exp $
+SRCS += fpu_add.c fpu_compare.c fpu_div.c fpu_explode.c fpu_implode.c \
+ fpu_mul.c fpu_qp.c fpu_q.c fpu_sqrt.c fpu_subr.c fpu_reg.c
+.PATH: ${.CURDIR}/arch/sparc64/fpu
diff --git a/lib/libc/arch/sparc64/fpu/fpu_add.c b/lib/libc/arch/sparc64/fpu/fpu_add.c
new file mode 100644
index 00000000000..1eeb734594c
--- /dev/null
+++ b/lib/libc/arch/sparc64/fpu/fpu_add.c
@@ -0,0 +1,220 @@
+/* $OpenBSD: fpu_add.c,v 1.1 2003/07/21 18:41:30 jason Exp $ */
+
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Lawrence Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fpu_add.c 8.1 (Berkeley) 6/11/93
+ * $NetBSD: fpu_add.c,v 1.3 1996/03/14 19:41:52 christos Exp $
+ */
+
+#include <sys/cdefs.h>
+#if 0
+__FBSDID("$FreeBSD: src/lib/libc/sparc64/fpu/fpu_add.c,v 1.4 2002/04/27 21:56:28 jake Exp $");
+#endif
+
+/*
+ * Perform an FPU add (return x + y).
+ *
+ * To subtract, negate y and call add.
+ */
+
+#include <sys/param.h>
+
+#include <machine/frame.h>
+#include <machine/fsr.h>
+#include <machine/instr.h>
+
+#include "fpu_arith.h"
+#include "fpu_emu.h"
+#include "fpu_extern.h"
+
+struct fpn *
+__fpu_add(fe)
+ struct fpemu *fe;
+{
+ struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2, *r;
+ u_int r0, r1, r2, r3;
+ int rd;
+
+ /*
+ * Put the `heavier' operand on the right (see fpu_emu.h).
+ * Then we will have one of the following cases, taken in the
+ * following order:
+ *
+ * - y = NaN. Implied: if only one is a signalling NaN, y is.
+ * The result is y.
+ * - y = Inf. Implied: x != NaN (is 0, number, or Inf: the NaN
+ * case was taken care of earlier).
+ * If x = -y, the result is NaN. Otherwise the result
+ * is y (an Inf of whichever sign).
+ * - y is 0. Implied: x = 0.
+ * If x and y differ in sign (one positive, one negative),
+ * the result is +0 except when rounding to -Inf. If same:
+ * +0 + +0 = +0; -0 + -0 = -0.
+ * - x is 0. Implied: y != 0.
+ * Result is y.
+ * - other. Implied: both x and y are numbers.
+ * Do addition a la Hennessey & Patterson.
+ */
+ ORDER(x, y);
+ if (ISNAN(y))
+ return (y);
+ if (ISINF(y)) {
+ if (ISINF(x) && x->fp_sign != y->fp_sign)
+ return (__fpu_newnan(fe));
+ return (y);
+ }
+ rd = FSR_GET_RD(fe->fe_fsr);
+ if (ISZERO(y)) {
+ if (rd != FSR_RD_RM) /* only -0 + -0 gives -0 */
+ y->fp_sign &= x->fp_sign;
+ else /* any -0 operand gives -0 */
+ y->fp_sign |= x->fp_sign;
+ return (y);
+ }
+ if (ISZERO(x))
+ return (y);
+ /*
+ * We really have two numbers to add, although their signs may
+ * differ. Make the exponents match, by shifting the smaller
+ * number right (e.g., 1.011 => 0.1011) and increasing its
+ * exponent (2^3 => 2^4). Note that we do not alter the exponents
+ * of x and y here.
+ */
+ r = &fe->fe_f3;
+ r->fp_class = FPC_NUM;
+ if (x->fp_exp == y->fp_exp) {
+ r->fp_exp = x->fp_exp;
+ r->fp_sticky = 0;
+ } else {
+ if (x->fp_exp < y->fp_exp) {
+ /*
+ * Try to avoid subtract case iii (see below).
+ * This also guarantees that x->fp_sticky = 0.
+ */
+ SWAP(x, y);
+ }
+ /* now x->fp_exp > y->fp_exp */
+ r->fp_exp = x->fp_exp;
+ r->fp_sticky = __fpu_shr(y, x->fp_exp - y->fp_exp);
+ }
+ r->fp_sign = x->fp_sign;
+ if (x->fp_sign == y->fp_sign) {
+ FPU_DECL_CARRY
+
+ /*
+ * The signs match, so we simply add the numbers. The result
+ * may be `supernormal' (as big as 1.111...1 + 1.111...1, or
+ * 11.111...0). If so, a single bit shift-right will fix it
+ * (but remember to adjust the exponent).
+ */
+ /* r->fp_mant = x->fp_mant + y->fp_mant */
+ FPU_ADDS(r->fp_mant[3], x->fp_mant[3], y->fp_mant[3]);
+ FPU_ADDCS(r->fp_mant[2], x->fp_mant[2], y->fp_mant[2]);
+ FPU_ADDCS(r->fp_mant[1], x->fp_mant[1], y->fp_mant[1]);
+ FPU_ADDC(r0, x->fp_mant[0], y->fp_mant[0]);
+ if ((r->fp_mant[0] = r0) >= FP_2) {
+ (void) __fpu_shr(r, 1);
+ r->fp_exp++;
+ }
+ } else {
+ FPU_DECL_CARRY
+
+ /*
+ * The signs differ, so things are rather more difficult.
+ * H&P would have us negate the negative operand and add;
+ * this is the same as subtracting the negative operand.
+ * This is quite a headache. Instead, we will subtract
+ * y from x, regardless of whether y itself is the negative
+ * operand. When this is done one of three conditions will
+ * hold, depending on the magnitudes of x and y:
+ * case i) |x| > |y|. The result is just x - y,
+ * with x's sign, but it may need to be normalized.
+ * case ii) |x| = |y|. The result is 0 (maybe -0)
+ * so must be fixed up.
+ * case iii) |x| < |y|. We goofed; the result should
+ * be (y - x), with the same sign as y.
+ * We could compare |x| and |y| here and avoid case iii,
+ * but that would take just as much work as the subtract.
+ * We can tell case iii has occurred by an overflow.
+ *
+ * N.B.: since x->fp_exp >= y->fp_exp, x->fp_sticky = 0.
+ */
+ /* r->fp_mant = x->fp_mant - y->fp_mant */
+ FPU_SET_CARRY(y->fp_sticky);
+ FPU_SUBCS(r3, x->fp_mant[3], y->fp_mant[3]);
+ FPU_SUBCS(r2, x->fp_mant[2], y->fp_mant[2]);
+ FPU_SUBCS(r1, x->fp_mant[1], y->fp_mant[1]);
+ FPU_SUBC(r0, x->fp_mant[0], y->fp_mant[0]);
+ if (r0 < FP_2) {
+ /* cases i and ii */
+ if ((r0 | r1 | r2 | r3) == 0) {
+ /* case ii */
+ r->fp_class = FPC_ZERO;
+ r->fp_sign = rd == FSR_RD_RM;
+ return (r);
+ }
+ } else {
+ /*
+ * Oops, case iii. This can only occur when the
+ * exponents were equal, in which case neither
+ * x nor y have sticky bits set. Flip the sign
+ * (to y's sign) and negate the result to get y - x.
+ */
+#ifdef DIAGNOSTIC
+ if (x->fp_exp != y->fp_exp || r->fp_sticky)
+ __utrap_panic("fpu_add");
+#endif
+ r->fp_sign = y->fp_sign;
+ FPU_SUBS(r3, 0, r3);
+ FPU_SUBCS(r2, 0, r2);
+ FPU_SUBCS(r1, 0, r1);
+ FPU_SUBC(r0, 0, r0);
+ }
+ r->fp_mant[3] = r3;
+ r->fp_mant[2] = r2;
+ r->fp_mant[1] = r1;
+ r->fp_mant[0] = r0;
+ if (r0 < FP_1)
+ __fpu_norm(r);
+ }
+ return (r);
+}
diff --git a/lib/libc/arch/sparc64/fpu/fpu_arith.h b/lib/libc/arch/sparc64/fpu/fpu_arith.h
new file mode 100644
index 00000000000..062a181c09e
--- /dev/null
+++ b/lib/libc/arch/sparc64/fpu/fpu_arith.h
@@ -0,0 +1,98 @@
+/* $OpenBSD: fpu_arith.h,v 1.1 2003/07/21 18:41:30 jason Exp $ */
+
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Lawrence Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fpu_arith.h 8.1 (Berkeley) 6/11/93
+ * $NetBSD: fpu_arith.h,v 1.3 2000/07/24 04:11:03 mycroft Exp $
+ * $FreeBSD: src/lib/libc/sparc64/fpu/fpu_arith.h,v 1.2 2002/03/22 23:41:59 obrien Exp $
+ */
+
+/*
+ * Extended-precision arithmetic.
+ *
+ * We hold the notion of a `carry register', which may or may not be a
+ * machine carry bit or register. On the SPARC, it is just the machine's
+ * carry bit.
+ *
+ * In the worst case, you can compute the carry from x+y as
+ * (unsigned)(x + y) < (unsigned)x
+ * and from x+y+c as
+ * ((unsigned)(x + y + c) <= (unsigned)x && (y|c) != 0)
+ * for example.
+ */
+
+/* set up for extended-precision arithemtic */
+#define FPU_DECL_CARRY
+
+/*
+ * We have three kinds of add:
+ * add with carry: r = x + y + c
+ * add (ignoring current carry) and set carry: c'r = x + y + 0
+ * add with carry and set carry: c'r = x + y + c
+ * The macros use `C' for `use carry' and `S' for `set carry'.
+ * Note that the state of the carry is undefined after ADDC and SUBC,
+ * so if all you have for these is `add with carry and set carry',
+ * that is OK.
+ *
+ * The same goes for subtract, except that we compute x - y - c.
+ *
+ * Finally, we have a way to get the carry into a `regular' variable,
+ * or set it from a value. SET_CARRY turns 0 into no-carry, nonzero
+ * into carry; GET_CARRY sets its argument to 0 or 1.
+ */
+#define FPU_ADDC(r, x, y) \
+ __asm __volatile("addx %1,%2,%0" : "=r"(r) : "r"(x), "r"(y))
+#define FPU_ADDS(r, x, y) \
+ __asm __volatile("addcc %1,%2,%0" : "=r"(r) : "r"(x), "r"(y))
+#define FPU_ADDCS(r, x, y) \
+ __asm __volatile("addxcc %1,%2,%0" : "=r"(r) : "r"(x), "r"(y))
+#define FPU_SUBC(r, x, y) \
+ __asm __volatile("subx %1,%2,%0" : "=r"(r) : "r"(x), "r"(y))
+#define FPU_SUBS(r, x, y) \
+ __asm __volatile("subcc %1,%2,%0" : "=r"(r) : "r"(x), "r"(y))
+#define FPU_SUBCS(r, x, y) \
+ __asm __volatile("subxcc %1,%2,%0" : "=r"(r) : "r"(x), "r"(y))
+
+#define FPU_GET_CARRY(r) __asm __volatile("addx %%g0,%%g0,%0" : "=r"(r))
+#define FPU_SET_CARRY(v) __asm __volatile("addcc %0,-1,%%g0" : : "r"(v))
+
+#define FPU_SHL1_BY_ADD /* shift left 1 faster by ADDC than (a<<1)|(b>>31) */
diff --git a/lib/libc/arch/sparc64/fpu/fpu_compare.c b/lib/libc/arch/sparc64/fpu/fpu_compare.c
new file mode 100644
index 00000000000..7d2e7c84a94
--- /dev/null
+++ b/lib/libc/arch/sparc64/fpu/fpu_compare.c
@@ -0,0 +1,184 @@
+/* $OpenBSD: fpu_compare.c,v 1.1 2003/07/21 18:41:30 jason Exp $ */
+
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Lawrence Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fpu_compare.c 8.1 (Berkeley) 6/11/93
+ * $NetBSD: fpu_compare.c,v 1.3 2001/08/26 05:46:31 eeh Exp $
+ */
+
+#include <sys/cdefs.h>
+#if 0
+__FBSDID("$FreeBSD: src/lib/libc/sparc64/fpu/fpu_compare.c,v 1.4 2002/03/22 21:52:58 obrien Exp $");
+#endif
+
+/*
+ * CMP and CMPE instructions.
+ *
+ * These rely on the fact that our internal wide format is achieved by
+ * adding zero bits to the end of narrower mantissas.
+ */
+
+#include <sys/types.h>
+
+#include <machine/frame.h>
+#include <machine/fsr.h>
+
+#include "fpu_arith.h"
+#include "fpu_emu.h"
+#include "fpu_extern.h"
+
+static u_long fcc_nmask[] = {
+ ~FSR_FCC,
+ ~FSR_FCC1,
+ ~FSR_FCC2,
+ ~FSR_FCC3
+};
+
+/* XXX: we don't use the FSR_FCCx macros here; it's much easier this way. */
+static int fcc_shift[] = {
+ FSR_FCC_SHIFT,
+ FSR_FCC1_SHIFT,
+ FSR_FCC2_SHIFT,
+ FSR_FCC3_SHIFT
+};
+
+/*
+ * Perform a compare instruction (with or without unordered exception).
+ * This updates the fcc field in the fsr.
+ *
+ * If either operand is NaN, the result is unordered. For cmpe, this
+ * causes an NV exception. Everything else is ordered:
+ * |Inf| > |numbers| > |0|.
+ * We already arranged for fp_class(Inf) > fp_class(numbers) > fp_class(0),
+ * so we get this directly. Note, however, that two zeros compare equal
+ * regardless of sign, while everything else depends on sign.
+ *
+ * Incidentally, two Infs of the same sign compare equal (per the 80387
+ * manual---it would be nice if the SPARC documentation were more
+ * complete).
+ */
+void
+__fpu_compare(struct fpemu *fe, int cmpe, int fcc)
+{
+ struct fpn *a, *b;
+ int cc;
+ FPU_DECL_CARRY
+
+ a = &fe->fe_f1;
+ b = &fe->fe_f2;
+
+ if (ISNAN(a) || ISNAN(b)) {
+ /*
+ * In any case, we already got an exception for signalling
+ * NaNs; here we may replace that one with an identical
+ * exception, but so what?.
+ */
+ if (cmpe)
+ fe->fe_cx = FSR_NV;
+ cc = FSR_CC_UO;
+ goto done;
+ }
+
+ /*
+ * Must handle both-zero early to avoid sign goofs. Otherwise,
+ * at most one is 0, and if the signs differ we are done.
+ */
+ if (ISZERO(a) && ISZERO(b)) {
+ cc = FSR_CC_EQ;
+ goto done;
+ }
+ if (a->fp_sign) { /* a < 0 (or -0) */
+ if (!b->fp_sign) { /* b >= 0 (or if a = -0, b > 0) */
+ cc = FSR_CC_LT;
+ goto done;
+ }
+ } else { /* a > 0 (or +0) */
+ if (b->fp_sign) { /* b <= -0 (or if a = +0, b < 0) */
+ cc = FSR_CC_GT;
+ goto done;
+ }
+ }
+
+ /*
+ * Now the signs are the same (but may both be negative). All
+ * we have left are these cases:
+ *
+ * |a| < |b| [classes or values differ]
+ * |a| > |b| [classes or values differ]
+ * |a| == |b| [classes and values identical]
+ *
+ * We define `diff' here to expand these as:
+ *
+ * |a| < |b|, a,b >= 0: a < b => FSR_CC_LT
+ * |a| < |b|, a,b < 0: a > b => FSR_CC_GT
+ * |a| > |b|, a,b >= 0: a > b => FSR_CC_GT
+ * |a| > |b|, a,b < 0: a < b => FSR_CC_LT
+ */
+#define opposite_cc(cc) ((cc) == FSR_CC_LT ? FSR_CC_GT : FSR_CC_LT)
+#define diff(magnitude) (a->fp_sign ? opposite_cc(magnitude) : (magnitude))
+ if (a->fp_class < b->fp_class) { /* |a| < |b| */
+ cc = diff(FSR_CC_LT);
+ goto done;
+ }
+ if (a->fp_class > b->fp_class) { /* |a| > |b| */
+ cc = diff(FSR_CC_GT);
+ goto done;
+ }
+ /* now none can be 0: only Inf and numbers remain */
+ if (ISINF(a)) { /* |Inf| = |Inf| */
+ cc = FSR_CC_EQ;
+ goto done;
+ }
+ /*
+ * Only numbers remain. To compare two numbers in magnitude, we
+ * simply subtract them.
+ */
+ a = __fpu_sub(fe);
+ if (a->fp_class == FPC_ZERO)
+ cc = FSR_CC_EQ;
+ else
+ cc = diff(FSR_CC_GT);
+
+done:
+ fe->fe_fsr = (fe->fe_fsr & fcc_nmask[fcc]) |
+ ((u_long)cc << fcc_shift[fcc]);
+}
diff --git a/lib/libc/arch/sparc64/fpu/fpu_div.c b/lib/libc/arch/sparc64/fpu/fpu_div.c
new file mode 100644
index 00000000000..b615e46c21f
--- /dev/null
+++ b/lib/libc/arch/sparc64/fpu/fpu_div.c
@@ -0,0 +1,275 @@
+/* $OpenBSD: fpu_div.c,v 1.1 2003/07/21 18:41:30 jason Exp $ */
+
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Lawrence Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fpu_div.c 8.1 (Berkeley) 6/11/93
+ * $NetBSD: fpu_div.c,v 1.2 1994/11/20 20:52:38 deraadt Exp $
+ */
+
+#include <sys/cdefs.h>
+#if 0
+__FBSDID("$FreeBSD: src/lib/libc/sparc64/fpu/fpu_div.c,v 1.3 2002/03/22 21:52:58 obrien Exp $");
+#endif
+
+/*
+ * Perform an FPU divide (return x / y).
+ */
+
+#include <sys/types.h>
+
+#include <machine/frame.h>
+#include <machine/fsr.h>
+
+#include "fpu_arith.h"
+#include "fpu_emu.h"
+#include "fpu_extern.h"
+
+/*
+ * Division of normal numbers is done as follows:
+ *
+ * x and y are floating point numbers, i.e., in the form 1.bbbb * 2^e.
+ * If X and Y are the mantissas (1.bbbb's), the quotient is then:
+ *
+ * q = (X / Y) * 2^((x exponent) - (y exponent))
+ *
+ * Since X and Y are both in [1.0,2.0), the quotient's mantissa (X / Y)
+ * will be in [0.5,2.0). Moreover, it will be less than 1.0 if and only
+ * if X < Y. In that case, it will have to be shifted left one bit to
+ * become a normal number, and the exponent decremented. Thus, the
+ * desired exponent is:
+ *
+ * left_shift = x->fp_mant < y->fp_mant;
+ * result_exp = x->fp_exp - y->fp_exp - left_shift;
+ *
+ * The quotient mantissa X/Y can then be computed one bit at a time
+ * using the following algorithm:
+ *
+ * Q = 0; -- Initial quotient.
+ * R = X; -- Initial remainder,
+ * if (left_shift) -- but fixed up in advance.
+ * R *= 2;
+ * for (bit = FP_NMANT; --bit >= 0; R *= 2) {
+ * if (R >= Y) {
+ * Q |= 1 << bit;
+ * R -= Y;
+ * }
+ * }
+ *
+ * The subtraction R -= Y always removes the uppermost bit from R (and
+ * can sometimes remove additional lower-order 1 bits); this proof is
+ * left to the reader.
+ *
+ * This loop correctly calculates the guard and round bits since they are
+ * included in the expanded internal representation. The sticky bit
+ * is to be set if and only if any other bits beyond guard and round
+ * would be set. From the above it is obvious that this is true if and
+ * only if the remainder R is nonzero when the loop terminates.
+ *
+ * Examining the loop above, we can see that the quotient Q is built
+ * one bit at a time ``from the top down''. This means that we can
+ * dispense with the multi-word arithmetic and just build it one word
+ * at a time, writing each result word when it is done.
+ *
+ * Furthermore, since X and Y are both in [1.0,2.0), we know that,
+ * initially, R >= Y. (Recall that, if X < Y, R is set to X * 2 and
+ * is therefore at in [2.0,4.0).) Thus Q is sure to have bit FP_NMANT-1
+ * set, and R can be set initially to either X - Y (when X >= Y) or
+ * 2X - Y (when X < Y). In addition, comparing R and Y is difficult,
+ * so we will simply calculate R - Y and see if that underflows.
+ * This leads to the following revised version of the algorithm:
+ *
+ * R = X;
+ * bit = FP_1;
+ * D = R - Y;
+ * if (D >= 0) {
+ * result_exp = x->fp_exp - y->fp_exp;
+ * R = D;
+ * q = bit;
+ * bit >>= 1;
+ * } else {
+ * result_exp = x->fp_exp - y->fp_exp - 1;
+ * q = 0;
+ * }
+ * R <<= 1;
+ * do {
+ * D = R - Y;
+ * if (D >= 0) {
+ * q |= bit;
+ * R = D;
+ * }
+ * R <<= 1;
+ * } while ((bit >>= 1) != 0);
+ * Q[0] = q;
+ * for (i = 1; i < 4; i++) {
+ * q = 0, bit = 1 << 31;
+ * do {
+ * D = R - Y;
+ * if (D >= 0) {
+ * q |= bit;
+ * R = D;
+ * }
+ * R <<= 1;
+ * } while ((bit >>= 1) != 0);
+ * Q[i] = q;
+ * }
+ *
+ * This can be refined just a bit further by moving the `R <<= 1'
+ * calculations to the front of the do-loops and eliding the first one.
+ * The process can be terminated immediately whenever R becomes 0, but
+ * this is relatively rare, and we do not bother.
+ */
+
+struct fpn *
+__fpu_div(fe)
+ struct fpemu *fe;
+{
+ struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2;
+ u_int q, bit;
+ u_int r0, r1, r2, r3, d0, d1, d2, d3, y0, y1, y2, y3;
+ FPU_DECL_CARRY
+
+ /*
+ * Since divide is not commutative, we cannot just use ORDER.
+ * Check either operand for NaN first; if there is at least one,
+ * order the signalling one (if only one) onto the right, then
+ * return it. Otherwise we have the following cases:
+ *
+ * Inf / Inf = NaN, plus NV exception
+ * Inf / num = Inf [i.e., return x]
+ * Inf / 0 = Inf [i.e., return x]
+ * 0 / Inf = 0 [i.e., return x]
+ * 0 / num = 0 [i.e., return x]
+ * 0 / 0 = NaN, plus NV exception
+ * num / Inf = 0
+ * num / num = num (do the divide)
+ * num / 0 = Inf, plus DZ exception
+ */
+ if (ISNAN(x) || ISNAN(y)) {
+ ORDER(x, y);
+ return (y);
+ }
+ if (ISINF(x) || ISZERO(x)) {
+ if (x->fp_class == y->fp_class)
+ return (__fpu_newnan(fe));
+ return (x);
+ }
+
+ /* all results at this point use XOR of operand signs */
+ x->fp_sign ^= y->fp_sign;
+ if (ISINF(y)) {
+ x->fp_class = FPC_ZERO;
+ return (x);
+ }
+ if (ISZERO(y)) {
+ fe->fe_cx = FSR_DZ;
+ x->fp_class = FPC_INF;
+ return (x);
+ }
+
+ /*
+ * Macros for the divide. See comments at top for algorithm.
+ * Note that we expand R, D, and Y here.
+ */
+
+#define SUBTRACT /* D = R - Y */ \
+ FPU_SUBS(d3, r3, y3); FPU_SUBCS(d2, r2, y2); \
+ FPU_SUBCS(d1, r1, y1); FPU_SUBC(d0, r0, y0)
+
+#define NONNEGATIVE /* D >= 0 */ \
+ ((int)d0 >= 0)
+
+#ifdef FPU_SHL1_BY_ADD
+#define SHL1 /* R <<= 1 */ \
+ FPU_ADDS(r3, r3, r3); FPU_ADDCS(r2, r2, r2); \
+ FPU_ADDCS(r1, r1, r1); FPU_ADDC(r0, r0, r0)
+#else
+#define SHL1 \
+ r0 = (r0 << 1) | (r1 >> 31), r1 = (r1 << 1) | (r2 >> 31), \
+ r2 = (r2 << 1) | (r3 >> 31), r3 <<= 1
+#endif
+
+#define LOOP /* do ... while (bit >>= 1) */ \
+ do { \
+ SHL1; \
+ SUBTRACT; \
+ if (NONNEGATIVE) { \
+ q |= bit; \
+ r0 = d0, r1 = d1, r2 = d2, r3 = d3; \
+ } \
+ } while ((bit >>= 1) != 0)
+
+#define WORD(r, i) /* calculate r->fp_mant[i] */ \
+ q = 0; \
+ bit = 1 << 31; \
+ LOOP; \
+ (x)->fp_mant[i] = q
+
+ /* Setup. Note that we put our result in x. */
+ r0 = x->fp_mant[0];
+ r1 = x->fp_mant[1];
+ r2 = x->fp_mant[2];
+ r3 = x->fp_mant[3];
+ y0 = y->fp_mant[0];
+ y1 = y->fp_mant[1];
+ y2 = y->fp_mant[2];
+ y3 = y->fp_mant[3];
+
+ bit = FP_1;
+ SUBTRACT;
+ if (NONNEGATIVE) {
+ x->fp_exp -= y->fp_exp;
+ r0 = d0, r1 = d1, r2 = d2, r3 = d3;
+ q = bit;
+ bit >>= 1;
+ } else {
+ x->fp_exp -= y->fp_exp + 1;
+ q = 0;
+ }
+ LOOP;
+ x->fp_mant[0] = q;
+ WORD(x, 1);
+ WORD(x, 2);
+ WORD(x, 3);
+ x->fp_sticky = r0 | r1 | r2 | r3;
+
+ return (x);
+}
diff --git a/lib/libc/arch/sparc64/fpu/fpu_emu.h b/lib/libc/arch/sparc64/fpu/fpu_emu.h
new file mode 100644
index 00000000000..12f563068a4
--- /dev/null
+++ b/lib/libc/arch/sparc64/fpu/fpu_emu.h
@@ -0,0 +1,177 @@
+/* $OpenBSD: fpu_emu.h,v 1.1 2003/07/21 18:41:30 jason Exp $ */
+
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Lawrence Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fpu_emu.h 8.1 (Berkeley) 6/11/93
+ * $NetBSD: fpu_emu.h,v 1.4 2000/08/03 18:32:07 eeh Exp $
+ * $FreeBSD: src/lib/libc/sparc64/fpu/fpu_emu.h,v 1.4 2002/03/22 23:41:59 obrien Exp $
+ */
+
+/*
+ * Floating point emulator (tailored for SPARC, but structurally
+ * machine-independent).
+ *
+ * Floating point numbers are carried around internally in an `expanded'
+ * or `unpacked' form consisting of:
+ * - sign
+ * - unbiased exponent
+ * - mantissa (`1.' + 112-bit fraction + guard + round)
+ * - sticky bit
+ * Any implied `1' bit is inserted, giving a 113-bit mantissa that is
+ * always nonzero. Additional low-order `guard' and `round' bits are
+ * scrunched in, making the entire mantissa 115 bits long. This is divided
+ * into four 32-bit words, with `spare' bits left over in the upper part
+ * of the top word (the high bits of fp_mant[0]). An internal `exploded'
+ * number is thus kept within the half-open interval [1.0,2.0) (but see
+ * the `number classes' below). This holds even for denormalized numbers:
+ * when we explode an external denorm, we normalize it, introducing low-order
+ * zero bits, so that the rest of the code always sees normalized values.
+ *
+ * Note that a number of our algorithms use the `spare' bits at the top.
+ * The most demanding algorithm---the one for sqrt---depends on two such
+ * bits, so that it can represent values up to (but not including) 8.0,
+ * and then it needs a carry on top of that, so that we need three `spares'.
+ *
+ * The sticky-word is 32 bits so that we can use `OR' operators to goosh
+ * whole words from the mantissa into it.
+ *
+ * All operations are done in this internal extended precision. According
+ * to Hennesey & Patterson, Appendix A, rounding can be repeated---that is,
+ * it is OK to do a+b in extended precision and then round the result to
+ * single precision---provided single, double, and extended precisions are
+ * `far enough apart' (they always are), but we will try to avoid any such
+ * extra work where possible.
+ */
+
+#ifndef _SPARC64_FPU_FPU_EMU_H_
+#define _SPARC64_FPU_FPU_EMU_H_
+
+#include "fpu_reg.h"
+
+struct fpn {
+ int fp_class; /* see below */
+ int fp_sign; /* 0 => positive, 1 => negative */
+ int fp_exp; /* exponent (unbiased) */
+ int fp_sticky; /* nonzero bits lost at right end */
+ u_int fp_mant[4]; /* 115-bit mantissa */
+};
+
+#define FP_NMANT 115 /* total bits in mantissa (incl g,r) */
+#define FP_NG 2 /* number of low-order guard bits */
+#define FP_LG ((FP_NMANT - 1) & 31) /* log2(1.0) for fp_mant[0] */
+#define FP_LG2 ((FP_NMANT - 1) & 63) /* log2(1.0) for fp_mant[0] and fp_mant[1] */
+#define FP_QUIETBIT (1 << (FP_LG - 1)) /* Quiet bit in NaNs (0.5) */
+#define FP_1 (1 << FP_LG) /* 1.0 in fp_mant[0] */
+#define FP_2 (1 << (FP_LG + 1)) /* 2.0 in fp_mant[0] */
+
+/*
+ * Number classes. Since zero, Inf, and NaN cannot be represented using
+ * the above layout, we distinguish these from other numbers via a class.
+ * In addition, to make computation easier and to follow Appendix N of
+ * the SPARC Version 8 standard, we give each kind of NaN a separate class.
+ */
+#define FPC_SNAN -2 /* signalling NaN (sign irrelevant) */
+#define FPC_QNAN -1 /* quiet NaN (sign irrelevant) */
+#define FPC_ZERO 0 /* zero (sign matters) */
+#define FPC_NUM 1 /* number (sign matters) */
+#define FPC_INF 2 /* infinity (sign matters) */
+
+#define ISNAN(fp) ((fp)->fp_class < 0)
+#define ISZERO(fp) ((fp)->fp_class == 0)
+#define ISINF(fp) ((fp)->fp_class == FPC_INF)
+
+/*
+ * ORDER(x,y) `sorts' a pair of `fpn *'s so that the right operand (y) points
+ * to the `more significant' operand for our purposes. Appendix N says that
+ * the result of a computation involving two numbers are:
+ *
+ * If both are SNaN: operand 2, converted to Quiet
+ * If only one is SNaN: the SNaN operand, converted to Quiet
+ * If both are QNaN: operand 2
+ * If only one is QNaN: the QNaN operand
+ *
+ * In addition, in operations with an Inf operand, the result is usually
+ * Inf. The class numbers are carefully arranged so that if
+ * (unsigned)class(op1) > (unsigned)class(op2)
+ * then op1 is the one we want; otherwise op2 is the one we want.
+ */
+#define ORDER(x, y) { \
+ if ((u_int)(x)->fp_class > (u_int)(y)->fp_class) \
+ SWAP(x, y); \
+}
+#define SWAP(x, y) { \
+ register struct fpn *swap; \
+ swap = (x), (x) = (y), (y) = swap; \
+}
+
+/*
+ * Emulator state.
+ */
+struct fpemu {
+ u_long fe_fsr; /* fsr copy (modified during op) */
+ int fe_cx; /* exceptions */
+ struct fpn fe_f1; /* operand 1 */
+ struct fpn fe_f2; /* operand 2, if required */
+ struct fpn fe_f3; /* available storage for result */
+};
+
+/*
+ * Arithmetic functions.
+ * Each of these may modify its inputs (f1,f2) and/or the temporary.
+ * Each returns a pointer to the result and/or sets exceptions.
+ */
+#define __fpu_sub(fe) ((fe)->fe_f2.fp_sign ^= 1, __fpu_add(fe))
+
+#ifdef FPU_DEBUG
+#define FPE_INSN 0x1
+#define FPE_REG 0x2
+extern int __fpe_debug;
+void __fpu_dumpfpn(struct fpn *);
+#define DPRINTF(x, y) if (__fpe_debug & (x)) printf y
+#define DUMPFPN(x, f) if (__fpe_debug & (x)) __fpu_dumpfpn((f))
+#else
+#define DPRINTF(x, y)
+#define DUMPFPN(x, f)
+#endif
+
+#define FSR_GET_RD(fsr) (((fsr) >> FSR_RD_SHIFT) & FSR_RD_MASK)
+#endif /* !_SPARC64_FPU_FPU_EXTERN_H_ */
diff --git a/lib/libc/arch/sparc64/fpu/fpu_explode.c b/lib/libc/arch/sparc64/fpu/fpu_explode.c
new file mode 100644
index 00000000000..176d1dd3aa2
--- /dev/null
+++ b/lib/libc/arch/sparc64/fpu/fpu_explode.c
@@ -0,0 +1,310 @@
+/* $OpenBSD: fpu_explode.c,v 1.1 2003/07/21 18:41:30 jason Exp $ */
+
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Lawrence Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fpu_explode.c 8.1 (Berkeley) 6/11/93
+ * $NetBSD: fpu_explode.c,v 1.5 2000/08/03 18:32:08 eeh Exp $
+ */
+
+#include <sys/cdefs.h>
+#if 0
+__FBSDID("$FreeBSD: src/lib/libc/sparc64/fpu/fpu_explode.c,v 1.5 2002/05/11 21:20:04 jake Exp $");
+#endif
+
+/*
+ * FPU subroutines: `explode' the machine's `packed binary' format numbers
+ * into our internal format.
+ */
+
+#include <sys/param.h>
+
+#include <machine/frame.h>
+#include <machine/fsr.h>
+#include <machine/ieee.h>
+#include <machine/instr.h>
+
+#include "fpu_arith.h"
+#include "fpu_emu.h"
+#include "fpu_extern.h"
+#include "fpu_reg.h"
+
+/*
+ * N.B.: in all of the following, we assume the FP format is
+ *
+ * ---------------------------
+ * | s | exponent | fraction |
+ * ---------------------------
+ *
+ * (which represents -1**s * 1.fraction * 2**exponent), so that the
+ * sign bit is way at the top (bit 31), the exponent is next, and
+ * then the remaining bits mark the fraction. A zero exponent means
+ * zero or denormalized (0.fraction rather than 1.fraction), and the
+ * maximum possible exponent, 2bias+1, signals inf (fraction==0) or NaN.
+ *
+ * Since the sign bit is always the topmost bit---this holds even for
+ * integers---we set that outside all the *tof functions. Each function
+ * returns the class code for the new number (but note that we use
+ * FPC_QNAN for all NaNs; fpu_explode will fix this if appropriate).
+ */
+
+/*
+ * int -> fpn.
+ */
+int
+__fpu_itof(fp, i)
+ struct fpn *fp;
+ u_int i;
+{
+
+ if (i == 0)
+ return (FPC_ZERO);
+ /*
+ * The value FP_1 represents 2^FP_LG, so set the exponent
+ * there and let normalization fix it up. Convert negative
+ * numbers to sign-and-magnitude. Note that this relies on
+ * fpu_norm()'s handling of `supernormals'; see fpu_subr.c.
+ */
+ fp->fp_exp = FP_LG;
+ fp->fp_mant[0] = (int)i < 0 ? -i : i;
+ fp->fp_mant[1] = 0;
+ fp->fp_mant[2] = 0;
+ fp->fp_mant[3] = 0;
+ __fpu_norm(fp);
+ return (FPC_NUM);
+}
+
+/*
+ * 64-bit int -> fpn.
+ */
+int
+__fpu_xtof(fp, i)
+ struct fpn *fp;
+ u_int64_t i;
+{
+
+ if (i == 0)
+ return (FPC_ZERO);
+ /*
+ * The value FP_1 represents 2^FP_LG, so set the exponent
+ * there and let normalization fix it up. Convert negative
+ * numbers to sign-and-magnitude. Note that this relies on
+ * fpu_norm()'s handling of `supernormals'; see fpu_subr.c.
+ */
+ fp->fp_exp = FP_LG2;
+ *((int64_t*)fp->fp_mant) = (int64_t)i < 0 ? -i : i;
+ fp->fp_mant[2] = 0;
+ fp->fp_mant[3] = 0;
+ __fpu_norm(fp);
+ return (FPC_NUM);
+}
+
+#define mask(nbits) ((1L << (nbits)) - 1)
+
+/*
+ * All external floating formats convert to internal in the same manner,
+ * as defined here. Note that only normals get an implied 1.0 inserted.
+ */
+#define FP_TOF(exp, expbias, allfrac, f0, f1, f2, f3) \
+ if (exp == 0) { \
+ if (allfrac == 0) \
+ return (FPC_ZERO); \
+ fp->fp_exp = 1 - expbias; \
+ fp->fp_mant[0] = f0; \
+ fp->fp_mant[1] = f1; \
+ fp->fp_mant[2] = f2; \
+ fp->fp_mant[3] = f3; \
+ __fpu_norm(fp); \
+ return (FPC_NUM); \
+ } \
+ if (exp == (2 * expbias + 1)) { \
+ if (allfrac == 0) \
+ return (FPC_INF); \
+ fp->fp_mant[0] = f0; \
+ fp->fp_mant[1] = f1; \
+ fp->fp_mant[2] = f2; \
+ fp->fp_mant[3] = f3; \
+ return (FPC_QNAN); \
+ } \
+ fp->fp_exp = exp - expbias; \
+ fp->fp_mant[0] = FP_1 | f0; \
+ fp->fp_mant[1] = f1; \
+ fp->fp_mant[2] = f2; \
+ fp->fp_mant[3] = f3; \
+ return (FPC_NUM)
+
+/*
+ * 32-bit single precision -> fpn.
+ * We assume a single occupies at most (64-FP_LG) bits in the internal
+ * format: i.e., needs at most fp_mant[0] and fp_mant[1].
+ */
+int
+__fpu_stof(fp, i)
+ struct fpn *fp;
+ u_int i;
+{
+ int exp;
+ u_int frac, f0, f1;
+#define SNG_SHIFT (SNG_FRACBITS - FP_LG)
+
+ exp = (i >> (32 - 1 - SNG_EXPBITS)) & mask(SNG_EXPBITS);
+ frac = i & mask(SNG_FRACBITS);
+ f0 = frac >> SNG_SHIFT;
+ f1 = frac << (32 - SNG_SHIFT);
+ FP_TOF(exp, SNG_EXP_BIAS, frac, f0, f1, 0, 0);
+}
+
+/*
+ * 64-bit double -> fpn.
+ * We assume this uses at most (96-FP_LG) bits.
+ */
+int
+__fpu_dtof(fp, i, j)
+ struct fpn *fp;
+ u_int i, j;
+{
+ int exp;
+ u_int frac, f0, f1, f2;
+#define DBL_SHIFT (DBL_FRACBITS - 32 - FP_LG)
+
+ exp = (i >> (32 - 1 - DBL_EXPBITS)) & mask(DBL_EXPBITS);
+ frac = i & mask(DBL_FRACBITS - 32);
+ f0 = frac >> DBL_SHIFT;
+ f1 = (frac << (32 - DBL_SHIFT)) | (j >> DBL_SHIFT);
+ f2 = j << (32 - DBL_SHIFT);
+ frac |= j;
+ FP_TOF(exp, DBL_EXP_BIAS, frac, f0, f1, f2, 0);
+}
+
+/*
+ * 128-bit extended -> fpn.
+ */
+int
+__fpu_qtof(fp, i, j, k, l)
+ struct fpn *fp;
+ u_int i, j, k, l;
+{
+ int exp;
+ u_int frac, f0, f1, f2, f3;
+#define EXT_SHIFT (-(EXT_FRACBITS - 3 * 32 - FP_LG)) /* left shift! */
+
+ /*
+ * Note that ext and fpn `line up', hence no shifting needed.
+ */
+ exp = (i >> (32 - 1 - EXT_EXPBITS)) & mask(EXT_EXPBITS);
+ frac = i & mask(EXT_FRACBITS - 3 * 32);
+ f0 = (frac << EXT_SHIFT) | (j >> (32 - EXT_SHIFT));
+ f1 = (j << EXT_SHIFT) | (k >> (32 - EXT_SHIFT));
+ f2 = (k << EXT_SHIFT) | (l >> (32 - EXT_SHIFT));
+ f3 = l << EXT_SHIFT;
+ frac |= j | k | l;
+ FP_TOF(exp, EXT_EXP_BIAS, frac, f0, f1, f2, f3);
+}
+
+/*
+ * Explode the contents of a / regpair / regquad.
+ * If the input is a signalling NaN, an NV (invalid) exception
+ * will be set. (Note that nothing but NV can occur until ALU
+ * operations are performed.)
+ */
+void
+__fpu_explode(fe, fp, type, reg)
+ struct fpemu *fe;
+ struct fpn *fp;
+ int type, reg;
+{
+ u_int32_t s = 0/* XXX gcc */, *sp;
+ u_int64_t l[2];
+
+ if (type == FTYPE_LNG || type == FTYPE_DBL || type == FTYPE_EXT) {
+ l[0] = __fpu_getreg64(reg & ~1);
+ sp = (u_int32_t *)l;
+ fp->fp_sign = sp[0] >> 31;
+ fp->fp_sticky = 0;
+ switch (type) {
+ case FTYPE_LNG:
+ s = __fpu_xtof(fp, l[0]);
+ break;
+ case FTYPE_DBL:
+ s = __fpu_dtof(fp, sp[0], sp[1]);
+ break;
+ case FTYPE_EXT:
+ l[1] = __fpu_getreg64((reg & ~1) + 2);
+ s = __fpu_qtof(fp, sp[0], sp[1], sp[2], sp[3]);
+ break;
+ default:
+#ifdef DIAGNOSTIC
+ __utrap_panic("fpu_explode");
+#endif
+ }
+ } else {
+#ifdef DIAGNOSTIC
+ if (type != FTYPE_SNG)
+ __utrap_panic("fpu_explode");
+#endif
+ s = __fpu_getreg32(reg);
+ fp->fp_sign = s >> 31;
+ fp->fp_sticky = 0;
+ s = __fpu_stof(fp, s);
+ }
+
+ if (s == FPC_QNAN && (fp->fp_mant[0] & FP_QUIETBIT) == 0) {
+ /*
+ * Input is a signalling NaN. All operations that return
+ * an input NaN operand put it through a ``NaN conversion'',
+ * which basically just means ``turn on the quiet bit''.
+ * We do this here so that all NaNs internally look quiet
+ * (we can tell signalling ones by their class).
+ */
+ fp->fp_mant[0] |= FP_QUIETBIT;
+ fe->fe_cx = FSR_NV; /* assert invalid operand */
+ s = FPC_SNAN;
+ }
+ fp->fp_class = s;
+ DPRINTF(FPE_REG, ("fpu_explode: %%%c%d => ", (type == FTYPE_LNG) ? 'x' :
+ ((type == FTYPE_INT) ? 'i' :
+ ((type == FTYPE_SNG) ? 's' :
+ ((type == FTYPE_DBL) ? 'd' :
+ ((type == FTYPE_EXT) ? 'q' : '?')))),
+ reg));
+ DUMPFPN(FPE_REG, fp);
+ DPRINTF(FPE_REG, ("\n"));
+}
diff --git a/lib/libc/arch/sparc64/fpu/fpu_extern.h b/lib/libc/arch/sparc64/fpu/fpu_extern.h
new file mode 100644
index 00000000000..5c687b4eb22
--- /dev/null
+++ b/lib/libc/arch/sparc64/fpu/fpu_extern.h
@@ -0,0 +1,95 @@
+/* $OpenBSD: fpu_extern.h,v 1.1 2003/07/21 18:41:30 jason Exp $ */
+
+/*-
+ * Copyright (c) 1995 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Christos Zoulas.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $NetBSD: fpu_extern.h,v 1.4 2000/08/03 18:32:08 eeh Exp $
+ * $FreeBSD: src/lib/libc/sparc64/fpu/fpu_extern.h,v 1.6 2002/04/27 21:56:28 jake Exp $
+ */
+
+#ifndef _SPARC64_FPU_FPU_EXTERN_H_
+#define _SPARC64_FPU_FPU_EXTERN_H_
+
+struct utrapframe;
+union instr;
+struct fpemu;
+struct fpn;
+
+/* fpu.c */
+int __fpu_exception(struct utrapframe *tf);
+
+/* fpu_add.c */
+struct fpn *__fpu_add(struct fpemu *);
+
+/* fpu_compare.c */
+void __fpu_compare(struct fpemu *, int, int);
+
+/* fpu_div.c */
+struct fpn *__fpu_div(struct fpemu *);
+
+/* fpu_explode.c */
+int __fpu_itof(struct fpn *, u_int);
+int __fpu_xtof(struct fpn *, u_int64_t);
+int __fpu_stof(struct fpn *, u_int);
+int __fpu_dtof(struct fpn *, u_int, u_int );
+int __fpu_qtof(struct fpn *, u_int, u_int , u_int , u_int );
+void __fpu_explode(struct fpemu *, struct fpn *, int, int );
+
+/* fpu_implode.c */
+u_int __fpu_ftoi(struct fpemu *, struct fpn *);
+u_int __fpu_ftox(struct fpemu *, struct fpn *, u_int *);
+u_int __fpu_ftos(struct fpemu *, struct fpn *);
+u_int __fpu_ftod(struct fpemu *, struct fpn *, u_int *);
+u_int __fpu_ftoq(struct fpemu *, struct fpn *, u_int *);
+void __fpu_implode(struct fpemu *, struct fpn *, int, u_int *);
+
+/* fpu_mul.c */
+struct fpn *__fpu_mul(struct fpemu *);
+
+/* fpu_sqrt.c */
+struct fpn *__fpu_sqrt(struct fpemu *);
+
+/* fpu_subr.c */
+/*
+ * Shift a number right some number of bits, taking care of round/sticky.
+ * Note that the result is probably not a well-formed number (it will lack
+ * the normal 1-bit mant[0]&FP_1).
+ */
+int __fpu_shr(register struct fpn *, register int);
+void __fpu_norm(register struct fpn *);
+/* Build a new Quiet NaN (sign=0, frac=all 1's). */
+struct fpn *__fpu_newnan(register struct fpemu *);
+
+#endif /* !_SPARC64_FPU_FPU_EXTERN_H_ */
diff --git a/lib/libc/arch/sparc64/fpu/fpu_implode.c b/lib/libc/arch/sparc64/fpu/fpu_implode.c
new file mode 100644
index 00000000000..e835e9aa07e
--- /dev/null
+++ b/lib/libc/arch/sparc64/fpu/fpu_implode.c
@@ -0,0 +1,543 @@
+/* $OpenBSD: fpu_implode.c,v 1.1 2003/07/21 18:41:30 jason Exp $ */
+
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Lawrence Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fpu_implode.c 8.1 (Berkeley) 6/11/93
+ * $NetBSD: fpu_implode.c,v 1.8 2001/08/26 05:44:46 eeh Exp $
+ */
+
+#include <sys/cdefs.h>
+#if 0
+__FBSDID("$FreeBSD: src/lib/libc/sparc64/fpu/fpu_implode.c,v 1.5 2002/04/27 21:56:28 jake Exp $");
+#endif
+
+/*
+ * FPU subroutines: `implode' internal format numbers into the machine's
+ * `packed binary' format.
+ */
+
+#include <sys/param.h>
+
+#include <machine/frame.h>
+#include <machine/fsr.h>
+#include <machine/ieee.h>
+#include <machine/instr.h>
+
+#include "fpu_arith.h"
+#include "fpu_emu.h"
+#include "fpu_extern.h"
+
+static int round(struct fpemu *, struct fpn *);
+static int toinf(struct fpemu *, int);
+
+#define FSR_GET_RD(fsr) (((fsr) >> FSR_RD_SHIFT) & FSR_RD_MASK)
+
+/*
+ * Round a number (algorithm from Motorola MC68882 manual, modified for
+ * our internal format). Set inexact exception if rounding is required.
+ * Return true iff we rounded up.
+ *
+ * After rounding, we discard the guard and round bits by shifting right
+ * 2 bits (a la fpu_shr(), but we do not bother with fp->fp_sticky).
+ * This saves effort later.
+ *
+ * Note that we may leave the value 2.0 in fp->fp_mant; it is the caller's
+ * responsibility to fix this if necessary.
+ */
+static int
+round(struct fpemu *fe, struct fpn *fp)
+{
+ u_int m0, m1, m2, m3;
+ int gr, s;
+
+ m0 = fp->fp_mant[0];
+ m1 = fp->fp_mant[1];
+ m2 = fp->fp_mant[2];
+ m3 = fp->fp_mant[3];
+ gr = m3 & 3;
+ s = fp->fp_sticky;
+
+ /* mant >>= FP_NG */
+ m3 = (m3 >> FP_NG) | (m2 << (32 - FP_NG));
+ m2 = (m2 >> FP_NG) | (m1 << (32 - FP_NG));
+ m1 = (m1 >> FP_NG) | (m0 << (32 - FP_NG));
+ m0 >>= FP_NG;
+
+ if ((gr | s) == 0) /* result is exact: no rounding needed */
+ goto rounddown;
+
+ fe->fe_cx |= FSR_NX; /* inexact */
+
+ /* Go to rounddown to round down; break to round up. */
+ switch (FSR_GET_RD(fe->fe_fsr)) {
+ case FSR_RD_RN:
+ default:
+ /*
+ * Round only if guard is set (gr & 2). If guard is set,
+ * but round & sticky both clear, then we want to round
+ * but have a tie, so round to even, i.e., add 1 iff odd.
+ */
+ if ((gr & 2) == 0)
+ goto rounddown;
+ if ((gr & 1) || fp->fp_sticky || (m3 & 1))
+ break;
+ goto rounddown;
+
+ case FSR_RD_RZ:
+ /* Round towards zero, i.e., down. */
+ goto rounddown;
+
+ case FSR_RD_RM:
+ /* Round towards -Inf: up if negative, down if positive. */
+ if (fp->fp_sign)
+ break;
+ goto rounddown;
+
+ case FSR_RD_RP:
+ /* Round towards +Inf: up if positive, down otherwise. */
+ if (!fp->fp_sign)
+ break;
+ goto rounddown;
+ }
+
+ /* Bump low bit of mantissa, with carry. */
+ FPU_ADDS(m3, m3, 1);
+ FPU_ADDCS(m2, m2, 0);
+ FPU_ADDCS(m1, m1, 0);
+ FPU_ADDC(m0, m0, 0);
+ fp->fp_mant[0] = m0;
+ fp->fp_mant[1] = m1;
+ fp->fp_mant[2] = m2;
+ fp->fp_mant[3] = m3;
+ return (1);
+
+rounddown:
+ fp->fp_mant[0] = m0;
+ fp->fp_mant[1] = m1;
+ fp->fp_mant[2] = m2;
+ fp->fp_mant[3] = m3;
+ return (0);
+}
+
+/*
+ * For overflow: return true if overflow is to go to +/-Inf, according
+ * to the sign of the overflowing result. If false, overflow is to go
+ * to the largest magnitude value instead.
+ */
+static int
+toinf(struct fpemu *fe, int sign)
+{
+ int inf;
+
+ /* look at rounding direction */
+ switch (FSR_GET_RD(fe->fe_fsr)) {
+ default:
+ case FSR_RD_RN: /* the nearest value is always Inf */
+ inf = 1;
+ break;
+
+ case FSR_RD_RZ: /* toward 0 => never towards Inf */
+ inf = 0;
+ break;
+
+ case FSR_RD_RP: /* toward +Inf iff positive */
+ inf = sign == 0;
+ break;
+
+ case FSR_RD_RM: /* toward -Inf iff negative */
+ inf = sign;
+ break;
+ }
+ return (inf);
+}
+
+/*
+ * fpn -> int (int value returned as return value).
+ *
+ * N.B.: this conversion always rounds towards zero (this is a peculiarity
+ * of the SPARC instruction set).
+ */
+u_int
+__fpu_ftoi(fe, fp)
+ struct fpemu *fe;
+ struct fpn *fp;
+{
+ u_int i;
+ int sign, exp;
+
+ sign = fp->fp_sign;
+ switch (fp->fp_class) {
+
+ case FPC_ZERO:
+ return (0);
+
+ case FPC_NUM:
+ /*
+ * If exp >= 2^32, overflow. Otherwise shift value right
+ * into last mantissa word (this will not exceed 0xffffffff),
+ * shifting any guard and round bits out into the sticky
+ * bit. Then ``round'' towards zero, i.e., just set an
+ * inexact exception if sticky is set (see round()).
+ * If the result is > 0x80000000, or is positive and equals
+ * 0x80000000, overflow; otherwise the last fraction word
+ * is the result.
+ */
+ if ((exp = fp->fp_exp) >= 32)
+ break;
+ /* NB: the following includes exp < 0 cases */
+ if (__fpu_shr(fp, FP_NMANT - 1 - exp) != 0)
+ fe->fe_cx |= FSR_NX;
+ i = fp->fp_mant[3];
+ if (i >= ((u_int)0x80000000 + sign))
+ break;
+ return (sign ? -i : i);
+
+ default: /* Inf, qNaN, sNaN */
+ break;
+ }
+ /* overflow: replace any inexact exception with invalid */
+ fe->fe_cx = (fe->fe_cx & ~FSR_NX) | FSR_NV;
+ return (0x7fffffff + sign);
+}
+
+/*
+ * fpn -> extended int (high bits of int value returned as return value).
+ *
+ * N.B.: this conversion always rounds towards zero (this is a peculiarity
+ * of the SPARC instruction set).
+ */
+u_int
+__fpu_ftox(fe, fp, res)
+ struct fpemu *fe;
+ struct fpn *fp;
+ u_int *res;
+{
+ u_int64_t i;
+ int sign, exp;
+
+ sign = fp->fp_sign;
+ switch (fp->fp_class) {
+
+ case FPC_ZERO:
+ res[1] = 0;
+ return (0);
+
+ case FPC_NUM:
+ /*
+ * If exp >= 2^64, overflow. Otherwise shift value right
+ * into last mantissa word (this will not exceed 0xffffffffffffffff),
+ * shifting any guard and round bits out into the sticky
+ * bit. Then ``round'' towards zero, i.e., just set an
+ * inexact exception if sticky is set (see round()).
+ * If the result is > 0x8000000000000000, or is positive and equals
+ * 0x8000000000000000, overflow; otherwise the last fraction word
+ * is the result.
+ */
+ if ((exp = fp->fp_exp) >= 64)
+ break;
+ /* NB: the following includes exp < 0 cases */
+ if (__fpu_shr(fp, FP_NMANT - 1 - exp) != 0)
+ fe->fe_cx |= FSR_NX;
+ i = ((u_int64_t)fp->fp_mant[2]<<32)|fp->fp_mant[3];
+ if (i >= ((u_int64_t)0x8000000000000000LL + sign))
+ break;
+ if (sign)
+ i = -1;
+ res[1] = (int)i;
+ return (i >> 32);
+
+ default: /* Inf, qNaN, sNaN */
+ break;
+ }
+ /* overflow: replace any inexact exception with invalid */
+ fe->fe_cx = (fe->fe_cx & ~FSR_NX) | FSR_NV;
+ return (0x7fffffffffffffffLL + sign);
+}
+
+/*
+ * fpn -> single (32 bit single returned as return value).
+ * We assume <= 29 bits in a single-precision fraction (1.f part).
+ */
+u_int
+__fpu_ftos(fe, fp)
+ struct fpemu *fe;
+ struct fpn *fp;
+{
+ u_int sign = fp->fp_sign << 31;
+ int exp;
+
+#define SNG_EXP(e) ((e) << SNG_FRACBITS) /* makes e an exponent */
+#define SNG_MASK (SNG_EXP(1) - 1) /* mask for fraction */
+
+ /* Take care of non-numbers first. */
+ if (ISNAN(fp)) {
+ /*
+ * Preserve upper bits of NaN, per SPARC V8 appendix N.
+ * Note that fp->fp_mant[0] has the quiet bit set,
+ * even if it is classified as a signalling NaN.
+ */
+ (void) __fpu_shr(fp, FP_NMANT - 1 - SNG_FRACBITS);
+ exp = SNG_EXP_INFNAN;
+ goto done;
+ }
+ if (ISINF(fp))
+ return (sign | SNG_EXP(SNG_EXP_INFNAN));
+ if (ISZERO(fp))
+ return (sign);
+
+ /*
+ * Normals (including subnormals). Drop all the fraction bits
+ * (including the explicit ``implied'' 1 bit) down into the
+ * single-precision range. If the number is subnormal, move
+ * the ``implied'' 1 into the explicit range as well, and shift
+ * right to introduce leading zeroes. Rounding then acts
+ * differently for normals and subnormals: the largest subnormal
+ * may round to the smallest normal (1.0 x 2^minexp), or may
+ * remain subnormal. In the latter case, signal an underflow
+ * if the result was inexact or if underflow traps are enabled.
+ *
+ * Rounding a normal, on the other hand, always produces another
+ * normal (although either way the result might be too big for
+ * single precision, and cause an overflow). If rounding a
+ * normal produces 2.0 in the fraction, we need not adjust that
+ * fraction at all, since both 1.0 and 2.0 are zero under the
+ * fraction mask.
+ *
+ * Note that the guard and round bits vanish from the number after
+ * rounding.
+ */
+ if ((exp = fp->fp_exp + SNG_EXP_BIAS) <= 0) { /* subnormal */
+ /* -NG for g,r; -SNG_FRACBITS-exp for fraction */
+ (void) __fpu_shr(fp, FP_NMANT - FP_NG - SNG_FRACBITS - exp);
+ if (round(fe, fp) && fp->fp_mant[3] == SNG_EXP(1))
+ return (sign | SNG_EXP(1) | 0);
+ if ((fe->fe_cx & FSR_NX) ||
+ (fe->fe_fsr & (FSR_UF << FSR_TEM_SHIFT)))
+ fe->fe_cx |= FSR_UF;
+ return (sign | SNG_EXP(0) | fp->fp_mant[3]);
+ }
+ /* -FP_NG for g,r; -1 for implied 1; -SNG_FRACBITS for fraction */
+ (void) __fpu_shr(fp, FP_NMANT - FP_NG - 1 - SNG_FRACBITS);
+#ifdef DIAGNOSTIC
+ if ((fp->fp_mant[3] & SNG_EXP(1 << FP_NG)) == 0)
+ __utrap_panic("fpu_ftos");
+#endif
+ if (round(fe, fp) && fp->fp_mant[3] == SNG_EXP(2))
+ exp++;
+ if (exp >= SNG_EXP_INFNAN) {
+ /* overflow to inf or to max single */
+ fe->fe_cx |= FSR_OF | FSR_NX;
+ if (toinf(fe, sign))
+ return (sign | SNG_EXP(SNG_EXP_INFNAN));
+ return (sign | SNG_EXP(SNG_EXP_INFNAN - 1) | SNG_MASK);
+ }
+done:
+ /* phew, made it */
+ return (sign | SNG_EXP(exp) | (fp->fp_mant[3] & SNG_MASK));
+}
+
+/*
+ * fpn -> double (32 bit high-order result returned; 32-bit low order result
+ * left in res[1]). Assumes <= 61 bits in double precision fraction.
+ *
+ * This code mimics fpu_ftos; see it for comments.
+ */
+u_int
+__fpu_ftod(fe, fp, res)
+ struct fpemu *fe;
+ struct fpn *fp;
+ u_int *res;
+{
+ u_int sign = fp->fp_sign << 31;
+ int exp;
+
+#define DBL_EXP(e) ((e) << (DBL_FRACBITS & 31))
+#define DBL_MASK (DBL_EXP(1) - 1)
+
+ if (ISNAN(fp)) {
+ (void) __fpu_shr(fp, FP_NMANT - 1 - DBL_FRACBITS);
+ exp = DBL_EXP_INFNAN;
+ goto done;
+ }
+ if (ISINF(fp)) {
+ sign |= DBL_EXP(DBL_EXP_INFNAN);
+ goto zero;
+ }
+ if (ISZERO(fp)) {
+zero: res[1] = 0;
+ return (sign);
+ }
+
+ if ((exp = fp->fp_exp + DBL_EXP_BIAS) <= 0) {
+ (void) __fpu_shr(fp, FP_NMANT - FP_NG - DBL_FRACBITS - exp);
+ if (round(fe, fp) && fp->fp_mant[2] == DBL_EXP(1)) {
+ res[1] = 0;
+ return (sign | DBL_EXP(1) | 0);
+ }
+ if ((fe->fe_cx & FSR_NX) ||
+ (fe->fe_fsr & (FSR_UF << FSR_TEM_SHIFT)))
+ fe->fe_cx |= FSR_UF;
+ exp = 0;
+ goto done;
+ }
+ (void) __fpu_shr(fp, FP_NMANT - FP_NG - 1 - DBL_FRACBITS);
+ if (round(fe, fp) && fp->fp_mant[2] == DBL_EXP(2))
+ exp++;
+ if (exp >= DBL_EXP_INFNAN) {
+ fe->fe_cx |= FSR_OF | FSR_NX;
+ if (toinf(fe, sign)) {
+ res[1] = 0;
+ return (sign | DBL_EXP(DBL_EXP_INFNAN) | 0);
+ }
+ res[1] = ~0;
+ return (sign | DBL_EXP(DBL_EXP_INFNAN) | DBL_MASK);
+ }
+done:
+ res[1] = fp->fp_mant[3];
+ return (sign | DBL_EXP(exp) | (fp->fp_mant[2] & DBL_MASK));
+}
+
+/*
+ * fpn -> extended (32 bit high-order result returned; low-order fraction
+ * words left in res[1]..res[3]). Like ftod, which is like ftos ... but
+ * our internal format *is* extended precision, plus 2 bits for guard/round,
+ * so we can avoid a small bit of work.
+ */
+u_int
+__fpu_ftoq(fe, fp, res)
+ struct fpemu *fe;
+ struct fpn *fp;
+ u_int *res;
+{
+ u_int sign = fp->fp_sign << 31;
+ int exp;
+
+#define EXT_EXP(e) ((e) << (EXT_FRACBITS & 31))
+#define EXT_MASK (EXT_EXP(1) - 1)
+
+ if (ISNAN(fp)) {
+ (void) __fpu_shr(fp, 2); /* since we are not rounding */
+ exp = EXT_EXP_INFNAN;
+ goto done;
+ }
+ if (ISINF(fp)) {
+ sign |= EXT_EXP(EXT_EXP_INFNAN);
+ goto zero;
+ }
+ if (ISZERO(fp)) {
+zero: res[1] = res[2] = res[3] = 0;
+ return (sign);
+ }
+
+ if ((exp = fp->fp_exp + EXT_EXP_BIAS) <= 0) {
+ (void) __fpu_shr(fp, FP_NMANT - FP_NG - EXT_FRACBITS - exp);
+ if (round(fe, fp) && fp->fp_mant[0] == EXT_EXP(1)) {
+ res[1] = res[2] = res[3] = 0;
+ return (sign | EXT_EXP(1) | 0);
+ }
+ if ((fe->fe_cx & FSR_NX) ||
+ (fe->fe_fsr & (FSR_UF << FSR_TEM_SHIFT)))
+ fe->fe_cx |= FSR_UF;
+ exp = 0;
+ goto done;
+ }
+ /* Since internal == extended, no need to shift here. */
+ if (round(fe, fp) && fp->fp_mant[0] == EXT_EXP(2))
+ exp++;
+ if (exp >= EXT_EXP_INFNAN) {
+ fe->fe_cx |= FSR_OF | FSR_NX;
+ if (toinf(fe, sign)) {
+ res[1] = res[2] = res[3] = 0;
+ return (sign | EXT_EXP(EXT_EXP_INFNAN) | 0);
+ }
+ res[1] = res[2] = res[3] = ~0;
+ return (sign | EXT_EXP(EXT_EXP_INFNAN) | EXT_MASK);
+ }
+done:
+ res[1] = fp->fp_mant[1];
+ res[2] = fp->fp_mant[2];
+ res[3] = fp->fp_mant[3];
+ return (sign | EXT_EXP(exp) | (fp->fp_mant[0] & EXT_MASK));
+}
+
+/*
+ * Implode an fpn, writing the result into the given space.
+ */
+void
+__fpu_implode(fe, fp, type, space)
+ struct fpemu *fe;
+ struct fpn *fp;
+ int type;
+ u_int *space;
+{
+
+ switch (type) {
+
+ case FTYPE_LNG:
+ space[0] = __fpu_ftox(fe, fp, space);
+ break;
+
+ case FTYPE_INT:
+ space[0] = __fpu_ftoi(fe, fp);
+ break;
+
+ case FTYPE_SNG:
+ space[0] = __fpu_ftos(fe, fp);
+ break;
+
+ case FTYPE_DBL:
+ space[0] = __fpu_ftod(fe, fp, space);
+ break;
+
+ case FTYPE_EXT:
+ /* funky rounding precision options ?? */
+ space[0] = __fpu_ftoq(fe, fp, space);
+ break;
+
+#ifdef DIAGNOSTIC
+ default:
+ __utrap_panic("fpu_implode");
+#endif
+ }
+ DPRINTF(FPE_REG, ("fpu_implode: %x %x %x %x\n",
+ space[0], space[1], space[2], space[3]));
+}
diff --git a/lib/libc/arch/sparc64/fpu/fpu_mul.c b/lib/libc/arch/sparc64/fpu/fpu_mul.c
new file mode 100644
index 00000000000..5115c00a288
--- /dev/null
+++ b/lib/libc/arch/sparc64/fpu/fpu_mul.c
@@ -0,0 +1,233 @@
+/* $OpenBSD: fpu_mul.c,v 1.1 2003/07/21 18:41:30 jason Exp $ */
+
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Lawrence Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fpu_mul.c 8.1 (Berkeley) 6/11/93
+ * $NetBSD: fpu_mul.c,v 1.2 1994/11/20 20:52:44 deraadt Exp $
+ */
+
+#include <sys/cdefs.h>
+#if 0
+__FBSDID("$FreeBSD: src/lib/libc/sparc64/fpu/fpu_mul.c,v 1.3 2002/03/22 21:52:58 obrien Exp $");
+#endif
+
+/*
+ * Perform an FPU multiply (return x * y).
+ */
+
+#include <sys/types.h>
+
+#include <machine/frame.h>
+
+#include "fpu_arith.h"
+#include "fpu_emu.h"
+#include "fpu_extern.h"
+
+/*
+ * The multiplication algorithm for normal numbers is as follows:
+ *
+ * The fraction of the product is built in the usual stepwise fashion.
+ * Each step consists of shifting the accumulator right one bit
+ * (maintaining any guard bits) and, if the next bit in y is set,
+ * adding the multiplicand (x) to the accumulator. Then, in any case,
+ * we advance one bit leftward in y. Algorithmically:
+ *
+ * A = 0;
+ * for (bit = 0; bit < FP_NMANT; bit++) {
+ * sticky |= A & 1, A >>= 1;
+ * if (Y & (1 << bit))
+ * A += X;
+ * }
+ *
+ * (X and Y here represent the mantissas of x and y respectively.)
+ * The resultant accumulator (A) is the product's mantissa. It may
+ * be as large as 11.11111... in binary and hence may need to be
+ * shifted right, but at most one bit.
+ *
+ * Since we do not have efficient multiword arithmetic, we code the
+ * accumulator as four separate words, just like any other mantissa.
+ * We use local `register' variables in the hope that this is faster
+ * than memory. We keep x->fp_mant in locals for the same reason.
+ *
+ * In the algorithm above, the bits in y are inspected one at a time.
+ * We will pick them up 32 at a time and then deal with those 32, one
+ * at a time. Note, however, that we know several things about y:
+ *
+ * - the guard and round bits at the bottom are sure to be zero;
+ *
+ * - often many low bits are zero (y is often from a single or double
+ * precision source);
+ *
+ * - bit FP_NMANT-1 is set, and FP_1*2 fits in a word.
+ *
+ * We can also test for 32-zero-bits swiftly. In this case, the center
+ * part of the loop---setting sticky, shifting A, and not adding---will
+ * run 32 times without adding X to A. We can do a 32-bit shift faster
+ * by simply moving words. Since zeros are common, we optimize this case.
+ * Furthermore, since A is initially zero, we can omit the shift as well
+ * until we reach a nonzero word.
+ */
+struct fpn *
+__fpu_mul(fe)
+ struct fpemu *fe;
+{
+ struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2;
+ u_int a3, a2, a1, a0, x3, x2, x1, x0, bit, m;
+ int sticky;
+ FPU_DECL_CARRY
+
+ /*
+ * Put the `heavier' operand on the right (see fpu_emu.h).
+ * Then we will have one of the following cases, taken in the
+ * following order:
+ *
+ * - y = NaN. Implied: if only one is a signalling NaN, y is.
+ * The result is y.
+ * - y = Inf. Implied: x != NaN (is 0, number, or Inf: the NaN
+ * case was taken care of earlier).
+ * If x = 0, the result is NaN. Otherwise the result
+ * is y, with its sign reversed if x is negative.
+ * - x = 0. Implied: y is 0 or number.
+ * The result is 0 (with XORed sign as usual).
+ * - other. Implied: both x and y are numbers.
+ * The result is x * y (XOR sign, multiply bits, add exponents).
+ */
+ ORDER(x, y);
+ if (ISNAN(y)) {
+ y->fp_sign ^= x->fp_sign;
+ return (y);
+ }
+ if (ISINF(y)) {
+ if (ISZERO(x))
+ return (__fpu_newnan(fe));
+ y->fp_sign ^= x->fp_sign;
+ return (y);
+ }
+ if (ISZERO(x)) {
+ x->fp_sign ^= y->fp_sign;
+ return (x);
+ }
+
+ /*
+ * Setup. In the code below, the mask `m' will hold the current
+ * mantissa byte from y. The variable `bit' denotes the bit
+ * within m. We also define some macros to deal with everything.
+ */
+ x3 = x->fp_mant[3];
+ x2 = x->fp_mant[2];
+ x1 = x->fp_mant[1];
+ x0 = x->fp_mant[0];
+ sticky = a3 = a2 = a1 = a0 = 0;
+
+#define ADD /* A += X */ \
+ FPU_ADDS(a3, a3, x3); \
+ FPU_ADDCS(a2, a2, x2); \
+ FPU_ADDCS(a1, a1, x1); \
+ FPU_ADDC(a0, a0, x0)
+
+#define SHR1 /* A >>= 1, with sticky */ \
+ sticky |= a3 & 1, a3 = (a3 >> 1) | (a2 << 31), \
+ a2 = (a2 >> 1) | (a1 << 31), a1 = (a1 >> 1) | (a0 << 31), a0 >>= 1
+
+#define SHR32 /* A >>= 32, with sticky */ \
+ sticky |= a3, a3 = a2, a2 = a1, a1 = a0, a0 = 0
+
+#define STEP /* each 1-bit step of the multiplication */ \
+ SHR1; if (bit & m) { ADD; }; bit <<= 1
+
+ /*
+ * We are ready to begin. The multiply loop runs once for each
+ * of the four 32-bit words. Some words, however, are special.
+ * As noted above, the low order bits of Y are often zero. Even
+ * if not, the first loop can certainly skip the guard bits.
+ * The last word of y has its highest 1-bit in position FP_NMANT-1,
+ * so we stop the loop when we move past that bit.
+ */
+ if ((m = y->fp_mant[3]) == 0) {
+ /* SHR32; */ /* unneeded since A==0 */
+ } else {
+ bit = 1 << FP_NG;
+ do {
+ STEP;
+ } while (bit != 0);
+ }
+ if ((m = y->fp_mant[2]) == 0) {
+ SHR32;
+ } else {
+ bit = 1;
+ do {
+ STEP;
+ } while (bit != 0);
+ }
+ if ((m = y->fp_mant[1]) == 0) {
+ SHR32;
+ } else {
+ bit = 1;
+ do {
+ STEP;
+ } while (bit != 0);
+ }
+ m = y->fp_mant[0]; /* definitely != 0 */
+ bit = 1;
+ do {
+ STEP;
+ } while (bit <= m);
+
+ /*
+ * Done with mantissa calculation. Get exponent and handle
+ * 11.111...1 case, then put result in place. We reuse x since
+ * it already has the right class (FP_NUM).
+ */
+ m = x->fp_exp + y->fp_exp;
+ if (a0 >= FP_2) {
+ SHR1;
+ m++;
+ }
+ x->fp_sign ^= y->fp_sign;
+ x->fp_exp = m;
+ x->fp_sticky = sticky;
+ x->fp_mant[3] = a3;
+ x->fp_mant[2] = a2;
+ x->fp_mant[1] = a1;
+ x->fp_mant[0] = a0;
+ return (x);
+}
diff --git a/lib/libc/arch/sparc64/fpu/fpu_q.c b/lib/libc/arch/sparc64/fpu/fpu_q.c
new file mode 100644
index 00000000000..fa6904c53e1
--- /dev/null
+++ b/lib/libc/arch/sparc64/fpu/fpu_q.c
@@ -0,0 +1,192 @@
+/* $OpenBSD: fpu_q.c,v 1.1 2003/07/21 18:41:30 jason Exp $ */
+
+/*
+ * Copyright (c) 2003 Jason L. Wright (jason@thought.net)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include "fpu_q.h"
+
+long double
+_Q_mul(long double a, long double b)
+{
+ long double c;
+
+ _Qp_mul(&c, &a, &b);
+ return (c);
+}
+
+long double
+_Q_div(long double a, long double b)
+{
+ long double c;
+
+ _Qp_div(&c, &a, &b);
+ return (c);
+}
+
+long double
+_Q_sub(long double a, long double b)
+{
+ long double c;
+
+ _Qp_sub(&c, &a, &b);
+ return (c);
+}
+
+long double
+_Q_add(long double a, long double b)
+{
+ long double c;
+
+ _Qp_add(&c, &a, &b);
+ return (c);
+}
+
+long double
+_Q_neg(long double a)
+{
+ long double z = 0, r;
+
+ _Qp_sub(&r, &z, &a);
+ return (r);
+}
+
+long double
+_Q_sqrt(long double a)
+{
+ long double v;
+
+ _Qp_sqrt(&v, &a);
+ return (v);
+}
+
+long double
+_Q_dtoq(double a)
+{
+ long double v;
+
+ _Qp_dtoq(&v, a);
+ return (v);
+}
+
+long double
+_Q_stoq(float a)
+{
+ long double v;
+
+ _Qp_stoq(&v, a);
+ return (v);
+}
+
+long double
+_Q_itoq(int a)
+{
+ long double v;
+
+ _Qp_itoq(&v, a);
+ return (v);
+}
+
+long double
+_Q_utoq(unsigned int a)
+{
+ long double v;
+
+ _Qp_uitoq(&v, a);
+ return (v);
+}
+
+double
+_Q_qtod(long double a)
+{
+ double v;
+
+ v = _Qp_qtod(&a);
+ return (v);
+}
+
+int
+_Q_qtoi(long double a)
+{
+ int v;
+
+ v = _Qp_qtoi(&a);
+ return (v);
+}
+
+float
+_Q_qtos(long double a)
+{
+ float v;
+
+ v = _Qp_qtos(&a);
+ return (v);
+}
+
+unsigned int
+_Q_qtou(long double a)
+{
+ unsigned int v;
+
+ v = _Qp_qtoui(&a);
+ return (v);
+}
+
+long double
+_Q_feq(long double a, long double b)
+{
+ return (_Qp_feq(&a, &b));
+}
+
+long double
+_Q_fne(long double a, long double b)
+{
+ return (_Qp_fne(&a, &b));
+}
+
+long double
+_Q_fge(long double a, long double b)
+{
+ return (_Qp_fge(&a, &b));
+}
+
+long double
+_Q_fgt(long double a, long double b)
+{
+ return (_Qp_fgt(&a, &b));
+}
+
+long double
+_Q_flt(long double a, long double b)
+{
+ return (_Qp_flt(&a, &b));
+}
+
+long double
+_Q_fle(long double a, long double b)
+{
+ return (_Qp_fle(&a, &b));
+}
diff --git a/lib/libc/arch/sparc64/fpu/fpu_q.h b/lib/libc/arch/sparc64/fpu/fpu_q.h
new file mode 100644
index 00000000000..166bbfd7ed0
--- /dev/null
+++ b/lib/libc/arch/sparc64/fpu/fpu_q.h
@@ -0,0 +1,72 @@
+/* $OpenBSD: fpu_q.h,v 1.1 2003/07/21 18:41:30 jason Exp $ */
+
+/*
+ * Copyright (c) 2003 Jason L. Wright (jason@thought.net)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+double _Q_qtod(long double);
+int _Q_qtoi(long double);
+float _Q_qtos(long double);
+unsigned int _Q_qtou(long double);
+long double _Q_neg(long double);
+long double _Q_sqrt(long double);
+long double _Q_dtoq(double);
+long double _Q_stoq(float);
+long double _Q_itoq(int);
+long double _Q_utoq(unsigned int);
+long double _Q_mul(long double, long double);
+long double _Q_div(long double, long double);
+long double _Q_sub(long double, long double);
+long double _Q_add(long double, long double);
+long double _Q_feq(long double, long double);
+long double _Q_fne(long double, long double);
+long double _Q_fge(long double, long double);
+long double _Q_fgt(long double, long double);
+long double _Q_flt(long double, long double);
+long double _Q_fle(long double, long double);
+
+void _Qp_add(long double *, long double *, long double *);
+void _Qp_div(long double *, long double *, long double *);
+void _Qp_mul(long double *, long double *, long double *);
+void _Qp_sub(long double *, long double *, void *);
+void _Qp_dtoq(long double *, double);
+void _Qp_itoq(long double *, int);
+void _Qp_stoq(long double *, float);
+void _Qp_xtoq(long double *, long);
+void _Qp_uitoq(long double *, u_int);
+void _Qp_uxtoq(long double *, u_long);
+double _Qp_qtod(long double *);
+long _Qp_qtox(long double *);
+u_long _Qp_qtoux(long double *);
+u_int _Qp_qtoui(long double *);
+int _Qp_qtoi(long double *);
+float _Qp_qtos(long double *);
+int _Qp_feq(long double *, long double *);
+int _Qp_fge(long double *, long double *);
+int _Qp_fle(long double *, long double *);
+int _Qp_fgt(long double *, long double *);
+int _Qp_flt(long double *, long double *);
+int _Qp_fne(long double *, long double *);
+void _Qp_sqrt(long double *, long double *);
diff --git a/lib/libc/arch/sparc64/fpu/fpu_qp.c b/lib/libc/arch/sparc64/fpu/fpu_qp.c
new file mode 100644
index 00000000000..a55cb51bcda
--- /dev/null
+++ b/lib/libc/arch/sparc64/fpu/fpu_qp.c
@@ -0,0 +1,188 @@
+/* $OpenBSD: fpu_qp.c,v 1.1 2003/07/21 18:41:30 jason Exp $ */
+
+/*-
+ * Copyright (c) 2002 Jake Burkholder.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#if 0
+__FBSDID("$FreeBSD: src/lib/libc/sparc64/fpu/fpu_qp.c,v 1.3 2002/09/02 02:30:20 jake Exp $");
+#endif
+
+#include <sys/types.h>
+#include <machine/fsr.h>
+
+#include "fpu_emu.h"
+#include "fpu_extern.h"
+
+#define _QP_OP(op) \
+void _Qp_ ## op(u_int *c, u_int *a, u_int *b); \
+void \
+_Qp_ ## op(u_int *c, u_int *a, u_int *b) \
+{ \
+ struct fpemu fe; \
+ struct fpn *r; \
+ __asm __volatile("stx %%fsr, [%0]" : : "r" (&fe.fe_fsr)); \
+ fe.fe_f1.fp_sign = a[0] >> 31; \
+ fe.fe_f1.fp_sticky = 0; \
+ fe.fe_f1.fp_class = __fpu_qtof(&fe.fe_f1, a[0], a[1], a[2], a[3]); \
+ fe.fe_f2.fp_sign = b[0] >> 31; \
+ fe.fe_f2.fp_sticky = 0; \
+ fe.fe_f2.fp_class = __fpu_qtof(&fe.fe_f2, b[0], b[1], b[2], b[3]); \
+ r = __fpu_ ## op(&fe); \
+ c[0] = __fpu_ftoq(&fe, r, c); \
+}
+
+#define _QP_TTOQ(qname, fname, ntype, atype, ...) \
+void _Qp_ ## qname ## toq(u_int *c, ntype n); \
+void \
+_Qp_ ## qname ## toq(u_int *c, ntype n) \
+{ \
+ struct fpemu fe; \
+ atype *a; \
+ __asm __volatile("stx %%fsr, %0" : "=m" (fe.fe_fsr) :); \
+ a = (atype *)&n; \
+ fe.fe_f1.fp_sign = a[0] >> 31; \
+ fe.fe_f1.fp_sticky = 0; \
+ fe.fe_f1.fp_class = __fpu_ ## fname ## tof(&fe.fe_f1, __VA_ARGS__); \
+ c[0] = __fpu_ftoq(&fe, &fe.fe_f1, c); \
+}
+
+#define _QP_QTOT4(qname, fname, type, x) \
+type _Qp_qto ## qname(u_int *c); \
+type \
+_Qp_qto ## qname(u_int *c) \
+{ \
+ struct fpemu fe; \
+ u_int *a; \
+ type n; \
+ __asm __volatile("stx %%fsr, %0" : "=m" (fe.fe_fsr) :); \
+ a = (u_int *)&n; \
+ fe.fe_f1.fp_sign = c[0] >> 31; \
+ fe.fe_f1.fp_sticky = 0; \
+ fe.fe_f1.fp_class = __fpu_qtof(&fe.fe_f1, c[0], c[1], c[2], c[3]); \
+ a[0] = __fpu_fto ## fname(&fe, &fe.fe_f1, x); \
+ return (n); \
+}
+
+#define _QP_QTOT3(qname, fname, type) \
+type _Qp_qto ## qname(u_int *c); \
+type \
+_Qp_qto ## qname(u_int *c) \
+{ \
+ struct fpemu fe; \
+ u_int *a; \
+ type n; \
+ __asm __volatile("stx %%fsr, %0" : "=m" (fe.fe_fsr) :); \
+ a = (u_int *)&n; \
+ fe.fe_f1.fp_sign = c[0] >> 31; \
+ fe.fe_f1.fp_sticky = 0; \
+ fe.fe_f1.fp_class = __fpu_qtof(&fe.fe_f1, c[0], c[1], c[2], c[3]); \
+ a[0] = __fpu_fto ## fname(&fe, &fe.fe_f1); \
+ return (n); \
+}
+
+#define _QP_QTOT(qname, fname, type, ...) \
+type _Qp_qto ## qname(u_int *c); \
+type \
+_Qp_qto ## qname(u_int *c) \
+{ \
+ struct fpemu fe; \
+ u_int *a; \
+ type n; \
+ __asm __volatile("stx %%fsr, %0" : "=m" (fe.fe_fsr) :); \
+ a = (u_int *)&n; \
+ fe.fe_f1.fp_sign = c[0] >> 31; \
+ fe.fe_f1.fp_sticky = 0; \
+ fe.fe_f1.fp_class = __fpu_qtof(&fe.fe_f1, c[0], c[1], c[2], c[3]); \
+ a[0] = __fpu_fto ## fname(&fe, &fe.fe_f1, ## __VA_ARGS__); \
+ return (n); \
+}
+
+#define FCC_EQ(fcc) ((fcc) == FSR_CC_EQ)
+#define FCC_GE(fcc) ((fcc) == FSR_CC_EQ || (fcc) == FSR_CC_GT)
+#define FCC_GT(fcc) ((fcc) == FSR_CC_GT)
+#define FCC_LE(fcc) ((fcc) == FSR_CC_EQ || (fcc) == FSR_CC_LT)
+#define FCC_LT(fcc) ((fcc) == FSR_CC_LT)
+#define FCC_NE(fcc) ((fcc) != FSR_CC_EQ)
+
+#define FSR_GET_FCC0(fsr) (((fsr) >> FSR_FCC_SHIFT) & FSR_FCC_MASK)
+
+#define _QP_CMP(name, cmpe, test) \
+int _Qp_f ## name(u_int *a, u_int *b) ; \
+int \
+_Qp_f ## name(u_int *a, u_int *b) \
+{ \
+ struct fpemu fe; \
+ __asm __volatile("stx %%fsr, %0" : "=m" (fe.fe_fsr) :); \
+ fe.fe_f1.fp_sign = a[0] >> 31; \
+ fe.fe_f1.fp_sticky = 0; \
+ fe.fe_f1.fp_class = __fpu_qtof(&fe.fe_f1, a[0], a[1], a[2], a[3]); \
+ fe.fe_f2.fp_sign = b[0] >> 31; \
+ fe.fe_f2.fp_sticky = 0; \
+ fe.fe_f2.fp_class = __fpu_qtof(&fe.fe_f2, b[0], b[1], b[2], b[3]); \
+ __fpu_compare(&fe, cmpe, 0); \
+ return (test(FSR_GET_FCC0(fe.fe_fsr))); \
+}
+
+void _Qp_sqrt(u_int *c, u_int *a);
+void
+_Qp_sqrt(u_int *c, u_int *a)
+{
+ struct fpemu fe;
+ struct fpn *r;
+ __asm __volatile("stx %%fsr, %0" : "=m" (fe.fe_fsr) :);
+ fe.fe_f1.fp_sign = a[0] >> 31;
+ fe.fe_f1.fp_sticky = 0;
+ fe.fe_f1.fp_class = __fpu_qtof(&fe.fe_f1, a[0], a[1], a[2], a[3]);
+ r = __fpu_sqrt(&fe);
+ c[0] = __fpu_ftoq(&fe, r, c);
+}
+
+_QP_OP(add)
+_QP_OP(div)
+_QP_OP(mul)
+_QP_OP(sub)
+
+_QP_TTOQ(d, d, double, u_int, a[0], a[1])
+_QP_TTOQ(i, i, int, u_int, a[0])
+_QP_TTOQ(s, s, float, u_int, a[0])
+_QP_TTOQ(x, x, long, u_long, a[0])
+_QP_TTOQ(ui, i, u_int, u_int, a[0])
+_QP_TTOQ(ux, x, u_long, u_long, a[0])
+
+_QP_QTOT4(d, d, double, a)
+_QP_QTOT3(i, i, int)
+_QP_QTOT3(s, s, float)
+_QP_QTOT4(x, x, long, a)
+_QP_QTOT3(ui, i, u_int)
+_QP_QTOT4(ux, x, u_long, a)
+
+_QP_CMP(eq, 0, FCC_EQ)
+_QP_CMP(ge, 0, FCC_GE)
+_QP_CMP(gt, 0, FCC_GT)
+_QP_CMP(le, 0, FCC_LE)
+_QP_CMP(lt, 0, FCC_LT)
+_QP_CMP(ne, 0, FCC_NE)
diff --git a/lib/libc/arch/sparc64/fpu/fpu_reg.c b/lib/libc/arch/sparc64/fpu/fpu_reg.c
new file mode 100644
index 00000000000..778b4a82fa8
--- /dev/null
+++ b/lib/libc/arch/sparc64/fpu/fpu_reg.c
@@ -0,0 +1,639 @@
+/* $OpenBSD: fpu_reg.c,v 1.1 2003/07/21 18:41:30 jason Exp $ */
+
+/*
+ * Copyright (c) 2003 Jason L. Wright (jason@thought.net)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/types.h>
+#include "fpu_reg.h"
+
+void
+__fpu_setreg32(int r, u_int32_t v)
+{
+ switch (r) {
+ case 0:
+ asm("ld [%0], %%f0" : : "r" (&v));
+ break;
+ case 1:
+ asm("ld [%0], %%f1" : : "r" (&v));
+ break;
+ case 2:
+ asm("ld [%0], %%f2" : : "r" (&v));
+ break;
+ case 3:
+ asm("ld [%0], %%f3" : : "r" (&v));
+ break;
+ case 4:
+ asm("ld [%0], %%f4" : : "r" (&v));
+ break;
+ case 5:
+ asm("ld [%0], %%f5" : : "r" (&v));
+ break;
+ case 6:
+ asm("ld [%0], %%f6" : : "r" (&v));
+ break;
+ case 7:
+ asm("ld [%0], %%f7" : : "r" (&v));
+ break;
+ case 8:
+ asm("ld [%0], %%f8" : : "r" (&v));
+ break;
+ case 9:
+ asm("ld [%0], %%f9" : : "r" (&v));
+ break;
+ case 10:
+ asm("ld [%0], %%f10" : : "r" (&v));
+ break;
+ case 11:
+ asm("ld [%0], %%f11" : : "r" (&v));
+ break;
+ case 12:
+ asm("ld [%0], %%f12" : : "r" (&v));
+ break;
+ case 13:
+ asm("ld [%0], %%f13" : : "r" (&v));
+ break;
+ case 14:
+ asm("ld [%0], %%f14" : : "r" (&v));
+ break;
+ case 15:
+ asm("ld [%0], %%f15" : : "r" (&v));
+ break;
+ case 16:
+ asm("ld [%0], %%f16" : : "r" (&v));
+ break;
+ case 17:
+ asm("ld [%0], %%f17" : : "r" (&v));
+ break;
+ case 18:
+ asm("ld [%0], %%f18" : : "r" (&v));
+ break;
+ case 19:
+ asm("ld [%0], %%f19" : : "r" (&v));
+ break;
+ case 20:
+ asm("ld [%0], %%f20" : : "r" (&v));
+ break;
+ case 21:
+ asm("ld [%0], %%f21" : : "r" (&v));
+ break;
+ case 22:
+ asm("ld [%0], %%f22" : : "r" (&v));
+ break;
+ case 23:
+ asm("ld [%0], %%f23" : : "r" (&v));
+ break;
+ case 24:
+ asm("ld [%0], %%f24" : : "r" (&v));
+ break;
+ case 25:
+ asm("ld [%0], %%f25" : : "r" (&v));
+ break;
+ case 26:
+ asm("ld [%0], %%f26" : : "r" (&v));
+ break;
+ case 27:
+ asm("ld [%0], %%f27" : : "r" (&v));
+ break;
+ case 28:
+ asm("ld [%0], %%f28" : : "r" (&v));
+ break;
+ case 29:
+ asm("ld [%0], %%f29" : : "r" (&v));
+ break;
+ case 30:
+ asm("ld [%0], %%f30" : : "r" (&v));
+ break;
+ case 31:
+ asm("ld [%0], %%f31" : : "r" (&v));
+ break;
+ case 32:
+ asm("ld [%0], %%f32" : : "r" (&v));
+ break;
+ case 33:
+ asm("ld [%0], %%f33" : : "r" (&v));
+ break;
+ case 34:
+ asm("ld [%0], %%f34" : : "r" (&v));
+ break;
+ case 35:
+ asm("ld [%0], %%f35" : : "r" (&v));
+ break;
+ case 36:
+ asm("ld [%0], %%f36" : : "r" (&v));
+ break;
+ case 37:
+ asm("ld [%0], %%f37" : : "r" (&v));
+ break;
+ case 38:
+ asm("ld [%0], %%f38" : : "r" (&v));
+ break;
+ case 39:
+ asm("ld [%0], %%f39" : : "r" (&v));
+ break;
+ case 40:
+ asm("ld [%0], %%f40" : : "r" (&v));
+ break;
+ case 41:
+ asm("ld [%0], %%f41" : : "r" (&v));
+ break;
+ case 42:
+ asm("ld [%0], %%f42" : : "r" (&v));
+ break;
+ case 43:
+ asm("ld [%0], %%f43" : : "r" (&v));
+ break;
+ case 44:
+ asm("ld [%0], %%f44" : : "r" (&v));
+ break;
+ case 45:
+ asm("ld [%0], %%f45" : : "r" (&v));
+ break;
+ case 46:
+ asm("ld [%0], %%f46" : : "r" (&v));
+ break;
+ case 47:
+ asm("ld [%0], %%f47" : : "r" (&v));
+ break;
+ case 48:
+ asm("ld [%0], %%f48" : : "r" (&v));
+ break;
+ case 49:
+ asm("ld [%0], %%f49" : : "r" (&v));
+ break;
+ case 50:
+ asm("ld [%0], %%f50" : : "r" (&v));
+ break;
+ case 51:
+ asm("ld [%0], %%f51" : : "r" (&v));
+ break;
+ case 52:
+ asm("ld [%0], %%f52" : : "r" (&v));
+ break;
+ case 53:
+ asm("ld [%0], %%f53" : : "r" (&v));
+ break;
+ case 54:
+ asm("ld [%0], %%f54" : : "r" (&v));
+ break;
+ case 55:
+ asm("ld [%0], %%f55" : : "r" (&v));
+ break;
+ case 56:
+ asm("ld [%0], %%f56" : : "r" (&v));
+ break;
+ case 57:
+ asm("ld [%0], %%f57" : : "r" (&v));
+ break;
+ case 58:
+ asm("ld [%0], %%f58" : : "r" (&v));
+ break;
+ case 59:
+ asm("ld [%0], %%f59" : : "r" (&v));
+ break;
+ case 60:
+ asm("ld [%0], %%f60" : : "r" (&v));
+ break;
+ case 61:
+ asm("ld [%0], %%f61" : : "r" (&v));
+ break;
+ case 62:
+ asm("ld [%0], %%f62" : : "r" (&v));
+ break;
+ case 63:
+ asm("ld [%0], %%f63" : : "r" (&v));
+ break;
+ }
+}
+
+void
+__fpu_setreg64(int r, u_int64_t v)
+{
+ switch (r) {
+ case 0:
+ asm("ldd [%0], %%f0" : : "r" (&v));
+ break;
+ case 2:
+ asm("ldd [%0], %%f2" : : "r" (&v));
+ break;
+ case 4:
+ asm("ldd [%0], %%f4" : : "r" (&v));
+ break;
+ case 6:
+ asm("ldd [%0], %%f6" : : "r" (&v));
+ break;
+ case 8:
+ asm("ldd [%0], %%f8" : : "r" (&v));
+ break;
+ case 10:
+ asm("ldd [%0], %%f10" : : "r" (&v));
+ break;
+ case 12:
+ asm("ldd [%0], %%f12" : : "r" (&v));
+ break;
+ case 14:
+ asm("ldd [%0], %%f14" : : "r" (&v));
+ break;
+ case 16:
+ asm("ldd [%0], %%f16" : : "r" (&v));
+ break;
+ case 18:
+ asm("ldd [%0], %%f18" : : "r" (&v));
+ break;
+ case 20:
+ asm("ldd [%0], %%f20" : : "r" (&v));
+ break;
+ case 22:
+ asm("ldd [%0], %%f22" : : "r" (&v));
+ break;
+ case 24:
+ asm("ldd [%0], %%f24" : : "r" (&v));
+ break;
+ case 26:
+ asm("ldd [%0], %%f26" : : "r" (&v));
+ break;
+ case 28:
+ asm("ldd [%0], %%f28" : : "r" (&v));
+ break;
+ case 30:
+ asm("ldd [%0], %%f30" : : "r" (&v));
+ break;
+ case 32:
+ asm("ldd [%0], %%f32" : : "r" (&v));
+ break;
+ case 34:
+ asm("ldd [%0], %%f34" : : "r" (&v));
+ break;
+ case 36:
+ asm("ldd [%0], %%f36" : : "r" (&v));
+ break;
+ case 38:
+ asm("ldd [%0], %%f38" : : "r" (&v));
+ break;
+ case 40:
+ asm("ldd [%0], %%f40" : : "r" (&v));
+ break;
+ case 42:
+ asm("ldd [%0], %%f42" : : "r" (&v));
+ break;
+ case 44:
+ asm("ldd [%0], %%f44" : : "r" (&v));
+ break;
+ case 46:
+ asm("ldd [%0], %%f46" : : "r" (&v));
+ break;
+ case 48:
+ asm("ldd [%0], %%f48" : : "r" (&v));
+ break;
+ case 50:
+ asm("ldd [%0], %%f50" : : "r" (&v));
+ break;
+ case 52:
+ asm("ldd [%0], %%f52" : : "r" (&v));
+ break;
+ case 54:
+ asm("ldd [%0], %%f54" : : "r" (&v));
+ break;
+ case 56:
+ asm("ldd [%0], %%f56" : : "r" (&v));
+ break;
+ case 58:
+ asm("ldd [%0], %%f58" : : "r" (&v));
+ break;
+ case 60:
+ asm("ldd [%0], %%f60" : : "r" (&v));
+ break;
+ case 62:
+ asm("ldd [%0], %%f62" : : "r" (&v));
+ break;
+ }
+}
+
+u_int32_t
+__fpu_getreg32(int r)
+{
+ u_int32_t v;
+
+ switch (r) {
+ case 0:
+ asm("st %%f0, [%0]" : : "r" (&v));
+ break;
+ case 1:
+ asm("st %%f1, [%0]" : : "r" (&v));
+ break;
+ case 2:
+ asm("st %%f2, [%0]" : : "r" (&v));
+ break;
+ case 3:
+ asm("st %%f3, [%0]" : : "r" (&v));
+ break;
+ case 4:
+ asm("st %%f4, [%0]" : : "r" (&v));
+ break;
+ case 5:
+ asm("st %%f5, [%0]" : : "r" (&v));
+ break;
+ case 6:
+ asm("st %%f6, [%0]" : : "r" (&v));
+ break;
+ case 7:
+ asm("st %%f7, [%0]" : : "r" (&v));
+ break;
+ case 8:
+ asm("st %%f8, [%0]" : : "r" (&v));
+ break;
+ case 9:
+ asm("st %%f9, [%0]" : : "r" (&v));
+ break;
+ case 10:
+ asm("st %%f10, [%0]" : : "r" (&v));
+ break;
+ case 11:
+ asm("st %%f11, [%0]" : : "r" (&v));
+ break;
+ case 12:
+ asm("st %%f12, [%0]" : : "r" (&v));
+ break;
+ case 13:
+ asm("st %%f13, [%0]" : : "r" (&v));
+ break;
+ case 14:
+ asm("st %%f14, [%0]" : : "r" (&v));
+ break;
+ case 15:
+ asm("st %%f15, [%0]" : : "r" (&v));
+ break;
+ case 16:
+ asm("st %%f16, [%0]" : : "r" (&v));
+ break;
+ case 17:
+ asm("st %%f17, [%0]" : : "r" (&v));
+ break;
+ case 18:
+ asm("st %%f18, [%0]" : : "r" (&v));
+ break;
+ case 19:
+ asm("st %%f19, [%0]" : : "r" (&v));
+ break;
+ case 20:
+ asm("st %%f20, [%0]" : : "r" (&v));
+ break;
+ case 21:
+ asm("st %%f21, [%0]" : : "r" (&v));
+ break;
+ case 22:
+ asm("st %%f22, [%0]" : : "r" (&v));
+ break;
+ case 23:
+ asm("st %%f23, [%0]" : : "r" (&v));
+ break;
+ case 24:
+ asm("st %%f24, [%0]" : : "r" (&v));
+ break;
+ case 25:
+ asm("st %%f25, [%0]" : : "r" (&v));
+ break;
+ case 26:
+ asm("st %%f26, [%0]" : : "r" (&v));
+ break;
+ case 27:
+ asm("st %%f27, [%0]" : : "r" (&v));
+ break;
+ case 28:
+ asm("st %%f28, [%0]" : : "r" (&v));
+ break;
+ case 29:
+ asm("st %%f29, [%0]" : : "r" (&v));
+ break;
+ case 30:
+ asm("st %%f30, [%0]" : : "r" (&v));
+ break;
+ case 31:
+ asm("st %%f31, [%0]" : : "r" (&v));
+ break;
+ case 32:
+ asm("st %%f32, [%0]" : : "r" (&v));
+ break;
+ case 33:
+ asm("st %%f33, [%0]" : : "r" (&v));
+ break;
+ case 34:
+ asm("st %%f34, [%0]" : : "r" (&v));
+ break;
+ case 35:
+ asm("st %%f35, [%0]" : : "r" (&v));
+ break;
+ case 36:
+ asm("st %%f36, [%0]" : : "r" (&v));
+ break;
+ case 37:
+ asm("st %%f37, [%0]" : : "r" (&v));
+ break;
+ case 38:
+ asm("st %%f38, [%0]" : : "r" (&v));
+ break;
+ case 39:
+ asm("st %%f39, [%0]" : : "r" (&v));
+ break;
+ case 40:
+ asm("st %%f40, [%0]" : : "r" (&v));
+ break;
+ case 41:
+ asm("st %%f41, [%0]" : : "r" (&v));
+ break;
+ case 42:
+ asm("st %%f42, [%0]" : : "r" (&v));
+ break;
+ case 43:
+ asm("st %%f43, [%0]" : : "r" (&v));
+ break;
+ case 44:
+ asm("st %%f44, [%0]" : : "r" (&v));
+ break;
+ case 45:
+ asm("st %%f45, [%0]" : : "r" (&v));
+ break;
+ case 46:
+ asm("st %%f46, [%0]" : : "r" (&v));
+ break;
+ case 47:
+ asm("st %%f47, [%0]" : : "r" (&v));
+ break;
+ case 48:
+ asm("st %%f48, [%0]" : : "r" (&v));
+ break;
+ case 49:
+ asm("st %%f49, [%0]" : : "r" (&v));
+ break;
+ case 50:
+ asm("st %%f50, [%0]" : : "r" (&v));
+ break;
+ case 51:
+ asm("st %%f51, [%0]" : : "r" (&v));
+ break;
+ case 52:
+ asm("st %%f52, [%0]" : : "r" (&v));
+ break;
+ case 53:
+ asm("st %%f53, [%0]" : : "r" (&v));
+ break;
+ case 54:
+ asm("st %%f54, [%0]" : : "r" (&v));
+ break;
+ case 55:
+ asm("st %%f55, [%0]" : : "r" (&v));
+ break;
+ case 56:
+ asm("st %%f56, [%0]" : : "r" (&v));
+ break;
+ case 57:
+ asm("st %%f57, [%0]" : : "r" (&v));
+ break;
+ case 58:
+ asm("st %%f58, [%0]" : : "r" (&v));
+ break;
+ case 59:
+ asm("st %%f59, [%0]" : : "r" (&v));
+ break;
+ case 60:
+ asm("st %%f60, [%0]" : : "r" (&v));
+ break;
+ case 61:
+ asm("st %%f61, [%0]" : : "r" (&v));
+ break;
+ case 62:
+ asm("st %%f62, [%0]" : : "r" (&v));
+ break;
+ case 63:
+ asm("st %%f63, [%0]" : : "r" (&v));
+ break;
+ }
+ return (v);
+}
+
+u_int64_t
+__fpu_getreg64(int r)
+{
+ u_int64_t v;
+
+ switch (r) {
+ case 0:
+ asm("std %%f0, [%0]" : : "r" (&v));
+ break;
+ case 2:
+ asm("std %%f2, [%0]" : : "r" (&v));
+ break;
+ case 4:
+ asm("std %%f4, [%0]" : : "r" (&v));
+ break;
+ case 6:
+ asm("std %%f6, [%0]" : : "r" (&v));
+ break;
+ case 8:
+ asm("std %%f8, [%0]" : : "r" (&v));
+ break;
+ case 10:
+ asm("std %%f10, [%0]" : : "r" (&v));
+ break;
+ case 12:
+ asm("std %%f12, [%0]" : : "r" (&v));
+ break;
+ case 14:
+ asm("std %%f14, [%0]" : : "r" (&v));
+ break;
+ case 16:
+ asm("std %%f16, [%0]" : : "r" (&v));
+ break;
+ case 18:
+ asm("std %%f18, [%0]" : : "r" (&v));
+ break;
+ case 20:
+ asm("std %%f20, [%0]" : : "r" (&v));
+ break;
+ case 22:
+ asm("std %%f22, [%0]" : : "r" (&v));
+ break;
+ case 24:
+ asm("std %%f24, [%0]" : : "r" (&v));
+ break;
+ case 26:
+ asm("std %%f26, [%0]" : : "r" (&v));
+ break;
+ case 28:
+ asm("std %%f28, [%0]" : : "r" (&v));
+ break;
+ case 30:
+ asm("std %%f30, [%0]" : : "r" (&v));
+ break;
+ case 32:
+ asm("std %%f32, [%0]" : : "r" (&v));
+ break;
+ case 34:
+ asm("std %%f34, [%0]" : : "r" (&v));
+ break;
+ case 36:
+ asm("std %%f36, [%0]" : : "r" (&v));
+ break;
+ case 38:
+ asm("std %%f38, [%0]" : : "r" (&v));
+ break;
+ case 40:
+ asm("std %%f40, [%0]" : : "r" (&v));
+ break;
+ case 42:
+ asm("std %%f42, [%0]" : : "r" (&v));
+ break;
+ case 44:
+ asm("std %%f44, [%0]" : : "r" (&v));
+ break;
+ case 46:
+ asm("std %%f46, [%0]" : : "r" (&v));
+ break;
+ case 48:
+ asm("std %%f48, [%0]" : : "r" (&v));
+ break;
+ case 50:
+ asm("std %%f50, [%0]" : : "r" (&v));
+ break;
+ case 52:
+ asm("std %%f52, [%0]" : : "r" (&v));
+ break;
+ case 54:
+ asm("std %%f54, [%0]" : : "r" (&v));
+ break;
+ case 56:
+ asm("std %%f56, [%0]" : : "r" (&v));
+ break;
+ case 58:
+ asm("std %%f58, [%0]" : : "r" (&v));
+ break;
+ case 60:
+ asm("std %%f60, [%0]" : : "r" (&v));
+ break;
+ case 62:
+ asm("std %%f62, [%0]" : : "r" (&v));
+ break;
+ }
+ return (v);
+}
diff --git a/lib/libc/arch/sparc64/fpu/fpu_reg.h b/lib/libc/arch/sparc64/fpu/fpu_reg.h
new file mode 100644
index 00000000000..86766cc4567
--- /dev/null
+++ b/lib/libc/arch/sparc64/fpu/fpu_reg.h
@@ -0,0 +1,37 @@
+/* $OpenBSD: fpu_reg.h,v 1.1 2003/07/21 18:41:30 jason Exp $ */
+
+/*
+ * Copyright (c) 2003 Jason L. Wright (jason@thought.net)
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _LIBC_ARCH_SPARC64_FPU_FPU_REG_H_
+#define _LIBC_ARCH_SPARC64_FPU_FPU_REG_H_
+
+u_int32_t __fpu_getreg32(int);
+u_int64_t __fpu_getreg64(int);
+void __fpu_setreg32(int, u_int32_t);
+void __fpu_setreg64(int, u_int64_t);
+
+#endif /* _LIBC_ARCH_SPARC64_FPU_FPU_REG_H_ */
diff --git a/lib/libc/arch/sparc64/fpu/fpu_sqrt.c b/lib/libc/arch/sparc64/fpu/fpu_sqrt.c
new file mode 100644
index 00000000000..dc89f6ee1d8
--- /dev/null
+++ b/lib/libc/arch/sparc64/fpu/fpu_sqrt.c
@@ -0,0 +1,404 @@
+/* $OpenBSD: fpu_sqrt.c,v 1.1 2003/07/21 18:41:30 jason Exp $ */
+
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Lawrence Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fpu_sqrt.c 8.1 (Berkeley) 6/11/93
+ * $NetBSD: fpu_sqrt.c,v 1.2 1994/11/20 20:52:46 deraadt Exp $
+ */
+
+#include <sys/cdefs.h>
+#if 0
+__FBSDID("$FreeBSD: src/lib/libc/sparc64/fpu/fpu_sqrt.c,v 1.3 2002/03/22 21:52:58 obrien Exp $");
+#endif
+
+/*
+ * Perform an FPU square root (return sqrt(x)).
+ */
+
+#include <sys/types.h>
+
+#include <machine/frame.h>
+
+#include "fpu_arith.h"
+#include "fpu_emu.h"
+#include "fpu_extern.h"
+
+/*
+ * Our task is to calculate the square root of a floating point number x0.
+ * This number x normally has the form:
+ *
+ * exp
+ * x = mant * 2 (where 1 <= mant < 2 and exp is an integer)
+ *
+ * This can be left as it stands, or the mantissa can be doubled and the
+ * exponent decremented:
+ *
+ * exp-1
+ * x = (2 * mant) * 2 (where 2 <= 2 * mant < 4)
+ *
+ * If the exponent `exp' is even, the square root of the number is best
+ * handled using the first form, and is by definition equal to:
+ *
+ * exp/2
+ * sqrt(x) = sqrt(mant) * 2
+ *
+ * If exp is odd, on the other hand, it is convenient to use the second
+ * form, giving:
+ *
+ * (exp-1)/2
+ * sqrt(x) = sqrt(2 * mant) * 2
+ *
+ * In the first case, we have
+ *
+ * 1 <= mant < 2
+ *
+ * and therefore
+ *
+ * sqrt(1) <= sqrt(mant) < sqrt(2)
+ *
+ * while in the second case we have
+ *
+ * 2 <= 2*mant < 4
+ *
+ * and therefore
+ *
+ * sqrt(2) <= sqrt(2*mant) < sqrt(4)
+ *
+ * so that in any case, we are sure that
+ *
+ * sqrt(1) <= sqrt(n * mant) < sqrt(4), n = 1 or 2
+ *
+ * or
+ *
+ * 1 <= sqrt(n * mant) < 2, n = 1 or 2.
+ *
+ * This root is therefore a properly formed mantissa for a floating
+ * point number. The exponent of sqrt(x) is either exp/2 or (exp-1)/2
+ * as above. This leaves us with the problem of finding the square root
+ * of a fixed-point number in the range [1..4).
+ *
+ * Though it may not be instantly obvious, the following square root
+ * algorithm works for any integer x of an even number of bits, provided
+ * that no overflows occur:
+ *
+ * let q = 0
+ * for k = NBITS-1 to 0 step -1 do -- for each digit in the answer...
+ * x *= 2 -- multiply by radix, for next digit
+ * if x >= 2q + 2^k then -- if adding 2^k does not
+ * x -= 2q + 2^k -- exceed the correct root,
+ * q += 2^k -- add 2^k and adjust x
+ * fi
+ * done
+ * sqrt = q / 2^(NBITS/2) -- (and any remainder is in x)
+ *
+ * If NBITS is odd (so that k is initially even), we can just add another
+ * zero bit at the top of x. Doing so means that q is not going to acquire
+ * a 1 bit in the first trip around the loop (since x0 < 2^NBITS). If the
+ * final value in x is not needed, or can be off by a factor of 2, this is
+ * equivalant to moving the `x *= 2' step to the bottom of the loop:
+ *
+ * for k = NBITS-1 to 0 step -1 do if ... fi; x *= 2; done
+ *
+ * and the result q will then be sqrt(x0) * 2^floor(NBITS / 2).
+ * (Since the algorithm is destructive on x, we will call x's initial
+ * value, for which q is some power of two times its square root, x0.)
+ *
+ * If we insert a loop invariant y = 2q, we can then rewrite this using
+ * C notation as:
+ *
+ * q = y = 0; x = x0;
+ * for (k = NBITS; --k >= 0;) {
+ * #if (NBITS is even)
+ * x *= 2;
+ * #endif
+ * t = y + (1 << k);
+ * if (x >= t) {
+ * x -= t;
+ * q += 1 << k;
+ * y += 1 << (k + 1);
+ * }
+ * #if (NBITS is odd)
+ * x *= 2;
+ * #endif
+ * }
+ *
+ * If x0 is fixed point, rather than an integer, we can simply alter the
+ * scale factor between q and sqrt(x0). As it happens, we can easily arrange
+ * for the scale factor to be 2**0 or 1, so that sqrt(x0) == q.
+ *
+ * In our case, however, x0 (and therefore x, y, q, and t) are multiword
+ * integers, which adds some complication. But note that q is built one
+ * bit at a time, from the top down, and is not used itself in the loop
+ * (we use 2q as held in y instead). This means we can build our answer
+ * in an integer, one word at a time, which saves a bit of work. Also,
+ * since 1 << k is always a `new' bit in q, 1 << k and 1 << (k+1) are
+ * `new' bits in y and we can set them with an `or' operation rather than
+ * a full-blown multiword add.
+ *
+ * We are almost done, except for one snag. We must prove that none of our
+ * intermediate calculations can overflow. We know that x0 is in [1..4)
+ * and therefore the square root in q will be in [1..2), but what about x,
+ * y, and t?
+ *
+ * We know that y = 2q at the beginning of each loop. (The relation only
+ * fails temporarily while y and q are being updated.) Since q < 2, y < 4.
+ * The sum in t can, in our case, be as much as y+(1<<1) = y+2 < 6, and.
+ * Furthermore, we can prove with a bit of work that x never exceeds y by
+ * more than 2, so that even after doubling, 0 <= x < 8. (This is left as
+ * an exercise to the reader, mostly because I have become tired of working
+ * on this comment.)
+ *
+ * If our floating point mantissas (which are of the form 1.frac) occupy
+ * B+1 bits, our largest intermediary needs at most B+3 bits, or two extra.
+ * In fact, we want even one more bit (for a carry, to avoid compares), or
+ * three extra. There is a comment in fpu_emu.h reminding maintainers of
+ * this, so we have some justification in assuming it.
+ */
+struct fpn *
+__fpu_sqrt(fe)
+ struct fpemu *fe;
+{
+ struct fpn *x = &fe->fe_f1;
+ u_int bit, q, tt;
+ u_int x0, x1, x2, x3;
+ u_int y0, y1, y2, y3;
+ u_int d0, d1, d2, d3;
+ int e;
+
+ /*
+ * Take care of special cases first. In order:
+ *
+ * sqrt(NaN) = NaN
+ * sqrt(+0) = +0
+ * sqrt(-0) = -0
+ * sqrt(x < 0) = NaN (including sqrt(-Inf))
+ * sqrt(+Inf) = +Inf
+ *
+ * Then all that remains are numbers with mantissas in [1..2).
+ */
+ if (ISNAN(x) || ISZERO(x))
+ return (x);
+ if (x->fp_sign)
+ return (__fpu_newnan(fe));
+ if (ISINF(x))
+ return (x);
+
+ /*
+ * Calculate result exponent. As noted above, this may involve
+ * doubling the mantissa. We will also need to double x each
+ * time around the loop, so we define a macro for this here, and
+ * we break out the multiword mantissa.
+ */
+#ifdef FPU_SHL1_BY_ADD
+#define DOUBLE_X { \
+ FPU_ADDS(x3, x3, x3); FPU_ADDCS(x2, x2, x2); \
+ FPU_ADDCS(x1, x1, x1); FPU_ADDC(x0, x0, x0); \
+}
+#else
+#define DOUBLE_X { \
+ x0 = (x0 << 1) | (x1 >> 31); x1 = (x1 << 1) | (x2 >> 31); \
+ x2 = (x2 << 1) | (x3 >> 31); x3 <<= 1; \
+}
+#endif
+#if (FP_NMANT & 1) != 0
+# define ODD_DOUBLE DOUBLE_X
+# define EVEN_DOUBLE /* nothing */
+#else
+# define ODD_DOUBLE /* nothing */
+# define EVEN_DOUBLE DOUBLE_X
+#endif
+ x0 = x->fp_mant[0];
+ x1 = x->fp_mant[1];
+ x2 = x->fp_mant[2];
+ x3 = x->fp_mant[3];
+ e = x->fp_exp;
+ if (e & 1) /* exponent is odd; use sqrt(2mant) */
+ DOUBLE_X;
+ /* THE FOLLOWING ASSUMES THAT RIGHT SHIFT DOES SIGN EXTENSION */
+ x->fp_exp = e >> 1; /* calculates (e&1 ? (e-1)/2 : e/2 */
+
+ /*
+ * Now calculate the mantissa root. Since x is now in [1..4),
+ * we know that the first trip around the loop will definitely
+ * set the top bit in q, so we can do that manually and start
+ * the loop at the next bit down instead. We must be sure to
+ * double x correctly while doing the `known q=1.0'.
+ *
+ * We do this one mantissa-word at a time, as noted above, to
+ * save work. To avoid `(1 << 31) << 1', we also do the top bit
+ * outside of each per-word loop.
+ *
+ * The calculation `t = y + bit' breaks down into `t0 = y0, ...,
+ * t3 = y3, t? |= bit' for the appropriate word. Since the bit
+ * is always a `new' one, this means that three of the `t?'s are
+ * just the corresponding `y?'; we use `#define's here for this.
+ * The variable `tt' holds the actual `t?' variable.
+ */
+
+ /* calculate q0 */
+#define t0 tt
+ bit = FP_1;
+ EVEN_DOUBLE;
+ /* if (x >= (t0 = y0 | bit)) { */ /* always true */
+ q = bit;
+ x0 -= bit;
+ y0 = bit << 1;
+ /* } */
+ ODD_DOUBLE;
+ while ((bit >>= 1) != 0) { /* for remaining bits in q0 */
+ EVEN_DOUBLE;
+ t0 = y0 | bit; /* t = y + bit */
+ if (x0 >= t0) { /* if x >= t then */
+ x0 -= t0; /* x -= t */
+ q |= bit; /* q += bit */
+ y0 |= bit << 1; /* y += bit << 1 */
+ }
+ ODD_DOUBLE;
+ }
+ x->fp_mant[0] = q;
+#undef t0
+
+ /* calculate q1. note (y0&1)==0. */
+#define t0 y0
+#define t1 tt
+ q = 0;
+ y1 = 0;
+ bit = 1 << 31;
+ EVEN_DOUBLE;
+ t1 = bit;
+ FPU_SUBS(d1, x1, t1);
+ FPU_SUBC(d0, x0, t0); /* d = x - t */
+ if ((int)d0 >= 0) { /* if d >= 0 (i.e., x >= t) then */
+ x0 = d0, x1 = d1; /* x -= t */
+ q = bit; /* q += bit */
+ y0 |= 1; /* y += bit << 1 */
+ }
+ ODD_DOUBLE;
+ while ((bit >>= 1) != 0) { /* for remaining bits in q1 */
+ EVEN_DOUBLE; /* as before */
+ t1 = y1 | bit;
+ FPU_SUBS(d1, x1, t1);
+ FPU_SUBC(d0, x0, t0);
+ if ((int)d0 >= 0) {
+ x0 = d0, x1 = d1;
+ q |= bit;
+ y1 |= bit << 1;
+ }
+ ODD_DOUBLE;
+ }
+ x->fp_mant[1] = q;
+#undef t1
+
+ /* calculate q2. note (y1&1)==0; y0 (aka t0) is fixed. */
+#define t1 y1
+#define t2 tt
+ q = 0;
+ y2 = 0;
+ bit = 1 << 31;
+ EVEN_DOUBLE;
+ t2 = bit;
+ FPU_SUBS(d2, x2, t2);
+ FPU_SUBCS(d1, x1, t1);
+ FPU_SUBC(d0, x0, t0);
+ if ((int)d0 >= 0) {
+ x0 = d0, x1 = d1, x2 = d2;
+ q |= bit;
+ y1 |= 1; /* now t1, y1 are set in concrete */
+ }
+ ODD_DOUBLE;
+ while ((bit >>= 1) != 0) {
+ EVEN_DOUBLE;
+ t2 = y2 | bit;
+ FPU_SUBS(d2, x2, t2);
+ FPU_SUBCS(d1, x1, t1);
+ FPU_SUBC(d0, x0, t0);
+ if ((int)d0 >= 0) {
+ x0 = d0, x1 = d1, x2 = d2;
+ q |= bit;
+ y2 |= bit << 1;
+ }
+ ODD_DOUBLE;
+ }
+ x->fp_mant[2] = q;
+#undef t2
+
+ /* calculate q3. y0, t0, y1, t1 all fixed; y2, t2, almost done. */
+#define t2 y2
+#define t3 tt
+ q = 0;
+ y3 = 0;
+ bit = 1 << 31;
+ EVEN_DOUBLE;
+ t3 = bit;
+ FPU_SUBS(d3, x3, t3);
+ FPU_SUBCS(d2, x2, t2);
+ FPU_SUBCS(d1, x1, t1);
+ FPU_SUBC(d0, x0, t0);
+ ODD_DOUBLE;
+ if ((int)d0 >= 0) {
+ x0 = d0, x1 = d1, x2 = d2;
+ q |= bit;
+ y2 |= 1;
+ }
+ while ((bit >>= 1) != 0) {
+ EVEN_DOUBLE;
+ t3 = y3 | bit;
+ FPU_SUBS(d3, x3, t3);
+ FPU_SUBCS(d2, x2, t2);
+ FPU_SUBCS(d1, x1, t1);
+ FPU_SUBC(d0, x0, t0);
+ if ((int)d0 >= 0) {
+ x0 = d0, x1 = d1, x2 = d2;
+ q |= bit;
+ y3 |= bit << 1;
+ }
+ ODD_DOUBLE;
+ }
+ x->fp_mant[3] = q;
+
+ /*
+ * The result, which includes guard and round bits, is exact iff
+ * x is now zero; any nonzero bits in x represent sticky bits.
+ */
+ x->fp_sticky = x0 | x1 | x2 | x3;
+ return (x);
+}
diff --git a/lib/libc/arch/sparc64/fpu/fpu_subr.c b/lib/libc/arch/sparc64/fpu/fpu_subr.c
new file mode 100644
index 00000000000..6dd7a8121d4
--- /dev/null
+++ b/lib/libc/arch/sparc64/fpu/fpu_subr.c
@@ -0,0 +1,227 @@
+/* $OpenBSD: fpu_subr.c,v 1.1 2003/07/21 18:41:30 jason Exp $ */
+
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Lawrence Berkeley Laboratory.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fpu_subr.c 8.1 (Berkeley) 6/11/93
+ * $NetBSD: fpu_subr.c,v 1.3 1996/03/14 19:42:01 christos Exp $
+ */
+
+#include <sys/cdefs.h>
+#if 0
+__FBSDID("$FreeBSD: src/lib/libc/sparc64/fpu/fpu_subr.c,v 1.4 2002/04/27 21:56:28 jake Exp $");
+#endif
+
+/*
+ * FPU subroutines.
+ */
+
+#include <sys/param.h>
+
+#include <machine/frame.h>
+#include <machine/fsr.h>
+#include <machine/instr.h>
+
+#include "fpu_arith.h"
+#include "fpu_emu.h"
+#include "fpu_extern.h"
+
+/*
+ * Shift the given number right rsh bits. Any bits that `fall off' will get
+ * shoved into the sticky field; we return the resulting sticky. Note that
+ * shifting NaNs is legal (this will never shift all bits out); a NaN's
+ * sticky field is ignored anyway.
+ */
+int
+__fpu_shr(struct fpn *fp, int rsh)
+{
+ u_int m0, m1, m2, m3, s;
+ int lsh;
+
+#ifdef DIAGNOSTIC
+ if (rsh <= 0 || (fp->fp_class != FPC_NUM && !ISNAN(fp)))
+ __utrap_panic("fpu_rightshift 1");
+#endif
+
+ m0 = fp->fp_mant[0];
+ m1 = fp->fp_mant[1];
+ m2 = fp->fp_mant[2];
+ m3 = fp->fp_mant[3];
+
+ /* If shifting all the bits out, take a shortcut. */
+ if (rsh >= FP_NMANT) {
+#ifdef DIAGNOSTIC
+ if ((m0 | m1 | m2 | m3) == 0)
+ __utrap_panic("fpu_rightshift 2");
+#endif
+ fp->fp_mant[0] = 0;
+ fp->fp_mant[1] = 0;
+ fp->fp_mant[2] = 0;
+ fp->fp_mant[3] = 0;
+#ifdef notdef
+ if ((m0 | m1 | m2 | m3) == 0)
+ fp->fp_class = FPC_ZERO;
+ else
+#endif
+ fp->fp_sticky = 1;
+ return (1);
+ }
+
+ /* Squish out full words. */
+ s = fp->fp_sticky;
+ if (rsh >= 32 * 3) {
+ s |= m3 | m2 | m1;
+ m3 = m0, m2 = 0, m1 = 0, m0 = 0;
+ } else if (rsh >= 32 * 2) {
+ s |= m3 | m2;
+ m3 = m1, m2 = m0, m1 = 0, m0 = 0;
+ } else if (rsh >= 32) {
+ s |= m3;
+ m3 = m2, m2 = m1, m1 = m0, m0 = 0;
+ }
+
+ /* Handle any remaining partial word. */
+ if ((rsh &= 31) != 0) {
+ lsh = 32 - rsh;
+ s |= m3 << lsh;
+ m3 = (m3 >> rsh) | (m2 << lsh);
+ m2 = (m2 >> rsh) | (m1 << lsh);
+ m1 = (m1 >> rsh) | (m0 << lsh);
+ m0 >>= rsh;
+ }
+ fp->fp_mant[0] = m0;
+ fp->fp_mant[1] = m1;
+ fp->fp_mant[2] = m2;
+ fp->fp_mant[3] = m3;
+ fp->fp_sticky = s;
+ return (s);
+}
+
+/*
+ * Force a number to be normal, i.e., make its fraction have all zero
+ * bits before FP_1, then FP_1, then all 1 bits. This is used for denorms
+ * and (sometimes) for intermediate results.
+ *
+ * Internally, this may use a `supernormal' -- a number whose fp_mant
+ * is greater than or equal to 2.0 -- so as a side effect you can hand it
+ * a supernormal and it will fix it (provided fp->fp_mant[3] == 0).
+ */
+void
+__fpu_norm(struct fpn *fp)
+{
+ u_int m0, m1, m2, m3, top, sup, nrm;
+ int lsh, rsh, exp;
+
+ exp = fp->fp_exp;
+ m0 = fp->fp_mant[0];
+ m1 = fp->fp_mant[1];
+ m2 = fp->fp_mant[2];
+ m3 = fp->fp_mant[3];
+
+ /* Handle severe subnormals with 32-bit moves. */
+ if (m0 == 0) {
+ if (m1)
+ m0 = m1, m1 = m2, m2 = m3, m3 = 0, exp -= 32;
+ else if (m2)
+ m0 = m2, m1 = m3, m2 = 0, m3 = 0, exp -= 2 * 32;
+ else if (m3)
+ m0 = m3, m1 = 0, m2 = 0, m3 = 0, exp -= 3 * 32;
+ else {
+ fp->fp_class = FPC_ZERO;
+ return;
+ }
+ }
+
+ /* Now fix any supernormal or remaining subnormal. */
+ nrm = FP_1;
+ sup = nrm << 1;
+ if (m0 >= sup) {
+ /*
+ * We have a supernormal number. We need to shift it right.
+ * We may assume m3==0.
+ */
+ for (rsh = 1, top = m0 >> 1; top >= sup; rsh++) /* XXX slow */
+ top >>= 1;
+ exp += rsh;
+ lsh = 32 - rsh;
+ m3 = m2 << lsh;
+ m2 = (m2 >> rsh) | (m1 << lsh);
+ m1 = (m1 >> rsh) | (m0 << lsh);
+ m0 = top;
+ } else if (m0 < nrm) {
+ /*
+ * We have a regular denorm (a subnormal number), and need
+ * to shift it left.
+ */
+ for (lsh = 1, top = m0 << 1; top < nrm; lsh++) /* XXX slow */
+ top <<= 1;
+ exp -= lsh;
+ rsh = 32 - lsh;
+ m0 = top | (m1 >> rsh);
+ m1 = (m1 << lsh) | (m2 >> rsh);
+ m2 = (m2 << lsh) | (m3 >> rsh);
+ m3 <<= lsh;
+ }
+
+ fp->fp_exp = exp;
+ fp->fp_mant[0] = m0;
+ fp->fp_mant[1] = m1;
+ fp->fp_mant[2] = m2;
+ fp->fp_mant[3] = m3;
+}
+
+/*
+ * Concoct a `fresh' Quiet NaN per Appendix N.
+ * As a side effect, we set NV (invalid) for the current exceptions.
+ */
+struct fpn *
+__fpu_newnan(struct fpemu *fe)
+{
+ struct fpn *fp;
+
+ fe->fe_cx = FSR_NV;
+ fp = &fe->fe_f3;
+ fp->fp_class = FPC_QNAN;
+ fp->fp_sign = 0;
+ fp->fp_mant[0] = FP_1 - 1;
+ fp->fp_mant[1] = fp->fp_mant[2] = fp->fp_mant[3] = ~0;
+ return (fp);
+}