summaryrefslogtreecommitdiff
path: root/sys/gnu/arch/i386/fpemul/wm_sqrt.s
diff options
context:
space:
mode:
authorJason Downs <downsj@cvs.openbsd.org>1996-08-27 10:33:05 +0000
committerJason Downs <downsj@cvs.openbsd.org>1996-08-27 10:33:05 +0000
commitd8270b44b1f22944ea7aaaf8e11b8ddc7fbf94aa (patch)
tree1c6d66ec2f8bcf690f8a1dd1eb265b677ec778ff /sys/gnu/arch/i386/fpemul/wm_sqrt.s
parentd21d48db18c4d62ff0fd6630fcb728ff04939183 (diff)
Everyone, say hello to a real floating point emulator!
The fpemul written for Linux by W. Metzenthen: ported to NetBSD and then to FreeBSD, and now back to OpenBSD.
Diffstat (limited to 'sys/gnu/arch/i386/fpemul/wm_sqrt.s')
-rw-r--r--sys/gnu/arch/i386/fpemul/wm_sqrt.s499
1 files changed, 499 insertions, 0 deletions
diff --git a/sys/gnu/arch/i386/fpemul/wm_sqrt.s b/sys/gnu/arch/i386/fpemul/wm_sqrt.s
new file mode 100644
index 00000000000..849a11ff465
--- /dev/null
+++ b/sys/gnu/arch/i386/fpemul/wm_sqrt.s
@@ -0,0 +1,499 @@
+ .file "wm_sqrt.S"
+/* $OpenBSD: wm_sqrt.s,v 1.1 1996/08/27 10:33:04 downsj Exp $ */
+/*
+ * wm_sqrt.S
+ *
+ * Fixed point arithmetic square root evaluation.
+ *
+ * Call from C as:
+ * void wm_sqrt(FPU_REG *n, unsigned int control_word)
+ *
+ *
+ * Copyright (C) 1992,1993,1994
+ * W. Metzenthen, 22 Parker St, Ormond, Vic 3163,
+ * Australia. E-mail billm@vaxc.cc.monash.edu.au
+ * All rights reserved.
+ *
+ * This copyright notice covers the redistribution and use of the
+ * FPU emulator developed by W. Metzenthen. It covers only its use
+ * in the 386BSD, FreeBSD and NetBSD operating systems. Any other
+ * use is not permitted under this copyright.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must include information specifying
+ * that source code for the emulator is freely available and include
+ * either:
+ * a) an offer to provide the source code for a nominal distribution
+ * fee, or
+ * b) list at least two alternative methods whereby the source
+ * can be obtained, e.g. a publically accessible bulletin board
+ * and an anonymous ftp site from which the software can be
+ * downloaded.
+ * 3. All advertising materials specifically mentioning features or use of
+ * this emulator must acknowledge that it was developed by W. Metzenthen.
+ * 4. The name of W. Metzenthen may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * W. METZENTHEN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * The purpose of this copyright, based upon the Berkeley copyright, is to
+ * ensure that the covered software remains freely available to everyone.
+ *
+ * The software (with necessary differences) is also available, but under
+ * the terms of the GNU copyleft, for the Linux operating system and for
+ * the djgpp ms-dos extender.
+ *
+ * W. Metzenthen June 1994.
+ *
+ *
+ * $FreeBSD: wm_sqrt.s,v 1.3 1994/06/10 07:45:04 rich Exp $
+ *
+ */
+
+
+/*---------------------------------------------------------------------------+
+ | wm_sqrt(FPU_REG *n, unsigned int control_word) |
+ | returns the square root of n in n. |
+ | |
+ | Use Newton's method to compute the square root of a number, which must |
+ | be in the range [1.0 .. 4.0), to 64 bits accuracy. |
+ | Does not check the sign or tag of the argument. |
+ | Sets the exponent, but not the sign or tag of the result. |
+ | |
+ | The guess is kept in %esi:%edi |
+ +---------------------------------------------------------------------------*/
+
+#include <machine/asm.h>
+
+#include <gnu/arch/i386/fpemul/exception.h>
+#include <gnu/arch/i386/fpemul/fpu_asm.h>
+
+
+.data
+/*
+ Local storage:
+ */
+ .align 4,0
+accum_3:
+ .long 0 /* ms word */
+accum_2:
+ .long 0
+accum_1:
+ .long 0
+accum_0:
+ .long 0
+
+/* The de-normalised argument:
+// sq_2 sq_1 sq_0
+// b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
+// ^ binary point here */
+fsqrt_arg_2:
+ .long 0 /* ms word */
+fsqrt_arg_1:
+ .long 0
+fsqrt_arg_0:
+ .long 0 /* ls word, at most the ms bit is set */
+
+.text
+ .align 2,144
+
+.globl _wm_sqrt
+
+_wm_sqrt:
+ pushl %ebp
+ movl %esp,%ebp
+ pushl %esi
+ pushl %edi
+ pushl %ebx
+
+ movl PARAM1,%esi
+
+ movl SIGH(%esi),%eax
+ movl SIGL(%esi),%ecx
+ xorl %edx,%edx
+
+/* We use a rough linear estimate for the first guess.. */
+
+ cmpl EXP_BIAS,EXP(%esi)
+ jnz sqrt_arg_ge_2
+
+ shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */
+ rcrl $1,%ecx
+ rcrl $1,%edx
+
+sqrt_arg_ge_2:
+/* From here on, n is never accessed directly again until it is
+// replaced by the answer. */
+
+ movl %eax,fsqrt_arg_2 /* ms word of n */
+ movl %ecx,fsqrt_arg_1
+ movl %edx,fsqrt_arg_0
+
+/* Make a linear first estimate */
+ shrl $1,%eax
+ addl $0x40000000,%eax
+ movl $0xaaaaaaaa,%ecx
+ mull %ecx
+ shll %edx /* max result was 7fff... */
+ testl $0x80000000,%edx /* but min was 3fff... */
+ jnz sqrt_prelim_no_adjust
+
+ movl $0x80000000,%edx /* round up */
+
+sqrt_prelim_no_adjust:
+ movl %edx,%esi /* Our first guess */
+
+/* We have now computed (approx) (2 + x) / 3, which forms the basis
+ for a few iterations of Newton's method */
+
+ movl fsqrt_arg_2,%ecx /* ms word */
+
+/* From our initial estimate, three iterations are enough to get us
+// to 30 bits or so. This will then allow two iterations at better
+// precision to complete the process.
+
+// Compute (g + n/g)/2 at each iteration (g is the guess). */
+ shrl %ecx /* Doing this first will prevent a divide */
+ /* overflow later. */
+
+ movl %ecx,%edx /* msw of the arg / 2 */
+ divl %esi /* current estimate */
+ shrl %esi /* divide by 2 */
+ addl %eax,%esi /* the new estimate */
+
+ movl %ecx,%edx
+ divl %esi
+ shrl %esi
+ addl %eax,%esi
+
+ movl %ecx,%edx
+ divl %esi
+ shrl %esi
+ addl %eax,%esi
+
+/* Now that an estimate accurate to about 30 bits has been obtained (in %esi),
+// we improve it to 60 bits or so.
+
+// The strategy from now on is to compute new estimates from
+// guess := guess + (n - guess^2) / (2 * guess) */
+
+/* First, find the square of the guess */
+ movl %esi,%eax
+ mull %esi
+/* guess^2 now in %edx:%eax */
+
+ movl fsqrt_arg_1,%ecx
+ subl %ecx,%eax
+ movl fsqrt_arg_2,%ecx /* ms word of normalized n */
+ sbbl %ecx,%edx
+ jnc sqrt_stage_2_positive
+/* subtraction gives a negative result
+// negate the result before division */
+ notl %edx
+ notl %eax
+ addl $1,%eax
+ adcl $0,%edx
+
+ divl %esi
+ movl %eax,%ecx
+
+ movl %edx,%eax
+ divl %esi
+ jmp sqrt_stage_2_finish
+
+sqrt_stage_2_positive:
+ divl %esi
+ movl %eax,%ecx
+
+ movl %edx,%eax
+ divl %esi
+
+ notl %ecx
+ notl %eax
+ addl $1,%eax
+ adcl $0,%ecx
+
+sqrt_stage_2_finish:
+ sarl $1,%ecx /* divide by 2 */
+ rcrl $1,%eax
+
+ /* Form the new estimate in %esi:%edi */
+ movl %eax,%edi
+ addl %ecx,%esi
+
+ jnz sqrt_stage_2_done /* result should be [1..2) */
+
+#ifdef PARANOID
+/* It should be possible to get here only if the arg is ffff....ffff*/
+ cmp $0xffffffff,fsqrt_arg_1
+ jnz sqrt_stage_2_error
+#endif PARANOID
+
+/* The best rounded result.*/
+ xorl %eax,%eax
+ decl %eax
+ movl %eax,%edi
+ movl %eax,%esi
+ movl $0x7fffffff,%eax
+ jmp sqrt_round_result
+
+#ifdef PARANOID
+sqrt_stage_2_error:
+ pushl EX_INTERNAL|0x213
+ call EXCEPTION
+#endif PARANOID
+
+sqrt_stage_2_done:
+
+/* Now the square root has been computed to better than 60 bits */
+
+/* Find the square of the guess*/
+ movl %edi,%eax /* ls word of guess*/
+ mull %edi
+ movl %edx,accum_1
+
+ movl %esi,%eax
+ mull %esi
+ movl %edx,accum_3
+ movl %eax,accum_2
+
+ movl %edi,%eax
+ mull %esi
+ addl %eax,accum_1
+ adcl %edx,accum_2
+ adcl $0,accum_3
+
+/* movl %esi,%eax*/
+/* mull %edi*/
+ addl %eax,accum_1
+ adcl %edx,accum_2
+ adcl $0,accum_3
+
+/* guess^2 now in accum_3:accum_2:accum_1*/
+
+ movl fsqrt_arg_0,%eax /* get normalized n*/
+ subl %eax,accum_1
+ movl fsqrt_arg_1,%eax
+ sbbl %eax,accum_2
+ movl fsqrt_arg_2,%eax /* ms word of normalized n*/
+ sbbl %eax,accum_3
+ jnc sqrt_stage_3_positive
+
+/* subtraction gives a negative result*/
+/* negate the result before division */
+ notl accum_1
+ notl accum_2
+ notl accum_3
+ addl $1,accum_1
+ adcl $0,accum_2
+
+#ifdef PARANOID
+ adcl $0,accum_3 /* This must be zero */
+ jz sqrt_stage_3_no_error
+
+sqrt_stage_3_error:
+ pushl EX_INTERNAL|0x207
+ call EXCEPTION
+
+sqrt_stage_3_no_error:
+#endif PARANOID
+
+ movl accum_2,%edx
+ movl accum_1,%eax
+ divl %esi
+ movl %eax,%ecx
+
+ movl %edx,%eax
+ divl %esi
+
+ sarl $1,%ecx / divide by 2*/
+ rcrl $1,%eax
+
+ /* prepare to round the result*/
+
+ addl %ecx,%edi
+ adcl $0,%esi
+
+ jmp sqrt_stage_3_finished
+
+sqrt_stage_3_positive:
+ movl accum_2,%edx
+ movl accum_1,%eax
+ divl %esi
+ movl %eax,%ecx
+
+ movl %edx,%eax
+ divl %esi
+
+ sarl $1,%ecx /* divide by 2*/
+ rcrl $1,%eax
+
+ /* prepare to round the result*/
+
+ notl %eax /* Negate the correction term*/
+ notl %ecx
+ addl $1,%eax
+ adcl $0,%ecx /* carry here ==> correction == 0*/
+ adcl $0xffffffff,%esi
+
+ addl %ecx,%edi
+ adcl $0,%esi
+
+sqrt_stage_3_finished:
+
+/* The result in %esi:%edi:%esi should be good to about 90 bits here,
+// and the rounding information here does not have sufficient accuracy
+// in a few rare cases. */
+ cmpl $0xffffffe0,%eax
+ ja sqrt_near_exact_x
+
+ cmpl $0x00000020,%eax
+ jb sqrt_near_exact
+
+ cmpl $0x7fffffe0,%eax
+ jb sqrt_round_result
+
+ cmpl $0x80000020,%eax
+ jb sqrt_get_more_precision
+
+sqrt_round_result:
+/* Set up for rounding operations*/
+ movl %eax,%edx
+ movl %esi,%eax
+ movl %edi,%ebx
+ movl PARAM1,%edi
+ movl EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0)*/
+ movl PARAM2,%ecx
+ jmp FPU_round_sqrt
+
+
+sqrt_near_exact_x:
+/* First, the estimate must be rounded up.*/
+ addl $1,%edi
+ adcl $0,%esi
+
+sqrt_near_exact:
+/* This is an easy case because x^1/2 is monotonic.
+// We need just find the square of our estimate, compare it
+// with the argument, and deduce whether our estimate is
+// above, below, or exact. We use the fact that the estimate
+// is known to be accurate to about 90 bits. */
+ movl %edi,%eax /* ls word of guess*/
+ mull %edi
+ movl %edx,%ebx /* 2nd ls word of square*/
+ movl %eax,%ecx /* ls word of square*/
+
+ movl %edi,%eax
+ mull %esi
+ addl %eax,%ebx
+ addl %eax,%ebx
+
+#ifdef PARANOID
+ cmp $0xffffffb0,%ebx
+ jb sqrt_near_exact_ok
+
+ cmp $0x00000050,%ebx
+ ja sqrt_near_exact_ok
+
+ pushl EX_INTERNAL|0x214
+ call EXCEPTION
+
+sqrt_near_exact_ok:
+#endif PARANOID
+
+ or %ebx,%ebx
+ js sqrt_near_exact_small
+
+ jnz sqrt_near_exact_large
+
+ or %ebx,%edx
+ jnz sqrt_near_exact_large
+
+/* Our estimate is exactly the right answer*/
+ xorl %eax,%eax
+ jmp sqrt_round_result
+
+sqrt_near_exact_small:
+/* Our estimate is too small*/
+ movl $0x000000ff,%eax
+ jmp sqrt_round_result
+
+sqrt_near_exact_large:
+/* Our estimate is too large, we need to decrement it*/
+ subl $1,%edi
+ sbbl $0,%esi
+ movl $0xffffff00,%eax
+ jmp sqrt_round_result
+
+
+sqrt_get_more_precision:
+/* This case is almost the same as the above, except we start*/
+/* with an extra bit of precision in the estimate.*/
+ stc /* The extra bit.*/
+ rcll $1,%edi /* Shift the estimate left one bit*/
+ rcll $1,%esi
+
+ movl %edi,%eax /* ls word of guess*/
+ mull %edi
+ movl %edx,%ebx /* 2nd ls word of square*/
+ movl %eax,%ecx /* ls word of square*/
+
+ movl %edi,%eax
+ mull %esi
+ addl %eax,%ebx
+ addl %eax,%ebx
+
+/* Put our estimate back to its original value*/
+ stc /* The ms bit.*/
+ rcrl $1,%esi /* Shift the estimate left one bit*/
+ rcrl $1,%edi
+
+#ifdef PARANOID
+ cmp $0xffffff60,%ebx
+ jb sqrt_more_prec_ok
+
+ cmp $0x000000a0,%ebx
+ ja sqrt_more_prec_ok
+
+ pushl EX_INTERNAL|0x215
+ call EXCEPTION
+
+sqrt_more_prec_ok:
+#endif PARANOID
+
+ or %ebx,%ebx
+ js sqrt_more_prec_small
+
+ jnz sqrt_more_prec_large
+
+ or %ebx,%ecx
+ jnz sqrt_more_prec_large
+
+/* Our estimate is exactly the right answer*/
+ movl $0x80000000,%eax
+ jmp sqrt_round_result
+
+sqrt_more_prec_small:
+/* Our estimate is too small*/
+ movl $0x800000ff,%eax
+ jmp sqrt_round_result
+
+sqrt_more_prec_large:
+/* Our estimate is too large*/
+ movl $0x7fffff00,%eax
+ jmp sqrt_round_result