From e855438d7a54e3ea87e99d0a4dabf617a91bf1ff Mon Sep 17 00:00:00 2001
From: Can Erkin Acar <canacar@cvs.openbsd.org>
Date: Mon, 7 Mar 2005 18:51:00 +0000
Subject: Improve the accuracy of log1p function on i387 for small values of x.
 From NetBSD, ok miod@

---
 lib/libm/arch/i387/s_log1p.S | 65 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 56 insertions(+), 9 deletions(-)

(limited to 'lib')

diff --git a/lib/libm/arch/i387/s_log1p.S b/lib/libm/arch/i387/s_log1p.S
index 561841fc1b0..08280b47ae0 100644
--- a/lib/libm/arch/i387/s_log1p.S
+++ b/lib/libm/arch/i387/s_log1p.S
@@ -1,22 +1,69 @@
 /*
- * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Written by J.T. Conklin <jtc@NetBSD.org>.
  * Public domain.
  */
 
+/*
+ * Modified by Lex Wennmacher <wennmach@NetBSD.org>
+ * Still public domain.
+ */
+
 #include <machine/asm.h>
 
-RCSID("$NetBSD: s_log1p.S,v 1.7 1995/05/09 00:10:58 jtc Exp $")
+RCSID("$NetBSD: s_log1p.S,v 1.13 2003/09/16 18:17:11 wennmach Exp $")
 
 /*
- * Since the fyl2xp1 instruction has such a limited range:
- *	-(1 - (sqrt(2) / 2)) <= x <= sqrt(2) - 1
- * it's not worth trying to use it.  
+ * The log1p() function is provided to compute an accurate value of
+ * log(1 + x), even for tiny values of x. The i387 FPU provides the
+ * fyl2xp1 instruction for this purpose. However, the range of this
+ * instruction is limited to:
+ * 		-(1 - (sqrt(2) / 2)) <= x <= sqrt(2) - 1
+ *                         -0.292893 <= x <= 0.414214
+ * at least on older processor versions.
+ *
+ * log1p() is implemented by testing the range of the argument.
+ * If it is appropriate for fyl2xp1, this instruction is used.
+ * Else, we compute log1p(x) = ln(2)*ld(1 + x) the traditional way
+ * (using fyl2x).
+ *
+ * The range testing costs speed, but as the rationale for the very
+ * existence of this function is accuracy, we accept that.
+ *
+ * In order to reduce the cost for testing the range, we check if
+ * the argument is in the range
+ *                             -0.25 <= x <= 0.25
+ * which can be done with just one conditional branch. If x is
+ * inside this range, we use fyl2xp1. Outside of this range,
+ * the use of fyl2x is accurate enough.
+ * 
  */
 
 ENTRY(log1p)
+	fldl	4(%esp)
+	fabs
+	fld1				/* ... x 1 */
+	fadd	%st(0)			/* ... x 2 */
+	fadd	%st(0)			/* ... x 4 */
+	fld1				/* ... 4 1 */
+	fdivp				/* ... x 0.25 */
+	fcompp
+	fnstsw	%ax
+	andb	$69,%ah
+	jne	use_fyl2x
+	jmp	use_fyl2xp1
+
+	.align	4
+use_fyl2x:
+	fldln2
+        fldl	4(%esp)
+        fld1
+        faddp
+        fyl2x
+        ret
+
+	.align	4
+use_fyl2xp1:
 	fldln2
-	fldl 4(%esp)
-	fld1
-	faddp
-	fyl2x
+	fldl	4(%esp)
+	fyl2xp1
 	ret
-- 
cgit v1.2.3