summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/libm/arch/i387/s_log1p.S65
1 files changed, 56 insertions, 9 deletions
diff --git a/lib/libm/arch/i387/s_log1p.S b/lib/libm/arch/i387/s_log1p.S
index 561841fc1b0..08280b47ae0 100644
--- a/lib/libm/arch/i387/s_log1p.S
+++ b/lib/libm/arch/i387/s_log1p.S
@@ -1,22 +1,69 @@
/*
- * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Written by J.T. Conklin <jtc@NetBSD.org>.
* Public domain.
*/
+/*
+ * Modified by Lex Wennmacher <wennmach@NetBSD.org>
+ * Still public domain.
+ */
+
#include <machine/asm.h>
-RCSID("$NetBSD: s_log1p.S,v 1.7 1995/05/09 00:10:58 jtc Exp $")
+RCSID("$NetBSD: s_log1p.S,v 1.13 2003/09/16 18:17:11 wennmach Exp $")
/*
- * Since the fyl2xp1 instruction has such a limited range:
- * -(1 - (sqrt(2) / 2)) <= x <= sqrt(2) - 1
- * it's not worth trying to use it.
+ * The log1p() function is provided to compute an accurate value of
+ * log(1 + x), even for tiny values of x. The i387 FPU provides the
+ * fyl2xp1 instruction for this purpose. However, the range of this
+ * instruction is limited to:
+ * -(1 - (sqrt(2) / 2)) <= x <= sqrt(2) - 1
+ * -0.292893 <= x <= 0.414214
+ * at least on older processor versions.
+ *
+ * log1p() is implemented by testing the range of the argument.
+ * If it is appropriate for fyl2xp1, this instruction is used.
+ * Else, we compute log1p(x) = ln(2)*ld(1 + x) the traditional way
+ * (using fyl2x).
+ *
+ * The range testing costs speed, but as the rationale for the very
+ * existence of this function is accuracy, we accept that.
+ *
+ * In order to reduce the cost for testing the range, we check if
+ * the argument is in the range
+ * -0.25 <= x <= 0.25
+ * which can be done with just one conditional branch. If x is
+ * inside this range, we use fyl2xp1. Outside of this range,
+ * the use of fyl2x is accurate enough.
+ *
*/
ENTRY(log1p)
+ fldl 4(%esp)
+ fabs
+ fld1 /* ... x 1 */
+ fadd %st(0) /* ... x 2 */
+ fadd %st(0) /* ... x 4 */
+ fld1 /* ... 4 1 */
+ fdivp /* ... x 0.25 */
+ fcompp
+ fnstsw %ax
+ andb $69,%ah
+ jne use_fyl2x
+ jmp use_fyl2xp1
+
+ .align 4
+use_fyl2x:
+ fldln2
+ fldl 4(%esp)
+ fld1
+ faddp
+ fyl2x
+ ret
+
+ .align 4
+use_fyl2xp1:
fldln2
- fldl 4(%esp)
- fld1
- faddp
- fyl2x
+ fldl 4(%esp)
+ fyl2xp1
ret