diff options
-rw-r--r-- | lib/libm/arch/i387/s_log1p.S | 65 |
1 files changed, 56 insertions, 9 deletions
diff --git a/lib/libm/arch/i387/s_log1p.S b/lib/libm/arch/i387/s_log1p.S index 561841fc1b0..08280b47ae0 100644 --- a/lib/libm/arch/i387/s_log1p.S +++ b/lib/libm/arch/i387/s_log1p.S @@ -1,22 +1,69 @@ /* - * Written by J.T. Conklin <jtc@netbsd.org>. + * Written by J.T. Conklin <jtc@NetBSD.org>. * Public domain. */ +/* + * Modified by Lex Wennmacher <wennmach@NetBSD.org> + * Still public domain. + */ + #include <machine/asm.h> -RCSID("$NetBSD: s_log1p.S,v 1.7 1995/05/09 00:10:58 jtc Exp $") +RCSID("$NetBSD: s_log1p.S,v 1.13 2003/09/16 18:17:11 wennmach Exp $") /* - * Since the fyl2xp1 instruction has such a limited range: - * -(1 - (sqrt(2) / 2)) <= x <= sqrt(2) - 1 - * it's not worth trying to use it. + * The log1p() function is provided to compute an accurate value of + * log(1 + x), even for tiny values of x. The i387 FPU provides the + * fyl2xp1 instruction for this purpose. However, the range of this + * instruction is limited to: + * -(1 - (sqrt(2) / 2)) <= x <= sqrt(2) - 1 + * -0.292893 <= x <= 0.414214 + * at least on older processor versions. + * + * log1p() is implemented by testing the range of the argument. + * If it is appropriate for fyl2xp1, this instruction is used. + * Else, we compute log1p(x) = ln(2)*ld(1 + x) the traditional way + * (using fyl2x). + * + * The range testing costs speed, but as the rationale for the very + * existence of this function is accuracy, we accept that. + * + * In order to reduce the cost for testing the range, we check if + * the argument is in the range + * -0.25 <= x <= 0.25 + * which can be done with just one conditional branch. If x is + * inside this range, we use fyl2xp1. Outside of this range, + * the use of fyl2x is accurate enough. + * */ ENTRY(log1p) + fldl 4(%esp) + fabs + fld1 /* ... x 1 */ + fadd %st(0) /* ... x 2 */ + fadd %st(0) /* ... x 4 */ + fld1 /* ... 4 1 */ + fdivp /* ... x 0.25 */ + fcompp + fnstsw %ax + andb $69,%ah + jne use_fyl2x + jmp use_fyl2xp1 + + .align 4 +use_fyl2x: + fldln2 + fldl 4(%esp) + fld1 + faddp + fyl2x + ret + + .align 4 +use_fyl2xp1: fldln2 - fldl 4(%esp) - fld1 - faddp - fyl2x + fldl 4(%esp) + fyl2xp1 ret |