blob: 93ab11436e26badbbee534ff2445a38812456a70 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
|
/*
* Written by J.T. Conklin <jtc@NetBSD.org>.
* Public domain.
*/
/*
* Modified by Lex Wennmacher <wennmach@NetBSD.org>
* Still public domain.
*/
#include <machine/asm.h>
#include "abi.h"
RCSID("$NetBSD: s_log1p.S,v 1.13 2003/09/16 18:17:11 wennmach Exp $")
/*
* The log1p() function is provided to compute an accurate value of
* log(1 + x), even for tiny values of x. The i387 FPU provides the
* fyl2xp1 instruction for this purpose. However, the range of this
* instruction is limited to:
* -(1 - (sqrt(2) / 2)) <= x <= sqrt(2) - 1
* -0.292893 <= x <= 0.414214
* at least on older processor versions.
*
* log1p() is implemented by testing the range of the argument.
* If it is appropriate for fyl2xp1, this instruction is used.
* Else, we compute log1p(x) = ln(2)*ld(1 + x) the traditional way
* (using fyl2x).
*
* The range testing costs speed, but as the rationale for the very
* existence of this function is accuracy, we accept that.
*
* In order to reduce the cost for testing the range, we check if
* the argument is in the range
* -0.25 <= x <= 0.25
* which can be done with just one conditional branch. If x is
* inside this range, we use fyl2xp1. Outside of this range,
* the use of fyl2x is accurate enough.
*
*/
.text
.align 4
ENTRY(log1p)
XMM_ONE_ARG_DOUBLE_PROLOGUE
fldl ARG_DOUBLE_ONE
fabs
fld1 /* ... x 1 */
fadd %st(0) /* ... x 2 */
fadd %st(0) /* ... x 4 */
fld1 /* ... 4 1 */
fdivp /* ... x 0.25 */
fcompp
fnstsw %ax
andb $69,%ah
jne use_fyl2x
jmp use_fyl2xp1
.align 4
use_fyl2x:
fldln2
fldl ARG_DOUBLE_ONE
fld1
faddp
fyl2x
XMM_DOUBLE_EPILOGUE
ret
.align 4
use_fyl2xp1:
fldln2
fldl ARG_DOUBLE_ONE
fyl2xp1
XMM_DOUBLE_EPILOGUE
ret
|