blob: 51805172625d57671ee4fae3fdb072339764cf30 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
|
/* $OpenBSD: s_log1pf.S,v 1.3 2009/04/08 23:31:34 martynas Exp $ */
/*
* Written by J.T. Conklin <jtc@NetBSD.org>.
* Public domain.
*/
/*
* Modified by Lex Wennmacher <wennmach@NetBSD.org>
* Still public domain.
*/
#include <machine/asm.h>
#include "abi.h"
/*
* The log1pf() function is provided to compute an accurate value of
* log(1 + x), even for tiny values of x. The i387 FPU provides the
* fyl2xp1 instruction for this purpose. However, the range of this
* instruction is limited to:
* -(1 - (sqrt(2) / 2)) <= x <= sqrt(2) - 1
* -0.292893 <= x <= 0.414214
* at least on older processor versions.
*
* log1pf() is implemented by testing the range of the argument.
* If it is appropriate for fyl2xp1, this instruction is used.
* Else, we compute log1pf(x) = ln(2)*ld(1 + x) the traditional way
* (using fyl2x).
*
* The range testing costs speed, but as the rationale for the very
* existence of this function is accuracy, we accept that.
*
* In order to reduce the cost for testing the range, we check if
* the argument is in the range
* -0.25 <= x <= 0.25
* which can be done with just one conditional branch. If x is
* inside this range, we use fyl2xp1. Outside of this range,
* the use of fyl2x is accurate enough.
*
*/
.text
.align 4
ENTRY(log1pf)
XMM_ONE_ARG_FLOAT_PROLOGUE
flds ARG_FLOAT_ONE
fabs
fld1 /* ... x 1 */
fadd %st(0) /* ... x 2 */
fadd %st(0) /* ... x 4 */
fld1 /* ... 4 1 */
fdivp /* ... x 0.25 */
fcompp
fnstsw %ax
andb $69,%ah
jne use_fyl2x
jmp use_fyl2xp1
.align 4
use_fyl2x:
fldln2
flds ARG_FLOAT_ONE
fld1
faddp
fyl2x
XMM_FLOAT_EPILOGUE
ret
.align 4
use_fyl2xp1:
fldln2
flds ARG_FLOAT_ONE
fyl2xp1
XMM_FLOAT_EPILOGUE
ret
|