1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
|
/*
* Written by J.T. Conklin <jtc@netbsd.org>.
* Public domain.
*/
#include <machine/asm.h>
#if defined(LIBC_SCCS)
.text
.asciz "$OpenBSD: swab.S,v 1.2 1996/08/19 08:13:24 tholo Exp $"
#endif
/*
* On the i486, this code is negligibly faster than the code generated
* by gcc at about half the size. If my i386 databook is correct, it
* should be considerably faster than the gcc code on a i386.
*/
ENTRY(swab)
pushl %esi
pushl %edi
movl 12(%esp),%esi
movl 16(%esp),%edi
movl 20(%esp),%ecx
cld # set direction forward
shrl $1,%ecx
testl $7,%ecx # copy first group of 1 to 7 words
jz L2 # while swaping alternate bytes.
.align 2,0x90
L1: lodsw
rorw $8,%ax
stosw
decl %ecx
testl $7,%ecx
jnz L1
L2: shrl $3,%ecx # copy remainder 8 words at a time
jz L4 # while swapping alternate bytes.
.align 2,0x90
L3: lodsw
rorw $8,%ax
stosw
lodsw
rorw $8,%ax
stosw
lodsw
rorw $8,%ax
stosw
lodsw
rorw $8,%ax
stosw
lodsw
rorw $8,%ax
stosw
lodsw
rorw $8,%ax
stosw
lodsw
rorw $8,%ax
stosw
lodsw
rorw $8,%ax
stosw
decl %ecx
jnz L3
L4: popl %edi
popl %esi
ret
|