summaryrefslogtreecommitdiff
path: root/sys/lib/libkern/arch/hppa/bcopy.S
blob: 18a632e166569e66140e082b9dc6d624aa44e317 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
/*	$OpenBSD: bcopy.S,v 1.1 1998/06/23 18:56:53 mickey Exp $	*/

/*
 *  (c) Copyright 1988 HEWLETT-PACKARD COMPANY
 *
 *  To anyone who acknowledges that this file is provided "AS IS"
 *  without any express or implied warranty:
 *      permission to use, copy, modify, and distribute this file
 *  for any purpose is hereby granted without fee, provided that
 *  the above copyright notice and this notice appears in all
 *  copies, and that the name of Hewlett-Packard Company not be
 *  used in advertising or publicity pertaining to distribution
 *  of the software without specific, written prior permission.
 *  Hewlett-Packard Company makes no representations about the
 *  suitability of this software for any purpose.
 */

/*
 * Copyright (c) 1990,1994 The University of Utah and
 * the Computer Systems Laboratory (CSL).  All rights reserved.
 *
 * THE UNIVERSITY OF UTAH AND CSL PROVIDE THIS SOFTWARE IN ITS "AS IS"
 * CONDITION, AND DISCLAIM ANY LIABILITY OF ANY KIND FOR ANY DAMAGES
 * WHATSOEVER RESULTING FROM ITS USE.
 *
 * CSL requests users of this software to return to csl-dist@cs.utah.edu any
 * improvements that they make and grant CSL redistribution rights.
 *
 * 	Utah $Hdr: bcopy.s 1.10 94/12/14$
 *	Author: Bob Wheeler, University of Utah CSL
 */

#include <machine/asm.h>

/*
 * void
 * memcpy(dst, src, count)
 *	vm_offset_t	dst;
 *	vm_offset_t	src;
 *	int		count;
 */
ENTRY(memcpy)
	copy	arg0,arg3
	copy	arg1,arg0
	copy	arg3,arg1
	/* And fall into.... */

/*
 * void 
 * bcopy(src, dst, count)
 *	vm_offset_t	src;
 *	vm_offset_t	dst;
 *	int		count;
 */
ALTENTRY(bcopy)
        comb,>=,n r0,arg2,$bcopy_exit

	/*
	 * See if the source and destination are word aligned and if the count
	 * is an integer number of words. If so then we can use an optimized 
	 * routine. If not then branch to bcopy_checkalign and see what we can
	 * do there.
	 */

        or	arg0,arg1,t1
        or	t1,arg2,t2
        extru,= t2,31,2,r0
        b,n     $bcopy_checkalign

        addib,<,n -16,arg2,$bcopy_movewords

	/*
	 * We can move the data in 4 word moves. We'll use 4 registers to 
	 * avoid interlock and pipeline stalls.
	 */

$bcopy_loop16

        ldwm	16(arg0),t1
        ldw	-12(arg0),t2
        ldw     -8(arg0),t3
        ldw     -4(arg0),t4
        stwm    t1,16(arg1)
        stw     t2,-12(arg1)
        stw     t3,-8(arg1)
        addib,>= -16,arg2,$bcopy_loop16
        stw     t4,-4(arg1)


	/*
	 * We have already decremented the count by 16, add 12 to it and then 
	 * we can test if there is at least 1 word left to move.
	 */

$bcopy_movewords
        addib,<,n 12,arg2,$bcopy_exit

	/*
	 * Clean up any remaining words that were not moved in the 16 byte
	 * moves
	 */

$bcopy_loop4
        ldwm	4(arg0),t1
        addib,>= -4,arg2,$bcopy_loop4
        stwm    t1,4(arg1)

	b,n 	$bcopy_exit


$bcopy_checkalign

	/*
	 * The source or destination is not word aligned or the count is not 
	 * an integral number of words. If we are dealing with less than 16 
	 * bytes then just do it byte by byte. Otherwise, see if the data has 
	 * the same basic alignment. We will add in the byte offset to size to
	 * keep track of what we have to move even though the stbys instruction
	 * won't physically move it. 
	 */

        comib,>= 15,arg2,$bcopy_byte
        extru   arg0,31,2,t1
        extru   arg1,31,2,t2
        add     arg2,t2,arg2
        comb,<> t2,t1,$bcopy_unaligned
        dep     0,31,2,arg0

	/*
	 * the source and destination have the same basic alignment. We will 
	 * move the data in blocks of 16 bytes as long as we can and then 
	 * we'll go to the 4 byte moves.
	 */

        addib,<,n -16,arg2,$bcopy_aligned2

$bcopy_loop_aligned4
        ldwm	16(arg0),t1
        ldw     -12(arg0),t2
        ldw     -8(arg0),t3
        ldw     -4(arg0),t4
        stbys,b,m t1,4(arg1)
        stwm    t2,4(arg1)
        stwm    t3,4(arg1)
        addib,>= -16,arg2,$bcopy_loop_aligned4
        stwm    t4,4(arg1)

	/*
	 * see if there is anything left that needs to be moved in a word move.
	 * Since the count was decremented by 16, add 12 to test if there are 
	 * any full word moves left to do.
	 */

$bcopy_aligned2
        addib,<,n 12,arg2,$bcopy_cleanup

$bcopy_loop_aligned2
        ldws,ma	4(arg0),t1
        addib,>= -4,arg2,$bcopy_loop_aligned2
        stbys,b,m t1,4(arg1)

	/*
	 * move the last bytes that may be unaligned on a word boundary
	 */

$bcopy_cleanup
         addib,=,n 4,arg2,$bcopy_exit
         ldws	0(arg0),t1
         add    arg1,arg2,arg1
         b      $bcopy_exit
         stbys,e t1,0(arg1)

	/*
	 * The source and destination are not alligned on the same boundary 
	 * types. We will have to shift the data around. Figure out the shift 
	 * amount and load it into cr11.
	 */

$bcopy_unaligned
        sub,>=	t2,t1,t3
        ldwm    4(arg0),t1
        zdep    t3,28,29,t4
        mtctl   t4,11

	/*
	 * see if we can do some of this work in blocks of 16 bytes
	 */

        addib,<,n -16,arg2,$bcopy_unaligned_words

$bcopy_unaligned4
        ldwm	16(arg0),t2
	ldw	-12(arg0),t3
	ldw	-8(arg0),t4
	ldw	-4(arg0),r1
        vshd	t1,t2,r28
        stbys,b,m r28,4(arg1)
        vshd	t2,t3,r28
        stwm	r28,4(arg1)
        vshd	t3,t4,r28
        stwm	r28,4(arg1)
        vshd	t4,r1,r28
        stwm   	r28,4(arg1)
        addib,>= -16,arg2,$bcopy_unaligned4
	copy	r1,t1

	/*
	 * see if there is a full word that we can transfer
	 */

$bcopy_unaligned_words
        addib,<,n 12,arg2,$bcopy_unaligned_cleanup1

$bcopy_unaligned_loop
        ldwm	4(arg0),t2
        vshd    t1,t2,t3
        addib,< -4,arg2,$bcopy_unaligned_cleanup2
        stbys,b,m t3,4(arg1)

        ldwm	4(arg0),t1
        vshd    t2,t1,t3
        addib,>= -4,arg2,$bcopy_unaligned_loop
        stbys,b,m t3,4(arg1)

$bcopy_unaligned_cleanup1
	copy	t1,t2

$bcopy_unaligned_cleanup2
	addib,<=,n 4,arg2,$bcopy_exit
        add	arg1,arg2,arg1
	mfctl	sar,t3
	extru	t3,28,2,t3
	sub,<=	arg2,t3,r0
        ldwm    4(arg0),t1
        vshd    t2,t1,t3
        b       $bcopy_exit
        stbys,e t3,0(arg1)

	/*
	 * move data one byte at a time
	 */

$bcopy_byte
        comb,>=,n r0,arg2,$bcopy_exit

$bcopy_loop_byte
        ldbs,ma	1(arg0),t1
        addib,> -1,arg2,$bcopy_loop_byte
        stbs,ma t1,1(arg1) 

$bcopy_exit
EXIT(bcopy)