src - OpenBSD base system

diff options


context:
space:
mode:

author	Michael Shalayeff <mickey@cvs.openbsd.org>	1999-09-14 00:46:19 +0000
committer	Michael Shalayeff <mickey@cvs.openbsd.org>	1999-09-14 00:46:19 +0000
commit	4b90e359fc35da3324e4677c210145a2ae7c45bc (patch)
tree	100219606abdf785e373dbc8cbd77c7cdfdd72d2 /lib/libc/arch/hppa
parent	0e9632755a4c31edfe3138c09d35334d2cfb6121 (diff)

this is the bcopy we use

Diffstat (limited to 'lib/libc/arch/hppa')

-rw-r--r--

lib/libc/arch/hppa/Makefile.inc

-rw-r--r--

lib/libc/arch/hppa/string/Makefile.inc

-rw-r--r--

lib/libc/arch/hppa/string/bcopy.S

238

-rw-r--r--

lib/libc/arch/hppa/string/bcopy.m4

269

-rw-r--r--

lib/libc/arch/hppa/string/memmove.S

252

5 files changed, 282 insertions, 494 deletions

diff --git a/lib/libc/arch/hppa/Makefile.inc b/lib/libc/arch/hppa/Makefile.inc
index 317dc3fccfb..ebffa402c6e 100644
--- a/lib/libc/arch/hppa/Makefile.inc
+++ b/lib/libc/arch/hppa/Makefile.inc

@@ -1,4 +1,12 @@

-# $OpenBSD: Makefile.inc,v 1.1 1998/12/17 16:56:45 mickey Exp $

+# $OpenBSD: Makefile.inc,v 1.2 1999/09/14 00:46:18 mickey Exp $

KMINCLUDES= arch/hppa/DEFS.h arch/hppa/SYS.h prefix.h

-KMSRCS= bcmp.S bzero.S bcopy.S ffs.S milli.S

+KMSRCS= bcmp.S bzero.S bcopy.m4 ffs.S

+SRCS+= memmove.S

+CLEANFILES+= memmove.S

+memmove.S: ${LIBCSRCDIR}/arch/hppa/string/bcopy.m4

+ @echo 'building ${.TARGET} from ${.ALLSRC}'

+ @m4 -DNAME=bcopy ${.ALLSRC} > ${.TARGET}

diff --git a/lib/libc/arch/hppa/string/Makefile.inc b/lib/libc/arch/hppa/string/Makefile.inc
index bca7da49295..2feab8eb5de 100644
--- a/lib/libc/arch/hppa/string/Makefile.inc
+++ b/lib/libc/arch/hppa/string/Makefile.inc

@@ -1,7 +1,8 @@

-# $OpenBSD: Makefile.inc,v 1.3 1999/04/27 17:53:32 mickey Exp $

+# $OpenBSD: Makefile.inc,v 1.4 1999/09/14 00:46:18 mickey Exp $

SRCS+= memchr.c memcmp.c memset.c index.c rindex.c \

strcat.c strcmp.c strcpy.c strcspn.c strlen.c \

strncat.c strncmp.c strncpy.c strpbrk.c strsep.c \

strspn.c strstr.c swab.c

-SRCS+= bcmp.S bcopy.S bzero.S ffs.S memmove.S strlcpy.S

+SRCS+= bcmp.S bzero.S ffs.S strlcpy.S

diff --git a/lib/libc/arch/hppa/string/bcopy.S b/lib/libc/arch/hppa/string/bcopy.S
deleted file mode 100644
index 177f41eac05..00000000000
--- a/lib/libc/arch/hppa/string/bcopy.S
+++ /dev/null

@@ -1,238 +0,0 @@

-/* $OpenBSD: bcopy.S,v 1.1 1998/08/28 20:59:41 mickey Exp $ */

-/*

- *

- * To anyone who acknowledges that this file is provided "AS IS"

- * without any express or implied warranty:

- * permission to use, copy, modify, and distribute this file

- * for any purpose is hereby granted without fee, provided that

- * the above copyright notice and this notice appears in all

- * copies, and that the name of Hewlett-Packard Company not be

- * used in advertising or publicity pertaining to distribution

- * of the software without specific, written prior permission.

- * Hewlett-Packard Company makes no representations about the

- * suitability of this software for any purpose.

- */

-/*

- *

- * THE UNIVERSITY OF UTAH AND CSL PROVIDE THIS SOFTWARE IN ITS "AS IS"

- * CONDITION, AND DISCLAIM ANY LIABILITY OF ANY KIND FOR ANY DAMAGES

- * WHATSOEVER RESULTING FROM ITS USE.

- *

- * CSL requests users of this software to return to csl-dist@cs.utah.edu any

- * improvements that they make and grant CSL redistribution rights.

- *

- * Utah $Hdr: bcopy.s 1.10 94/12/14$

- * Author: Bob Wheeler, University of Utah CSL

- */

-#include <machine/asm.h>

-/*

- * void

- * bcopy(src, dst, count)

- * vm_offset_t src;

- * vm_offset_t dst;

- * int count;

- */

-ENTRY(bcopy)

- comb,>=,n r0,arg2,$bcopy_exit

- /*

- * See if the source and destination are word aligned and if the count

- * is an integer number of words. If so then we can use an optimized

- * routine. If not then branch to bcopy_checkalign and see what we can

- * do there.

- */

- or arg0,arg1,t1

- or t1,arg2,t2

- extru,= t2,31,2,r0

- b,n $bcopy_checkalign

- addib,<,n -16,arg2,$bcopy_movewords

- /*

- * We can move the data in 4 word moves. We'll use 4 registers to

- * avoid interlock and pipeline stalls.

- */

-$bcopy_loop16

- ldwm 16(arg0),t1

- ldw -12(arg0),t2

- ldw -8(arg0),t3

- ldw -4(arg0),t4

- stwm t1,16(arg1)

- stw t2,-12(arg1)

- stw t3,-8(arg1)

- addib,>= -16,arg2,$bcopy_loop16

- stw t4,-4(arg1)

- /*

- * We have already decremented the count by 16, add 12 to it and then

- * we can test if there is at least 1 word left to move.

- */

-$bcopy_movewords

- addib,<,n 12,arg2,$bcopy_exit

- /*

- * Clean up any remaining words that were not moved in the 16 byte

- * moves

- */

-$bcopy_loop4

- ldwm 4(arg0),t1

- addib,>= -4,arg2,$bcopy_loop4

- stwm t1,4(arg1)

- b,n $bcopy_exit

-$bcopy_checkalign

- /*

- * The source or destination is not word aligned or the count is not

- * an integral number of words. If we are dealing with less than 16

- * bytes then just do it byte by byte. Otherwise, see if the data has

- * the same basic alignment. We will add in the byte offset to size to

- * keep track of what we have to move even though the stbys instruction

- * won't physically move it.

- */

- comib,>= 15,arg2,$bcopy_byte

- extru arg0,31,2,t1

- extru arg1,31,2,t2

- add arg2,t2,arg2

- comb,<> t2,t1,$bcopy_unaligned

- dep 0,31,2,arg0

- /*

- * the source and destination have the same basic alignment. We will

- * move the data in blocks of 16 bytes as long as we can and then

- * we'll go to the 4 byte moves.

- */

- addib,<,n -16,arg2,$bcopy_aligned2

-$bcopy_loop_aligned4

- ldwm 16(arg0),t1

- ldw -12(arg0),t2

- ldw -8(arg0),t3

- ldw -4(arg0),t4

- stbys,b,m t1,4(arg1)

- stwm t2,4(arg1)

- stwm t3,4(arg1)

- addib,>= -16,arg2,$bcopy_loop_aligned4

- stwm t4,4(arg1)

- /*

- * see if there is anything left that needs to be moved in a word move.

- * Since the count was decremented by 16, add 12 to test if there are

- * any full word moves left to do.

- */

-$bcopy_aligned2

- addib,<,n 12,arg2,$bcopy_cleanup

-$bcopy_loop_aligned2

- ldws,ma 4(arg0),t1

- addib,>= -4,arg2,$bcopy_loop_aligned2

- stbys,b,m t1,4(arg1)

- /*

- * move the last bytes that may be unaligned on a word boundary

- */

-$bcopy_cleanup

- addib,=,n 4,arg2,$bcopy_exit

- ldws 0(arg0),t1

- add arg1,arg2,arg1

- b $bcopy_exit

- stbys,e t1,0(arg1)

- /*

- * The source and destination are not alligned on the same boundary

- * types. We will have to shift the data around. Figure out the shift

- * amount and load it into cr11.

- */

-$bcopy_unaligned

- sub,>= t2,t1,t3

- ldwm 4(arg0),t1

- zdep t3,28,29,t4

- mtctl t4,11

- /*

- * see if we can do some of this work in blocks of 16 bytes

- */

- addib,<,n -16,arg2,$bcopy_unaligned_words

-$bcopy_unaligned4

- ldwm 16(arg0),t2

- ldw -12(arg0),t3

- ldw -8(arg0),t4

- ldw -4(arg0),r1

- vshd t1,t2,r28

- stbys,b,m r28,4(arg1)

- vshd t2,t3,r28

- stwm r28,4(arg1)

- vshd t3,t4,r28

- stwm r28,4(arg1)

- vshd t4,r1,r28

- stwm r28,4(arg1)

- addib,>= -16,arg2,$bcopy_unaligned4

- copy r1,t1

- /*

- * see if there is a full word that we can transfer

- */

-$bcopy_unaligned_words

- addib,<,n 12,arg2,$bcopy_unaligned_cleanup1

-$bcopy_unaligned_loop

- ldwm 4(arg0),t2

- vshd t1,t2,t3

- addib,< -4,arg2,$bcopy_unaligned_cleanup2

- stbys,b,m t3,4(arg1)

- ldwm 4(arg0),t1

- vshd t2,t1,t3

- addib,>= -4,arg2,$bcopy_unaligned_loop

- stbys,b,m t3,4(arg1)

-$bcopy_unaligned_cleanup1

- copy t1,t2

-$bcopy_unaligned_cleanup2

- addib,<=,n 4,arg2,$bcopy_exit

- add arg1,arg2,arg1

- mfctl sar,t3

- extru t3,28,2,t3

- sub,<= arg2,t3,r0

- ldwm 4(arg0),t1

- vshd t2,t1,t3

- b $bcopy_exit

- stbys,e t3,0(arg1)

- /*

- * move data one byte at a time

- */

-$bcopy_byte

- comb,>=,n r0,arg2,$bcopy_exit

-$bcopy_loop_byte

- ldbs,ma 1(arg0),t1

- addib,> -1,arg2,$bcopy_loop_byte

- stbs,ma t1,1(arg1)

-$bcopy_exit

-EXIT(bcopy)

diff --git a/lib/libc/arch/hppa/string/bcopy.m4 b/lib/libc/arch/hppa/string/bcopy.m4
new file mode 100644
index 00000000000..bf69e1349ab
--- /dev/null
+++ b/lib/libc/arch/hppa/string/bcopy.m4

@@ -0,0 +1,269 @@

+define(_rcsid,``$OpenBSD: bcopy.m4,v 1.1 1999/09/14 00:46:18 mickey Exp $'')dnl

+dnl

+dnl This is the source file for bcopy.S, spcopy.S

+dnl

+define(`versionmacro',substr(_rcsid,1,eval(len(_rcsid)-2)))dnl

+dnl

+/* This is a generated file. DO NOT EDIT. */

+/*

+ * Generated from:

+ *

+ * versionmacro

+ */

+/*

+ *

+ * Redistribution and use in source and binary forms, with or without

+ * modification, are permitted provided that the following conditions

+ * are met:

+ * 1. Redistributions of source code must retain the above copyright

+ * notice, this list of conditions and the following disclaimer.

+ * 2. Redistributions in binary form must reproduce the above copyright

+ * notice, this list of conditions and the following disclaimer in the

+ * documentation and/or other materials provided with the distribution.

+ * 3. All advertising materials mentioning features or use of this software

+ * must display the following acknowledgement:

+ * This product includes software developed by Michael Shalayeff.

+ * 4. The name of the author may not be used to endorse or promote products

+ * derived from this software without specific prior written permission.

+ *

+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR

+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES

+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.

+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,

+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT

+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF

+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+ *

+ */

+dnl

+dnl macro: L(`arg1',`arg2')

+dnl synopsis: creates an assembly label based on args resulting in $arg1.arg2

+dnl

+define(`L', `$$1.$2')dnl

+dnl

+define(`STWS',`ifelse($5, `u',dnl

+`ifelse($1, `1', `vshd $4, t`$1', r31

+ stbys,B,m r31, F`'4($2, $3)',

+`0', `0', `vshd t`'decr($1), t`$1', r31

+ stws,M r31, F`'4($2, $3)')',dnl

+`0', `0',

+`ifelse($1, `1',

+`stbys,B`'ifelse(B, `b', `,m ', `0', `0', ` ')`'t`$1', F`'4($2, $3)',

+`0', `0', `stws,M t`$1', F`'4($2, $3)')')')dnl

+define(`STWSS', `ifelse(`$3', `1', `dnl',

+`0', `0', `STWSS($1, $2, eval($3 - 1), $4, $5)')

+ STWS($3, $1, $2, $4, $5)dnl

+')dnl

+define(`LDWSS', `ifelse(`$3', `1', `dnl',

+`0', `0', `LDWSS($1, $2, eval($3 - 1))')

+ ldws,M F`'4($1, $2), t`'$3`'dnl

+')dnl

+dnl

+dnl copy data in 4-words blocks

+dnl

+define(`hppa_blcopy',`

+ addi -16, $6, $6

+L($1, `loop16'`$7')

+dnl cache hint may not work on some hardware

+dnl ldw F 32($2, $3), r0

+ifelse(F, `-', `dnl

+ addi F`'4, $5, $5', `0', `0', `dnl')

+LDWSS($2, $3, 4)

+STWSS($4, $5, 3, `ret1', $7)

+ifelse($7, `u', `dnl

+ STWS(4, $4, $5, `ret1', $7)', $7, `a', `dnl')

+ addib,>= -16, $6, L($1, `loop16'`$7')

+ifelse($7, `a', `dnl

+ STWS(4, $4, $5, `ret1', $7)dnl

+', $7, `u', `dnl

+ copy t4, ret1')')dnl

+dnl

+dnl copy in words

+dnl

+define(`STWL', `addib,<,n 12, $6, L($1, cleanup)

+ifelse($7, `u', ` copy ret1, t1', $7, `a', `dnl')

+L($1, word)

+ ldws,M F`'4($2, $3), t1

+ addib,>= -4, $6, L($1, word)

+ stws,M t1, F`'4($4, $5)

+L($1, cleanup)

+ addib,=,n 4, $6, L($1, done)

+ ldws 0($2, $3), t1

+ add $5, $6, $5

+ bv r0(rp)

+ stbys,E t1, 0($4, $5)

+')

+dnl

+dnl parameters:

+dnl $1 name

+dnl $2 source space

+dnl $3 source address

+dnl $4 destination space

+dnl $5 destination address

+dnl $6 length

+dnl $7 direction

+dnl

+define(hppa_copy,

+`dnl

+dnl

+dnl if direction is `-' (backwards copy), adjust src, dst

+dnl

+ifelse($7,`-', `add $3, $6, $3

+ add $5, $6, $5

+define(`F', `-')dnl

+define(`R', `')dnl

+define(`M', `mb')dnl

+define(`B', `e')dnl

+define(`E', `b')dnl

+',dnl ifelse

+`0',`0',

+`define(`F', `')dnl

+define(`R', `-')dnl

+define(`M', `ma')dnl

+define(`B', `b')dnl

+define(`E', `e')dnl

+')dnl ifelse

+ifelse($7,`-', `', `0',`0',

+` comib,>=,n 15, $6, L($1, byte)

+ extru $3, 31, 2, t3

+ extru $5, 31, 2, t4

+ add $6, t4, $6

+ comb,<> t3, t4, L($1, unaligned)

+ dep r0, 31, 2, $3

+ hppa_blcopy($1, $2, $3, $4, $5, $6, `a')

+ STWL($1, $2, $3, $4, $5, $6, `a')dnl

+L($1, unaligned)

+ sub,>= t4, t3, t2

+ ldwm F`'4($2, $3), ret1

+ zdep t2, 28, 29, t1

+ mtsar t1

+ hppa_blcopy($1, $2, $3, $4, $5, $6, `u')

+dnl STWL($1, $2, $3, $4, $5, $6, `u')

+ addib,<,n 12, $6, L($1, cleanup_un)

+L($1, word_un)

+ ldws,M F`'4($2, $3), t1

+ vshd ret1, t1, t2

+ addib,< -4, $6, L($1, cleanup1_un)

+ stws,M t2, F`'4($4, $5)

+ ldws,M F`'4($2, $3), ret1

+ vshd t1, ret1, t2

+ addib,>= -4, $6, L($1, word_un)

+ stws,M t2, F`'4($4, $5)

+L($1, cleanup_un)

+ addib,<=,n 4, $6, L($1, done)

+ mfctl sar, t4

+ add $5, $6, $5

+ extru t4, 28, 2, t4

+ sub,<= $6, t4, r0

+ ldws,M F`'4($2, $3), t1

+ vshd ret1, t1, t2

+ bv r0(rp)

+ stbys,E t2, 0($4, $5)

+L($1, cleanup1_un)

+ b L($1, cleanup_un)

+ copy t1, ret1

+')dnl ifelse

+L($1, byte)

+ comb,>=,n r0, $6, L($1, done)

+L($1, byte_loop)

+ ldbs,M F`'1($2, $3), t1

+ addib,<> -1, $6, L($1, byte_loop)

+ stbs,M t1, F`'1($4, $5)

+L($1, done)

+')dnl

+#undef _LOCORE

+#define _LOCORE

+#include <machine/asm.h>

+#include <machine/frame.h>

+ifelse(NAME, `bcopy',

+#if defined(LIBC_SCCS)

+ .text

+ .asciz "versionmacro"

+#endif

+ENTRY(memcpy)

+ALTENTRY(memmove)

+ copy arg0, t1

+ copy arg1, arg0

+ copy t1, arg1

+ALTENTRY(ovbcopy)

+ALTENTRY(bcopy)

+ add arg0, arg2, t1

+ comb,>,n t1, arg1, L(bcopy, reverse)

+ hppa_copy(bcopy_f, sr0, arg0, sr0, arg1, arg2, `+')

+ b,n L(bcopy, ret)

+L(bcopy, reverse)

+ hppa_copy(bcopy_r, sr0, arg0, sr0, arg1, arg2, `-')

+L(bcopy, ret)

+ bv 0(rp)

+ nop

+EXIT(memcpy)

+')dnl

+dnl

+ifelse(NAME, `spcopy',

+#ifdef _KERNEL

+#include <assym.h>

+/*

+ * int spcopy (pa_space_t ssp, const void *src, pa_space_t dsp, void *dst,

+ * size_t size)

+ * do a space to space bcopy.

+ *

+ * assumed that spaces do not clash, otherwise we loose

+ */

+ .import curproc, data

+ .import copy_on_fault, code

+ENTRY(spcopy)

+ ldw HPPA_FRAME_ARG(4)(sp), ret1

+ comb,>=,n r0, ret1, L(spcopy, ret)

+ /* setup fault handler */

+ ldil L%curproc, r31

+ ldw R%curproc(r31), r31

+ ldil L%copy_on_fault, t2

+ ldw p_addr(r31), r31

+ ldo R%copy_on_fault(t2), t2

+ stw t2, pcb_onfault+u_pcb(r31)

+ mfctl sr2, ret0 /* XXX need this?, sr1 is scratchable */

+ mtctl arg0, sr1

+ mtctl arg2, sr2

+ copy ret1, arg0 /* ret1 is used in hppa_blcopy() */

+ hppa_copy(spcopy, sr1, arg1, sr2, arg3, ret1, `+')

+ /* reset fault handler */

+ stw r0, pcb_onfault+u_pcb(r31)

+ mtctl ret0, sr2

+L(spcopy, ret)

+ bv 0(rp)

+ copy r0, ret0

+EXIT(spcopy)

+#endif

+')dnl

+ .end

diff --git a/lib/libc/arch/hppa/string/memmove.S b/lib/libc/arch/hppa/string/memmove.S
deleted file mode 100644
index e72a19122be..00000000000
--- a/lib/libc/arch/hppa/string/memmove.S
+++ /dev/null

@@ -1,252 +0,0 @@

-/* $OpenBSD: memmove.S,v 1.1 1998/12/17 16:56:47 mickey Exp $ */

-/*

- *

- * To anyone who acknowledges that this file is provided "AS IS"

- * without any express or implied warranty:

- * permission to use, copy, modify, and distribute this file

- * for any purpose is hereby granted without fee, provided that

- * the above copyright notice and this notice appears in all

- * copies, and that the name of Hewlett-Packard Company not be

- * used in advertising or publicity pertaining to distribution

- * of the software without specific, written prior permission.

- * Hewlett-Packard Company makes no representations about the

- * suitability of this software for any purpose.

- */

-/*

- *

- * THE UNIVERSITY OF UTAH AND CSL PROVIDE THIS SOFTWARE IN ITS "AS IS"

- * CONDITION, AND DISCLAIM ANY LIABILITY OF ANY KIND FOR ANY DAMAGES

- * WHATSOEVER RESULTING FROM ITS USE.

- *

- * CSL requests users of this software to return to csl-dist@cs.utah.edu any

- * improvements that they make and grant CSL redistribution rights.

- *

- * Utah $Hdr: bcopy.s 1.10 94/12/14$

- * Author: Bob Wheeler, University of Utah CSL

- */

-#include <machine/asm.h>

-/*

- * void

- * memmove(dst, src, count)

- * vm_offset_t dst;

- * vm_offset_t src;

- * int count;

- */

-ENTRY(memmove)

- /* fall through */

-/*

- * void

- * memcpy(dst, src, count)

- * vm_offset_t dst;

- * vm_offset_t src;

- * int count;

- */

-ALTENTRY(memcpy)

- copy arg0,arg3

- copy arg1,arg0

- copy arg3,arg1

- comb,>=,n r0,arg2,$bcopy_exit

- /*

- * See if the source and destination are word aligned and if the count

- * is an integer number of words. If so then we can use an optimized

- * routine. If not then branch to bcopy_checkalign and see what we can

- * do there.

- */

- or arg0,arg1,t1

- or t1,arg2,t2

- extru,= t2,31,2,r0

- b,n $bcopy_checkalign

- addib,<,n -16,arg2,$bcopy_movewords

- /*

- * We can move the data in 4 word moves. We'll use 4 registers to

- * avoid interlock and pipeline stalls.

- */

-$bcopy_loop16

- ldwm 16(arg0),t1

- ldw -12(arg0),t2

- ldw -8(arg0),t3

- ldw -4(arg0),t4

- stwm t1,16(arg1)

- stw t2,-12(arg1)

- stw t3,-8(arg1)

- addib,>= -16,arg2,$bcopy_loop16

- stw t4,-4(arg1)

- /*

- * We have already decremented the count by 16, add 12 to it and then

- * we can test if there is at least 1 word left to move.

- */

-$bcopy_movewords

- addib,<,n 12,arg2,$bcopy_exit

- /*

- * Clean up any remaining words that were not moved in the 16 byte

- * moves

- */

-$bcopy_loop4

- ldwm 4(arg0),t1

- addib,>= -4,arg2,$bcopy_loop4

- stwm t1,4(arg1)

- b,n $bcopy_exit

-$bcopy_checkalign

- /*

- * The source or destination is not word aligned or the count is not

- * an integral number of words. If we are dealing with less than 16

- * bytes then just do it byte by byte. Otherwise, see if the data has

- * the same basic alignment. We will add in the byte offset to size to

- * keep track of what we have to move even though the stbys instruction

- * won't physically move it.

- */

- comib,>= 15,arg2,$bcopy_byte

- extru arg0,31,2,t1

- extru arg1,31,2,t2

- add arg2,t2,arg2

- comb,<> t2,t1,$bcopy_unaligned

- dep 0,31,2,arg0

- /*

- * the source and destination have the same basic alignment. We will

- * move the data in blocks of 16 bytes as long as we can and then

- * we'll go to the 4 byte moves.

- */

- addib,<,n -16,arg2,$bcopy_aligned2

-$bcopy_loop_aligned4

- ldwm 16(arg0),t1

- ldw -12(arg0),t2

- ldw -8(arg0),t3

- ldw -4(arg0),t4

- stbys,b,m t1,4(arg1)

- stwm t2,4(arg1)

- stwm t3,4(arg1)

- addib,>= -16,arg2,$bcopy_loop_aligned4

- stwm t4,4(arg1)

- /*

- * see if there is anything left that needs to be moved in a word move.

- * Since the count was decremented by 16, add 12 to test if there are

- * any full word moves left to do.

- */

-$bcopy_aligned2

- addib,<,n 12,arg2,$bcopy_cleanup

-$bcopy_loop_aligned2

- ldws,ma 4(arg0),t1

- addib,>= -4,arg2,$bcopy_loop_aligned2

- stbys,b,m t1,4(arg1)

- /*

- * move the last bytes that may be unaligned on a word boundary

- */

-$bcopy_cleanup

- addib,=,n 4,arg2,$bcopy_exit

- ldws 0(arg0),t1

- add arg1,arg2,arg1

- b $bcopy_exit

- stbys,e t1,0(arg1)

- /*

- * The source and destination are not alligned on the same boundary

- * types. We will have to shift the data around. Figure out the shift

- * amount and load it into cr11.

- */

-$bcopy_unaligned

- sub,>= t2,t1,t3

- ldwm 4(arg0),t1

- zdep t3,28,29,t4

- mtctl t4,11

- /*

- * see if we can do some of this work in blocks of 16 bytes

- */

- addib,<,n -16,arg2,$bcopy_unaligned_words

-$bcopy_unaligned4

- ldwm 16(arg0),t2

- ldw -12(arg0),t3

- ldw -8(arg0),t4

- ldw -4(arg0),r1

- vshd t1,t2,r28

- stbys,b,m r28,4(arg1)

- vshd t2,t3,r28

- stwm r28,4(arg1)

- vshd t3,t4,r28

- stwm r28,4(arg1)

- vshd t4,r1,r28

- stwm r28,4(arg1)

- addib,>= -16,arg2,$bcopy_unaligned4

- copy r1,t1

- /*

- * see if there is a full word that we can transfer

- */

-$bcopy_unaligned_words

- addib,<,n 12,arg2,$bcopy_unaligned_cleanup1

-$bcopy_unaligned_loop

- ldwm 4(arg0),t2

- vshd t1,t2,t3

- addib,< -4,arg2,$bcopy_unaligned_cleanup2

- stbys,b,m t3,4(arg1)

- ldwm 4(arg0),t1

- vshd t2,t1,t3

- addib,>= -4,arg2,$bcopy_unaligned_loop

- stbys,b,m t3,4(arg1)

-$bcopy_unaligned_cleanup1

- copy t1,t2

-$bcopy_unaligned_cleanup2

- addib,<=,n 4,arg2,$bcopy_exit

- add arg1,arg2,arg1

- mfctl sar,t3

- extru t3,28,2,t3

- sub,<= arg2,t3,r0

- ldwm 4(arg0),t1

- vshd t2,t1,t3

- b $bcopy_exit

- stbys,e t3,0(arg1)

- /*

- * move data one byte at a time

- */

-$bcopy_byte

- comb,>=,n r0,arg2,$bcopy_exit

-$bcopy_loop_byte

- ldbs,ma 1(arg0),t1

- addib,> -1,arg2,$bcopy_loop_byte

- stbs,ma t1,1(arg1)

-$bcopy_exit

-EXIT(memmove)