summaryrefslogtreecommitdiff
path: root/lib/libc/arch/hppa
diff options
context:
space:
mode:
authorMichael Shalayeff <mickey@cvs.openbsd.org>1999-09-14 00:46:19 +0000
committerMichael Shalayeff <mickey@cvs.openbsd.org>1999-09-14 00:46:19 +0000
commit4b90e359fc35da3324e4677c210145a2ae7c45bc (patch)
tree100219606abdf785e373dbc8cbd77c7cdfdd72d2 /lib/libc/arch/hppa
parent0e9632755a4c31edfe3138c09d35334d2cfb6121 (diff)
this is the bcopy we use
Diffstat (limited to 'lib/libc/arch/hppa')
-rw-r--r--lib/libc/arch/hppa/Makefile.inc12
-rw-r--r--lib/libc/arch/hppa/string/Makefile.inc5
-rw-r--r--lib/libc/arch/hppa/string/bcopy.S238
-rw-r--r--lib/libc/arch/hppa/string/bcopy.m4269
-rw-r--r--lib/libc/arch/hppa/string/memmove.S252
5 files changed, 282 insertions, 494 deletions
diff --git a/lib/libc/arch/hppa/Makefile.inc b/lib/libc/arch/hppa/Makefile.inc
index 317dc3fccfb..ebffa402c6e 100644
--- a/lib/libc/arch/hppa/Makefile.inc
+++ b/lib/libc/arch/hppa/Makefile.inc
@@ -1,4 +1,12 @@
-# $OpenBSD: Makefile.inc,v 1.1 1998/12/17 16:56:45 mickey Exp $
+# $OpenBSD: Makefile.inc,v 1.2 1999/09/14 00:46:18 mickey Exp $
KMINCLUDES= arch/hppa/DEFS.h arch/hppa/SYS.h prefix.h
-KMSRCS= bcmp.S bzero.S bcopy.S ffs.S milli.S
+KMSRCS= bcmp.S bzero.S bcopy.m4 ffs.S
+
+SRCS+= memmove.S
+CLEANFILES+= memmove.S
+
+memmove.S: ${LIBCSRCDIR}/arch/hppa/string/bcopy.m4
+ @echo 'building ${.TARGET} from ${.ALLSRC}'
+ @m4 -DNAME=bcopy ${.ALLSRC} > ${.TARGET}
+
diff --git a/lib/libc/arch/hppa/string/Makefile.inc b/lib/libc/arch/hppa/string/Makefile.inc
index bca7da49295..2feab8eb5de 100644
--- a/lib/libc/arch/hppa/string/Makefile.inc
+++ b/lib/libc/arch/hppa/string/Makefile.inc
@@ -1,7 +1,8 @@
-# $OpenBSD: Makefile.inc,v 1.3 1999/04/27 17:53:32 mickey Exp $
+# $OpenBSD: Makefile.inc,v 1.4 1999/09/14 00:46:18 mickey Exp $
SRCS+= memchr.c memcmp.c memset.c index.c rindex.c \
strcat.c strcmp.c strcpy.c strcspn.c strlen.c \
strncat.c strncmp.c strncpy.c strpbrk.c strsep.c \
strspn.c strstr.c swab.c
-SRCS+= bcmp.S bcopy.S bzero.S ffs.S memmove.S strlcpy.S
+SRCS+= bcmp.S bzero.S ffs.S strlcpy.S
+
diff --git a/lib/libc/arch/hppa/string/bcopy.S b/lib/libc/arch/hppa/string/bcopy.S
deleted file mode 100644
index 177f41eac05..00000000000
--- a/lib/libc/arch/hppa/string/bcopy.S
+++ /dev/null
@@ -1,238 +0,0 @@
-/* $OpenBSD: bcopy.S,v 1.1 1998/08/28 20:59:41 mickey Exp $ */
-
-/*
- * (c) Copyright 1988 HEWLETT-PACKARD COMPANY
- *
- * To anyone who acknowledges that this file is provided "AS IS"
- * without any express or implied warranty:
- * permission to use, copy, modify, and distribute this file
- * for any purpose is hereby granted without fee, provided that
- * the above copyright notice and this notice appears in all
- * copies, and that the name of Hewlett-Packard Company not be
- * used in advertising or publicity pertaining to distribution
- * of the software without specific, written prior permission.
- * Hewlett-Packard Company makes no representations about the
- * suitability of this software for any purpose.
- */
-/*
- * Copyright (c) 1990,1994 The University of Utah and
- * the Computer Systems Laboratory (CSL). All rights reserved.
- *
- * THE UNIVERSITY OF UTAH AND CSL PROVIDE THIS SOFTWARE IN ITS "AS IS"
- * CONDITION, AND DISCLAIM ANY LIABILITY OF ANY KIND FOR ANY DAMAGES
- * WHATSOEVER RESULTING FROM ITS USE.
- *
- * CSL requests users of this software to return to csl-dist@cs.utah.edu any
- * improvements that they make and grant CSL redistribution rights.
- *
- * Utah $Hdr: bcopy.s 1.10 94/12/14$
- * Author: Bob Wheeler, University of Utah CSL
- */
-
-#include <machine/asm.h>
-
-/*
- * void
- * bcopy(src, dst, count)
- * vm_offset_t src;
- * vm_offset_t dst;
- * int count;
- */
-ENTRY(bcopy)
- comb,>=,n r0,arg2,$bcopy_exit
-
- /*
- * See if the source and destination are word aligned and if the count
- * is an integer number of words. If so then we can use an optimized
- * routine. If not then branch to bcopy_checkalign and see what we can
- * do there.
- */
-
- or arg0,arg1,t1
- or t1,arg2,t2
- extru,= t2,31,2,r0
- b,n $bcopy_checkalign
-
- addib,<,n -16,arg2,$bcopy_movewords
-
- /*
- * We can move the data in 4 word moves. We'll use 4 registers to
- * avoid interlock and pipeline stalls.
- */
-
-$bcopy_loop16
-
- ldwm 16(arg0),t1
- ldw -12(arg0),t2
- ldw -8(arg0),t3
- ldw -4(arg0),t4
- stwm t1,16(arg1)
- stw t2,-12(arg1)
- stw t3,-8(arg1)
- addib,>= -16,arg2,$bcopy_loop16
- stw t4,-4(arg1)
-
-
- /*
- * We have already decremented the count by 16, add 12 to it and then
- * we can test if there is at least 1 word left to move.
- */
-
-$bcopy_movewords
- addib,<,n 12,arg2,$bcopy_exit
-
- /*
- * Clean up any remaining words that were not moved in the 16 byte
- * moves
- */
-
-$bcopy_loop4
- ldwm 4(arg0),t1
- addib,>= -4,arg2,$bcopy_loop4
- stwm t1,4(arg1)
-
- b,n $bcopy_exit
-
-
-$bcopy_checkalign
-
- /*
- * The source or destination is not word aligned or the count is not
- * an integral number of words. If we are dealing with less than 16
- * bytes then just do it byte by byte. Otherwise, see if the data has
- * the same basic alignment. We will add in the byte offset to size to
- * keep track of what we have to move even though the stbys instruction
- * won't physically move it.
- */
-
- comib,>= 15,arg2,$bcopy_byte
- extru arg0,31,2,t1
- extru arg1,31,2,t2
- add arg2,t2,arg2
- comb,<> t2,t1,$bcopy_unaligned
- dep 0,31,2,arg0
-
- /*
- * the source and destination have the same basic alignment. We will
- * move the data in blocks of 16 bytes as long as we can and then
- * we'll go to the 4 byte moves.
- */
-
- addib,<,n -16,arg2,$bcopy_aligned2
-
-$bcopy_loop_aligned4
- ldwm 16(arg0),t1
- ldw -12(arg0),t2
- ldw -8(arg0),t3
- ldw -4(arg0),t4
- stbys,b,m t1,4(arg1)
- stwm t2,4(arg1)
- stwm t3,4(arg1)
- addib,>= -16,arg2,$bcopy_loop_aligned4
- stwm t4,4(arg1)
-
- /*
- * see if there is anything left that needs to be moved in a word move.
- * Since the count was decremented by 16, add 12 to test if there are
- * any full word moves left to do.
- */
-
-$bcopy_aligned2
- addib,<,n 12,arg2,$bcopy_cleanup
-
-$bcopy_loop_aligned2
- ldws,ma 4(arg0),t1
- addib,>= -4,arg2,$bcopy_loop_aligned2
- stbys,b,m t1,4(arg1)
-
- /*
- * move the last bytes that may be unaligned on a word boundary
- */
-
-$bcopy_cleanup
- addib,=,n 4,arg2,$bcopy_exit
- ldws 0(arg0),t1
- add arg1,arg2,arg1
- b $bcopy_exit
- stbys,e t1,0(arg1)
-
- /*
- * The source and destination are not alligned on the same boundary
- * types. We will have to shift the data around. Figure out the shift
- * amount and load it into cr11.
- */
-
-$bcopy_unaligned
- sub,>= t2,t1,t3
- ldwm 4(arg0),t1
- zdep t3,28,29,t4
- mtctl t4,11
-
- /*
- * see if we can do some of this work in blocks of 16 bytes
- */
-
- addib,<,n -16,arg2,$bcopy_unaligned_words
-
-$bcopy_unaligned4
- ldwm 16(arg0),t2
- ldw -12(arg0),t3
- ldw -8(arg0),t4
- ldw -4(arg0),r1
- vshd t1,t2,r28
- stbys,b,m r28,4(arg1)
- vshd t2,t3,r28
- stwm r28,4(arg1)
- vshd t3,t4,r28
- stwm r28,4(arg1)
- vshd t4,r1,r28
- stwm r28,4(arg1)
- addib,>= -16,arg2,$bcopy_unaligned4
- copy r1,t1
-
- /*
- * see if there is a full word that we can transfer
- */
-
-$bcopy_unaligned_words
- addib,<,n 12,arg2,$bcopy_unaligned_cleanup1
-
-$bcopy_unaligned_loop
- ldwm 4(arg0),t2
- vshd t1,t2,t3
- addib,< -4,arg2,$bcopy_unaligned_cleanup2
- stbys,b,m t3,4(arg1)
-
- ldwm 4(arg0),t1
- vshd t2,t1,t3
- addib,>= -4,arg2,$bcopy_unaligned_loop
- stbys,b,m t3,4(arg1)
-
-$bcopy_unaligned_cleanup1
- copy t1,t2
-
-$bcopy_unaligned_cleanup2
- addib,<=,n 4,arg2,$bcopy_exit
- add arg1,arg2,arg1
- mfctl sar,t3
- extru t3,28,2,t3
- sub,<= arg2,t3,r0
- ldwm 4(arg0),t1
- vshd t2,t1,t3
- b $bcopy_exit
- stbys,e t3,0(arg1)
-
- /*
- * move data one byte at a time
- */
-
-$bcopy_byte
- comb,>=,n r0,arg2,$bcopy_exit
-
-$bcopy_loop_byte
- ldbs,ma 1(arg0),t1
- addib,> -1,arg2,$bcopy_loop_byte
- stbs,ma t1,1(arg1)
-
-$bcopy_exit
-EXIT(bcopy)
diff --git a/lib/libc/arch/hppa/string/bcopy.m4 b/lib/libc/arch/hppa/string/bcopy.m4
new file mode 100644
index 00000000000..bf69e1349ab
--- /dev/null
+++ b/lib/libc/arch/hppa/string/bcopy.m4
@@ -0,0 +1,269 @@
+define(_rcsid,``$OpenBSD: bcopy.m4,v 1.1 1999/09/14 00:46:18 mickey Exp $'')dnl
+dnl
+dnl
+dnl This is the source file for bcopy.S, spcopy.S
+dnl
+dnl
+define(`versionmacro',substr(_rcsid,1,eval(len(_rcsid)-2)))dnl
+dnl
+/* This is a generated file. DO NOT EDIT. */
+/*
+ * Generated from:
+ *
+ * versionmacro
+ */
+/*
+ * Copyright (c) 1999 Michael Shalayeff
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Michael Shalayeff.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+dnl
+dnl macro: L(`arg1',`arg2')
+dnl synopsis: creates an assembly label based on args resulting in $arg1.arg2
+dnl
+define(`L', `$$1.$2')dnl
+dnl
+dnl
+dnl
+define(`STWS',`ifelse($5, `u',dnl
+`ifelse($1, `1', `vshd $4, t`$1', r31
+ stbys,B,m r31, F`'4($2, $3)',
+`0', `0', `vshd t`'decr($1), t`$1', r31
+ stws,M r31, F`'4($2, $3)')',dnl
+`0', `0',
+`ifelse($1, `1',
+`stbys,B`'ifelse(B, `b', `,m ', `0', `0', ` ')`'t`$1', F`'4($2, $3)',
+`0', `0', `stws,M t`$1', F`'4($2, $3)')')')dnl
+define(`STWSS', `ifelse(`$3', `1', `dnl',
+`0', `0', `STWSS($1, $2, eval($3 - 1), $4, $5)')
+ STWS($3, $1, $2, $4, $5)dnl
+')dnl
+define(`LDWSS', `ifelse(`$3', `1', `dnl',
+`0', `0', `LDWSS($1, $2, eval($3 - 1))')
+ ldws,M F`'4($1, $2), t`'$3`'dnl
+')dnl
+dnl
+dnl copy data in 4-words blocks
+dnl
+define(`hppa_blcopy',`
+ addi -16, $6, $6
+L($1, `loop16'`$7')
+dnl cache hint may not work on some hardware
+dnl ldw F 32($2, $3), r0
+ifelse(F, `-', `dnl
+ addi F`'4, $5, $5', `0', `0', `dnl')
+LDWSS($2, $3, 4)
+STWSS($4, $5, 3, `ret1', $7)
+ifelse($7, `u', `dnl
+ STWS(4, $4, $5, `ret1', $7)', $7, `a', `dnl')
+ addib,>= -16, $6, L($1, `loop16'`$7')
+ifelse($7, `a', `dnl
+ STWS(4, $4, $5, `ret1', $7)dnl
+', $7, `u', `dnl
+ copy t4, ret1')')dnl
+dnl
+dnl copy in words
+dnl
+define(`STWL', `addib,<,n 12, $6, L($1, cleanup)
+ifelse($7, `u', ` copy ret1, t1', $7, `a', `dnl')
+L($1, word)
+ ldws,M F`'4($2, $3), t1
+ addib,>= -4, $6, L($1, word)
+ stws,M t1, F`'4($4, $5)
+
+L($1, cleanup)
+ addib,=,n 4, $6, L($1, done)
+ ldws 0($2, $3), t1
+ add $5, $6, $5
+ bv r0(rp)
+ stbys,E t1, 0($4, $5)
+')
+dnl
+dnl
+dnl parameters:
+dnl $1 name
+dnl $2 source space
+dnl $3 source address
+dnl $4 destination space
+dnl $5 destination address
+dnl $6 length
+dnl $7 direction
+dnl
+define(hppa_copy,
+`dnl
+dnl
+dnl if direction is `-' (backwards copy), adjust src, dst
+dnl
+ifelse($7,`-', `add $3, $6, $3
+ add $5, $6, $5
+define(`F', `-')dnl
+define(`R', `')dnl
+define(`M', `mb')dnl
+define(`B', `e')dnl
+define(`E', `b')dnl
+',dnl ifelse
+`0',`0',
+`define(`F', `')dnl
+define(`R', `-')dnl
+define(`M', `ma')dnl
+define(`B', `b')dnl
+define(`E', `e')dnl
+')dnl ifelse
+
+ifelse($7,`-', `', `0',`0',
+` comib,>=,n 15, $6, L($1, byte)
+
+ extru $3, 31, 2, t3
+ extru $5, 31, 2, t4
+ add $6, t4, $6
+ comb,<> t3, t4, L($1, unaligned)
+ dep r0, 31, 2, $3
+ hppa_blcopy($1, $2, $3, $4, $5, $6, `a')
+
+ STWL($1, $2, $3, $4, $5, $6, `a')dnl
+
+L($1, unaligned)
+ sub,>= t4, t3, t2
+ ldwm F`'4($2, $3), ret1
+ zdep t2, 28, 29, t1
+ mtsar t1
+ hppa_blcopy($1, $2, $3, $4, $5, $6, `u')
+
+dnl STWL($1, $2, $3, $4, $5, $6, `u')
+ addib,<,n 12, $6, L($1, cleanup_un)
+L($1, word_un)
+ ldws,M F`'4($2, $3), t1
+ vshd ret1, t1, t2
+ addib,< -4, $6, L($1, cleanup1_un)
+ stws,M t2, F`'4($4, $5)
+ ldws,M F`'4($2, $3), ret1
+ vshd t1, ret1, t2
+ addib,>= -4, $6, L($1, word_un)
+ stws,M t2, F`'4($4, $5)
+
+L($1, cleanup_un)
+ addib,<=,n 4, $6, L($1, done)
+ mfctl sar, t4
+ add $5, $6, $5
+ extru t4, 28, 2, t4
+ sub,<= $6, t4, r0
+ ldws,M F`'4($2, $3), t1
+ vshd ret1, t1, t2
+ bv r0(rp)
+ stbys,E t2, 0($4, $5)
+
+L($1, cleanup1_un)
+ b L($1, cleanup_un)
+ copy t1, ret1
+')dnl ifelse
+
+L($1, byte)
+ comb,>=,n r0, $6, L($1, done)
+L($1, byte_loop)
+ ldbs,M F`'1($2, $3), t1
+ addib,<> -1, $6, L($1, byte_loop)
+ stbs,M t1, F`'1($4, $5)
+L($1, done)
+')dnl
+`
+#undef _LOCORE
+#define _LOCORE
+#include <machine/asm.h>
+#include <machine/frame.h>
+'
+ifelse(NAME, `bcopy',
+`
+#if defined(LIBC_SCCS)
+ .text
+ .asciz "versionmacro"
+#endif
+
+ENTRY(memcpy)
+ALTENTRY(memmove)
+ copy arg0, t1
+ copy arg1, arg0
+ copy t1, arg1
+ALTENTRY(ovbcopy)
+ALTENTRY(bcopy)
+ add arg0, arg2, t1
+ comb,>,n t1, arg1, L(bcopy, reverse)
+ hppa_copy(bcopy_f, sr0, arg0, sr0, arg1, arg2, `+')
+ b,n L(bcopy, ret)
+L(bcopy, reverse)
+ hppa_copy(bcopy_r, sr0, arg0, sr0, arg1, arg2, `-')
+L(bcopy, ret)
+ bv 0(rp)
+ nop
+EXIT(memcpy)
+')dnl
+dnl
+ifelse(NAME, `spcopy',
+`
+#ifdef _KERNEL
+#include <assym.h>
+
+/*
+ * int spcopy (pa_space_t ssp, const void *src, pa_space_t dsp, void *dst,
+ * size_t size)
+ * do a space to space bcopy.
+ *
+ * assumed that spaces do not clash, otherwise we loose
+ */
+ .import curproc, data
+ .import copy_on_fault, code
+ENTRY(spcopy)
+ ldw HPPA_FRAME_ARG(4)(sp), ret1
+ comb,>=,n r0, ret1, L(spcopy, ret)
+`
+ /* setup fault handler */
+ ldil L%curproc, r31
+ ldw R%curproc(r31), r31
+ ldil L%copy_on_fault, t2
+ ldw p_addr(r31), r31
+ ldo R%copy_on_fault(t2), t2
+ stw t2, pcb_onfault+u_pcb(r31)
+'
+ mfctl sr2, ret0 /* XXX need this?, sr1 is scratchable */
+ mtctl arg0, sr1
+ mtctl arg2, sr2
+ copy ret1, arg0 /* ret1 is used in hppa_blcopy() */
+
+ hppa_copy(spcopy, sr1, arg1, sr2, arg3, ret1, `+')
+
+ /* reset fault handler */
+ stw r0, pcb_onfault+u_pcb(r31)
+ mtctl ret0, sr2
+L(spcopy, ret)
+ bv 0(rp)
+ copy r0, ret0
+EXIT(spcopy)
+#endif
+')dnl
+
+ .end
diff --git a/lib/libc/arch/hppa/string/memmove.S b/lib/libc/arch/hppa/string/memmove.S
deleted file mode 100644
index e72a19122be..00000000000
--- a/lib/libc/arch/hppa/string/memmove.S
+++ /dev/null
@@ -1,252 +0,0 @@
-/* $OpenBSD: memmove.S,v 1.1 1998/12/17 16:56:47 mickey Exp $ */
-
-/*
- * (c) Copyright 1988 HEWLETT-PACKARD COMPANY
- *
- * To anyone who acknowledges that this file is provided "AS IS"
- * without any express or implied warranty:
- * permission to use, copy, modify, and distribute this file
- * for any purpose is hereby granted without fee, provided that
- * the above copyright notice and this notice appears in all
- * copies, and that the name of Hewlett-Packard Company not be
- * used in advertising or publicity pertaining to distribution
- * of the software without specific, written prior permission.
- * Hewlett-Packard Company makes no representations about the
- * suitability of this software for any purpose.
- */
-/*
- * Copyright (c) 1990,1994 The University of Utah and
- * the Computer Systems Laboratory (CSL). All rights reserved.
- *
- * THE UNIVERSITY OF UTAH AND CSL PROVIDE THIS SOFTWARE IN ITS "AS IS"
- * CONDITION, AND DISCLAIM ANY LIABILITY OF ANY KIND FOR ANY DAMAGES
- * WHATSOEVER RESULTING FROM ITS USE.
- *
- * CSL requests users of this software to return to csl-dist@cs.utah.edu any
- * improvements that they make and grant CSL redistribution rights.
- *
- * Utah $Hdr: bcopy.s 1.10 94/12/14$
- * Author: Bob Wheeler, University of Utah CSL
- */
-
-#include <machine/asm.h>
-
-/*
- * void
- * memmove(dst, src, count)
- * vm_offset_t dst;
- * vm_offset_t src;
- * int count;
- */
-ENTRY(memmove)
- /* fall through */
-
-/*
- * void
- * memcpy(dst, src, count)
- * vm_offset_t dst;
- * vm_offset_t src;
- * int count;
- */
-ALTENTRY(memcpy)
- copy arg0,arg3
- copy arg1,arg0
- copy arg3,arg1
-
- comb,>=,n r0,arg2,$bcopy_exit
-
- /*
- * See if the source and destination are word aligned and if the count
- * is an integer number of words. If so then we can use an optimized
- * routine. If not then branch to bcopy_checkalign and see what we can
- * do there.
- */
-
- or arg0,arg1,t1
- or t1,arg2,t2
- extru,= t2,31,2,r0
- b,n $bcopy_checkalign
-
- addib,<,n -16,arg2,$bcopy_movewords
-
- /*
- * We can move the data in 4 word moves. We'll use 4 registers to
- * avoid interlock and pipeline stalls.
- */
-
-$bcopy_loop16
-
- ldwm 16(arg0),t1
- ldw -12(arg0),t2
- ldw -8(arg0),t3
- ldw -4(arg0),t4
- stwm t1,16(arg1)
- stw t2,-12(arg1)
- stw t3,-8(arg1)
- addib,>= -16,arg2,$bcopy_loop16
- stw t4,-4(arg1)
-
-
- /*
- * We have already decremented the count by 16, add 12 to it and then
- * we can test if there is at least 1 word left to move.
- */
-
-$bcopy_movewords
- addib,<,n 12,arg2,$bcopy_exit
-
- /*
- * Clean up any remaining words that were not moved in the 16 byte
- * moves
- */
-
-$bcopy_loop4
- ldwm 4(arg0),t1
- addib,>= -4,arg2,$bcopy_loop4
- stwm t1,4(arg1)
-
- b,n $bcopy_exit
-
-
-$bcopy_checkalign
-
- /*
- * The source or destination is not word aligned or the count is not
- * an integral number of words. If we are dealing with less than 16
- * bytes then just do it byte by byte. Otherwise, see if the data has
- * the same basic alignment. We will add in the byte offset to size to
- * keep track of what we have to move even though the stbys instruction
- * won't physically move it.
- */
-
- comib,>= 15,arg2,$bcopy_byte
- extru arg0,31,2,t1
- extru arg1,31,2,t2
- add arg2,t2,arg2
- comb,<> t2,t1,$bcopy_unaligned
- dep 0,31,2,arg0
-
- /*
- * the source and destination have the same basic alignment. We will
- * move the data in blocks of 16 bytes as long as we can and then
- * we'll go to the 4 byte moves.
- */
-
- addib,<,n -16,arg2,$bcopy_aligned2
-
-$bcopy_loop_aligned4
- ldwm 16(arg0),t1
- ldw -12(arg0),t2
- ldw -8(arg0),t3
- ldw -4(arg0),t4
- stbys,b,m t1,4(arg1)
- stwm t2,4(arg1)
- stwm t3,4(arg1)
- addib,>= -16,arg2,$bcopy_loop_aligned4
- stwm t4,4(arg1)
-
- /*
- * see if there is anything left that needs to be moved in a word move.
- * Since the count was decremented by 16, add 12 to test if there are
- * any full word moves left to do.
- */
-
-$bcopy_aligned2
- addib,<,n 12,arg2,$bcopy_cleanup
-
-$bcopy_loop_aligned2
- ldws,ma 4(arg0),t1
- addib,>= -4,arg2,$bcopy_loop_aligned2
- stbys,b,m t1,4(arg1)
-
- /*
- * move the last bytes that may be unaligned on a word boundary
- */
-
-$bcopy_cleanup
- addib,=,n 4,arg2,$bcopy_exit
- ldws 0(arg0),t1
- add arg1,arg2,arg1
- b $bcopy_exit
- stbys,e t1,0(arg1)
-
- /*
- * The source and destination are not alligned on the same boundary
- * types. We will have to shift the data around. Figure out the shift
- * amount and load it into cr11.
- */
-
-$bcopy_unaligned
- sub,>= t2,t1,t3
- ldwm 4(arg0),t1
- zdep t3,28,29,t4
- mtctl t4,11
-
- /*
- * see if we can do some of this work in blocks of 16 bytes
- */
-
- addib,<,n -16,arg2,$bcopy_unaligned_words
-
-$bcopy_unaligned4
- ldwm 16(arg0),t2
- ldw -12(arg0),t3
- ldw -8(arg0),t4
- ldw -4(arg0),r1
- vshd t1,t2,r28
- stbys,b,m r28,4(arg1)
- vshd t2,t3,r28
- stwm r28,4(arg1)
- vshd t3,t4,r28
- stwm r28,4(arg1)
- vshd t4,r1,r28
- stwm r28,4(arg1)
- addib,>= -16,arg2,$bcopy_unaligned4
- copy r1,t1
-
- /*
- * see if there is a full word that we can transfer
- */
-
-$bcopy_unaligned_words
- addib,<,n 12,arg2,$bcopy_unaligned_cleanup1
-
-$bcopy_unaligned_loop
- ldwm 4(arg0),t2
- vshd t1,t2,t3
- addib,< -4,arg2,$bcopy_unaligned_cleanup2
- stbys,b,m t3,4(arg1)
-
- ldwm 4(arg0),t1
- vshd t2,t1,t3
- addib,>= -4,arg2,$bcopy_unaligned_loop
- stbys,b,m t3,4(arg1)
-
-$bcopy_unaligned_cleanup1
- copy t1,t2
-
-$bcopy_unaligned_cleanup2
- addib,<=,n 4,arg2,$bcopy_exit
- add arg1,arg2,arg1
- mfctl sar,t3
- extru t3,28,2,t3
- sub,<= arg2,t3,r0
- ldwm 4(arg0),t1
- vshd t2,t1,t3
- b $bcopy_exit
- stbys,e t3,0(arg1)
-
- /*
- * move data one byte at a time
- */
-
-$bcopy_byte
- comb,>=,n r0,arg2,$bcopy_exit
-
-$bcopy_loop_byte
- ldbs,ma 1(arg0),t1
- addib,> -1,arg2,$bcopy_loop_byte
- stbs,ma t1,1(arg1)
-
-$bcopy_exit
-EXIT(memmove)