diff options
author | Michael Shalayeff <mickey@cvs.openbsd.org> | 1999-09-14 00:46:19 +0000 |
---|---|---|
committer | Michael Shalayeff <mickey@cvs.openbsd.org> | 1999-09-14 00:46:19 +0000 |
commit | 4b90e359fc35da3324e4677c210145a2ae7c45bc (patch) | |
tree | 100219606abdf785e373dbc8cbd77c7cdfdd72d2 /lib | |
parent | 0e9632755a4c31edfe3138c09d35334d2cfb6121 (diff) |
this is the bcopy we use
Diffstat (limited to 'lib')
-rw-r--r-- | lib/libc/arch/hppa/Makefile.inc | 12 | ||||
-rw-r--r-- | lib/libc/arch/hppa/string/Makefile.inc | 5 | ||||
-rw-r--r-- | lib/libc/arch/hppa/string/bcopy.S | 238 | ||||
-rw-r--r-- | lib/libc/arch/hppa/string/bcopy.m4 | 269 | ||||
-rw-r--r-- | lib/libc/arch/hppa/string/memmove.S | 252 |
5 files changed, 282 insertions, 494 deletions
diff --git a/lib/libc/arch/hppa/Makefile.inc b/lib/libc/arch/hppa/Makefile.inc index 317dc3fccfb..ebffa402c6e 100644 --- a/lib/libc/arch/hppa/Makefile.inc +++ b/lib/libc/arch/hppa/Makefile.inc @@ -1,4 +1,12 @@ -# $OpenBSD: Makefile.inc,v 1.1 1998/12/17 16:56:45 mickey Exp $ +# $OpenBSD: Makefile.inc,v 1.2 1999/09/14 00:46:18 mickey Exp $ KMINCLUDES= arch/hppa/DEFS.h arch/hppa/SYS.h prefix.h -KMSRCS= bcmp.S bzero.S bcopy.S ffs.S milli.S +KMSRCS= bcmp.S bzero.S bcopy.m4 ffs.S + +SRCS+= memmove.S +CLEANFILES+= memmove.S + +memmove.S: ${LIBCSRCDIR}/arch/hppa/string/bcopy.m4 + @echo 'building ${.TARGET} from ${.ALLSRC}' + @m4 -DNAME=bcopy ${.ALLSRC} > ${.TARGET} + diff --git a/lib/libc/arch/hppa/string/Makefile.inc b/lib/libc/arch/hppa/string/Makefile.inc index bca7da49295..2feab8eb5de 100644 --- a/lib/libc/arch/hppa/string/Makefile.inc +++ b/lib/libc/arch/hppa/string/Makefile.inc @@ -1,7 +1,8 @@ -# $OpenBSD: Makefile.inc,v 1.3 1999/04/27 17:53:32 mickey Exp $ +# $OpenBSD: Makefile.inc,v 1.4 1999/09/14 00:46:18 mickey Exp $ SRCS+= memchr.c memcmp.c memset.c index.c rindex.c \ strcat.c strcmp.c strcpy.c strcspn.c strlen.c \ strncat.c strncmp.c strncpy.c strpbrk.c strsep.c \ strspn.c strstr.c swab.c -SRCS+= bcmp.S bcopy.S bzero.S ffs.S memmove.S strlcpy.S +SRCS+= bcmp.S bzero.S ffs.S strlcpy.S + diff --git a/lib/libc/arch/hppa/string/bcopy.S b/lib/libc/arch/hppa/string/bcopy.S deleted file mode 100644 index 177f41eac05..00000000000 --- a/lib/libc/arch/hppa/string/bcopy.S +++ /dev/null @@ -1,238 +0,0 @@ -/* $OpenBSD: bcopy.S,v 1.1 1998/08/28 20:59:41 mickey Exp $ */ - -/* - * (c) Copyright 1988 HEWLETT-PACKARD COMPANY - * - * To anyone who acknowledges that this file is provided "AS IS" - * without any express or implied warranty: - * permission to use, copy, modify, and distribute this file - * for any purpose is hereby granted without fee, provided that - * the above copyright notice and this notice appears in all - * copies, and that the name of Hewlett-Packard Company not be - * used in advertising or publicity pertaining to distribution - * of the software without specific, written prior permission. - * Hewlett-Packard Company makes no representations about the - * suitability of this software for any purpose. - */ -/* - * Copyright (c) 1990,1994 The University of Utah and - * the Computer Systems Laboratory (CSL). All rights reserved. - * - * THE UNIVERSITY OF UTAH AND CSL PROVIDE THIS SOFTWARE IN ITS "AS IS" - * CONDITION, AND DISCLAIM ANY LIABILITY OF ANY KIND FOR ANY DAMAGES - * WHATSOEVER RESULTING FROM ITS USE. - * - * CSL requests users of this software to return to csl-dist@cs.utah.edu any - * improvements that they make and grant CSL redistribution rights. - * - * Utah $Hdr: bcopy.s 1.10 94/12/14$ - * Author: Bob Wheeler, University of Utah CSL - */ - -#include <machine/asm.h> - -/* - * void - * bcopy(src, dst, count) - * vm_offset_t src; - * vm_offset_t dst; - * int count; - */ -ENTRY(bcopy) - comb,>=,n r0,arg2,$bcopy_exit - - /* - * See if the source and destination are word aligned and if the count - * is an integer number of words. If so then we can use an optimized - * routine. If not then branch to bcopy_checkalign and see what we can - * do there. - */ - - or arg0,arg1,t1 - or t1,arg2,t2 - extru,= t2,31,2,r0 - b,n $bcopy_checkalign - - addib,<,n -16,arg2,$bcopy_movewords - - /* - * We can move the data in 4 word moves. We'll use 4 registers to - * avoid interlock and pipeline stalls. - */ - -$bcopy_loop16 - - ldwm 16(arg0),t1 - ldw -12(arg0),t2 - ldw -8(arg0),t3 - ldw -4(arg0),t4 - stwm t1,16(arg1) - stw t2,-12(arg1) - stw t3,-8(arg1) - addib,>= -16,arg2,$bcopy_loop16 - stw t4,-4(arg1) - - - /* - * We have already decremented the count by 16, add 12 to it and then - * we can test if there is at least 1 word left to move. - */ - -$bcopy_movewords - addib,<,n 12,arg2,$bcopy_exit - - /* - * Clean up any remaining words that were not moved in the 16 byte - * moves - */ - -$bcopy_loop4 - ldwm 4(arg0),t1 - addib,>= -4,arg2,$bcopy_loop4 - stwm t1,4(arg1) - - b,n $bcopy_exit - - -$bcopy_checkalign - - /* - * The source or destination is not word aligned or the count is not - * an integral number of words. If we are dealing with less than 16 - * bytes then just do it byte by byte. Otherwise, see if the data has - * the same basic alignment. We will add in the byte offset to size to - * keep track of what we have to move even though the stbys instruction - * won't physically move it. - */ - - comib,>= 15,arg2,$bcopy_byte - extru arg0,31,2,t1 - extru arg1,31,2,t2 - add arg2,t2,arg2 - comb,<> t2,t1,$bcopy_unaligned - dep 0,31,2,arg0 - - /* - * the source and destination have the same basic alignment. We will - * move the data in blocks of 16 bytes as long as we can and then - * we'll go to the 4 byte moves. - */ - - addib,<,n -16,arg2,$bcopy_aligned2 - -$bcopy_loop_aligned4 - ldwm 16(arg0),t1 - ldw -12(arg0),t2 - ldw -8(arg0),t3 - ldw -4(arg0),t4 - stbys,b,m t1,4(arg1) - stwm t2,4(arg1) - stwm t3,4(arg1) - addib,>= -16,arg2,$bcopy_loop_aligned4 - stwm t4,4(arg1) - - /* - * see if there is anything left that needs to be moved in a word move. - * Since the count was decremented by 16, add 12 to test if there are - * any full word moves left to do. - */ - -$bcopy_aligned2 - addib,<,n 12,arg2,$bcopy_cleanup - -$bcopy_loop_aligned2 - ldws,ma 4(arg0),t1 - addib,>= -4,arg2,$bcopy_loop_aligned2 - stbys,b,m t1,4(arg1) - - /* - * move the last bytes that may be unaligned on a word boundary - */ - -$bcopy_cleanup - addib,=,n 4,arg2,$bcopy_exit - ldws 0(arg0),t1 - add arg1,arg2,arg1 - b $bcopy_exit - stbys,e t1,0(arg1) - - /* - * The source and destination are not alligned on the same boundary - * types. We will have to shift the data around. Figure out the shift - * amount and load it into cr11. - */ - -$bcopy_unaligned - sub,>= t2,t1,t3 - ldwm 4(arg0),t1 - zdep t3,28,29,t4 - mtctl t4,11 - - /* - * see if we can do some of this work in blocks of 16 bytes - */ - - addib,<,n -16,arg2,$bcopy_unaligned_words - -$bcopy_unaligned4 - ldwm 16(arg0),t2 - ldw -12(arg0),t3 - ldw -8(arg0),t4 - ldw -4(arg0),r1 - vshd t1,t2,r28 - stbys,b,m r28,4(arg1) - vshd t2,t3,r28 - stwm r28,4(arg1) - vshd t3,t4,r28 - stwm r28,4(arg1) - vshd t4,r1,r28 - stwm r28,4(arg1) - addib,>= -16,arg2,$bcopy_unaligned4 - copy r1,t1 - - /* - * see if there is a full word that we can transfer - */ - -$bcopy_unaligned_words - addib,<,n 12,arg2,$bcopy_unaligned_cleanup1 - -$bcopy_unaligned_loop - ldwm 4(arg0),t2 - vshd t1,t2,t3 - addib,< -4,arg2,$bcopy_unaligned_cleanup2 - stbys,b,m t3,4(arg1) - - ldwm 4(arg0),t1 - vshd t2,t1,t3 - addib,>= -4,arg2,$bcopy_unaligned_loop - stbys,b,m t3,4(arg1) - -$bcopy_unaligned_cleanup1 - copy t1,t2 - -$bcopy_unaligned_cleanup2 - addib,<=,n 4,arg2,$bcopy_exit - add arg1,arg2,arg1 - mfctl sar,t3 - extru t3,28,2,t3 - sub,<= arg2,t3,r0 - ldwm 4(arg0),t1 - vshd t2,t1,t3 - b $bcopy_exit - stbys,e t3,0(arg1) - - /* - * move data one byte at a time - */ - -$bcopy_byte - comb,>=,n r0,arg2,$bcopy_exit - -$bcopy_loop_byte - ldbs,ma 1(arg0),t1 - addib,> -1,arg2,$bcopy_loop_byte - stbs,ma t1,1(arg1) - -$bcopy_exit -EXIT(bcopy) diff --git a/lib/libc/arch/hppa/string/bcopy.m4 b/lib/libc/arch/hppa/string/bcopy.m4 new file mode 100644 index 00000000000..bf69e1349ab --- /dev/null +++ b/lib/libc/arch/hppa/string/bcopy.m4 @@ -0,0 +1,269 @@ +define(_rcsid,``$OpenBSD: bcopy.m4,v 1.1 1999/09/14 00:46:18 mickey Exp $'')dnl +dnl +dnl +dnl This is the source file for bcopy.S, spcopy.S +dnl +dnl +define(`versionmacro',substr(_rcsid,1,eval(len(_rcsid)-2)))dnl +dnl +/* This is a generated file. DO NOT EDIT. */ +/* + * Generated from: + * + * versionmacro + */ +/* + * Copyright (c) 1999 Michael Shalayeff + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Michael Shalayeff. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +dnl +dnl macro: L(`arg1',`arg2') +dnl synopsis: creates an assembly label based on args resulting in $arg1.arg2 +dnl +define(`L', `$$1.$2')dnl +dnl +dnl +dnl +define(`STWS',`ifelse($5, `u',dnl +`ifelse($1, `1', `vshd $4, t`$1', r31 + stbys,B,m r31, F`'4($2, $3)', +`0', `0', `vshd t`'decr($1), t`$1', r31 + stws,M r31, F`'4($2, $3)')',dnl +`0', `0', +`ifelse($1, `1', +`stbys,B`'ifelse(B, `b', `,m ', `0', `0', ` ')`'t`$1', F`'4($2, $3)', +`0', `0', `stws,M t`$1', F`'4($2, $3)')')')dnl +define(`STWSS', `ifelse(`$3', `1', `dnl', +`0', `0', `STWSS($1, $2, eval($3 - 1), $4, $5)') + STWS($3, $1, $2, $4, $5)dnl +')dnl +define(`LDWSS', `ifelse(`$3', `1', `dnl', +`0', `0', `LDWSS($1, $2, eval($3 - 1))') + ldws,M F`'4($1, $2), t`'$3`'dnl +')dnl +dnl +dnl copy data in 4-words blocks +dnl +define(`hppa_blcopy',` + addi -16, $6, $6 +L($1, `loop16'`$7') +dnl cache hint may not work on some hardware +dnl ldw F 32($2, $3), r0 +ifelse(F, `-', `dnl + addi F`'4, $5, $5', `0', `0', `dnl') +LDWSS($2, $3, 4) +STWSS($4, $5, 3, `ret1', $7) +ifelse($7, `u', `dnl + STWS(4, $4, $5, `ret1', $7)', $7, `a', `dnl') + addib,>= -16, $6, L($1, `loop16'`$7') +ifelse($7, `a', `dnl + STWS(4, $4, $5, `ret1', $7)dnl +', $7, `u', `dnl + copy t4, ret1')')dnl +dnl +dnl copy in words +dnl +define(`STWL', `addib,<,n 12, $6, L($1, cleanup) +ifelse($7, `u', ` copy ret1, t1', $7, `a', `dnl') +L($1, word) + ldws,M F`'4($2, $3), t1 + addib,>= -4, $6, L($1, word) + stws,M t1, F`'4($4, $5) + +L($1, cleanup) + addib,=,n 4, $6, L($1, done) + ldws 0($2, $3), t1 + add $5, $6, $5 + bv r0(rp) + stbys,E t1, 0($4, $5) +') +dnl +dnl +dnl parameters: +dnl $1 name +dnl $2 source space +dnl $3 source address +dnl $4 destination space +dnl $5 destination address +dnl $6 length +dnl $7 direction +dnl +define(hppa_copy, +`dnl +dnl +dnl if direction is `-' (backwards copy), adjust src, dst +dnl +ifelse($7,`-', `add $3, $6, $3 + add $5, $6, $5 +define(`F', `-')dnl +define(`R', `')dnl +define(`M', `mb')dnl +define(`B', `e')dnl +define(`E', `b')dnl +',dnl ifelse +`0',`0', +`define(`F', `')dnl +define(`R', `-')dnl +define(`M', `ma')dnl +define(`B', `b')dnl +define(`E', `e')dnl +')dnl ifelse + +ifelse($7,`-', `', `0',`0', +` comib,>=,n 15, $6, L($1, byte) + + extru $3, 31, 2, t3 + extru $5, 31, 2, t4 + add $6, t4, $6 + comb,<> t3, t4, L($1, unaligned) + dep r0, 31, 2, $3 + hppa_blcopy($1, $2, $3, $4, $5, $6, `a') + + STWL($1, $2, $3, $4, $5, $6, `a')dnl + +L($1, unaligned) + sub,>= t4, t3, t2 + ldwm F`'4($2, $3), ret1 + zdep t2, 28, 29, t1 + mtsar t1 + hppa_blcopy($1, $2, $3, $4, $5, $6, `u') + +dnl STWL($1, $2, $3, $4, $5, $6, `u') + addib,<,n 12, $6, L($1, cleanup_un) +L($1, word_un) + ldws,M F`'4($2, $3), t1 + vshd ret1, t1, t2 + addib,< -4, $6, L($1, cleanup1_un) + stws,M t2, F`'4($4, $5) + ldws,M F`'4($2, $3), ret1 + vshd t1, ret1, t2 + addib,>= -4, $6, L($1, word_un) + stws,M t2, F`'4($4, $5) + +L($1, cleanup_un) + addib,<=,n 4, $6, L($1, done) + mfctl sar, t4 + add $5, $6, $5 + extru t4, 28, 2, t4 + sub,<= $6, t4, r0 + ldws,M F`'4($2, $3), t1 + vshd ret1, t1, t2 + bv r0(rp) + stbys,E t2, 0($4, $5) + +L($1, cleanup1_un) + b L($1, cleanup_un) + copy t1, ret1 +')dnl ifelse + +L($1, byte) + comb,>=,n r0, $6, L($1, done) +L($1, byte_loop) + ldbs,M F`'1($2, $3), t1 + addib,<> -1, $6, L($1, byte_loop) + stbs,M t1, F`'1($4, $5) +L($1, done) +')dnl +` +#undef _LOCORE +#define _LOCORE +#include <machine/asm.h> +#include <machine/frame.h> +' +ifelse(NAME, `bcopy', +` +#if defined(LIBC_SCCS) + .text + .asciz "versionmacro" +#endif + +ENTRY(memcpy) +ALTENTRY(memmove) + copy arg0, t1 + copy arg1, arg0 + copy t1, arg1 +ALTENTRY(ovbcopy) +ALTENTRY(bcopy) + add arg0, arg2, t1 + comb,>,n t1, arg1, L(bcopy, reverse) + hppa_copy(bcopy_f, sr0, arg0, sr0, arg1, arg2, `+') + b,n L(bcopy, ret) +L(bcopy, reverse) + hppa_copy(bcopy_r, sr0, arg0, sr0, arg1, arg2, `-') +L(bcopy, ret) + bv 0(rp) + nop +EXIT(memcpy) +')dnl +dnl +ifelse(NAME, `spcopy', +` +#ifdef _KERNEL +#include <assym.h> + +/* + * int spcopy (pa_space_t ssp, const void *src, pa_space_t dsp, void *dst, + * size_t size) + * do a space to space bcopy. + * + * assumed that spaces do not clash, otherwise we loose + */ + .import curproc, data + .import copy_on_fault, code +ENTRY(spcopy) + ldw HPPA_FRAME_ARG(4)(sp), ret1 + comb,>=,n r0, ret1, L(spcopy, ret) +` + /* setup fault handler */ + ldil L%curproc, r31 + ldw R%curproc(r31), r31 + ldil L%copy_on_fault, t2 + ldw p_addr(r31), r31 + ldo R%copy_on_fault(t2), t2 + stw t2, pcb_onfault+u_pcb(r31) +' + mfctl sr2, ret0 /* XXX need this?, sr1 is scratchable */ + mtctl arg0, sr1 + mtctl arg2, sr2 + copy ret1, arg0 /* ret1 is used in hppa_blcopy() */ + + hppa_copy(spcopy, sr1, arg1, sr2, arg3, ret1, `+') + + /* reset fault handler */ + stw r0, pcb_onfault+u_pcb(r31) + mtctl ret0, sr2 +L(spcopy, ret) + bv 0(rp) + copy r0, ret0 +EXIT(spcopy) +#endif +')dnl + + .end diff --git a/lib/libc/arch/hppa/string/memmove.S b/lib/libc/arch/hppa/string/memmove.S deleted file mode 100644 index e72a19122be..00000000000 --- a/lib/libc/arch/hppa/string/memmove.S +++ /dev/null @@ -1,252 +0,0 @@ -/* $OpenBSD: memmove.S,v 1.1 1998/12/17 16:56:47 mickey Exp $ */ - -/* - * (c) Copyright 1988 HEWLETT-PACKARD COMPANY - * - * To anyone who acknowledges that this file is provided "AS IS" - * without any express or implied warranty: - * permission to use, copy, modify, and distribute this file - * for any purpose is hereby granted without fee, provided that - * the above copyright notice and this notice appears in all - * copies, and that the name of Hewlett-Packard Company not be - * used in advertising or publicity pertaining to distribution - * of the software without specific, written prior permission. - * Hewlett-Packard Company makes no representations about the - * suitability of this software for any purpose. - */ -/* - * Copyright (c) 1990,1994 The University of Utah and - * the Computer Systems Laboratory (CSL). All rights reserved. - * - * THE UNIVERSITY OF UTAH AND CSL PROVIDE THIS SOFTWARE IN ITS "AS IS" - * CONDITION, AND DISCLAIM ANY LIABILITY OF ANY KIND FOR ANY DAMAGES - * WHATSOEVER RESULTING FROM ITS USE. - * - * CSL requests users of this software to return to csl-dist@cs.utah.edu any - * improvements that they make and grant CSL redistribution rights. - * - * Utah $Hdr: bcopy.s 1.10 94/12/14$ - * Author: Bob Wheeler, University of Utah CSL - */ - -#include <machine/asm.h> - -/* - * void - * memmove(dst, src, count) - * vm_offset_t dst; - * vm_offset_t src; - * int count; - */ -ENTRY(memmove) - /* fall through */ - -/* - * void - * memcpy(dst, src, count) - * vm_offset_t dst; - * vm_offset_t src; - * int count; - */ -ALTENTRY(memcpy) - copy arg0,arg3 - copy arg1,arg0 - copy arg3,arg1 - - comb,>=,n r0,arg2,$bcopy_exit - - /* - * See if the source and destination are word aligned and if the count - * is an integer number of words. If so then we can use an optimized - * routine. If not then branch to bcopy_checkalign and see what we can - * do there. - */ - - or arg0,arg1,t1 - or t1,arg2,t2 - extru,= t2,31,2,r0 - b,n $bcopy_checkalign - - addib,<,n -16,arg2,$bcopy_movewords - - /* - * We can move the data in 4 word moves. We'll use 4 registers to - * avoid interlock and pipeline stalls. - */ - -$bcopy_loop16 - - ldwm 16(arg0),t1 - ldw -12(arg0),t2 - ldw -8(arg0),t3 - ldw -4(arg0),t4 - stwm t1,16(arg1) - stw t2,-12(arg1) - stw t3,-8(arg1) - addib,>= -16,arg2,$bcopy_loop16 - stw t4,-4(arg1) - - - /* - * We have already decremented the count by 16, add 12 to it and then - * we can test if there is at least 1 word left to move. - */ - -$bcopy_movewords - addib,<,n 12,arg2,$bcopy_exit - - /* - * Clean up any remaining words that were not moved in the 16 byte - * moves - */ - -$bcopy_loop4 - ldwm 4(arg0),t1 - addib,>= -4,arg2,$bcopy_loop4 - stwm t1,4(arg1) - - b,n $bcopy_exit - - -$bcopy_checkalign - - /* - * The source or destination is not word aligned or the count is not - * an integral number of words. If we are dealing with less than 16 - * bytes then just do it byte by byte. Otherwise, see if the data has - * the same basic alignment. We will add in the byte offset to size to - * keep track of what we have to move even though the stbys instruction - * won't physically move it. - */ - - comib,>= 15,arg2,$bcopy_byte - extru arg0,31,2,t1 - extru arg1,31,2,t2 - add arg2,t2,arg2 - comb,<> t2,t1,$bcopy_unaligned - dep 0,31,2,arg0 - - /* - * the source and destination have the same basic alignment. We will - * move the data in blocks of 16 bytes as long as we can and then - * we'll go to the 4 byte moves. - */ - - addib,<,n -16,arg2,$bcopy_aligned2 - -$bcopy_loop_aligned4 - ldwm 16(arg0),t1 - ldw -12(arg0),t2 - ldw -8(arg0),t3 - ldw -4(arg0),t4 - stbys,b,m t1,4(arg1) - stwm t2,4(arg1) - stwm t3,4(arg1) - addib,>= -16,arg2,$bcopy_loop_aligned4 - stwm t4,4(arg1) - - /* - * see if there is anything left that needs to be moved in a word move. - * Since the count was decremented by 16, add 12 to test if there are - * any full word moves left to do. - */ - -$bcopy_aligned2 - addib,<,n 12,arg2,$bcopy_cleanup - -$bcopy_loop_aligned2 - ldws,ma 4(arg0),t1 - addib,>= -4,arg2,$bcopy_loop_aligned2 - stbys,b,m t1,4(arg1) - - /* - * move the last bytes that may be unaligned on a word boundary - */ - -$bcopy_cleanup - addib,=,n 4,arg2,$bcopy_exit - ldws 0(arg0),t1 - add arg1,arg2,arg1 - b $bcopy_exit - stbys,e t1,0(arg1) - - /* - * The source and destination are not alligned on the same boundary - * types. We will have to shift the data around. Figure out the shift - * amount and load it into cr11. - */ - -$bcopy_unaligned - sub,>= t2,t1,t3 - ldwm 4(arg0),t1 - zdep t3,28,29,t4 - mtctl t4,11 - - /* - * see if we can do some of this work in blocks of 16 bytes - */ - - addib,<,n -16,arg2,$bcopy_unaligned_words - -$bcopy_unaligned4 - ldwm 16(arg0),t2 - ldw -12(arg0),t3 - ldw -8(arg0),t4 - ldw -4(arg0),r1 - vshd t1,t2,r28 - stbys,b,m r28,4(arg1) - vshd t2,t3,r28 - stwm r28,4(arg1) - vshd t3,t4,r28 - stwm r28,4(arg1) - vshd t4,r1,r28 - stwm r28,4(arg1) - addib,>= -16,arg2,$bcopy_unaligned4 - copy r1,t1 - - /* - * see if there is a full word that we can transfer - */ - -$bcopy_unaligned_words - addib,<,n 12,arg2,$bcopy_unaligned_cleanup1 - -$bcopy_unaligned_loop - ldwm 4(arg0),t2 - vshd t1,t2,t3 - addib,< -4,arg2,$bcopy_unaligned_cleanup2 - stbys,b,m t3,4(arg1) - - ldwm 4(arg0),t1 - vshd t2,t1,t3 - addib,>= -4,arg2,$bcopy_unaligned_loop - stbys,b,m t3,4(arg1) - -$bcopy_unaligned_cleanup1 - copy t1,t2 - -$bcopy_unaligned_cleanup2 - addib,<=,n 4,arg2,$bcopy_exit - add arg1,arg2,arg1 - mfctl sar,t3 - extru t3,28,2,t3 - sub,<= arg2,t3,r0 - ldwm 4(arg0),t1 - vshd t2,t1,t3 - b $bcopy_exit - stbys,e t3,0(arg1) - - /* - * move data one byte at a time - */ - -$bcopy_byte - comb,>=,n r0,arg2,$bcopy_exit - -$bcopy_loop_byte - ldbs,ma 1(arg0),t1 - addib,> -1,arg2,$bcopy_loop_byte - stbs,ma t1,1(arg1) - -$bcopy_exit -EXIT(memmove) |