5 files changed, 282 insertions, 494 deletions
diff --git a/lib/libc/arch/hppa/Makefile.inc b/lib/libc/arch/hppa/Makefile.inc
index 317dc3fccfb..ebffa402c6e 100644
--- a/lib/libc/arch/hppa/Makefile.inc
+++ b/lib/libc/arch/hppa/Makefile.inc
@@ -1,4 +1,12 @@
-#	$OpenBSD: Makefile.inc,v 1.1 1998/12/17 16:56:45 mickey Exp $
+#	$OpenBSD: Makefile.inc,v 1.2 1999/09/14 00:46:18 mickey Exp $
 
 KMINCLUDES=	arch/hppa/DEFS.h arch/hppa/SYS.h prefix.h
-KMSRCS=	bcmp.S bzero.S bcopy.S ffs.S milli.S
+KMSRCS=	bcmp.S bzero.S bcopy.m4 ffs.S
+
+SRCS+=	memmove.S
+CLEANFILES+=	memmove.S
+
+memmove.S:	${LIBCSRCDIR}/arch/hppa/string/bcopy.m4
+	@echo 'building ${.TARGET} from ${.ALLSRC}'
+	@m4 -DNAME=bcopy ${.ALLSRC} > ${.TARGET}
+
diff --git a/lib/libc/arch/hppa/string/Makefile.inc b/lib/libc/arch/hppa/string/Makefile.inc
index bca7da49295..2feab8eb5de 100644
--- a/lib/libc/arch/hppa/string/Makefile.inc
+++ b/lib/libc/arch/hppa/string/Makefile.inc
@@ -1,7 +1,8 @@
-#	$OpenBSD: Makefile.inc,v 1.3 1999/04/27 17:53:32 mickey Exp $
+#	$OpenBSD: Makefile.inc,v 1.4 1999/09/14 00:46:18 mickey Exp $
 
 SRCS+=	memchr.c memcmp.c memset.c index.c rindex.c \
 	strcat.c strcmp.c strcpy.c strcspn.c strlen.c \
 	strncat.c strncmp.c strncpy.c strpbrk.c strsep.c \
 	strspn.c strstr.c swab.c
-SRCS+=	bcmp.S bcopy.S bzero.S ffs.S memmove.S strlcpy.S
+SRCS+=	bcmp.S bzero.S ffs.S strlcpy.S
+
diff --git a/lib/libc/arch/hppa/string/bcopy.S b/lib/libc/arch/hppa/string/bcopy.S
deleted file mode 100644
index 177f41eac05..00000000000
--- a/lib/libc/arch/hppa/string/bcopy.S
+++ /dev/null
@@ -1,238 +0,0 @@
-/*	$OpenBSD: bcopy.S,v 1.1 1998/08/28 20:59:41 mickey Exp $	*/
-
-/*
- *  (c) Copyright 1988 HEWLETT-PACKARD COMPANY
- *
- *  To anyone who acknowledges that this file is provided "AS IS"
- *  without any express or implied warranty:
- *      permission to use, copy, modify, and distribute this file
- *  for any purpose is hereby granted without fee, provided that
- *  the above copyright notice and this notice appears in all
- *  copies, and that the name of Hewlett-Packard Company not be
- *  used in advertising or publicity pertaining to distribution
- *  of the software without specific, written prior permission.
- *  Hewlett-Packard Company makes no representations about the
- *  suitability of this software for any purpose.
- */
-/*
- * Copyright (c) 1990,1994 The University of Utah and
- * the Computer Systems Laboratory (CSL).  All rights reserved.
- *
- * THE UNIVERSITY OF UTAH AND CSL PROVIDE THIS SOFTWARE IN ITS "AS IS"
- * CONDITION, AND DISCLAIM ANY LIABILITY OF ANY KIND FOR ANY DAMAGES
- * WHATSOEVER RESULTING FROM ITS USE.
- *
- * CSL requests users of this software to return to csl-dist@cs.utah.edu any
- * improvements that they make and grant CSL redistribution rights.
- *
- * 	Utah $Hdr: bcopy.s 1.10 94/12/14$
- *	Author: Bob Wheeler, University of Utah CSL
- */
-
-#include <machine/asm.h>
-
-/*
- * void 
- * bcopy(src, dst, count)
- *	vm_offset_t	src;
- *	vm_offset_t	dst;
- *	int		count;
- */
-ENTRY(bcopy)
-        comb,>=,n r0,arg2,$bcopy_exit
-
-	/*
-	 * See if the source and destination are word aligned and if the count
-	 * is an integer number of words. If so then we can use an optimized 
-	 * routine. If not then branch to bcopy_checkalign and see what we can
-	 * do there.
-	 */
-
-        or	arg0,arg1,t1
-        or	t1,arg2,t2
-        extru,= t2,31,2,r0
-        b,n     $bcopy_checkalign
-
-        addib,<,n -16,arg2,$bcopy_movewords
-
-	/*
-	 * We can move the data in 4 word moves. We'll use 4 registers to 
-	 * avoid interlock and pipeline stalls.
-	 */
-
-$bcopy_loop16
-
-        ldwm	16(arg0),t1
-        ldw	-12(arg0),t2
-        ldw     -8(arg0),t3
-        ldw     -4(arg0),t4
-        stwm    t1,16(arg1)
-        stw     t2,-12(arg1)
-        stw     t3,-8(arg1)
-        addib,>= -16,arg2,$bcopy_loop16
-        stw     t4,-4(arg1)
-
-
-	/*
-	 * We have already decremented the count by 16, add 12 to it and then 
-	 * we can test if there is at least 1 word left to move.
-	 */
-
-$bcopy_movewords
-        addib,<,n 12,arg2,$bcopy_exit
-
-	/*
-	 * Clean up any remaining words that were not moved in the 16 byte
-	 * moves
-	 */
-
-$bcopy_loop4
-        ldwm	4(arg0),t1
-        addib,>= -4,arg2,$bcopy_loop4
-        stwm    t1,4(arg1)
-
-	b,n 	$bcopy_exit
-
-
-$bcopy_checkalign
-
-	/*
-	 * The source or destination is not word aligned or the count is not 
-	 * an integral number of words. If we are dealing with less than 16 
-	 * bytes then just do it byte by byte. Otherwise, see if the data has 
-	 * the same basic alignment. We will add in the byte offset to size to
-	 * keep track of what we have to move even though the stbys instruction
-	 * won't physically move it. 
-	 */
-
-        comib,>= 15,arg2,$bcopy_byte
-        extru   arg0,31,2,t1
-        extru   arg1,31,2,t2
-        add     arg2,t2,arg2
-        comb,<> t2,t1,$bcopy_unaligned
-        dep     0,31,2,arg0
-
-	/*
-	 * the source and destination have the same basic alignment. We will 
-	 * move the data in blocks of 16 bytes as long as we can and then 
-	 * we'll go to the 4 byte moves.
-	 */
-
-        addib,<,n -16,arg2,$bcopy_aligned2
-
-$bcopy_loop_aligned4
-        ldwm	16(arg0),t1
-        ldw     -12(arg0),t2
-        ldw     -8(arg0),t3
-        ldw     -4(arg0),t4
-        stbys,b,m t1,4(arg1)
-        stwm    t2,4(arg1)
-        stwm    t3,4(arg1)
-        addib,>= -16,arg2,$bcopy_loop_aligned4
-        stwm    t4,4(arg1)
-
-	/*
-	 * see if there is anything left that needs to be moved in a word move.
-	 * Since the count was decremented by 16, add 12 to test if there are 
-	 * any full word moves left to do.
-	 */
-
-$bcopy_aligned2
-        addib,<,n 12,arg2,$bcopy_cleanup
-
-$bcopy_loop_aligned2
-        ldws,ma	4(arg0),t1
-        addib,>= -4,arg2,$bcopy_loop_aligned2
-        stbys,b,m t1,4(arg1)
-
-	/*
-	 * move the last bytes that may be unaligned on a word boundary
-	 */
-
-$bcopy_cleanup
-         addib,=,n 4,arg2,$bcopy_exit
-         ldws	0(arg0),t1
-         add    arg1,arg2,arg1
-         b      $bcopy_exit
-         stbys,e t1,0(arg1)
-
-	/*
-	 * The source and destination are not alligned on the same boundary 
-	 * types. We will have to shift the data around. Figure out the shift 
-	 * amount and load it into cr11.
-	 */
-
-$bcopy_unaligned
-        sub,>=	t2,t1,t3
-        ldwm    4(arg0),t1
-        zdep    t3,28,29,t4
-        mtctl   t4,11
-
-	/*
-	 * see if we can do some of this work in blocks of 16 bytes
-	 */
-
-        addib,<,n -16,arg2,$bcopy_unaligned_words
-
-$bcopy_unaligned4
-        ldwm	16(arg0),t2
-	ldw	-12(arg0),t3
-	ldw	-8(arg0),t4
-	ldw	-4(arg0),r1
-        vshd	t1,t2,r28
-        stbys,b,m r28,4(arg1)
-        vshd	t2,t3,r28
-        stwm	r28,4(arg1)
-        vshd	t3,t4,r28
-        stwm	r28,4(arg1)
-        vshd	t4,r1,r28
-        stwm   	r28,4(arg1)
-        addib,>= -16,arg2,$bcopy_unaligned4
-	copy	r1,t1
-
-	/*
-	 * see if there is a full word that we can transfer
-	 */
-
-$bcopy_unaligned_words
-        addib,<,n 12,arg2,$bcopy_unaligned_cleanup1
-
-$bcopy_unaligned_loop
-        ldwm	4(arg0),t2
-        vshd    t1,t2,t3
-        addib,< -4,arg2,$bcopy_unaligned_cleanup2
-        stbys,b,m t3,4(arg1)
-
-        ldwm	4(arg0),t1
-        vshd    t2,t1,t3
-        addib,>= -4,arg2,$bcopy_unaligned_loop
-        stbys,b,m t3,4(arg1)
-
-$bcopy_unaligned_cleanup1
-	copy	t1,t2
-
-$bcopy_unaligned_cleanup2
-	addib,<=,n 4,arg2,$bcopy_exit
-        add	arg1,arg2,arg1
-	mfctl	sar,t3
-	extru	t3,28,2,t3
-	sub,<=	arg2,t3,r0
-        ldwm    4(arg0),t1
-        vshd    t2,t1,t3
-        b       $bcopy_exit
-        stbys,e t3,0(arg1)
-
-	/*
-	 * move data one byte at a time
-	 */
-
-$bcopy_byte
-        comb,>=,n r0,arg2,$bcopy_exit
-
-$bcopy_loop_byte
-        ldbs,ma	1(arg0),t1
-        addib,> -1,arg2,$bcopy_loop_byte
-        stbs,ma t1,1(arg1) 
-
-$bcopy_exit
-EXIT(bcopy)
diff --git a/lib/libc/arch/hppa/string/bcopy.m4 b/lib/libc/arch/hppa/string/bcopy.m4
new file mode 100644
index 00000000000..bf69e1349ab
--- /dev/null
+++ b/lib/libc/arch/hppa/string/bcopy.m4
@@ -0,0 +1,269 @@
+define(_rcsid,``$OpenBSD: bcopy.m4,v 1.1 1999/09/14 00:46:18 mickey Exp $'')dnl
+dnl
+dnl
+dnl  This is the source file for bcopy.S, spcopy.S
+dnl
+dnl
+define(`versionmacro',substr(_rcsid,1,eval(len(_rcsid)-2)))dnl
+dnl
+/* This is a generated file. DO NOT EDIT. */
+/*
+ * Generated from:
+ *
+ *	versionmacro
+ */
+/*
+ * Copyright (c) 1999 Michael Shalayeff
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Michael Shalayeff.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+dnl
+dnl    macro: L(`arg1',`arg2')
+dnl synopsis: creates an assembly label based on args resulting in $arg1.arg2
+dnl
+define(`L', `$$1.$2')dnl
+dnl
+dnl
+dnl
+define(`STWS',`ifelse($5, `u',dnl
+`ifelse($1, `1', `vshd     $4, t`$1', r31
+	stbys,B,m r31, F`'4($2, $3)',
+`0', `0', `vshd	t`'decr($1), t`$1', r31
+	stws,M	r31, F`'4($2, $3)')',dnl
+`0', `0',
+`ifelse($1, `1',
+`stbys,B`'ifelse(B, `b', `,m ', `0', `0', `	')`'t`$1', F`'4($2, $3)',
+`0', `0', `stws,M	t`$1', F`'4($2, $3)')')')dnl
+define(`STWSS', `ifelse(`$3', `1', `dnl',
+`0', `0', `STWSS($1, $2, eval($3 - 1), $4, $5)')
+	STWS($3, $1, $2, $4, $5)dnl
+')dnl
+define(`LDWSS', `ifelse(`$3', `1', `dnl',
+`0', `0', `LDWSS($1, $2, eval($3 - 1))')
+	ldws,M  F`'4($1, $2), t`'$3`'dnl
+')dnl
+dnl
+dnl copy data in 4-words blocks
+dnl
+define(`hppa_blcopy',`
+	addi	-16, $6, $6
+L($1, `loop16'`$7')
+dnl	cache hint may not work on some hardware
+dnl	ldw	F 32($2, $3), r0
+ifelse(F, `-', `dnl
+	addi	F`'4, $5, $5', `0', `0', `dnl')
+LDWSS($2, $3, 4)
+STWSS($4, $5, 3, `ret1', $7)
+ifelse($7, `u', `dnl
+	STWS(4, $4, $5, `ret1', $7)', $7, `a', `dnl')
+	addib,>= -16, $6, L($1, `loop16'`$7')
+ifelse($7, `a', `dnl
+	STWS(4, $4, $5, `ret1', $7)dnl
+', $7, `u', `dnl
+	copy	t4, ret1')')dnl
+dnl
+dnl copy in words
+dnl
+define(`STWL', `addib,<,n 12, $6, L($1, cleanup)
+ifelse($7, `u', `	copy	ret1, t1', $7, `a', `dnl')
+L($1, word)
+	ldws,M	F`'4($2, $3), t1
+	addib,>= -4, $6, L($1, word)
+	stws,M	t1, F`'4($4, $5)
+
+L($1, cleanup)
+	addib,=,n 4, $6, L($1, done)
+	ldws	0($2, $3), t1
+	add	$5, $6, $5
+	bv	r0(rp)
+	stbys,E	t1, 0($4, $5)
+')
+dnl
+dnl
+dnl parameters:
+dnl  $1	name
+dnl  $2	source space
+dnl  $3	source address
+dnl  $4	destination space
+dnl  $5	destination address
+dnl  $6	length
+dnl  $7	direction
+dnl
+define(hppa_copy,
+`dnl
+dnl
+dnl	if direction is `-' (backwards copy), adjust src, dst
+dnl
+ifelse($7,`-', `add	$3, $6, $3
+	add	$5, $6, $5
+define(`F', `-')dnl
+define(`R', `')dnl
+define(`M', `mb')dnl
+define(`B', `e')dnl
+define(`E', `b')dnl
+',dnl ifelse
+`0',`0',
+`define(`F', `')dnl
+define(`R', `-')dnl
+define(`M', `ma')dnl
+define(`B', `b')dnl
+define(`E', `e')dnl
+')dnl ifelse
+
+ifelse($7,`-', `', `0',`0',
+`	comib,>=,n 15, $6, L($1, byte)
+
+	extru	$3, 31, 2, t3
+	extru	$5, 31, 2, t4
+	add	$6, t4, $6
+	comb,<> t3, t4, L($1, unaligned)
+	dep	r0, 31, 2, $3
+	hppa_blcopy($1, $2, $3, $4, $5, $6, `a')
+
+	STWL($1, $2, $3, $4, $5, $6, `a')dnl
+
+L($1, unaligned)
+	sub,>=	t4, t3, t2
+	ldwm	F`'4($2, $3), ret1
+	zdep	t2, 28, 29, t1
+	mtsar	t1
+	hppa_blcopy($1, $2, $3, $4, $5, $6, `u')
+
+dnl	STWL($1, $2, $3, $4, $5, $6, `u')
+	addib,<,n 12, $6, L($1, cleanup_un)
+L($1, word_un)
+	ldws,M	F`'4($2, $3), t1
+	vshd	ret1, t1, t2
+	addib,<	-4, $6, L($1, cleanup1_un)
+	stws,M	t2, F`'4($4, $5)
+	ldws,M	F`'4($2, $3), ret1
+	vshd	t1, ret1, t2
+	addib,>= -4, $6, L($1, word_un)
+	stws,M	t2, F`'4($4, $5)
+
+L($1, cleanup_un)
+	addib,<=,n 4, $6, L($1, done)
+	mfctl	sar, t4
+	add	$5, $6, $5
+	extru	t4, 28, 2, t4
+	sub,<=	$6, t4, r0
+	ldws,M	F`'4($2, $3), t1
+	vshd	ret1, t1, t2
+	bv	r0(rp)
+	stbys,E	t2, 0($4, $5)
+
+L($1, cleanup1_un)
+	b	L($1, cleanup_un)
+	copy	t1, ret1
+')dnl ifelse
+
+L($1, byte)
+	comb,>=,n r0, $6, L($1, done)
+L($1, byte_loop)
+	ldbs,M	F`'1($2, $3), t1
+	addib,<> -1, $6, L($1, byte_loop)
+	stbs,M	t1, F`'1($4, $5)
+L($1, done)
+')dnl
+`
+#undef _LOCORE
+#define _LOCORE
+#include <machine/asm.h>
+#include <machine/frame.h>
+'
+ifelse(NAME, `bcopy',
+`
+#if defined(LIBC_SCCS)
+        .text
+	.asciz "versionmacro"
+#endif
+
+ENTRY(memcpy)
+ALTENTRY(memmove)
+	copy	arg0, t1
+	copy	arg1, arg0
+	copy	t1, arg1
+ALTENTRY(ovbcopy)
+ALTENTRY(bcopy)
+	add	arg0, arg2, t1
+	comb,>,n t1, arg1, L(bcopy, reverse)
+	hppa_copy(bcopy_f, sr0, arg0, sr0, arg1, arg2, `+')
+	b,n	L(bcopy, ret)
+L(bcopy, reverse)
+	hppa_copy(bcopy_r, sr0, arg0, sr0, arg1, arg2, `-')
+L(bcopy, ret)
+	bv	0(rp)
+	nop
+EXIT(memcpy)
+')dnl
+dnl
+ifelse(NAME, `spcopy',
+`
+#ifdef _KERNEL
+#include <assym.h>
+
+/*
+ * int spcopy (pa_space_t ssp, const void *src, pa_space_t dsp, void *dst,
+ *              size_t size)
+ * do a space to space bcopy.
+ *
+ * assumed that spaces do not clash, otherwise we loose
+ */
+	.import	curproc, data
+	.import	copy_on_fault, code
+ENTRY(spcopy)
+	ldw     HPPA_FRAME_ARG(4)(sp), ret1
+	comb,>=,n r0, ret1, L(spcopy, ret)
+`
+	/* setup fault handler */
+	ldil    L%curproc, r31
+	ldw     R%curproc(r31), r31
+	ldil    L%copy_on_fault, t2
+	ldw     p_addr(r31), r31
+	ldo     R%copy_on_fault(t2), t2
+	stw     t2, pcb_onfault+u_pcb(r31)
+'
+	mfctl   sr2, ret0       /* XXX need this?, sr1 is scratchable */
+	mtctl   arg0, sr1
+	mtctl   arg2, sr2
+	copy	ret1, arg0	/* ret1 is used in hppa_blcopy() */
+
+	hppa_copy(spcopy, sr1, arg1, sr2, arg3, ret1, `+')
+
+	/* reset fault handler */
+	stw     r0, pcb_onfault+u_pcb(r31)
+	mtctl   ret0, sr2
+L(spcopy, ret)
+	bv      0(rp)
+	copy    r0, ret0
+EXIT(spcopy)
+#endif
+')dnl
+
+	.end
diff --git a/lib/libc/arch/hppa/string/memmove.S b/lib/libc/arch/hppa/string/memmove.S
deleted file mode 100644
index e72a19122be..00000000000
--- a/lib/libc/arch/hppa/string/memmove.S
+++ /dev/null
@@ -1,252 +0,0 @@
-/*	$OpenBSD: memmove.S,v 1.1 1998/12/17 16:56:47 mickey Exp $	*/
-
-/*
- *  (c) Copyright 1988 HEWLETT-PACKARD COMPANY
- *
- *  To anyone who acknowledges that this file is provided "AS IS"
- *  without any express or implied warranty:
- *      permission to use, copy, modify, and distribute this file
- *  for any purpose is hereby granted without fee, provided that
- *  the above copyright notice and this notice appears in all
- *  copies, and that the name of Hewlett-Packard Company not be
- *  used in advertising or publicity pertaining to distribution
- *  of the software without specific, written prior permission.
- *  Hewlett-Packard Company makes no representations about the
- *  suitability of this software for any purpose.
- */
-/*
- * Copyright (c) 1990,1994 The University of Utah and
- * the Computer Systems Laboratory (CSL).  All rights reserved.
- *
- * THE UNIVERSITY OF UTAH AND CSL PROVIDE THIS SOFTWARE IN ITS "AS IS"
- * CONDITION, AND DISCLAIM ANY LIABILITY OF ANY KIND FOR ANY DAMAGES
- * WHATSOEVER RESULTING FROM ITS USE.
- *
- * CSL requests users of this software to return to csl-dist@cs.utah.edu any
- * improvements that they make and grant CSL redistribution rights.
- *
- * 	Utah $Hdr: bcopy.s 1.10 94/12/14$
- *	Author: Bob Wheeler, University of Utah CSL
- */
-
-#include <machine/asm.h>
-
-/*
- * void
- * memmove(dst, src, count)
- *	vm_offset_t	dst;
- *	vm_offset_t	src;
- *	int		count;
- */
-ENTRY(memmove)
-	/* fall through */
-
-/*
- * void
- * memcpy(dst, src, count)
- *	vm_offset_t	dst;
- *	vm_offset_t	src;
- *	int		count;
- */
-ALTENTRY(memcpy)
-	copy	arg0,arg3
-	copy	arg1,arg0
-	copy	arg3,arg1
-
-        comb,>=,n r0,arg2,$bcopy_exit
-
-	/*
-	 * See if the source and destination are word aligned and if the count
-	 * is an integer number of words. If so then we can use an optimized 
-	 * routine. If not then branch to bcopy_checkalign and see what we can
-	 * do there.
-	 */
-
-        or	arg0,arg1,t1
-        or	t1,arg2,t2
-        extru,= t2,31,2,r0
-        b,n     $bcopy_checkalign
-
-        addib,<,n -16,arg2,$bcopy_movewords
-
-	/*
-	 * We can move the data in 4 word moves. We'll use 4 registers to 
-	 * avoid interlock and pipeline stalls.
-	 */
-
-$bcopy_loop16
-
-        ldwm	16(arg0),t1
-        ldw	-12(arg0),t2
-        ldw     -8(arg0),t3
-        ldw     -4(arg0),t4
-        stwm    t1,16(arg1)
-        stw     t2,-12(arg1)
-        stw     t3,-8(arg1)
-        addib,>= -16,arg2,$bcopy_loop16
-        stw     t4,-4(arg1)
-
-
-	/*
-	 * We have already decremented the count by 16, add 12 to it and then 
-	 * we can test if there is at least 1 word left to move.
-	 */
-
-$bcopy_movewords
-        addib,<,n 12,arg2,$bcopy_exit
-
-	/*
-	 * Clean up any remaining words that were not moved in the 16 byte
-	 * moves
-	 */
-
-$bcopy_loop4
-        ldwm	4(arg0),t1
-        addib,>= -4,arg2,$bcopy_loop4
-        stwm    t1,4(arg1)
-
-	b,n 	$bcopy_exit
-
-
-$bcopy_checkalign
-
-	/*
-	 * The source or destination is not word aligned or the count is not 
-	 * an integral number of words. If we are dealing with less than 16 
-	 * bytes then just do it byte by byte. Otherwise, see if the data has 
-	 * the same basic alignment. We will add in the byte offset to size to
-	 * keep track of what we have to move even though the stbys instruction
-	 * won't physically move it. 
-	 */
-
-        comib,>= 15,arg2,$bcopy_byte
-        extru   arg0,31,2,t1
-        extru   arg1,31,2,t2
-        add     arg2,t2,arg2
-        comb,<> t2,t1,$bcopy_unaligned
-        dep     0,31,2,arg0
-
-	/*
-	 * the source and destination have the same basic alignment. We will 
-	 * move the data in blocks of 16 bytes as long as we can and then 
-	 * we'll go to the 4 byte moves.
-	 */
-
-        addib,<,n -16,arg2,$bcopy_aligned2
-
-$bcopy_loop_aligned4
-        ldwm	16(arg0),t1
-        ldw     -12(arg0),t2
-        ldw     -8(arg0),t3
-        ldw     -4(arg0),t4
-        stbys,b,m t1,4(arg1)
-        stwm    t2,4(arg1)
-        stwm    t3,4(arg1)
-        addib,>= -16,arg2,$bcopy_loop_aligned4
-        stwm    t4,4(arg1)
-
-	/*
-	 * see if there is anything left that needs to be moved in a word move.
-	 * Since the count was decremented by 16, add 12 to test if there are 
-	 * any full word moves left to do.
-	 */
-
-$bcopy_aligned2
-        addib,<,n 12,arg2,$bcopy_cleanup
-
-$bcopy_loop_aligned2
-        ldws,ma	4(arg0),t1
-        addib,>= -4,arg2,$bcopy_loop_aligned2
-        stbys,b,m t1,4(arg1)
-
-	/*
-	 * move the last bytes that may be unaligned on a word boundary
-	 */
-
-$bcopy_cleanup
-         addib,=,n 4,arg2,$bcopy_exit
-         ldws	0(arg0),t1
-         add    arg1,arg2,arg1
-         b      $bcopy_exit
-         stbys,e t1,0(arg1)
-
-	/*
-	 * The source and destination are not alligned on the same boundary 
-	 * types. We will have to shift the data around. Figure out the shift 
-	 * amount and load it into cr11.
-	 */
-
-$bcopy_unaligned
-        sub,>=	t2,t1,t3
-        ldwm    4(arg0),t1
-        zdep    t3,28,29,t4
-        mtctl   t4,11
-
-	/*
-	 * see if we can do some of this work in blocks of 16 bytes
-	 */
-
-        addib,<,n -16,arg2,$bcopy_unaligned_words
-
-$bcopy_unaligned4
-        ldwm	16(arg0),t2
-	ldw	-12(arg0),t3
-	ldw	-8(arg0),t4
-	ldw	-4(arg0),r1
-        vshd	t1,t2,r28
-        stbys,b,m r28,4(arg1)
-        vshd	t2,t3,r28
-        stwm	r28,4(arg1)
-        vshd	t3,t4,r28
-        stwm	r28,4(arg1)
-        vshd	t4,r1,r28
-        stwm   	r28,4(arg1)
-        addib,>= -16,arg2,$bcopy_unaligned4
-	copy	r1,t1
-
-	/*
-	 * see if there is a full word that we can transfer
-	 */
-
-$bcopy_unaligned_words
-        addib,<,n 12,arg2,$bcopy_unaligned_cleanup1
-
-$bcopy_unaligned_loop
-        ldwm	4(arg0),t2
-        vshd    t1,t2,t3
-        addib,< -4,arg2,$bcopy_unaligned_cleanup2
-        stbys,b,m t3,4(arg1)
-
-        ldwm	4(arg0),t1
-        vshd    t2,t1,t3
-        addib,>= -4,arg2,$bcopy_unaligned_loop
-        stbys,b,m t3,4(arg1)
-
-$bcopy_unaligned_cleanup1
-	copy	t1,t2
-
-$bcopy_unaligned_cleanup2
-	addib,<=,n 4,arg2,$bcopy_exit
-        add	arg1,arg2,arg1
-	mfctl	sar,t3
-	extru	t3,28,2,t3
-	sub,<=	arg2,t3,r0
-        ldwm    4(arg0),t1
-        vshd    t2,t1,t3
-        b       $bcopy_exit
-        stbys,e t3,0(arg1)
-
-	/*
-	 * move data one byte at a time
-	 */
-
-$bcopy_byte
-        comb,>=,n r0,arg2,$bcopy_exit
-
-$bcopy_loop_byte
-        ldbs,ma	1(arg0),t1
-        addib,> -1,arg2,$bcopy_loop_byte
-        stbs,ma t1,1(arg1) 
-
-$bcopy_exit
-EXIT(memmove)