summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTheo de Raadt <deraadt@cvs.openbsd.org>2013-06-13 19:34:00 +0000
committerTheo de Raadt <deraadt@cvs.openbsd.org>2013-06-13 19:34:00 +0000
commit3e6dea100267c861a621c71cd8c30097db0caa2a (patch)
tree7ccda0bab3bb381a8e2d8fd7bc677667eec31900
parent81690cea09a348595634b39f1055aa090f221b36 (diff)
new integral bcopy/memmove/memcpy, pulled out of sparc locore
tested by beck
-rw-r--r--sys/arch/sparc/sparc/locore.s352
-rw-r--r--sys/lib/libkern/arch/sparc/bcopy.S2
-rw-r--r--sys/lib/libkern/arch/sparc/memcpy.S2
-rw-r--r--sys/lib/libkern/arch/sparc/memmove.S420
4 files changed, 425 insertions, 351 deletions
diff --git a/sys/arch/sparc/sparc/locore.s b/sys/arch/sparc/sparc/locore.s
index e506ae41dc7..a748abf4a3c 100644
--- a/sys/arch/sparc/sparc/locore.s
+++ b/sys/arch/sparc/sparc/locore.s
@@ -1,4 +1,4 @@
-/* $OpenBSD: locore.s,v 1.93 2013/06/13 04:15:13 deraadt Exp $ */
+/* $OpenBSD: locore.s,v 1.94 2013/06/13 19:33:59 deraadt Exp $ */
/* $NetBSD: locore.s,v 1.73 1997/09/13 20:36:48 pk Exp $ */
/*
@@ -4220,14 +4220,14 @@ ENTRY(copyout)
mov EFAULT, %o0
/*
- * ******NOTE****** this depends on old bcopy() not using %g7
+ * ******NOTE****** this depends on bcopy() not using %g7
*/
Ldocopy:
! sethi %hi(_C_LABEL(cpcb)), %o3
ld [%o3 + %lo(_C_LABEL(cpcb))], %o3
set Lcopyfault, %o4
mov %o7, %g7 ! save return address
- call Lbcopy_old ! bcopy(src, dst, len)
+ call _C_LABEL(bcopy) ! bcopy(src, dst, len)
st %o4, [%o3 + PCB_ONFAULT]
sethi %hi(_C_LABEL(cpcb)), %o3
@@ -4621,354 +4621,10 @@ ENTRY(qzero)
retl
nop
-/*
- * kernel old bcopy/memcpy
- * Assumes regions do not overlap; has no useful return value.
- *
- * Must not use %g7 (see copyin/copyout above).
- */
-
#define BCOPY_SMALL 32 /* if < 32, copy by bytes */
-
-ENTRY(memcpy)
- /*
- * Swap args for bcopy. Gcc generates calls to memcpy for
- * structure assignments.
- */
- mov %o0, %o3
- mov %o1, %o0
- mov %o3, %o1
-Lbcopy_old:
- cmp %o2, BCOPY_SMALL
-Lbcopy_start:
- bge,a Lbcopy_fancy ! if >= this many, go be fancy.
- btst 7, %o0 ! (part of being fancy)
-
- /*
- * Not much to copy, just do it a byte at a time.
- */
- deccc %o2 ! while (--len >= 0)
- bl 1f
- EMPTY
-0:
- inc %o0
- ldsb [%o0 - 1], %o4 ! (++dst)[-1] = *src++;
- stb %o4, [%o1]
- deccc %o2
- bge 0b
- inc %o1
-1:
- retl
- nop
- /* NOTREACHED */
-
- /*
- * Plenty of data to copy, so try to do it optimally.
- */
-Lbcopy_fancy:
- ! check for common case first: everything lines up.
-! btst 7, %o0 ! done already
- bne 1f
- EMPTY
- btst 7, %o1
- be,a Lbcopy_doubles
- dec 8, %o2 ! if all lined up, len -= 8, goto bcopy_doubes
-
- ! If the low bits match, we can make these line up.
-1:
- xor %o0, %o1, %o3 ! t = src ^ dst;
- btst 1, %o3 ! if (t & 1) {
- be,a 1f
- btst 1, %o0 ! [delay slot: if (src & 1)]
-
- ! low bits do not match, must copy by bytes.
-0:
- ldsb [%o0], %o4 ! do {
- inc %o0 ! (++dst)[-1] = *src++;
- inc %o1
- deccc %o2
- bnz 0b ! } while (--len != 0);
- stb %o4, [%o1 - 1]
- retl
- nop
- /* NOTREACHED */
-
- ! lowest bit matches, so we can copy by words, if nothing else
-1:
- be,a 1f ! if (src & 1) {
- btst 2, %o3 ! [delay slot: if (t & 2)]
-
- ! although low bits match, both are 1: must copy 1 byte to align
- ldsb [%o0], %o4 ! *dst++ = *src++;
- stb %o4, [%o1]
- inc %o0
- inc %o1
- dec %o2 ! len--;
- btst 2, %o3 ! } [if (t & 2)]
-1:
- be,a 1f ! if (t & 2) {
- btst 2, %o0 ! [delay slot: if (src & 2)]
- dec 2, %o2 ! len -= 2;
-0:
- ldsh [%o0], %o4 ! do {
- sth %o4, [%o1] ! *(short *)dst = *(short *)src;
- inc 2, %o0 ! dst += 2, src += 2;
- deccc 2, %o2 ! } while ((len -= 2) >= 0);
- bge 0b
- inc 2, %o1
- b Lbcopy_mopb ! goto mop_up_byte;
- btst 1, %o2 ! } [delay slot: if (len & 1)]
- /* NOTREACHED */
-
- ! low two bits match, so we can copy by longwords
-1:
- be,a 1f ! if (src & 2) {
- btst 4, %o3 ! [delay slot: if (t & 4)]
-
- ! although low 2 bits match, they are 10: must copy one short to align
- ldsh [%o0], %o4 ! (*short *)dst = *(short *)src;
- sth %o4, [%o1]
- inc 2, %o0 ! dst += 2;
- inc 2, %o1 ! src += 2;
- dec 2, %o2 ! len -= 2;
- btst 4, %o3 ! } [if (t & 4)]
-1:
- be,a 1f ! if (t & 4) {
- btst 4, %o0 ! [delay slot: if (src & 4)]
- dec 4, %o2 ! len -= 4;
-0:
- ld [%o0], %o4 ! do {
- st %o4, [%o1] ! *(int *)dst = *(int *)src;
- inc 4, %o0 ! dst += 4, src += 4;
- deccc 4, %o2 ! } while ((len -= 4) >= 0);
- bge 0b
- inc 4, %o1
- b Lbcopy_mopw ! goto mop_up_word_and_byte;
- btst 2, %o2 ! } [delay slot: if (len & 2)]
- /* NOTREACHED */
-
- ! low three bits match, so we can copy by doublewords
-1:
- be 1f ! if (src & 4) {
- dec 8, %o2 ! [delay slot: len -= 8]
- ld [%o0], %o4 ! *(int *)dst = *(int *)src;
- st %o4, [%o1]
- inc 4, %o0 ! dst += 4, src += 4, len -= 4;
- inc 4, %o1
- dec 4, %o2 ! }
-1:
-Lbcopy_doubles:
- ldd [%o0], %o4 ! do {
- std %o4, [%o1] ! *(double *)dst = *(double *)src;
- inc 8, %o0 ! dst += 8, src += 8;
- deccc 8, %o2 ! } while ((len -= 8) >= 0);
- bge Lbcopy_doubles
- inc 8, %o1
-
- ! check for a usual case again (save work)
- btst 7, %o2 ! if ((len & 7) == 0)
- be Lbcopy_done ! goto bcopy_done;
-
- btst 4, %o2 ! if ((len & 4) == 0)
- be,a Lbcopy_mopw ! goto mop_up_word_and_byte;
- btst 2, %o2 ! [delay slot: if (len & 2)]
- ld [%o0], %o4 ! *(int *)dst = *(int *)src;
- st %o4, [%o1]
- inc 4, %o0 ! dst += 4;
- inc 4, %o1 ! src += 4;
- btst 2, %o2 ! } [if (len & 2)]
-
-1:
- ! mop up trailing word (if present) and byte (if present).
-Lbcopy_mopw:
- be Lbcopy_mopb ! no word, go mop up byte
- btst 1, %o2 ! [delay slot: if (len & 1)]
- ldsh [%o0], %o4 ! *(short *)dst = *(short *)src;
- be Lbcopy_done ! if ((len & 1) == 0) goto done;
- sth %o4, [%o1]
- ldsb [%o0 + 2], %o4 ! dst[2] = src[2];
- retl
- stb %o4, [%o1 + 2]
- /* NOTREACHED */
-
- ! mop up trailing byte (if present).
-Lbcopy_mopb:
- bne,a 1f
- ldsb [%o0], %o4
-
-Lbcopy_done:
- retl
- nop
-
-1:
- retl
- stb %o4,[%o1]
-
-ENTRY(memmove)
- /*
- * Swap args and continue to bcopy.
- */
- mov %o0, %o3
- mov %o1, %o0
- mov %o3, %o1
/*
- * bcopy(src, dst, len): like old bcopy, but regions may overlap.
+ * Must not use %g7 (see copyin/copyout above).
*/
-ENTRY(bcopy)
- cmp %o0, %o1 ! src < dst?
- bgeu Lbcopy_start ! no, go copy forwards as via old bcopy
- cmp %o2, BCOPY_SMALL! (check length for doublecopy first)
-
- /*
- * Since src comes before dst, and the regions might overlap,
- * we have to do the copy starting at the end and working backwards.
- */
- add %o2, %o0, %o0 ! src += len
- add %o2, %o1, %o1 ! dst += len
- bge,a Lback_fancy ! if len >= BCOPY_SMALL, go be fancy
- btst 3, %o0
-
- /*
- * Not much to copy, just do it a byte at a time.
- */
- deccc %o2 ! while (--len >= 0)
- bl 1f
- EMPTY
-0:
- dec %o0 ! *--dst = *--src;
- ldsb [%o0], %o4
- dec %o1
- deccc %o2
- bge 0b
- stb %o4, [%o1]
-1:
- retl
- nop
-
- /*
- * Plenty to copy, try to be optimal.
- * We only bother with word/halfword/byte copies here.
- */
-Lback_fancy:
-! btst 3, %o0 ! done already
- bnz 1f ! if ((src & 3) == 0 &&
- btst 3, %o1 ! (dst & 3) == 0)
- bz,a Lback_words ! goto words;
- dec 4, %o2 ! (done early for word copy)
-
-1:
- /*
- * See if the low bits match.
- */
- xor %o0, %o1, %o3 ! t = src ^ dst;
- btst 1, %o3
- bz,a 3f ! if (t & 1) == 0, can do better
- btst 1, %o0
-
- /*
- * Nope; gotta do byte copy.
- */
-2:
- dec %o0 ! do {
- ldsb [%o0], %o4 ! *--dst = *--src;
- dec %o1
- deccc %o2 ! } while (--len != 0);
- bnz 2b
- stb %o4, [%o1]
- retl
- nop
-
-3:
- /*
- * Can do halfword or word copy, but might have to copy 1 byte first.
- */
-! btst 1, %o0 ! done earlier
- bz,a 4f ! if (src & 1) { /* copy 1 byte */
- btst 2, %o3 ! (done early)
- dec %o0 ! *--dst = *--src;
- ldsb [%o0], %o4
- dec %o1
- stb %o4, [%o1]
- dec %o2 ! len--;
- btst 2, %o3 ! }
-
-4:
- /*
- * See if we can do a word copy ((t&2) == 0).
- */
-! btst 2, %o3 ! done earlier
- bz,a 6f ! if (t & 2) == 0, can do word copy
- btst 2, %o0 ! (src&2, done early)
-
- /*
- * Gotta do halfword copy.
- */
- dec 2, %o2 ! len -= 2;
-5:
- dec 2, %o0 ! do {
- ldsh [%o0], %o4 ! src -= 2;
- dec 2, %o1 ! dst -= 2;
- deccc 2, %o2 ! *(short *)dst = *(short *)src;
- bge 5b ! } while ((len -= 2) >= 0);
- sth %o4, [%o1]
- b Lback_mopb ! goto mop_up_byte;
- btst 1, %o2 ! (len&1, done early)
-
-6:
- /*
- * We can do word copies, but we might have to copy
- * one halfword first.
- */
-! btst 2, %o0 ! done already
- bz 7f ! if (src & 2) {
- dec 4, %o2 ! (len -= 4, done early)
- dec 2, %o0 ! src -= 2, dst -= 2;
- ldsh [%o0], %o4 ! *(short *)dst = *(short *)src;
- dec 2, %o1
- sth %o4, [%o1]
- dec 2, %o2 ! len -= 2;
- ! }
-
-7:
-Lback_words:
- /*
- * Do word copies (backwards), then mop up trailing halfword
- * and byte if any.
- */
-! dec 4, %o2 ! len -= 4, done already
-0: ! do {
- dec 4, %o0 ! src -= 4;
- dec 4, %o1 ! src -= 4;
- ld [%o0], %o4 ! *(int *)dst = *(int *)src;
- deccc 4, %o2 ! } while ((len -= 4) >= 0);
- bge 0b
- st %o4, [%o1]
-
- /*
- * Check for trailing shortword.
- */
- btst 2, %o2 ! if (len & 2) {
- bz,a 1f
- btst 1, %o2 ! (len&1, done early)
- dec 2, %o0 ! src -= 2, dst -= 2;
- ldsh [%o0], %o4 ! *(short *)dst = *(short *)src;
- dec 2, %o1
- sth %o4, [%o1] ! }
- btst 1, %o2
-
- /*
- * Check for trailing byte.
- */
-1:
-Lback_mopb:
-! btst 1, %o2 ! (done already)
- bnz,a 1f ! if (len & 1) {
- ldsb [%o0 - 1], %o4 ! b = src[-1];
- retl
- nop
-1:
- retl ! dst[-1] = b;
- stb %o4, [%o1 - 1] ! }
/*
* kcopy() is exactly like old bcopy except that it set pcb_onfault such that
diff --git a/sys/lib/libkern/arch/sparc/bcopy.S b/sys/lib/libkern/arch/sparc/bcopy.S
index b34338a26e6..c9361568da4 100644
--- a/sys/lib/libkern/arch/sparc/bcopy.S
+++ b/sys/lib/libkern/arch/sparc/bcopy.S
@@ -1 +1 @@
-/* No code here since kernel implements this itself */
+/* This code is contained in memmove.S */
diff --git a/sys/lib/libkern/arch/sparc/memcpy.S b/sys/lib/libkern/arch/sparc/memcpy.S
index b34338a26e6..c9361568da4 100644
--- a/sys/lib/libkern/arch/sparc/memcpy.S
+++ b/sys/lib/libkern/arch/sparc/memcpy.S
@@ -1 +1 @@
-/* No code here since kernel implements this itself */
+/* This code is contained in memmove.S */
diff --git a/sys/lib/libkern/arch/sparc/memmove.S b/sys/lib/libkern/arch/sparc/memmove.S
index b34338a26e6..8d2d3e62c96 100644
--- a/sys/lib/libkern/arch/sparc/memmove.S
+++ b/sys/lib/libkern/arch/sparc/memmove.S
@@ -1 +1,419 @@
-/* No code here since kernel implements this itself */
+/* $OpenBSD: memmove.S,v 1.4 2013/06/13 19:33:58 deraadt Exp $ */
+
+/*
+ * Copyright (c) 1996
+ * The President and Fellows of Harvard College. All rights reserved.
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This software was developed by the Computer Systems Engineering group
+ * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
+ * contributed to Berkeley.
+ *
+ * All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Lawrence Berkeley Laboratory.
+ * This product includes software developed by Harvard University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * This product includes software developed by Harvard University.
+ * This product includes software developed by Paul Kranenburg.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <machine/param.h>
+#include <machine/asm.h>
+
+/*
+ * GNU assembler does not understand `.empty' directive; Sun assembler
+ * gripes about labels without it. To allow cross-compilation using
+ * the Sun assembler, and because .empty directives are useful documentation,
+ * we use this trick.
+ */
+#ifdef SUN_AS
+#define EMPTY .empty
+#else
+#define EMPTY /* .empty */
+#endif
+
+/* use as needed to align things on longword boundaries */
+#define _ALIGN .align 4
+
+#define BCOPY_SMALL 32 /* if < 32, copy by bytes */
+
+/*
+ * memcpy(dst, src, len). Assumes regions do not overlap; returns dst.
+ */
+ENTRY(memcpy)
+ /*
+ * Swap args, because we may end up in bcopy.
+ */
+ mov %o0, %o5 ! save return value
+ mov %o1, %o0
+ mov %o5, %o1
+Lbcopy_old:
+ cmp %o2, BCOPY_SMALL
+Lbcopy_start:
+ bge,a Lbcopy_fancy ! if >= this many, go be fancy.
+ btst 7, %o0 ! (part of being fancy)
+
+ /*
+ * Not much to copy, just do it a byte at a time.
+ */
+ deccc %o2 ! while (--len >= 0)
+ bl 1f
+ EMPTY
+0:
+ inc %o0
+ ldsb [%o0 - 1], %o4 ! (++dst)[-1] = *src++;
+ stb %o4, [%o1]
+ deccc %o2
+ bge 0b
+ inc %o1
+1:
+ retl
+ mov %o5, %o0 ! return (dst)
+ /* NOTREACHED */
+
+ /*
+ * Plenty of data to copy, so try to do it optimally.
+ */
+Lbcopy_fancy:
+ ! check for common case first: everything lines up.
+! btst 7, %o0 ! done already
+ bne 1f
+ EMPTY
+ btst 7, %o1
+ be,a Lbcopy_doubles
+ dec 8, %o2 ! if all lined up, len -= 8, goto bcopy_doubes
+
+ ! If the low bits match, we can make these line up.
+1:
+ xor %o0, %o1, %o3 ! t = src ^ dst;
+ btst 1, %o3 ! if (t & 1) {
+ be,a 1f
+ btst 1, %o0 ! [delay slot: if (src & 1)]
+
+ ! low bits do not match, must copy by bytes.
+0:
+ ldsb [%o0], %o4 ! do {
+ inc %o0 ! (++dst)[-1] = *src++;
+ inc %o1
+ deccc %o2
+ bnz 0b ! } while (--len != 0);
+ stb %o4, [%o1 - 1]
+ retl
+ mov %o5, %o0 ! return (dst)
+ /* NOTREACHED */
+
+ ! lowest bit matches, so we can copy by words, if nothing else
+1:
+ be,a 1f ! if (src & 1) {
+ btst 2, %o3 ! [delay slot: if (t & 2)]
+
+ ! although low bits match, both are 1: must copy 1 byte to align
+ ldsb [%o0], %o4 ! *dst++ = *src++;
+ stb %o4, [%o1]
+ inc %o0
+ inc %o1
+ dec %o2 ! len--;
+ btst 2, %o3 ! } [if (t & 2)]
+1:
+ be,a 1f ! if (t & 2) {
+ btst 2, %o0 ! [delay slot: if (src & 2)]
+ dec 2, %o2 ! len -= 2;
+0:
+ ldsh [%o0], %o4 ! do {
+ sth %o4, [%o1] ! *(short *)dst = *(short *)src;
+ inc 2, %o0 ! dst += 2, src += 2;
+ deccc 2, %o2 ! } while ((len -= 2) >= 0);
+ bge 0b
+ inc 2, %o1
+ b Lbcopy_mopb ! goto mop_up_byte;
+ btst 1, %o2 ! } [delay slot: if (len & 1)]
+ /* NOTREACHED */
+
+ ! low two bits match, so we can copy by longwords
+1:
+ be,a 1f ! if (src & 2) {
+ btst 4, %o3 ! [delay slot: if (t & 4)]
+
+ ! although low 2 bits match, they are 10: must copy one short to align
+ ldsh [%o0], %o4 ! (*short *)dst = *(short *)src;
+ sth %o4, [%o1]
+ inc 2, %o0 ! dst += 2;
+ inc 2, %o1 ! src += 2;
+ dec 2, %o2 ! len -= 2;
+ btst 4, %o3 ! } [if (t & 4)]
+1:
+ be,a 1f ! if (t & 4) {
+ btst 4, %o0 ! [delay slot: if (src & 4)]
+ dec 4, %o2 ! len -= 4;
+0:
+ ld [%o0], %o4 ! do {
+ st %o4, [%o1] ! *(int *)dst = *(int *)src;
+ inc 4, %o0 ! dst += 4, src += 4;
+ deccc 4, %o2 ! } while ((len -= 4) >= 0);
+ bge 0b
+ inc 4, %o1
+ b Lbcopy_mopw ! goto mop_up_word_and_byte;
+ btst 2, %o2 ! } [delay slot: if (len & 2)]
+ /* NOTREACHED */
+
+ ! low three bits match, so we can copy by doublewords
+1:
+ be 1f ! if (src & 4) {
+ dec 8, %o2 ! [delay slot: len -= 8]
+ ld [%o0], %o4 ! *(int *)dst = *(int *)src;
+ st %o4, [%o1]
+ inc 4, %o0 ! dst += 4, src += 4, len -= 4;
+ inc 4, %o1
+ dec 4, %o2 ! }
+1:
+Lbcopy_doubles:
+ ldd [%o0], %o4 ! do {
+ std %o4, [%o1] ! *(double *)dst = *(double *)src;
+ inc 8, %o0 ! dst += 8, src += 8;
+ deccc 8, %o2 ! } while ((len -= 8) >= 0);
+ bge Lbcopy_doubles
+ inc 8, %o1
+
+ ! check for a usual case again (save work)
+ btst 7, %o2 ! if ((len & 7) == 0)
+ be Lbcopy_done ! goto bcopy_done;
+
+ btst 4, %o2 ! if ((len & 4) == 0)
+ be,a Lbcopy_mopw ! goto mop_up_word_and_byte;
+ btst 2, %o2 ! [delay slot: if (len & 2)]
+ ld [%o0], %o4 ! *(int *)dst = *(int *)src;
+ st %o4, [%o1]
+ inc 4, %o0 ! dst += 4;
+ inc 4, %o1 ! src += 4;
+ btst 2, %o2 ! } [if (len & 2)]
+
+1:
+ ! mop up trailing word (if present) and byte (if present).
+Lbcopy_mopw:
+ be Lbcopy_mopb ! no word, go mop up byte
+ btst 1, %o2 ! [delay slot: if (len & 1)]
+ ldsh [%o0], %o4 ! *(short *)dst = *(short *)src;
+ be Lbcopy_done ! if ((len & 1) == 0) goto done;
+ sth %o4, [%o1]
+ ldsb [%o0 + 2], %o4 ! dst[2] = src[2];
+ stb %o4, [%o1 + 2]
+ retl
+ mov %o5, %o0 ! return (dst)
+ /* NOTREACHED */
+
+ ! mop up trailing byte (if present).
+Lbcopy_mopb:
+ bne,a 1f
+ ldsb [%o0], %o4
+
+Lbcopy_done:
+ retl
+ mov %o5, %o0 ! return (dst)
+
+1:
+ stb %o4,[%o1]
+ retl
+ mov %o5, %o0 ! return (dst)
+
+/*
+ * memmove(dst, src, len). Handles overlap; returns dst.
+ */
+ENTRY(memmove)
+ /*
+ * Swap args and continue to bcopy.
+ */
+ mov %o0, %o5 ! save dst
+ mov %o1, %o0
+ mov %o5, %o1
+/*
+ * bcopy(src, dst, len): regions may overlap.
+ */
+ENTRY(bcopy)
+ cmp %o0, %o1 ! src < dst?
+ bgeu Lbcopy_start ! no, go copy forwards as via old bcopy
+ cmp %o2, BCOPY_SMALL! (check length for doublecopy first)
+
+ /*
+ * Since src comes before dst, and the regions might overlap,
+ * we have to do the copy starting at the end and working backwards.
+ */
+ add %o2, %o0, %o0 ! src += len
+ add %o2, %o1, %o1 ! dst += len
+ bge,a Lback_fancy ! if len >= BCOPY_SMALL, go be fancy
+ btst 3, %o0
+
+ /*
+ * Not much to copy, just do it a byte at a time.
+ */
+ deccc %o2 ! while (--len >= 0)
+ bl 1f
+ EMPTY
+0:
+ dec %o0 ! *--dst = *--src;
+ ldsb [%o0], %o4
+ dec %o1
+ deccc %o2
+ bge 0b
+ stb %o4, [%o1]
+1:
+ retl
+ mov %o5, %o0 ! return (dst)
+
+ /*
+ * Plenty to copy, try to be optimal.
+ * We only bother with word/halfword/byte copies here.
+ */
+Lback_fancy:
+! btst 3, %o0 ! done already
+ bnz 1f ! if ((src & 3) == 0 &&
+ btst 3, %o1 ! (dst & 3) == 0)
+ bz,a Lback_words ! goto words;
+ dec 4, %o2 ! (done early for word copy)
+
+1:
+ /*
+ * See if the low bits match.
+ */
+ xor %o0, %o1, %o3 ! t = src ^ dst;
+ btst 1, %o3
+ bz,a 3f ! if (t & 1) == 0, can do better
+ btst 1, %o0
+
+ /*
+ * Nope; gotta do byte copy.
+ */
+2:
+ dec %o0 ! do {
+ ldsb [%o0], %o4 ! *--dst = *--src;
+ dec %o1
+ deccc %o2 ! } while (--len != 0);
+ bnz 2b
+ stb %o4, [%o1]
+ retl
+ mov %o5, %o0 ! return (dst)
+
+3:
+ /*
+ * Can do halfword or word copy, but might have to copy 1 byte first.
+ */
+! btst 1, %o0 ! done earlier
+ bz,a 4f ! if (src & 1) { /* copy 1 byte */
+ btst 2, %o3 ! (done early)
+ dec %o0 ! *--dst = *--src;
+ ldsb [%o0], %o4
+ dec %o1
+ stb %o4, [%o1]
+ dec %o2 ! len--;
+ btst 2, %o3 ! }
+
+4:
+ /*
+ * See if we can do a word copy ((t&2) == 0).
+ */
+! btst 2, %o3 ! done earlier
+ bz,a 6f ! if (t & 2) == 0, can do word copy
+ btst 2, %o0 ! (src&2, done early)
+
+ /*
+ * Gotta do halfword copy.
+ */
+ dec 2, %o2 ! len -= 2;
+5:
+ dec 2, %o0 ! do {
+ ldsh [%o0], %o4 ! src -= 2;
+ dec 2, %o1 ! dst -= 2;
+ deccc 2, %o2 ! *(short *)dst = *(short *)src;
+ bge 5b ! } while ((len -= 2) >= 0);
+ sth %o4, [%o1]
+ b Lback_mopb ! goto mop_up_byte;
+ btst 1, %o2 ! (len&1, done early)
+
+6:
+ /*
+ * We can do word copies, but we might have to copy
+ * one halfword first.
+ */
+! btst 2, %o0 ! done already
+ bz 7f ! if (src & 2) {
+ dec 4, %o2 ! (len -= 4, done early)
+ dec 2, %o0 ! src -= 2, dst -= 2;
+ ldsh [%o0], %o4 ! *(short *)dst = *(short *)src;
+ dec 2, %o1
+ sth %o4, [%o1]
+ dec 2, %o2 ! len -= 2;
+ ! }
+
+7:
+Lback_words:
+ /*
+ * Do word copies (backwards), then mop up trailing halfword
+ * and byte if any.
+ */
+! dec 4, %o2 ! len -= 4, done already
+0: ! do {
+ dec 4, %o0 ! src -= 4;
+ dec 4, %o1 ! src -= 4;
+ ld [%o0], %o4 ! *(int *)dst = *(int *)src;
+ deccc 4, %o2 ! } while ((len -= 4) >= 0);
+ bge 0b
+ st %o4, [%o1]
+
+ /*
+ * Check for trailing shortword.
+ */
+ btst 2, %o2 ! if (len & 2) {
+ bz,a 1f
+ btst 1, %o2 ! (len&1, done early)
+ dec 2, %o0 ! src -= 2, dst -= 2;
+ ldsh [%o0], %o4 ! *(short *)dst = *(short *)src;
+ dec 2, %o1
+ sth %o4, [%o1] ! }
+ btst 1, %o2
+
+ /*
+ * Check for trailing byte.
+ */
+1:
+Lback_mopb:
+! btst 1, %o2 ! (done already)
+ bnz,a 1f ! if (len & 1) {
+ ldsb [%o0 - 1], %o4 ! b = src[-1];
+ retl
+ mov %o5, %o0 ! return (dst)
+
+1:
+ stb %o4, [%o1 - 1] ! }
+ retl ! dst[-1] = b;
+ mov %o5, %o0 ! return (dst)
+
+