diff options
author | Theo de Raadt <deraadt@cvs.openbsd.org> | 2013-06-13 19:34:00 +0000 |
---|---|---|
committer | Theo de Raadt <deraadt@cvs.openbsd.org> | 2013-06-13 19:34:00 +0000 |
commit | 3e6dea100267c861a621c71cd8c30097db0caa2a (patch) | |
tree | 7ccda0bab3bb381a8e2d8fd7bc677667eec31900 | |
parent | 81690cea09a348595634b39f1055aa090f221b36 (diff) |
new integral bcopy/memmove/memcpy, pulled out of sparc locore
tested by beck
-rw-r--r-- | sys/arch/sparc/sparc/locore.s | 352 | ||||
-rw-r--r-- | sys/lib/libkern/arch/sparc/bcopy.S | 2 | ||||
-rw-r--r-- | sys/lib/libkern/arch/sparc/memcpy.S | 2 | ||||
-rw-r--r-- | sys/lib/libkern/arch/sparc/memmove.S | 420 |
4 files changed, 425 insertions, 351 deletions
diff --git a/sys/arch/sparc/sparc/locore.s b/sys/arch/sparc/sparc/locore.s index e506ae41dc7..a748abf4a3c 100644 --- a/sys/arch/sparc/sparc/locore.s +++ b/sys/arch/sparc/sparc/locore.s @@ -1,4 +1,4 @@ -/* $OpenBSD: locore.s,v 1.93 2013/06/13 04:15:13 deraadt Exp $ */ +/* $OpenBSD: locore.s,v 1.94 2013/06/13 19:33:59 deraadt Exp $ */ /* $NetBSD: locore.s,v 1.73 1997/09/13 20:36:48 pk Exp $ */ /* @@ -4220,14 +4220,14 @@ ENTRY(copyout) mov EFAULT, %o0 /* - * ******NOTE****** this depends on old bcopy() not using %g7 + * ******NOTE****** this depends on bcopy() not using %g7 */ Ldocopy: ! sethi %hi(_C_LABEL(cpcb)), %o3 ld [%o3 + %lo(_C_LABEL(cpcb))], %o3 set Lcopyfault, %o4 mov %o7, %g7 ! save return address - call Lbcopy_old ! bcopy(src, dst, len) + call _C_LABEL(bcopy) ! bcopy(src, dst, len) st %o4, [%o3 + PCB_ONFAULT] sethi %hi(_C_LABEL(cpcb)), %o3 @@ -4621,354 +4621,10 @@ ENTRY(qzero) retl nop -/* - * kernel old bcopy/memcpy - * Assumes regions do not overlap; has no useful return value. - * - * Must not use %g7 (see copyin/copyout above). - */ - #define BCOPY_SMALL 32 /* if < 32, copy by bytes */ - -ENTRY(memcpy) - /* - * Swap args for bcopy. Gcc generates calls to memcpy for - * structure assignments. - */ - mov %o0, %o3 - mov %o1, %o0 - mov %o3, %o1 -Lbcopy_old: - cmp %o2, BCOPY_SMALL -Lbcopy_start: - bge,a Lbcopy_fancy ! if >= this many, go be fancy. - btst 7, %o0 ! (part of being fancy) - - /* - * Not much to copy, just do it a byte at a time. - */ - deccc %o2 ! while (--len >= 0) - bl 1f - EMPTY -0: - inc %o0 - ldsb [%o0 - 1], %o4 ! (++dst)[-1] = *src++; - stb %o4, [%o1] - deccc %o2 - bge 0b - inc %o1 -1: - retl - nop - /* NOTREACHED */ - - /* - * Plenty of data to copy, so try to do it optimally. - */ -Lbcopy_fancy: - ! check for common case first: everything lines up. -! btst 7, %o0 ! done already - bne 1f - EMPTY - btst 7, %o1 - be,a Lbcopy_doubles - dec 8, %o2 ! if all lined up, len -= 8, goto bcopy_doubes - - ! If the low bits match, we can make these line up. -1: - xor %o0, %o1, %o3 ! t = src ^ dst; - btst 1, %o3 ! if (t & 1) { - be,a 1f - btst 1, %o0 ! [delay slot: if (src & 1)] - - ! low bits do not match, must copy by bytes. -0: - ldsb [%o0], %o4 ! do { - inc %o0 ! (++dst)[-1] = *src++; - inc %o1 - deccc %o2 - bnz 0b ! } while (--len != 0); - stb %o4, [%o1 - 1] - retl - nop - /* NOTREACHED */ - - ! lowest bit matches, so we can copy by words, if nothing else -1: - be,a 1f ! if (src & 1) { - btst 2, %o3 ! [delay slot: if (t & 2)] - - ! although low bits match, both are 1: must copy 1 byte to align - ldsb [%o0], %o4 ! *dst++ = *src++; - stb %o4, [%o1] - inc %o0 - inc %o1 - dec %o2 ! len--; - btst 2, %o3 ! } [if (t & 2)] -1: - be,a 1f ! if (t & 2) { - btst 2, %o0 ! [delay slot: if (src & 2)] - dec 2, %o2 ! len -= 2; -0: - ldsh [%o0], %o4 ! do { - sth %o4, [%o1] ! *(short *)dst = *(short *)src; - inc 2, %o0 ! dst += 2, src += 2; - deccc 2, %o2 ! } while ((len -= 2) >= 0); - bge 0b - inc 2, %o1 - b Lbcopy_mopb ! goto mop_up_byte; - btst 1, %o2 ! } [delay slot: if (len & 1)] - /* NOTREACHED */ - - ! low two bits match, so we can copy by longwords -1: - be,a 1f ! if (src & 2) { - btst 4, %o3 ! [delay slot: if (t & 4)] - - ! although low 2 bits match, they are 10: must copy one short to align - ldsh [%o0], %o4 ! (*short *)dst = *(short *)src; - sth %o4, [%o1] - inc 2, %o0 ! dst += 2; - inc 2, %o1 ! src += 2; - dec 2, %o2 ! len -= 2; - btst 4, %o3 ! } [if (t & 4)] -1: - be,a 1f ! if (t & 4) { - btst 4, %o0 ! [delay slot: if (src & 4)] - dec 4, %o2 ! len -= 4; -0: - ld [%o0], %o4 ! do { - st %o4, [%o1] ! *(int *)dst = *(int *)src; - inc 4, %o0 ! dst += 4, src += 4; - deccc 4, %o2 ! } while ((len -= 4) >= 0); - bge 0b - inc 4, %o1 - b Lbcopy_mopw ! goto mop_up_word_and_byte; - btst 2, %o2 ! } [delay slot: if (len & 2)] - /* NOTREACHED */ - - ! low three bits match, so we can copy by doublewords -1: - be 1f ! if (src & 4) { - dec 8, %o2 ! [delay slot: len -= 8] - ld [%o0], %o4 ! *(int *)dst = *(int *)src; - st %o4, [%o1] - inc 4, %o0 ! dst += 4, src += 4, len -= 4; - inc 4, %o1 - dec 4, %o2 ! } -1: -Lbcopy_doubles: - ldd [%o0], %o4 ! do { - std %o4, [%o1] ! *(double *)dst = *(double *)src; - inc 8, %o0 ! dst += 8, src += 8; - deccc 8, %o2 ! } while ((len -= 8) >= 0); - bge Lbcopy_doubles - inc 8, %o1 - - ! check for a usual case again (save work) - btst 7, %o2 ! if ((len & 7) == 0) - be Lbcopy_done ! goto bcopy_done; - - btst 4, %o2 ! if ((len & 4) == 0) - be,a Lbcopy_mopw ! goto mop_up_word_and_byte; - btst 2, %o2 ! [delay slot: if (len & 2)] - ld [%o0], %o4 ! *(int *)dst = *(int *)src; - st %o4, [%o1] - inc 4, %o0 ! dst += 4; - inc 4, %o1 ! src += 4; - btst 2, %o2 ! } [if (len & 2)] - -1: - ! mop up trailing word (if present) and byte (if present). -Lbcopy_mopw: - be Lbcopy_mopb ! no word, go mop up byte - btst 1, %o2 ! [delay slot: if (len & 1)] - ldsh [%o0], %o4 ! *(short *)dst = *(short *)src; - be Lbcopy_done ! if ((len & 1) == 0) goto done; - sth %o4, [%o1] - ldsb [%o0 + 2], %o4 ! dst[2] = src[2]; - retl - stb %o4, [%o1 + 2] - /* NOTREACHED */ - - ! mop up trailing byte (if present). -Lbcopy_mopb: - bne,a 1f - ldsb [%o0], %o4 - -Lbcopy_done: - retl - nop - -1: - retl - stb %o4,[%o1] - -ENTRY(memmove) - /* - * Swap args and continue to bcopy. - */ - mov %o0, %o3 - mov %o1, %o0 - mov %o3, %o1 /* - * bcopy(src, dst, len): like old bcopy, but regions may overlap. + * Must not use %g7 (see copyin/copyout above). */ -ENTRY(bcopy) - cmp %o0, %o1 ! src < dst? - bgeu Lbcopy_start ! no, go copy forwards as via old bcopy - cmp %o2, BCOPY_SMALL! (check length for doublecopy first) - - /* - * Since src comes before dst, and the regions might overlap, - * we have to do the copy starting at the end and working backwards. - */ - add %o2, %o0, %o0 ! src += len - add %o2, %o1, %o1 ! dst += len - bge,a Lback_fancy ! if len >= BCOPY_SMALL, go be fancy - btst 3, %o0 - - /* - * Not much to copy, just do it a byte at a time. - */ - deccc %o2 ! while (--len >= 0) - bl 1f - EMPTY -0: - dec %o0 ! *--dst = *--src; - ldsb [%o0], %o4 - dec %o1 - deccc %o2 - bge 0b - stb %o4, [%o1] -1: - retl - nop - - /* - * Plenty to copy, try to be optimal. - * We only bother with word/halfword/byte copies here. - */ -Lback_fancy: -! btst 3, %o0 ! done already - bnz 1f ! if ((src & 3) == 0 && - btst 3, %o1 ! (dst & 3) == 0) - bz,a Lback_words ! goto words; - dec 4, %o2 ! (done early for word copy) - -1: - /* - * See if the low bits match. - */ - xor %o0, %o1, %o3 ! t = src ^ dst; - btst 1, %o3 - bz,a 3f ! if (t & 1) == 0, can do better - btst 1, %o0 - - /* - * Nope; gotta do byte copy. - */ -2: - dec %o0 ! do { - ldsb [%o0], %o4 ! *--dst = *--src; - dec %o1 - deccc %o2 ! } while (--len != 0); - bnz 2b - stb %o4, [%o1] - retl - nop - -3: - /* - * Can do halfword or word copy, but might have to copy 1 byte first. - */ -! btst 1, %o0 ! done earlier - bz,a 4f ! if (src & 1) { /* copy 1 byte */ - btst 2, %o3 ! (done early) - dec %o0 ! *--dst = *--src; - ldsb [%o0], %o4 - dec %o1 - stb %o4, [%o1] - dec %o2 ! len--; - btst 2, %o3 ! } - -4: - /* - * See if we can do a word copy ((t&2) == 0). - */ -! btst 2, %o3 ! done earlier - bz,a 6f ! if (t & 2) == 0, can do word copy - btst 2, %o0 ! (src&2, done early) - - /* - * Gotta do halfword copy. - */ - dec 2, %o2 ! len -= 2; -5: - dec 2, %o0 ! do { - ldsh [%o0], %o4 ! src -= 2; - dec 2, %o1 ! dst -= 2; - deccc 2, %o2 ! *(short *)dst = *(short *)src; - bge 5b ! } while ((len -= 2) >= 0); - sth %o4, [%o1] - b Lback_mopb ! goto mop_up_byte; - btst 1, %o2 ! (len&1, done early) - -6: - /* - * We can do word copies, but we might have to copy - * one halfword first. - */ -! btst 2, %o0 ! done already - bz 7f ! if (src & 2) { - dec 4, %o2 ! (len -= 4, done early) - dec 2, %o0 ! src -= 2, dst -= 2; - ldsh [%o0], %o4 ! *(short *)dst = *(short *)src; - dec 2, %o1 - sth %o4, [%o1] - dec 2, %o2 ! len -= 2; - ! } - -7: -Lback_words: - /* - * Do word copies (backwards), then mop up trailing halfword - * and byte if any. - */ -! dec 4, %o2 ! len -= 4, done already -0: ! do { - dec 4, %o0 ! src -= 4; - dec 4, %o1 ! src -= 4; - ld [%o0], %o4 ! *(int *)dst = *(int *)src; - deccc 4, %o2 ! } while ((len -= 4) >= 0); - bge 0b - st %o4, [%o1] - - /* - * Check for trailing shortword. - */ - btst 2, %o2 ! if (len & 2) { - bz,a 1f - btst 1, %o2 ! (len&1, done early) - dec 2, %o0 ! src -= 2, dst -= 2; - ldsh [%o0], %o4 ! *(short *)dst = *(short *)src; - dec 2, %o1 - sth %o4, [%o1] ! } - btst 1, %o2 - - /* - * Check for trailing byte. - */ -1: -Lback_mopb: -! btst 1, %o2 ! (done already) - bnz,a 1f ! if (len & 1) { - ldsb [%o0 - 1], %o4 ! b = src[-1]; - retl - nop -1: - retl ! dst[-1] = b; - stb %o4, [%o1 - 1] ! } /* * kcopy() is exactly like old bcopy except that it set pcb_onfault such that diff --git a/sys/lib/libkern/arch/sparc/bcopy.S b/sys/lib/libkern/arch/sparc/bcopy.S index b34338a26e6..c9361568da4 100644 --- a/sys/lib/libkern/arch/sparc/bcopy.S +++ b/sys/lib/libkern/arch/sparc/bcopy.S @@ -1 +1 @@ -/* No code here since kernel implements this itself */ +/* This code is contained in memmove.S */ diff --git a/sys/lib/libkern/arch/sparc/memcpy.S b/sys/lib/libkern/arch/sparc/memcpy.S index b34338a26e6..c9361568da4 100644 --- a/sys/lib/libkern/arch/sparc/memcpy.S +++ b/sys/lib/libkern/arch/sparc/memcpy.S @@ -1 +1 @@ -/* No code here since kernel implements this itself */ +/* This code is contained in memmove.S */ diff --git a/sys/lib/libkern/arch/sparc/memmove.S b/sys/lib/libkern/arch/sparc/memmove.S index b34338a26e6..8d2d3e62c96 100644 --- a/sys/lib/libkern/arch/sparc/memmove.S +++ b/sys/lib/libkern/arch/sparc/memmove.S @@ -1 +1,419 @@ -/* No code here since kernel implements this itself */ +/* $OpenBSD: memmove.S,v 1.4 2013/06/13 19:33:58 deraadt Exp $ */ + +/* + * Copyright (c) 1996 + * The President and Fellows of Harvard College. All rights reserved. + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This software was developed by the Computer Systems Engineering group + * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and + * contributed to Berkeley. + * + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Lawrence Berkeley Laboratory. + * This product includes software developed by Harvard University. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * This product includes software developed by Harvard University. + * This product includes software developed by Paul Kranenburg. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <machine/param.h> +#include <machine/asm.h> + +/* + * GNU assembler does not understand `.empty' directive; Sun assembler + * gripes about labels without it. To allow cross-compilation using + * the Sun assembler, and because .empty directives are useful documentation, + * we use this trick. + */ +#ifdef SUN_AS +#define EMPTY .empty +#else +#define EMPTY /* .empty */ +#endif + +/* use as needed to align things on longword boundaries */ +#define _ALIGN .align 4 + +#define BCOPY_SMALL 32 /* if < 32, copy by bytes */ + +/* + * memcpy(dst, src, len). Assumes regions do not overlap; returns dst. + */ +ENTRY(memcpy) + /* + * Swap args, because we may end up in bcopy. + */ + mov %o0, %o5 ! save return value + mov %o1, %o0 + mov %o5, %o1 +Lbcopy_old: + cmp %o2, BCOPY_SMALL +Lbcopy_start: + bge,a Lbcopy_fancy ! if >= this many, go be fancy. + btst 7, %o0 ! (part of being fancy) + + /* + * Not much to copy, just do it a byte at a time. + */ + deccc %o2 ! while (--len >= 0) + bl 1f + EMPTY +0: + inc %o0 + ldsb [%o0 - 1], %o4 ! (++dst)[-1] = *src++; + stb %o4, [%o1] + deccc %o2 + bge 0b + inc %o1 +1: + retl + mov %o5, %o0 ! return (dst) + /* NOTREACHED */ + + /* + * Plenty of data to copy, so try to do it optimally. + */ +Lbcopy_fancy: + ! check for common case first: everything lines up. +! btst 7, %o0 ! done already + bne 1f + EMPTY + btst 7, %o1 + be,a Lbcopy_doubles + dec 8, %o2 ! if all lined up, len -= 8, goto bcopy_doubes + + ! If the low bits match, we can make these line up. +1: + xor %o0, %o1, %o3 ! t = src ^ dst; + btst 1, %o3 ! if (t & 1) { + be,a 1f + btst 1, %o0 ! [delay slot: if (src & 1)] + + ! low bits do not match, must copy by bytes. +0: + ldsb [%o0], %o4 ! do { + inc %o0 ! (++dst)[-1] = *src++; + inc %o1 + deccc %o2 + bnz 0b ! } while (--len != 0); + stb %o4, [%o1 - 1] + retl + mov %o5, %o0 ! return (dst) + /* NOTREACHED */ + + ! lowest bit matches, so we can copy by words, if nothing else +1: + be,a 1f ! if (src & 1) { + btst 2, %o3 ! [delay slot: if (t & 2)] + + ! although low bits match, both are 1: must copy 1 byte to align + ldsb [%o0], %o4 ! *dst++ = *src++; + stb %o4, [%o1] + inc %o0 + inc %o1 + dec %o2 ! len--; + btst 2, %o3 ! } [if (t & 2)] +1: + be,a 1f ! if (t & 2) { + btst 2, %o0 ! [delay slot: if (src & 2)] + dec 2, %o2 ! len -= 2; +0: + ldsh [%o0], %o4 ! do { + sth %o4, [%o1] ! *(short *)dst = *(short *)src; + inc 2, %o0 ! dst += 2, src += 2; + deccc 2, %o2 ! } while ((len -= 2) >= 0); + bge 0b + inc 2, %o1 + b Lbcopy_mopb ! goto mop_up_byte; + btst 1, %o2 ! } [delay slot: if (len & 1)] + /* NOTREACHED */ + + ! low two bits match, so we can copy by longwords +1: + be,a 1f ! if (src & 2) { + btst 4, %o3 ! [delay slot: if (t & 4)] + + ! although low 2 bits match, they are 10: must copy one short to align + ldsh [%o0], %o4 ! (*short *)dst = *(short *)src; + sth %o4, [%o1] + inc 2, %o0 ! dst += 2; + inc 2, %o1 ! src += 2; + dec 2, %o2 ! len -= 2; + btst 4, %o3 ! } [if (t & 4)] +1: + be,a 1f ! if (t & 4) { + btst 4, %o0 ! [delay slot: if (src & 4)] + dec 4, %o2 ! len -= 4; +0: + ld [%o0], %o4 ! do { + st %o4, [%o1] ! *(int *)dst = *(int *)src; + inc 4, %o0 ! dst += 4, src += 4; + deccc 4, %o2 ! } while ((len -= 4) >= 0); + bge 0b + inc 4, %o1 + b Lbcopy_mopw ! goto mop_up_word_and_byte; + btst 2, %o2 ! } [delay slot: if (len & 2)] + /* NOTREACHED */ + + ! low three bits match, so we can copy by doublewords +1: + be 1f ! if (src & 4) { + dec 8, %o2 ! [delay slot: len -= 8] + ld [%o0], %o4 ! *(int *)dst = *(int *)src; + st %o4, [%o1] + inc 4, %o0 ! dst += 4, src += 4, len -= 4; + inc 4, %o1 + dec 4, %o2 ! } +1: +Lbcopy_doubles: + ldd [%o0], %o4 ! do { + std %o4, [%o1] ! *(double *)dst = *(double *)src; + inc 8, %o0 ! dst += 8, src += 8; + deccc 8, %o2 ! } while ((len -= 8) >= 0); + bge Lbcopy_doubles + inc 8, %o1 + + ! check for a usual case again (save work) + btst 7, %o2 ! if ((len & 7) == 0) + be Lbcopy_done ! goto bcopy_done; + + btst 4, %o2 ! if ((len & 4) == 0) + be,a Lbcopy_mopw ! goto mop_up_word_and_byte; + btst 2, %o2 ! [delay slot: if (len & 2)] + ld [%o0], %o4 ! *(int *)dst = *(int *)src; + st %o4, [%o1] + inc 4, %o0 ! dst += 4; + inc 4, %o1 ! src += 4; + btst 2, %o2 ! } [if (len & 2)] + +1: + ! mop up trailing word (if present) and byte (if present). +Lbcopy_mopw: + be Lbcopy_mopb ! no word, go mop up byte + btst 1, %o2 ! [delay slot: if (len & 1)] + ldsh [%o0], %o4 ! *(short *)dst = *(short *)src; + be Lbcopy_done ! if ((len & 1) == 0) goto done; + sth %o4, [%o1] + ldsb [%o0 + 2], %o4 ! dst[2] = src[2]; + stb %o4, [%o1 + 2] + retl + mov %o5, %o0 ! return (dst) + /* NOTREACHED */ + + ! mop up trailing byte (if present). +Lbcopy_mopb: + bne,a 1f + ldsb [%o0], %o4 + +Lbcopy_done: + retl + mov %o5, %o0 ! return (dst) + +1: + stb %o4,[%o1] + retl + mov %o5, %o0 ! return (dst) + +/* + * memmove(dst, src, len). Handles overlap; returns dst. + */ +ENTRY(memmove) + /* + * Swap args and continue to bcopy. + */ + mov %o0, %o5 ! save dst + mov %o1, %o0 + mov %o5, %o1 +/* + * bcopy(src, dst, len): regions may overlap. + */ +ENTRY(bcopy) + cmp %o0, %o1 ! src < dst? + bgeu Lbcopy_start ! no, go copy forwards as via old bcopy + cmp %o2, BCOPY_SMALL! (check length for doublecopy first) + + /* + * Since src comes before dst, and the regions might overlap, + * we have to do the copy starting at the end and working backwards. + */ + add %o2, %o0, %o0 ! src += len + add %o2, %o1, %o1 ! dst += len + bge,a Lback_fancy ! if len >= BCOPY_SMALL, go be fancy + btst 3, %o0 + + /* + * Not much to copy, just do it a byte at a time. + */ + deccc %o2 ! while (--len >= 0) + bl 1f + EMPTY +0: + dec %o0 ! *--dst = *--src; + ldsb [%o0], %o4 + dec %o1 + deccc %o2 + bge 0b + stb %o4, [%o1] +1: + retl + mov %o5, %o0 ! return (dst) + + /* + * Plenty to copy, try to be optimal. + * We only bother with word/halfword/byte copies here. + */ +Lback_fancy: +! btst 3, %o0 ! done already + bnz 1f ! if ((src & 3) == 0 && + btst 3, %o1 ! (dst & 3) == 0) + bz,a Lback_words ! goto words; + dec 4, %o2 ! (done early for word copy) + +1: + /* + * See if the low bits match. + */ + xor %o0, %o1, %o3 ! t = src ^ dst; + btst 1, %o3 + bz,a 3f ! if (t & 1) == 0, can do better + btst 1, %o0 + + /* + * Nope; gotta do byte copy. + */ +2: + dec %o0 ! do { + ldsb [%o0], %o4 ! *--dst = *--src; + dec %o1 + deccc %o2 ! } while (--len != 0); + bnz 2b + stb %o4, [%o1] + retl + mov %o5, %o0 ! return (dst) + +3: + /* + * Can do halfword or word copy, but might have to copy 1 byte first. + */ +! btst 1, %o0 ! done earlier + bz,a 4f ! if (src & 1) { /* copy 1 byte */ + btst 2, %o3 ! (done early) + dec %o0 ! *--dst = *--src; + ldsb [%o0], %o4 + dec %o1 + stb %o4, [%o1] + dec %o2 ! len--; + btst 2, %o3 ! } + +4: + /* + * See if we can do a word copy ((t&2) == 0). + */ +! btst 2, %o3 ! done earlier + bz,a 6f ! if (t & 2) == 0, can do word copy + btst 2, %o0 ! (src&2, done early) + + /* + * Gotta do halfword copy. + */ + dec 2, %o2 ! len -= 2; +5: + dec 2, %o0 ! do { + ldsh [%o0], %o4 ! src -= 2; + dec 2, %o1 ! dst -= 2; + deccc 2, %o2 ! *(short *)dst = *(short *)src; + bge 5b ! } while ((len -= 2) >= 0); + sth %o4, [%o1] + b Lback_mopb ! goto mop_up_byte; + btst 1, %o2 ! (len&1, done early) + +6: + /* + * We can do word copies, but we might have to copy + * one halfword first. + */ +! btst 2, %o0 ! done already + bz 7f ! if (src & 2) { + dec 4, %o2 ! (len -= 4, done early) + dec 2, %o0 ! src -= 2, dst -= 2; + ldsh [%o0], %o4 ! *(short *)dst = *(short *)src; + dec 2, %o1 + sth %o4, [%o1] + dec 2, %o2 ! len -= 2; + ! } + +7: +Lback_words: + /* + * Do word copies (backwards), then mop up trailing halfword + * and byte if any. + */ +! dec 4, %o2 ! len -= 4, done already +0: ! do { + dec 4, %o0 ! src -= 4; + dec 4, %o1 ! src -= 4; + ld [%o0], %o4 ! *(int *)dst = *(int *)src; + deccc 4, %o2 ! } while ((len -= 4) >= 0); + bge 0b + st %o4, [%o1] + + /* + * Check for trailing shortword. + */ + btst 2, %o2 ! if (len & 2) { + bz,a 1f + btst 1, %o2 ! (len&1, done early) + dec 2, %o0 ! src -= 2, dst -= 2; + ldsh [%o0], %o4 ! *(short *)dst = *(short *)src; + dec 2, %o1 + sth %o4, [%o1] ! } + btst 1, %o2 + + /* + * Check for trailing byte. + */ +1: +Lback_mopb: +! btst 1, %o2 ! (done already) + bnz,a 1f ! if (len & 1) { + ldsb [%o0 - 1], %o4 ! b = src[-1]; + retl + mov %o5, %o0 ! return (dst) + +1: + stb %o4, [%o1 - 1] ! } + retl ! dst[-1] = b; + mov %o5, %o0 ! return (dst) + + |