diff options
author | Theo de Raadt <deraadt@cvs.openbsd.org> | 2013-06-13 02:25:20 +0000 |
---|---|---|
committer | Theo de Raadt <deraadt@cvs.openbsd.org> | 2013-06-13 02:25:20 +0000 |
commit | b8bc5de226e3b46e1590faaa62f38de46a76481a (patch) | |
tree | 191df082ff77a357a638d2347a1258330745c44e /sys/lib | |
parent | 656ebbb89624d21d788e9d81d841c8bad09504c5 (diff) |
merge memcpy/memmove/bcopy into one file, with sub-jumps.
ok .... I guess noone, because it is summer
Diffstat (limited to 'sys/lib')
-rw-r--r-- | sys/lib/libkern/arch/alpha/bcopy.S | 2 | ||||
-rw-r--r-- | sys/lib/libkern/arch/alpha/memcpy.S | 2 | ||||
-rw-r--r-- | sys/lib/libkern/arch/alpha/memmove.S | 270 |
3 files changed, 271 insertions, 3 deletions
diff --git a/sys/lib/libkern/arch/alpha/bcopy.S b/sys/lib/libkern/arch/alpha/bcopy.S index b34338a26e6..c9361568da4 100644 --- a/sys/lib/libkern/arch/alpha/bcopy.S +++ b/sys/lib/libkern/arch/alpha/bcopy.S @@ -1 +1 @@ -/* No code here since kernel implements this itself */ +/* This code is contained in memmove.S */ diff --git a/sys/lib/libkern/arch/alpha/memcpy.S b/sys/lib/libkern/arch/alpha/memcpy.S index b34338a26e6..c9361568da4 100644 --- a/sys/lib/libkern/arch/alpha/memcpy.S +++ b/sys/lib/libkern/arch/alpha/memcpy.S @@ -1 +1 @@ -/* No code here since kernel implements this itself */ +/* This code is contained in memmove.S */ diff --git a/sys/lib/libkern/arch/alpha/memmove.S b/sys/lib/libkern/arch/alpha/memmove.S index b34338a26e6..2a994c8ab1a 100644 --- a/sys/lib/libkern/arch/alpha/memmove.S +++ b/sys/lib/libkern/arch/alpha/memmove.S @@ -1 +1,269 @@ -/* No code here since kernel implements this itself */ +/* + * Copyright (c) 1994, 1995, 1996 Carnegie-Mellon University. + * All rights reserved. + * + * Author: Chris G. Demetriou + * + * Permission to use, copy, modify and distribute this software and + * its documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie the + * rights to redistribute these changes. + */ + +#include <machine/asm.h> + +/* + * Copy a bytes within the kernel's address space. The bcopy and memmove + * variants handle overlapping regions, the memcpy variant does not. + * + * void memcpy(char *to, char *from, size_t len); + * void memmove(char *to, char *from, size_t len); + * void bcopy(char *from, char *to, size_t len); + */ +LEAF(memcpy,3) + cmoveq zero,a0,t5 + cmoveq zero,a1,a0 + cmoveq zero,t5,a1 + br bcopy_forward + +XLEAF(memmove,3) + cmoveq zero,a0,t5 + cmoveq zero,a1,a0 + cmoveq zero,t5,a1 +XLEAF(bcopy,3) + /* Check for zero length */ + beq a2,bcopy_done + + /* Check for overlap */ + subq a1,a0,t5 + cmpult t5,a2,t5 + bne t5,bcopy_overlap + +bcopy_forward: + /* a3 = end address */ + addq a0,a2,a3 + + /* Get the first word */ + ldq_u t2,0(a0) + + /* Do they have the same alignment? */ + xor a0,a1,t0 + and t0,7,t0 + and a1,7,t1 + bne t0,bcopy_different_alignment + + /* src & dst have same alignment */ + beq t1,bcopy_all_aligned + + ldq_u t3,0(a1) + addq a2,t1,a2 + mskqh t2,a0,t2 + mskql t3,a0,t3 + or t2,t3,t2 + + /* Dst is 8-byte aligned */ + +bcopy_all_aligned: + /* If less than 8 bytes,skip loop */ + subq a2,1,t0 + and a2,7,a2 + bic t0,7,t0 + beq t0,bcopy_samealign_lp_end + +bcopy_samealign_lp: + stq_u t2,0(a1) + addq a1,8,a1 + ldq_u t2,8(a0) + subq t0,8,t0 + addq a0,8,a0 + bne t0,bcopy_samealign_lp + +bcopy_samealign_lp_end: + /* If we're done, exit */ + bne a2,bcopy_small_left + stq_u t2,0(a1) + RET + +bcopy_small_left: + mskql t2,a2,t4 + ldq_u t3,0(a1) + mskqh t3,a2,t3 + or t4,t3,t4 + stq_u t4,0(a1) + RET + +bcopy_different_alignment: + /* + * this is the fun part + */ + addq a0,a2,a3 + cmpule a2,8,t0 + bne t0,bcopy_da_finish + + beq t1,bcopy_da_noentry + + /* Do the initial partial word */ + subq zero,a1,t0 + and t0,7,t0 + ldq_u t3,7(a0) + extql t2,a0,t2 + extqh t3,a0,t3 + or t2,t3,t5 + insql t5,a1,t5 + ldq_u t6,0(a1) + mskql t6,a1,t6 + or t5,t6,t5 + stq_u t5,0(a1) + addq a0,t0,a0 + addq a1,t0,a1 + subq a2,t0,a2 + ldq_u t2,0(a0) + +bcopy_da_noentry: + subq a2,1,t0 + bic t0,7,t0 + and a2,7,a2 + beq t0,bcopy_da_finish2 + +bcopy_da_lp: + ldq_u t3,7(a0) + addq a0,8,a0 + extql t2,a0,t4 + extqh t3,a0,t5 + subq t0,8,t0 + or t4,t5,t5 + stq t5,0(a1) + addq a1,8,a1 + beq t0,bcopy_da_finish1 + ldq_u t2,7(a0) + addq a0,8,a0 + extql t3,a0,t4 + extqh t2,a0,t5 + subq t0,8,t0 + or t4,t5,t5 + stq t5,0(a1) + addq a1,8,a1 + bne t0,bcopy_da_lp + +bcopy_da_finish2: + /* Do the last new word */ + mov t2,t3 + +bcopy_da_finish1: + /* Do the last partial word */ + ldq_u t2,-1(a3) + extql t3,a0,t3 + extqh t2,a0,t2 + or t2,t3,t2 + br zero,bcopy_samealign_lp_end + +bcopy_da_finish: + /* Do the last word in the next source word */ + ldq_u t3,-1(a3) + extql t2,a0,t2 + extqh t3,a0,t3 + or t2,t3,t2 + insqh t2,a1,t3 + insql t2,a1,t2 + lda t4,-1(zero) + mskql t4,a2,t5 + cmovne t5,t5,t4 + insqh t4,a1,t5 + insql t4,a1,t4 + addq a1,a2,a4 + ldq_u t6,0(a1) + ldq_u t7,-1(a4) + bic t6,t4,t6 + bic t7,t5,t7 + and t2,t4,t2 + and t3,t5,t3 + or t2,t6,t2 + or t3,t7,t3 + stq_u t3,-1(a4) + stq_u t2,0(a1) + RET + +bcopy_overlap: + /* + * Basically equivalent to previous case, only backwards. + * Not quite as highly optimized + */ + addq a0,a2,a3 + addq a1,a2,a4 + + /* less than 8 bytes - don't worry about overlap */ + cmpule a2,8,t0 + bne t0,bcopy_ov_short + + /* Possibly do a partial first word */ + and a4,7,t4 + beq t4,bcopy_ov_nostart2 + subq a3,t4,a3 + subq a4,t4,a4 + ldq_u t1,0(a3) + subq a2,t4,a2 + ldq_u t2,7(a3) + ldq t3,0(a4) + extql t1,a3,t1 + extqh t2,a3,t2 + or t1,t2,t1 + mskqh t3,t4,t3 + mskql t1,t4,t1 + or t1,t3,t1 + stq t1,0(a4) + +bcopy_ov_nostart2: + bic a2,7,t4 + and a2,7,a2 + beq t4,bcopy_ov_lp_end + +bcopy_ov_lp: + /* This could be more pipelined, but it doesn't seem worth it */ + ldq_u t0,-8(a3) + subq a4,8,a4 + ldq_u t1,-1(a3) + subq a3,8,a3 + extql t0,a3,t0 + extqh t1,a3,t1 + subq t4,8,t4 + or t0,t1,t0 + stq t0,0(a4) + bne t4,bcopy_ov_lp + +bcopy_ov_lp_end: + beq a2,bcopy_done + + ldq_u t0,0(a0) + ldq_u t1,7(a0) + ldq_u t2,0(a1) + extql t0,a0,t0 + extqh t1,a0,t1 + or t0,t1,t0 + insql t0,a1,t0 + mskql t2,a1,t2 + or t2,t0,t2 + stq_u t2,0(a1) + +bcopy_done: + RET + +bcopy_ov_short: + ldq_u t2,0(a0) + br zero,bcopy_da_finish + + END(memcpy) |