summaryrefslogtreecommitdiff
path: root/sys/lib/libkern/arch/sh/memset.S
diff options
context:
space:
mode:
Diffstat (limited to 'sys/lib/libkern/arch/sh/memset.S')
-rw-r--r--sys/lib/libkern/arch/sh/memset.S298
1 files changed, 298 insertions, 0 deletions
diff --git a/sys/lib/libkern/arch/sh/memset.S b/sys/lib/libkern/arch/sh/memset.S
new file mode 100644
index 00000000000..6611daba3ac
--- /dev/null
+++ b/sys/lib/libkern/arch/sh/memset.S
@@ -0,0 +1,298 @@
+/* $NetBSD: memset.S,v 1.1 2005/12/20 19:28:50 christos Exp $ */
+
+/*-
+ * Copyright (c) 2002 SHIMIZU Ryo. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <machine/asm.h>
+
+#if defined(LIBC_SCCS) && !defined(lint)
+ RCSID("$NetBSD: memset.S,v 1.1 2005/12/20 19:28:50 christos Exp $")
+#endif
+
+#define REG_PTR r0
+#define REG_TMP1 r1
+
+#ifdef BZERO
+# define REG_C r2
+# define REG_DST r4
+# define REG_LEN r5
+#else
+# define REG_DST0 r3
+# define REG_DST r4
+# define REG_C r5
+# define REG_LEN r6
+#endif
+
+#ifdef BZERO
+ENTRY(bzero)
+#else
+ENTRY(memset)
+ mov REG_DST,REG_DST0 /* for return value */
+#endif
+ /* small amount to fill ? */
+ mov #28,REG_TMP1
+ cmp/hs REG_TMP1,REG_LEN /* if (len >= 28) goto large; */
+ bt/s large
+ mov #12,REG_TMP1 /* if (len >= 12) goto small; */
+ cmp/hs REG_TMP1,REG_LEN
+ bt/s small
+#ifdef BZERO
+ mov #0,REG_C
+#endif
+ /* very little fill (0 ~ 11 bytes) */
+ tst REG_LEN,REG_LEN
+ add REG_DST,REG_LEN
+ bt/s done
+ add #1,REG_DST
+
+ /* unroll 4 loops */
+ cmp/eq REG_DST,REG_LEN
+1: mov.b REG_C,@-REG_LEN
+ bt/s done
+ cmp/eq REG_DST,REG_LEN
+ mov.b REG_C,@-REG_LEN
+ bt/s done
+ cmp/eq REG_DST,REG_LEN
+ mov.b REG_C,@-REG_LEN
+ bt/s done
+ cmp/eq REG_DST,REG_LEN
+ mov.b REG_C,@-REG_LEN
+ bf/s 1b
+ cmp/eq REG_DST,REG_LEN
+done:
+#ifdef BZERO
+ rts
+ nop
+#else
+ rts
+ mov REG_DST0,r0
+#endif
+
+
+small:
+ mov REG_DST,r0
+ tst #1,r0
+ bt/s small_aligned
+ mov REG_DST,REG_TMP1
+ shll REG_LEN
+ mova 1f,r0 /* 1f must be 4bytes aligned! */
+ add #16,REG_TMP1 /* REG_TMP1 = dst+16; */
+ sub REG_LEN,r0
+ jmp @r0
+ mov REG_C,r0
+
+ .align 2
+ mov.b r0,@(15,REG_TMP1)
+ mov.b r0,@(14,REG_TMP1)
+ mov.b r0,@(13,REG_TMP1)
+ mov.b r0,@(12,REG_TMP1)
+ mov.b r0,@(11,REG_TMP1)
+ mov.b r0,@(10,REG_TMP1)
+ mov.b r0,@(9,REG_TMP1)
+ mov.b r0,@(8,REG_TMP1)
+ mov.b r0,@(7,REG_TMP1)
+ mov.b r0,@(6,REG_TMP1)
+ mov.b r0,@(5,REG_TMP1)
+ mov.b r0,@(4,REG_TMP1)
+ mov.b r0,@(3,REG_TMP1)
+ mov.b r0,@(2,REG_TMP1)
+ mov.b r0,@(1,REG_TMP1)
+ mov.b r0,@REG_TMP1
+ mov.b r0,@(15,REG_DST)
+ mov.b r0,@(14,REG_DST)
+ mov.b r0,@(13,REG_DST)
+ mov.b r0,@(12,REG_DST)
+ mov.b r0,@(11,REG_DST)
+ mov.b r0,@(10,REG_DST)
+ mov.b r0,@(9,REG_DST)
+ mov.b r0,@(8,REG_DST)
+ mov.b r0,@(7,REG_DST)
+ mov.b r0,@(6,REG_DST)
+ mov.b r0,@(5,REG_DST)
+ mov.b r0,@(4,REG_DST)
+ mov.b r0,@(3,REG_DST)
+ mov.b r0,@(2,REG_DST)
+ mov.b r0,@(1,REG_DST)
+#ifdef BZERO
+ rts
+1: mov.b r0,@REG_DST
+#else
+ mov.b r0,@REG_DST
+1: rts
+ mov REG_DST0,r0
+#endif
+
+
+/* 2 bytes aligned small fill */
+small_aligned:
+#ifndef BZERO
+ extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */
+ shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */
+ or REG_TMP1,REG_C /* REG_C = ????xxxx */
+#endif
+
+ mov REG_LEN,r0
+ tst #1,r0 /* len is aligned? */
+ bt/s 1f
+ add #-1,r0
+ mov.b REG_C,@(r0,REG_DST) /* fill last a byte */
+ mov r0,REG_LEN
+1:
+
+ mova 1f,r0 /* 1f must be 4bytes aligned! */
+ sub REG_LEN,r0
+ jmp @r0
+ mov REG_C,r0
+
+ .align 2
+ mov.w r0,@(30,REG_DST)
+ mov.w r0,@(28,REG_DST)
+ mov.w r0,@(26,REG_DST)
+ mov.w r0,@(24,REG_DST)
+ mov.w r0,@(22,REG_DST)
+ mov.w r0,@(20,REG_DST)
+ mov.w r0,@(18,REG_DST)
+ mov.w r0,@(16,REG_DST)
+ mov.w r0,@(14,REG_DST)
+ mov.w r0,@(12,REG_DST)
+ mov.w r0,@(10,REG_DST)
+ mov.w r0,@(8,REG_DST)
+ mov.w r0,@(6,REG_DST)
+ mov.w r0,@(4,REG_DST)
+ mov.w r0,@(2,REG_DST)
+#ifdef BZERO
+ rts
+1: mov.w r0,@REG_DST
+#else
+ mov.w r0,@REG_DST
+1: rts
+ mov REG_DST0,r0
+#endif
+
+
+
+ .align 2
+large:
+#ifdef BZERO
+ mov #0,REG_C
+#else
+ extu.b REG_C,REG_TMP1 /* REG_C = ??????xx, REG_TMP1 = ????00xx */
+ shll8 REG_C /* REG_C = ????xx00, REG_TMP1 = ????00xx */
+ or REG_C,REG_TMP1 /* REG_C = ????xx00, REG_TMP1 = ????xxxx */
+ swap.w REG_TMP1,REG_C /* REG_C = xxxx????, REG_TMP1 = ????xxxx */
+ xtrct REG_TMP1,REG_C /* REG_C = xxxxxxxx */
+#endif
+
+ mov #3,REG_TMP1
+ tst REG_TMP1,REG_DST
+ mov REG_DST,REG_PTR
+ bf/s unaligned_dst
+ add REG_LEN,REG_PTR /* REG_PTR = dst + len; */
+ tst REG_TMP1,REG_LEN
+ bf/s unaligned_len
+
+aligned:
+ /* fill 32*n bytes */
+ mov #32,REG_TMP1
+ cmp/hi REG_LEN,REG_TMP1
+ bt 9f
+ .align 2
+1: sub REG_TMP1,REG_PTR
+ mov.l REG_C,@REG_PTR
+ sub REG_TMP1,REG_LEN
+ mov.l REG_C,@(4,REG_PTR)
+ cmp/hi REG_LEN,REG_TMP1
+ mov.l REG_C,@(8,REG_PTR)
+ mov.l REG_C,@(12,REG_PTR)
+ mov.l REG_C,@(16,REG_PTR)
+ mov.l REG_C,@(20,REG_PTR)
+ mov.l REG_C,@(24,REG_PTR)
+ bf/s 1b
+ mov.l REG_C,@(28,REG_PTR)
+9:
+
+ /* fill left 4*n bytes */
+ cmp/eq REG_DST,REG_PTR
+ bt 9f
+ add #4,REG_DST
+ cmp/eq REG_DST,REG_PTR
+1: mov.l REG_C,@-REG_PTR
+ bt/s 9f
+ cmp/eq REG_DST,REG_PTR
+ mov.l REG_C,@-REG_PTR
+ bt/s 9f
+ cmp/eq REG_DST,REG_PTR
+ mov.l REG_C,@-REG_PTR
+ bt/s 9f
+ cmp/eq REG_DST,REG_PTR
+ mov.l REG_C,@-REG_PTR
+ bf/s 1b
+ cmp/eq REG_DST,REG_PTR
+9:
+#ifdef BZERO
+ rts
+ nop
+#else
+ rts
+ mov REG_DST0,r0
+#endif
+
+
+unaligned_dst:
+ mov #1,REG_TMP1
+ tst REG_TMP1,REG_DST /* if (dst & 1) { */
+ add #1,REG_TMP1
+ bt/s 2f
+ tst REG_TMP1,REG_DST
+ mov.b REG_C,@REG_DST /* *dst++ = c; */
+ add #1,REG_DST
+ tst REG_TMP1,REG_DST
+2: /* } */
+ /* if (dst & 2) { */
+ bt 4f
+ mov.w REG_C,@REG_DST /* *(u_int16_t*)dst++ = c; */
+ add #2,REG_DST
+4: /* } */
+
+
+ tst #3,REG_PTR /* if (ptr & 3) { */
+ bt/s 4f /* */
+unaligned_len:
+ tst #1,REG_PTR /* if (ptr & 1) { */
+ bt/s 2f
+ tst #2,REG_PTR
+ mov.b REG_C,@-REG_PTR /* --ptr = c; */
+2: /* } */
+ /* if (ptr & 2) { */
+ bt 4f
+ mov.w REG_C,@-REG_PTR /* *--(u_int16_t*)ptr = c; */
+4: /* } */
+ /* } */
+
+ mov REG_PTR,REG_LEN
+ bra aligned
+ sub REG_DST,REG_LEN
+