summaryrefslogtreecommitdiff
path: root/sys/arch/arm64
diff options
context:
space:
mode:
authorMark Kettenis <kettenis@cvs.openbsd.org>2020-10-09 17:36:48 +0000
committerMark Kettenis <kettenis@cvs.openbsd.org>2020-10-09 17:36:48 +0000
commit967365ce097b36c5276c49b77ee3d8b9fb1b8f1c (patch)
tree3d0dd9b8eded0436a5f7e46986492665d17b5225 /sys/arch/arm64
parent7aa42f974b2ff58ece3fbf84e22fb69c5f501293 (diff)
Optimize copyin(9), copyout(9) and kcopy(9) by doing 16-byte copies if
possible. No doubt further optimizations are poissible, but this change results in a nice balance between code size and speed and is still easy to understand. ok patrick@
Diffstat (limited to 'sys/arch/arm64')
-rw-r--r--sys/arch/arm64/arm64/copy.S54
1 files changed, 42 insertions, 12 deletions
diff --git a/sys/arch/arm64/arm64/copy.S b/sys/arch/arm64/arm64/copy.S
index db2a6c393a2..1a4ed25b080 100644
--- a/sys/arch/arm64/arm64/copy.S
+++ b/sys/arch/arm64/arm64/copy.S
@@ -1,4 +1,4 @@
-/* $OpenBSD: copy.S,v 1.8 2020/10/04 20:03:57 kettenis Exp $ */
+/* $OpenBSD: copy.S,v 1.9 2020/10/09 17:36:47 kettenis Exp $ */
/*
* Copyright (c) 2015 Dale Rahn <drahn@dalerahn.com>
* Copyright (c) 2014 Patrick Wildt <patrick@blueri.se>
@@ -52,13 +52,25 @@ ENTRY(copyin)
adr x5, .Lcopyfault
str x5, [x3, #(PCB_ONFAULT)] // set handler
-// This probably should be optimized
-2: ldtrb w6, [x0]
+ cmp x2, #16
+ b.lo .Lcopyin1
+2: ldtr x6, [x0]
+ ldtr x7, [x0, #8]
+ stp x6, x7, [x1], #16
+ add x0, x0, #16
+ sub x2, x2, #16
+ cmp x2, #16
+ b.hs 2b
+
+.Lcopyin1:
+ cbz x2, .Lcopyin0
+3: ldtrb w6, [x0]
strb w6, [x1], #1
add x0, x0, #1
sub x2, x2, #1
- cbnz x2, 2b
+ cbnz x2, 3b
+.Lcopyin0:
str x4, [x3, #(PCB_ONFAULT)] // clear handler
mov x0, xzr
RETGUARD_CHECK(copy, x15)
@@ -130,13 +142,25 @@ ENTRY(copyout)
adr x5, .Lcopyfault
str x5, [x3, #(PCB_ONFAULT)] // set handler
-// This probably should be optimized
-2: ldrb w6, [x0], #1
+ cmp x2, #16
+ b.lo .Lcopyout1
+2: ldp x6, x7, [x0], #16
+ sttr x6, [x1]
+ sttr x7, [x1, #8]
+ add x1, x1, #16
+ sub x2, x2, #16
+ cmp x2, #16
+ b.hs 2b
+
+.Lcopyout1:
+ cbz x2, .Lcopyout0
+3: ldrb w6, [x0], #1
sttrb w6, [x1]
add x1, x1, #1
sub x2, x2, #1
- cbnz x2, 2b
+ cbnz x2, 3b
+.Lcopyout0:
str x4, [x3, #(PCB_ONFAULT)] // clear handler
mov x0, xzr
RETGUARD_CHECK(copy, x15)
@@ -163,13 +187,19 @@ ENTRY(kcopy)
adr x5, .Lcopyfault
str x5, [x3, #(PCB_ONFAULT)] // set handler
- cmp x2, #8
- b.lo .Lkcopy4
-2: ldr x6, [x0], #8
+ cmp x2, #16
+ b.lo .Lkcopy8
+2: ldp x6, x7, [x0], #16
+ stp x6, x7, [x1], #16
+ sub x2, x2, #16
+ cmp x2, #16
+ b.hs 2b
+
+.Lkcopy8:
+ tbz x2, #3, .Lkcopy4
+ ldr x6, [x0], #8
str x6, [x1], #8
sub x2, x2, #8
- cmp x2, #8
- b.hs 2b
.Lkcopy4:
tbz x2, #2, .Lkcopy1