summaryrefslogtreecommitdiff
path: root/sys/arch/riscv64
diff options
context:
space:
mode:
authorJeremie Courreges-Anglas <jca@cvs.openbsd.org>2021-07-23 15:31:15 +0000
committerJeremie Courreges-Anglas <jca@cvs.openbsd.org>2021-07-23 15:31:15 +0000
commit82db2a413d23cb793c66deead444d6b2d2784b6e (patch)
tree796adeb8be186a40653b4e4fbdb9e637a1aad68a /sys/arch/riscv64
parent8ad6ef52ef3cbc6f4f85d1ce17e802c3f56d763f (diff)
Use 8/4/1 bytes loads/stores for copyin/copyout/kcopy
Only use multiple bytes operations on properly aligned addresses, as I have observed a 40x penalty for unaligned 8 bytes operations compared to equivalent 1-byte loops on this Sifive Unmatched. The speed gain is small but significant. Input & ok kettenis@
Diffstat (limited to 'sys/arch/riscv64')
-rw-r--r--sys/arch/riscv64/riscv64/copy.S35
1 files changed, 33 insertions, 2 deletions
diff --git a/sys/arch/riscv64/riscv64/copy.S b/sys/arch/riscv64/riscv64/copy.S
index 075159f8bf1..e7eb3f11d1f 100644
--- a/sys/arch/riscv64/riscv64/copy.S
+++ b/sys/arch/riscv64/riscv64/copy.S
@@ -1,4 +1,4 @@
-/* $OpenBSD: copy.S,v 1.6 2021/06/28 18:53:10 deraadt Exp $ */
+/* $OpenBSD: copy.S,v 1.7 2021/07/23 15:31:14 jca Exp $ */
/*
* Copyright (c) 2020 Brian Bamsch <bbamsch@google.com>
@@ -49,8 +49,38 @@ ENTRY(copyin)
SWAP_FAULT_HANDLER(a3, a4, a5)
ENTER_USER_ACCESS(a4)
-// XXX optimize?
.Lcopyio:
+.Lcopy8:
+ li a5, 8
+ bltu a2, a5, .Lcopy4
+
+ or a7, a0, a1
+ andi a7, a7, 7
+ bnez a7, .Lcopy4
+
+1: ld a4, 0(a0)
+ addi a0, a0, 8
+ sd a4, 0(a1)
+ addi a1, a1, 8
+ addi a2, a2, -8
+ bgeu a2, a5, 1b
+
+.Lcopy4:
+ li a5, 4
+ bltu a2, a5, .Lcopy1
+
+ andi a7, a7, 3
+ bnez a7, .Lcopy1
+
+1: lw a4, 0(a0)
+ addi a0, a0, 4
+ sw a4, 0(a1)
+ addi a1, a1, 4
+ addi a2, a2, -4
+ bgeu a2, a5, 1b
+
+.Lcopy1:
+ beqz a2, .Lcopy0
1: lb a4, 0(a0)
addi a0, a0, 1
sb a4, 0(a1)
@@ -58,6 +88,7 @@ ENTRY(copyin)
addi a2, a2, -1
bnez a2, 1b
+.Lcopy0:
EXIT_USER_ACCESS(a4)
SET_FAULT_HANDLER(a3, a4)
.Lcopyiodone: