diff options
author | Jeremie Courreges-Anglas <jca@cvs.openbsd.org> | 2021-07-23 15:31:15 +0000 |
---|---|---|
committer | Jeremie Courreges-Anglas <jca@cvs.openbsd.org> | 2021-07-23 15:31:15 +0000 |
commit | 82db2a413d23cb793c66deead444d6b2d2784b6e (patch) | |
tree | 796adeb8be186a40653b4e4fbdb9e637a1aad68a /sys/arch/riscv64 | |
parent | 8ad6ef52ef3cbc6f4f85d1ce17e802c3f56d763f (diff) |
Use 8/4/1 bytes loads/stores for copyin/copyout/kcopy
Only use multiple bytes operations on properly aligned addresses, as
I have observed a 40x penalty for unaligned 8 bytes operations compared
to equivalent 1-byte loops on this Sifive Unmatched. The speed gain is
small but significant.
Input & ok kettenis@
Diffstat (limited to 'sys/arch/riscv64')
-rw-r--r-- | sys/arch/riscv64/riscv64/copy.S | 35 |
1 files changed, 33 insertions, 2 deletions
diff --git a/sys/arch/riscv64/riscv64/copy.S b/sys/arch/riscv64/riscv64/copy.S index 075159f8bf1..e7eb3f11d1f 100644 --- a/sys/arch/riscv64/riscv64/copy.S +++ b/sys/arch/riscv64/riscv64/copy.S @@ -1,4 +1,4 @@ -/* $OpenBSD: copy.S,v 1.6 2021/06/28 18:53:10 deraadt Exp $ */ +/* $OpenBSD: copy.S,v 1.7 2021/07/23 15:31:14 jca Exp $ */ /* * Copyright (c) 2020 Brian Bamsch <bbamsch@google.com> @@ -49,8 +49,38 @@ ENTRY(copyin) SWAP_FAULT_HANDLER(a3, a4, a5) ENTER_USER_ACCESS(a4) -// XXX optimize? .Lcopyio: +.Lcopy8: + li a5, 8 + bltu a2, a5, .Lcopy4 + + or a7, a0, a1 + andi a7, a7, 7 + bnez a7, .Lcopy4 + +1: ld a4, 0(a0) + addi a0, a0, 8 + sd a4, 0(a1) + addi a1, a1, 8 + addi a2, a2, -8 + bgeu a2, a5, 1b + +.Lcopy4: + li a5, 4 + bltu a2, a5, .Lcopy1 + + andi a7, a7, 3 + bnez a7, .Lcopy1 + +1: lw a4, 0(a0) + addi a0, a0, 4 + sw a4, 0(a1) + addi a1, a1, 4 + addi a2, a2, -4 + bgeu a2, a5, 1b + +.Lcopy1: + beqz a2, .Lcopy0 1: lb a4, 0(a0) addi a0, a0, 1 sb a4, 0(a1) @@ -58,6 +88,7 @@ ENTRY(copyin) addi a2, a2, -1 bnez a2, 1b +.Lcopy0: EXIT_USER_ACCESS(a4) SET_FAULT_HANDLER(a3, a4) .Lcopyiodone: |