summaryrefslogtreecommitdiff
path: root/sys/arch
diff options
context:
space:
mode:
authorMark Kettenis <kettenis@cvs.openbsd.org>2024-07-02 10:25:17 +0000
committerMark Kettenis <kettenis@cvs.openbsd.org>2024-07-02 10:25:17 +0000
commite70502701168f8fb7ebf0078121677efd2309163 (patch)
tree23bc2021d90852c6af1cfc2751d3ea68ac575e74 /sys/arch
parent592939e90a889760e9eba2b53b61ad8c967fbc4b (diff)
The traditional LL/SC atomics perform poorly on modern arm64 systems with
many CPU cores. With the recent conversion of the sched lock to a mutex some systems appear to hang if the sched lock is contended. ARMv8.1 introduced an LSE feature that provides atomic instructions such as CAS that perform much better. Unfortunately these can't be used on older ARMv8.0 systems. Use -moutline-atomics to make the compiler generate function calls for atomic operations and provide an implementation for the functions we use in the kernel that use LSE when available and fall back on LL/SC. Fixes regressions seen on Ampere Altra and Apple M2 Pro/Max/Ultra since the conversion of the sched lock to a mutex. tested by claudio@, phessler@, mpi@ ok patrick@
Diffstat (limited to 'sys/arch')
-rw-r--r--sys/arch/arm64/arm64/cpu.c4
-rw-r--r--sys/arch/arm64/arm64/lse.S170
-rw-r--r--sys/arch/arm64/conf/Makefile.arm643
-rw-r--r--sys/arch/arm64/conf/files.arm643
4 files changed, 177 insertions, 3 deletions
diff --git a/sys/arch/arm64/arm64/cpu.c b/sys/arch/arm64/arm64/cpu.c
index a350d11a843..925ccfb4486 100644
--- a/sys/arch/arm64/arm64/cpu.c
+++ b/sys/arch/arm64/arm64/cpu.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: cpu.c,v 1.121 2024/06/23 10:17:16 kettenis Exp $ */
+/* $OpenBSD: cpu.c,v 1.122 2024/07/02 10:25:16 kettenis Exp $ */
/*
* Copyright (c) 2016 Dale Rahn <drahn@dalerahn.com>
@@ -244,6 +244,7 @@ uint64_t cpu_id_aa64isar2;
uint64_t cpu_id_aa64pfr0;
uint64_t cpu_id_aa64pfr1;
+int arm64_has_lse;
#ifdef CRYPTO
int arm64_has_aes;
#endif
@@ -714,6 +715,7 @@ cpu_identify(struct cpu_info *ci)
if (ID_AA64ISAR0_ATOMIC(id) >= ID_AA64ISAR0_ATOMIC_IMPL) {
printf("%sAtomic", sep);
sep = ",";
+ arm64_has_lse = 1;
}
if (ID_AA64ISAR0_CRC32(id) >= ID_AA64ISAR0_CRC32_BASE) {
diff --git a/sys/arch/arm64/arm64/lse.S b/sys/arch/arm64/arm64/lse.S
new file mode 100644
index 00000000000..6c5727459c0
--- /dev/null
+++ b/sys/arch/arm64/arm64/lse.S
@@ -0,0 +1,170 @@
+/* $OpenBSD: lse.S,v 1.1 2024/07/02 10:25:16 kettenis Exp $ */
+/*
+ * Copyright (c) 2024 Mark Kettenis <kettenis@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <machine/asm.h>
+
+/*
+ * Out-of-line LSE atomics helpers
+ */
+
+.arch armv8-a+lse
+
+ENTRY(__aarch64_cas4_acq_rel)
+ RETGUARD_SETUP(__aarch64_cas4_acq_rel, x15)
+ adrp x9, arm64_has_lse
+ ldr w9, [x9, :lo12:arm64_has_lse]
+ cbz w9, 1f
+ casal w0, w1, [x2]
+ RETGUARD_CHECK(__aarch64_cas4_acq_rel, x15)
+ ret
+1:
+ ldaxr w9, [x2]
+ cmp w9, w0
+ b.ne 2f
+ stlxr w10, w1, [x2]
+ cbnz w10, 1b
+2:
+ mov w0, w9
+ RETGUARD_CHECK(__aarch64_cas4_acq_rel, x15)
+ ret
+END(__aarch64_cas4_acq_rel)
+
+ENTRY(__aarch64_cas8_acq_rel)
+ RETGUARD_SETUP(__aarch64_cas8_acq_rel, x15)
+ adrp x9, arm64_has_lse
+ ldr w9, [x9, :lo12:arm64_has_lse]
+ cbz w9, 1f
+ casal x0, x1, [x2]
+ RETGUARD_CHECK(__aarch64_cas8_acq_rel, x15)
+ ret
+1:
+ ldaxr x9, [x2]
+ cmp x9, x0
+ b.ne 2f
+ stlxr w10, x1, [x2]
+ cbnz w10, 1b
+2:
+ mov x0, x9
+ RETGUARD_CHECK(__aarch64_cas8_acq_rel, x15)
+ ret
+END(__aarch64_cas8_acq_rel)
+
+ENTRY(__aarch64_ldadd4_acq_rel)
+ RETGUARD_SETUP(__aarch64_ldadd4_acq_rel, x15)
+ adrp x9, arm64_has_lse
+ ldr w9, [x9, :lo12:arm64_has_lse]
+ cbz w9, 1f
+ ldaddal w0, w0, [x1]
+ RETGUARD_CHECK(__aarch64_ldadd4_acq_rel, x15)
+ ret
+1:
+ ldaxr w9, [x1]
+ add w11, w9, w0
+ stlxr w10, w11, [x1]
+ cbnz w10, 1b
+ mov w0, w9
+ RETGUARD_CHECK(__aarch64_ldadd4_acq_rel, x15)
+ ret
+END(__aarch64_ldadd4_acq_rel)
+
+ENTRY(__aarch64_ldadd8_acq_rel)
+ RETGUARD_SETUP(__aarch64_ldadd8_acq_rel, x15)
+ adrp x9, arm64_has_lse
+ ldr w9, [x9, :lo12:arm64_has_lse]
+ cbz w9, 1f
+ ldaddal x0, x0, [x1]
+ RETGUARD_CHECK(__aarch64_ldadd8_acq_rel, x15)
+ ret
+1:
+ ldaxr x9, [x1]
+ add x11, x9, x0
+ stlxr w10, x11, [x1]
+ cbnz w10, 1b
+ mov x0, x9
+ RETGUARD_CHECK(__aarch64_ldadd8_acq_rel, x15)
+ ret
+END(__aarch64_ldadd8_acq_rel)
+
+ENTRY(__aarch64_ldclr4_acq_rel)
+ RETGUARD_SETUP(__aarch64_ldclr4_acq_rel, x15)
+ adrp x9, arm64_has_lse
+ ldr w9, [x9, :lo12:arm64_has_lse]
+ cbz w9, 1f
+ ldclral w0, w0, [x1]
+ RETGUARD_CHECK(__aarch64_ldclr4_acq_rel, x15)
+ ret
+1:
+ ldaxr w9, [x1]
+ bic w11, w9, w0
+ stlxr w10, w11, [x1]
+ cbnz w10, 1b
+ mov w0, w9
+ RETGUARD_CHECK(__aarch64_ldclr4_acq_rel, x15)
+ ret
+END(__aarch64_ldclr4_acq_rel)
+
+ENTRY(__aarch64_ldset4_acq_rel)
+ RETGUARD_SETUP(__aarch64_ldset4_acq_rel, x15)
+ adrp x9, arm64_has_lse
+ ldr w9, [x9, :lo12:arm64_has_lse]
+ cbz w9, 1f
+ ldsetal w0, w0, [x1]
+ RETGUARD_CHECK(__aarch64_ldset4_acq_rel, x15)
+ ret
+1:
+ ldaxr w9, [x1]
+ orr w11, w9, w0
+ stlxr w10, w11, [x1]
+ cbnz w10, 1b
+ mov w0, w9
+ RETGUARD_CHECK(__aarch64_ldset4_acq_rel, x15)
+ ret
+END(__aarch64_ldset4_acq_rel)
+
+ENTRY(__aarch64_swp4_acq_rel)
+ RETGUARD_SETUP(__aarch64_swp4_acq_rel, x15)
+ adrp x9, arm64_has_lse
+ ldr w9, [x9, :lo12:arm64_has_lse]
+ cbz w9, 1f
+ swpal w0, w0, [x1]
+ RETGUARD_CHECK(__aarch64_swp4_acq_rel, x15)
+ ret
+1:
+ ldaxr w9, [x1]
+ stlxr w10, w0, [x1]
+ cbnz w10, 1b
+ mov w0, w9
+ RETGUARD_CHECK(__aarch64_swp4_acq_rel, x15)
+ ret
+END(__aarch64_swp4_acq_rel)
+
+ENTRY(__aarch64_swp8_acq_rel)
+ RETGUARD_SETUP(__aarch64_swp8_acq_rel, x15)
+ adrp x9, arm64_has_lse
+ ldr w9, [x9, :lo12:arm64_has_lse]
+ cbz w9, 1f
+ swpal x0, x0, [x1]
+ RETGUARD_CHECK(__aarch64_swp8_acq_rel, x15)
+ ret
+1:
+ ldaxr x9, [x1]
+ stlxr w10, x0, [x1]
+ cbnz w10, 1b
+ mov x0, x9
+ RETGUARD_CHECK(__aarch64_swp8_acq_rel, x15)
+ ret
+END(__aarch64_swp8_acq_rel)
diff --git a/sys/arch/arm64/conf/Makefile.arm64 b/sys/arch/arm64/conf/Makefile.arm64
index 636b7b4043b..837b1fe656d 100644
--- a/sys/arch/arm64/conf/Makefile.arm64
+++ b/sys/arch/arm64/conf/Makefile.arm64
@@ -1,4 +1,4 @@
-# $OpenBSD: Makefile.arm64,v 1.47 2023/09/06 01:47:36 jsg Exp $
+# $OpenBSD: Makefile.arm64,v 1.48 2024/07/02 10:25:16 kettenis Exp $
# For instructions on building kernels consult the config(8) and options(4)
# manual pages.
@@ -60,6 +60,7 @@ CMACHFLAGS= -march=armv8-a+nofp+nosimd \
-fno-omit-frame-pointer -mno-omit-leaf-frame-pointer \
-ffixed-x18
CMACHFLAGS+= -ffreestanding ${NOPIE_FLAGS}
+CMACHFLAGS+= -moutline-atomics
SORTR= sort -R
.if ${IDENT:M-DNO_PROPOLICE}
CMACHFLAGS+= -fno-stack-protector
diff --git a/sys/arch/arm64/conf/files.arm64 b/sys/arch/arm64/conf/files.arm64
index 833ea3f050f..c8f9acac72a 100644
--- a/sys/arch/arm64/conf/files.arm64
+++ b/sys/arch/arm64/conf/files.arm64
@@ -1,4 +1,4 @@
-# $OpenBSD: files.arm64,v 1.69 2024/03/25 17:24:03 patrick Exp $
+# $OpenBSD: files.arm64,v 1.70 2024/07/02 10:25:16 kettenis Exp $
maxpartitions 16
maxusers 2 8 128
@@ -34,6 +34,7 @@ file arch/arm64/arm64/trap.c
file arch/arm64/arm64/ast.c
file arch/arm64/arm64/cpufunc_asm.S
+file arch/arm64/arm64/lse.S
file arch/arm64/arm64/support.S
file arch/arm64/arm64/bus_dma.c