commit dca69f4c0deda3c9277bed027b2481f69268289d from: kettenis date: Tue Jul 02 10:25:16 2024 UTC The traditional LL/SC atomics perform poorly on modern arm64 systems with many CPU cores. With the recent conversion of the sched lock to a mutex some systems appear to hang if the sched lock is contended. ARMv8.1 introduced an LSE feature that provides atomic instructions such as CAS that perform much better. Unfortunately these can't be used on older ARMv8.0 systems. Use -moutline-atomics to make the compiler generate function calls for atomic operations and provide an implementation for the functions we use in the kernel that use LSE when available and fall back on LL/SC. Fixes regressions seen on Ampere Altra and Apple M2 Pro/Max/Ultra since the conversion of the sched lock to a mutex. tested by claudio@, phessler@, mpi@ ok patrick@ commit - 26a40d9e37747a58e21e4c9ff3865c199d9d9c6f commit + dca69f4c0deda3c9277bed027b2481f69268289d blob - a350d11a8432bb7ba8ed70d0fc9d4f70a0093d8e blob + 925ccfb4486e47f53944cce840570c8a37ab8dcc --- sys/arch/arm64/arm64/cpu.c +++ sys/arch/arm64/arm64/cpu.c @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.c,v 1.121 2024/06/23 10:17:16 kettenis Exp $ */ +/* $OpenBSD: cpu.c,v 1.122 2024/07/02 10:25:16 kettenis Exp $ */ /* * Copyright (c) 2016 Dale Rahn @@ -244,6 +244,7 @@ uint64_t cpu_id_aa64isar2; uint64_t cpu_id_aa64pfr0; uint64_t cpu_id_aa64pfr1; +int arm64_has_lse; #ifdef CRYPTO int arm64_has_aes; #endif @@ -714,6 +715,7 @@ cpu_identify(struct cpu_info *ci) if (ID_AA64ISAR0_ATOMIC(id) >= ID_AA64ISAR0_ATOMIC_IMPL) { printf("%sAtomic", sep); sep = ","; + arm64_has_lse = 1; } if (ID_AA64ISAR0_CRC32(id) >= ID_AA64ISAR0_CRC32_BASE) { blob - /dev/null blob + 6c5727459c07fe160633ca8fa91eb289570b6fcc (mode 644) --- /dev/null +++ sys/arch/arm64/arm64/lse.S @@ -0,0 +1,170 @@ +/* $OpenBSD: lse.S,v 1.1 2024/07/02 10:25:16 kettenis Exp $ */ +/* + * Copyright (c) 2024 Mark Kettenis + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include + +/* + * Out-of-line LSE atomics helpers + */ + +.arch armv8-a+lse + +ENTRY(__aarch64_cas4_acq_rel) + RETGUARD_SETUP(__aarch64_cas4_acq_rel, x15) + adrp x9, arm64_has_lse + ldr w9, [x9, :lo12:arm64_has_lse] + cbz w9, 1f + casal w0, w1, [x2] + RETGUARD_CHECK(__aarch64_cas4_acq_rel, x15) + ret +1: + ldaxr w9, [x2] + cmp w9, w0 + b.ne 2f + stlxr w10, w1, [x2] + cbnz w10, 1b +2: + mov w0, w9 + RETGUARD_CHECK(__aarch64_cas4_acq_rel, x15) + ret +END(__aarch64_cas4_acq_rel) + +ENTRY(__aarch64_cas8_acq_rel) + RETGUARD_SETUP(__aarch64_cas8_acq_rel, x15) + adrp x9, arm64_has_lse + ldr w9, [x9, :lo12:arm64_has_lse] + cbz w9, 1f + casal x0, x1, [x2] + RETGUARD_CHECK(__aarch64_cas8_acq_rel, x15) + ret +1: + ldaxr x9, [x2] + cmp x9, x0 + b.ne 2f + stlxr w10, x1, [x2] + cbnz w10, 1b +2: + mov x0, x9 + RETGUARD_CHECK(__aarch64_cas8_acq_rel, x15) + ret +END(__aarch64_cas8_acq_rel) + +ENTRY(__aarch64_ldadd4_acq_rel) + RETGUARD_SETUP(__aarch64_ldadd4_acq_rel, x15) + adrp x9, arm64_has_lse + ldr w9, [x9, :lo12:arm64_has_lse] + cbz w9, 1f + ldaddal w0, w0, [x1] + RETGUARD_CHECK(__aarch64_ldadd4_acq_rel, x15) + ret +1: + ldaxr w9, [x1] + add w11, w9, w0 + stlxr w10, w11, [x1] + cbnz w10, 1b + mov w0, w9 + RETGUARD_CHECK(__aarch64_ldadd4_acq_rel, x15) + ret +END(__aarch64_ldadd4_acq_rel) + +ENTRY(__aarch64_ldadd8_acq_rel) + RETGUARD_SETUP(__aarch64_ldadd8_acq_rel, x15) + adrp x9, arm64_has_lse + ldr w9, [x9, :lo12:arm64_has_lse] + cbz w9, 1f + ldaddal x0, x0, [x1] + RETGUARD_CHECK(__aarch64_ldadd8_acq_rel, x15) + ret +1: + ldaxr x9, [x1] + add x11, x9, x0 + stlxr w10, x11, [x1] + cbnz w10, 1b + mov x0, x9 + RETGUARD_CHECK(__aarch64_ldadd8_acq_rel, x15) + ret +END(__aarch64_ldadd8_acq_rel) + +ENTRY(__aarch64_ldclr4_acq_rel) + RETGUARD_SETUP(__aarch64_ldclr4_acq_rel, x15) + adrp x9, arm64_has_lse + ldr w9, [x9, :lo12:arm64_has_lse] + cbz w9, 1f + ldclral w0, w0, [x1] + RETGUARD_CHECK(__aarch64_ldclr4_acq_rel, x15) + ret +1: + ldaxr w9, [x1] + bic w11, w9, w0 + stlxr w10, w11, [x1] + cbnz w10, 1b + mov w0, w9 + RETGUARD_CHECK(__aarch64_ldclr4_acq_rel, x15) + ret +END(__aarch64_ldclr4_acq_rel) + +ENTRY(__aarch64_ldset4_acq_rel) + RETGUARD_SETUP(__aarch64_ldset4_acq_rel, x15) + adrp x9, arm64_has_lse + ldr w9, [x9, :lo12:arm64_has_lse] + cbz w9, 1f + ldsetal w0, w0, [x1] + RETGUARD_CHECK(__aarch64_ldset4_acq_rel, x15) + ret +1: + ldaxr w9, [x1] + orr w11, w9, w0 + stlxr w10, w11, [x1] + cbnz w10, 1b + mov w0, w9 + RETGUARD_CHECK(__aarch64_ldset4_acq_rel, x15) + ret +END(__aarch64_ldset4_acq_rel) + +ENTRY(__aarch64_swp4_acq_rel) + RETGUARD_SETUP(__aarch64_swp4_acq_rel, x15) + adrp x9, arm64_has_lse + ldr w9, [x9, :lo12:arm64_has_lse] + cbz w9, 1f + swpal w0, w0, [x1] + RETGUARD_CHECK(__aarch64_swp4_acq_rel, x15) + ret +1: + ldaxr w9, [x1] + stlxr w10, w0, [x1] + cbnz w10, 1b + mov w0, w9 + RETGUARD_CHECK(__aarch64_swp4_acq_rel, x15) + ret +END(__aarch64_swp4_acq_rel) + +ENTRY(__aarch64_swp8_acq_rel) + RETGUARD_SETUP(__aarch64_swp8_acq_rel, x15) + adrp x9, arm64_has_lse + ldr w9, [x9, :lo12:arm64_has_lse] + cbz w9, 1f + swpal x0, x0, [x1] + RETGUARD_CHECK(__aarch64_swp8_acq_rel, x15) + ret +1: + ldaxr x9, [x1] + stlxr w10, x0, [x1] + cbnz w10, 1b + mov x0, x9 + RETGUARD_CHECK(__aarch64_swp8_acq_rel, x15) + ret +END(__aarch64_swp8_acq_rel) blob - 636b7b4043b342495732cc6bc2d7f405decbe790 blob + 837b1fe656de93e30ad7bccac64b135813dfb959 --- sys/arch/arm64/conf/Makefile.arm64 +++ sys/arch/arm64/conf/Makefile.arm64 @@ -1,4 +1,4 @@ -# $OpenBSD: Makefile.arm64,v 1.47 2023/09/06 01:47:36 jsg Exp $ +# $OpenBSD: Makefile.arm64,v 1.48 2024/07/02 10:25:16 kettenis Exp $ # For instructions on building kernels consult the config(8) and options(4) # manual pages. @@ -60,6 +60,7 @@ CMACHFLAGS= -march=armv8-a+nofp+nosimd \ -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer \ -ffixed-x18 CMACHFLAGS+= -ffreestanding ${NOPIE_FLAGS} +CMACHFLAGS+= -moutline-atomics SORTR= sort -R .if ${IDENT:M-DNO_PROPOLICE} CMACHFLAGS+= -fno-stack-protector blob - 833ea3f050f2d0b9a197dc5f7049ad6a84be88c3 blob + c8f9acac72a11843c6d4c5ef67f446537c991884 --- sys/arch/arm64/conf/files.arm64 +++ sys/arch/arm64/conf/files.arm64 @@ -1,4 +1,4 @@ -# $OpenBSD: files.arm64,v 1.69 2024/03/25 17:24:03 patrick Exp $ +# $OpenBSD: files.arm64,v 1.70 2024/07/02 10:25:16 kettenis Exp $ maxpartitions 16 maxusers 2 8 128 @@ -34,6 +34,7 @@ file arch/arm64/arm64/trap.c file arch/arm64/arm64/ast.c file arch/arm64/arm64/cpufunc_asm.S +file arch/arm64/arm64/lse.S file arch/arm64/arm64/support.S file arch/arm64/arm64/bus_dma.c