[llvm-branch-commits] [compiler-rt] a4ac434 - [AArch64] Compiler-rt interface for out-of-line atomics.
Pavel Iliin via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Dec 2 12:11:56 PST 2020
Author: Pavel Iliin
Date: 2020-12-02T20:07:12Z
New Revision: a4ac434c47434d80bca54bab96f295ed4e972cc6
URL: https://github.com/llvm/llvm-project/commit/a4ac434c47434d80bca54bab96f295ed4e972cc6
DIFF: https://github.com/llvm/llvm-project/commit/a4ac434c47434d80bca54bab96f295ed4e972cc6.diff
LOG: [AArch64] Compiler-rt interface for out-of-line atomics.
Out-of-line helper functions to support LSE deployment added.
This is a port of libgcc implementation:
https://gcc.gnu.org/git/?p=gcc.git;h=33befddcb849235353dc263db1c7d07dc15c9faa
Differential Revision: https://reviews.llvm.org/D91156
Added:
compiler-rt/lib/builtins/aarch64/lse.S
Modified:
compiler-rt/cmake/builtin-config-ix.cmake
compiler-rt/lib/builtins/CMakeLists.txt
compiler-rt/lib/builtins/assembly.h
compiler-rt/lib/builtins/cpu_model.c
Removed:
################################################################################
diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake
index 16d82b127878..2eeedd49e392 100644
--- a/compiler-rt/cmake/builtin-config-ix.cmake
+++ b/compiler-rt/cmake/builtin-config-ix.cmake
@@ -23,6 +23,12 @@ int foo(int x, int y) {
")
+builtin_check_c_compiler_source(COMPILER_RT_HAS_ASM_LSE
+"
+asm(\".arch armv8-a+lse\");
+asm(\"cas w0, w1, [x2]\");
+")
+
set(ARM64 aarch64)
set(ARM32 arm armhf armv6m armv7m armv7em armv7 armv7s armv7k)
set(HEXAGON hexagon)
diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index 3c29bba612e1..7f3df6ff548d 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -502,9 +502,39 @@ endif()
set(aarch64_SOURCES
${GENERIC_TF_SOURCES}
${GENERIC_SOURCES}
+ cpu_model.c
aarch64/fp_mode.c
)
+# Generate outline atomics helpers from lse.S base
+set(CUSTOM_FLAGS ${CMAKE_C_FLAGS})
+if(NOT ANDROID)
+ append_list_if(COMPILER_RT_HAS_VISIBILITY_HIDDEN_FLAG -DVISIBILITY_HIDDEN CUSTOM_FLAGS)
+endif()
+append_list_if(COMPILER_RT_HAS_ASM_LSE -DHAS_ASM_LSE CUSTOM_FLAGS)
+string(REPLACE " " "\t" CUSTOM_FLAGS "${CUSTOM_FLAGS}")
+
+foreach(pat cas swp ldadd ldclr ldeor ldset)
+ foreach(size 1 2 4 8 16)
+ foreach(model 1 2 3 4)
+ if(pat STREQUAL "cas" OR NOT size STREQUAL "16")
+ set(helper_asm outline_atomic_${pat}${size}_${model}.S)
+ add_custom_command(
+ OUTPUT ${helper_asm}
+ COMMAND ${CMAKE_C_COMPILER} -E ${CUSTOM_FLAGS} -DL_${pat} -DSIZE=${size} -DMODEL=${model}
+ ${CMAKE_CURRENT_SOURCE_DIR}/aarch64/lse.S -o ${helper_asm}
+ DEPENDS aarch64/lse.S assembly.h
+ )
+ set_source_files_properties(${helper_asm} PROPERTIES GENERATED TRUE)
+ set(aarch64_SOURCES
+ ${aarch64_SOURCES}
+ ${helper_asm}
+ )
+ endif()
+ endforeach(model)
+ endforeach(size)
+endforeach(pat)
+
if (MINGW)
set(aarch64_SOURCES
${aarch64_SOURCES}
diff --git a/compiler-rt/lib/builtins/aarch64/lse.S b/compiler-rt/lib/builtins/aarch64/lse.S
new file mode 100644
index 000000000000..4c75fa524c44
--- /dev/null
+++ b/compiler-rt/lib/builtins/aarch64/lse.S
@@ -0,0 +1,227 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "../assembly.h"
+
+// Out-of-line LSE atomics helpers. Ported from libgcc library.
+// N = {1, 2, 4, 8}
+// M = {1, 2, 4, 8, 16}
+// ORDER = {'relax', 'acq', 'rel', 'acq_rel'}
+// Routines implemented:
+//
+// iM __aarch64_casM_ORDER(iM expected, iM desired, iM *ptr)
+// iN __aarch64_swpN_ORDER(iN val, iN *ptr)
+// iN __aarch64_ldaddN_ORDER(iN val, iN *ptr)
+// iN __aarch64_ldclrN_ORDER(iN val, iN *ptr)
+// iN __aarch64_ldeorN_ORDER(iN val, iN *ptr)
+// iN __aarch64_ldsetN_ORDER(iN val, iN *ptr)
+//
+// Routines may modify temporary registers tmp0, tmp1, tmp2,
+// return value x0 and the flags only.
+
+#ifdef __aarch64__
+
+#ifdef HAS_ASM_LSE
+.arch armv8-a+lse
+#else
+.arch armv8-a
+#endif
+
+HIDDEN(__aarch64_have_lse_atomics)
+
+// Generate mnemonics for
+// L_cas: SIZE: 1,2,4,8,16 MODEL: 1,2,3,4
+// L_swp L_ldadd L_ldclr L_ldeor L_ldset: SIZE: 1,2,4,8 MODEL: 1,2,3,4
+
+#if SIZE == 1
+#define S b
+#define UXT uxtb
+#define B 0x00000000
+#elif SIZE == 2
+#define S h
+#define UXT uxth
+#define B 0x40000000
+#elif SIZE == 4 || SIZE == 8 || SIZE == 16
+#define S
+#define UXT mov
+#if SIZE == 4
+#define B 0x80000000
+#elif SIZE == 8
+#define B 0xc0000000
+#endif
+#else
+#error
+#endif // SIZE
+
+#if MODEL == 1
+#define SUFF _relax
+#define A
+#define L
+#define M 0x000000
+#define N 0x000000
+#elif MODEL == 2
+#define SUFF _acq
+#define A a
+#define L
+#define M 0x400000
+#define N 0x800000
+#elif MODEL == 3
+#define SUFF _rel
+#define A
+#define L l
+#define M 0x008000
+#define N 0x400000
+#elif MODEL == 4
+#define SUFF _acq_rel
+#define A a
+#define L l
+#define M 0x408000
+#define N 0xc00000
+#else
+#error
+#endif // MODEL
+
+// Define register size.
+#define x(N) GLUE2(x, N)
+#define w(N) GLUE2(w, N)
+#if SIZE < 8
+#define s(N) w(N)
+#else
+#define s(N) x(N)
+#endif
+
+#define NAME(BASE) GLUE4(__aarch64_, BASE, SIZE, SUFF)
+#define LDXR GLUE4(ld, A, xr, S)
+#define STXR GLUE4(st, L, xr, S)
+
+// Define temporary registers.
+#define tmp0 16
+#define tmp1 17
+#define tmp2 15
+
+// Macro for branch to label if no LSE available
+.macro JUMP_IF_NOT_LSE label
+ adrp x(tmp0), __aarch64_have_lse_atomics
+ ldrb w(tmp0), [x(tmp0), :lo12:__aarch64_have_lse_atomics]
+ cbz w(tmp0), \label
+.endm
+
+#ifdef L_cas
+DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(cas))
+ JUMP_IF_NOT_LSE 8f
+#if SIZE < 16
+#ifdef HAS_ASM_LSE
+#define CAS GLUE4(cas, A, L, S) s(0), s(1), [x2]
+#else
+#define CAS .inst 0x08a07c41 + B + M
+#endif
+ CAS // s(0), s(1), [x2]
+ ret
+8:
+ UXT s(tmp0), s(0)
+0:
+ LDXR s(0), [x2]
+ cmp s(0), s(tmp0)
+ bne 1f
+ STXR w(tmp1), s(1), [x2]
+ cbnz w(tmp1), 0b
+1:
+ ret
+#else
+#define LDXP GLUE3(ld, A, xp)
+#define STXP GLUE3(st, L, xp)
+#ifdef HAS_ASM_LSE
+#define CASP GLUE3(casp, A, L) x0, x1, x2, x3, [x4]
+#else
+#define CASP .inst 0x48207c82 + M
+#endif
+
+ CASP // x0, x1, x2, x3, [x4]
+ ret
+8:
+ mov x(tmp0), x0
+ mov x(tmp1), x1
+0:
+ LDXP x0, x1, [x4]
+ cmp x0, x(tmp0)
+ ccmp x1, x(tmp1), #0, eq
+ bne 1f
+ STXP w(tmp2), x2, x3, [x4]
+ cbnz w(tmp2), 0b
+1:
+ ret
+#endif
+END_COMPILERRT_OUTLINE_FUNCTION(NAME(cas))
+#endif // L_cas
+
+#ifdef L_swp
+#ifdef HAS_ASM_LSE
+#define SWP GLUE4(swp, A, L, S) s(0), s(0), [x1]
+#else
+#define SWP .inst 0x38208020 + B + N
+#endif
+DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(swp))
+ JUMP_IF_NOT_LSE 8f
+ SWP // s(0), s(0), [x1]
+ ret
+8:
+ mov s(tmp0), s(0)
+0:
+ LDXR s(0), [x1]
+ STXR w(tmp1), s(tmp0), [x1]
+ cbnz w(tmp1), 0b
+ ret
+END_COMPILERRT_OUTLINE_FUNCTION(NAME(swp))
+#endif // L_swp
+
+#if defined(L_ldadd) || defined(L_ldclr) || \
+ defined(L_ldeor) || defined(L_ldset)
+
+#ifdef L_ldadd
+#define LDNM ldadd
+#define OP add
+#define OPN 0x0000
+#elif defined(L_ldclr)
+#define LDNM ldclr
+#define OP bic
+#define OPN 0x1000
+#elif defined(L_ldeor)
+#define LDNM ldeor
+#define OP eor
+#define OPN 0x2000
+#elif defined(L_ldset)
+#define LDNM ldset
+#define OP orr
+#define OPN 0x3000
+#else
+#error
+#endif
+
+#ifdef HAS_ASM_LSE
+#define LDOP GLUE4(LDNM, A, L, S) s(0), s(0), [x1]
+#else
+#define LDOP .inst 0x38200020 + OPN + B + N
+#endif
+
+DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(LDNM))
+ JUMP_IF_NOT_LSE 8f
+ LDOP // s(0), s(0), [x1]
+ ret
+8:
+ mov s(tmp0), s(0)
+0:
+ LDXR s(0), [x1]
+ OP s(tmp1), s(0), s(tmp0)
+ STXR w(tmp2), s(tmp1), [x1]
+ cbnz w(tmp2), 0b
+ ret
+END_COMPILERRT_OUTLINE_FUNCTION(NAME(LDNM))
+#endif // L_ldadd L_ldclr L_ldeor L_ldset
+
+NO_EXEC_STACK_DIRECTIVE
+
+// GNU property note for BTI and PAC
+GNU_PROPERTY_BTI_PAC
+
+#endif // __aarch64__
diff --git a/compiler-rt/lib/builtins/assembly.h b/compiler-rt/lib/builtins/assembly.h
index f437cb87f60a..3b7f592fa95c 100644
--- a/compiler-rt/lib/builtins/assembly.h
+++ b/compiler-rt/lib/builtins/assembly.h
@@ -35,14 +35,18 @@
#define HIDDEN(name) .hidden name
#define LOCAL_LABEL(name) .L_##name
#define FILE_LEVEL_DIRECTIVE
-#if defined(__arm__)
+#if defined(__arm__) || defined(__aarch64__)
#define SYMBOL_IS_FUNC(name) .type name,%function
+#define FUNC_ALIGN \
+ .text SEPARATOR \
+ .balign 16 SEPARATOR
#else
#define SYMBOL_IS_FUNC(name) .type name, at function
+#define FUNC_ALIGN
#endif
#define CONST_SECTION .section .rodata
-#if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \
+#if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \
defined(__linux__)
#define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits
#else
@@ -65,6 +69,58 @@
#endif
+// BTI and PAC gnu property note
+#define NT_GNU_PROPERTY_TYPE_0 5
+#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000
+#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI 1
+#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC 2
+
+#if defined(__ARM_FEATURE_BTI_DEFAULT)
+#define BTI_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_BTI
+#else
+#define BTI_FLAG 0
+#endif
+
+#if __ARM_FEATURE_PAC_DEFAULT & 3
+#define PAC_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_PAC
+#else
+#define PAC_FLAG 0
+#endif
+
+#define GNU_PROPERTY(type, value) \
+ .pushsection .note.gnu.property, "a" SEPARATOR \
+ .p2align 3 SEPARATOR \
+ .word 4 SEPARATOR \
+ .word 16 SEPARATOR \
+ .word NT_GNU_PROPERTY_TYPE_0 SEPARATOR \
+ .asciz "GNU" SEPARATOR \
+ .word type SEPARATOR \
+ .word 4 SEPARATOR \
+ .word value SEPARATOR \
+ .word 0 SEPARATOR \
+ .popsection
+
+#if BTI_FLAG != 0
+#define BTI_C bti c
+#else
+#define BTI_C
+#endif
+
+#if (BTI_FLAG | PAC_FLAG) != 0
+#define GNU_PROPERTY_BTI_PAC \
+ GNU_PROPERTY(GNU_PROPERTY_AARCH64_FEATURE_1_AND, BTI_FLAG | PAC_FLAG)
+#else
+#define GNU_PROPERTY_BTI_PAC
+#endif
+
+#if defined(__clang__) || defined(__GCC_HAVE_DWARF2_CFI_ASM)
+#define CFI_START .cfi_startproc
+#define CFI_END .cfi_endproc
+#else
+#define CFI_START
+#define CFI_END
+#endif
+
#if defined(__arm__)
// Determine actual [ARM][THUMB[1][2]] ISA using compiler predefined macros:
@@ -131,8 +187,14 @@
#define DEFINE_CODE_STATE
#endif
-#define GLUE2(a, b) a##b
-#define GLUE(a, b) GLUE2(a, b)
+#define GLUE2_(a, b) a##b
+#define GLUE(a, b) GLUE2_(a, b)
+#define GLUE2(a, b) GLUE2_(a, b)
+#define GLUE3_(a, b, c) a##b##c
+#define GLUE3(a, b, c) GLUE3_(a, b, c)
+#define GLUE4_(a, b, c, d) a##b##c##d
+#define GLUE4(a, b, c, d) GLUE4_(a, b, c, d)
+
#define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name)
#ifdef VISIBILITY_HIDDEN
@@ -177,6 +239,16 @@
DECLARE_FUNC_ENCODING \
name:
+#define DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(name) \
+ DEFINE_CODE_STATE \
+ FUNC_ALIGN \
+ .globl name SEPARATOR \
+ SYMBOL_IS_FUNC(name) SEPARATOR \
+ DECLARE_SYMBOL_VISIBILITY(name) SEPARATOR \
+ CFI_START SEPARATOR \
+ DECLARE_FUNC_ENCODING \
+ name: SEPARATOR BTI_C
+
#define DEFINE_COMPILERRT_FUNCTION_ALIAS(name, target) \
.globl SYMBOL_NAME(name) SEPARATOR \
SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \
@@ -193,8 +265,12 @@
#ifdef __ELF__
#define END_COMPILERRT_FUNCTION(name) \
.size SYMBOL_NAME(name), . - SYMBOL_NAME(name)
+#define END_COMPILERRT_OUTLINE_FUNCTION(name) \
+ CFI_END SEPARATOR \
+ .size SYMBOL_NAME(name), . - SYMBOL_NAME(name)
#else
#define END_COMPILERRT_FUNCTION(name)
+#define END_COMPILERRT_OUTLINE_FUNCTION(name)
#endif
#endif // COMPILERRT_ASSEMBLY_H
diff --git a/compiler-rt/lib/builtins/cpu_model.c b/compiler-rt/lib/builtins/cpu_model.c
index e8b23d5e5381..05ef8492384f 100644
--- a/compiler-rt/lib/builtins/cpu_model.c
+++ b/compiler-rt/lib/builtins/cpu_model.c
@@ -8,10 +8,21 @@
//
// This file is based on LLVM's lib/Support/Host.cpp.
// It implements the operating system Host concept and builtin
-// __cpu_model for the compiler_rt library, for x86 only.
+// __cpu_model for the compiler_rt library for x86 and
+// __aarch64_have_lse_atomics for AArch64.
//
//===----------------------------------------------------------------------===//
+#if defined(HAVE_INIT_PRIORITY)
+#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101))
+#elif __has_attribute(__constructor__)
+#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__))
+#else
+// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
+// this runs during initialization.
+#define CONSTRUCTOR_ATTRIBUTE
+#endif
+
#if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \
defined(_M_X64)) && \
(defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
@@ -665,16 +676,6 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
#undef setFeature
}
-#if defined(HAVE_INIT_PRIORITY)
-#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101))
-#elif __has_attribute(__constructor__)
-#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__))
-#else
-// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
-// this runs during initialization.
-#define CONSTRUCTOR_ATTRIBUTE
-#endif
-
#ifndef _WIN32
__attribute__((visibility("hidden")))
#endif
@@ -749,5 +750,24 @@ int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
return 0;
}
-
+#elif defined(__aarch64__)
+// LSE support detection for out-of-line atomics
+// using HWCAP and Auxiliary vector
+_Bool __aarch64_have_lse_atomics
+ __attribute__((visibility("hidden"), nocommon));
+#if defined(__has_include)
+#if __has_include(<sys/auxv.h>)
+#include <sys/auxv.h>
+#ifndef AT_HWCAP
+#define AT_HWCAP 16
#endif
+#ifndef HWCAP_ATOMICS
+#define HWCAP_ATOMICS (1 << 8)
+#endif
+static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) {
+ unsigned long hwcap = getauxval(AT_HWCAP);
+ __aarch64_have_lse_atomics = (hwcap & HWCAP_ATOMICS) != 0;
+}
+#endif // defined(__has_include)
+#endif // __has_include(<sys/auxv.h>)
+#endif // defined(__aarch64__)
More information about the llvm-branch-commits
mailing list