[compiler-rt] a4ac434 - [AArch64] Compiler-rt interface for out-of-line atomics.

Pavel Iliin via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 2 12:07:48 PST 2020


Author: Pavel Iliin
Date: 2020-12-02T20:07:12Z
New Revision: a4ac434c47434d80bca54bab96f295ed4e972cc6

URL: https://github.com/llvm/llvm-project/commit/a4ac434c47434d80bca54bab96f295ed4e972cc6
DIFF: https://github.com/llvm/llvm-project/commit/a4ac434c47434d80bca54bab96f295ed4e972cc6.diff

LOG: [AArch64] Compiler-rt interface for out-of-line atomics.

Out-of-line helper functions to support LSE deployment added.
This is a port of libgcc implementation:
https://gcc.gnu.org/git/?p=gcc.git;h=33befddcb849235353dc263db1c7d07dc15c9faa

Differential Revision: https://reviews.llvm.org/D91156

Added: 
    compiler-rt/lib/builtins/aarch64/lse.S

Modified: 
    compiler-rt/cmake/builtin-config-ix.cmake
    compiler-rt/lib/builtins/CMakeLists.txt
    compiler-rt/lib/builtins/assembly.h
    compiler-rt/lib/builtins/cpu_model.c

Removed: 
    


################################################################################
diff  --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake
index 16d82b127878..2eeedd49e392 100644
--- a/compiler-rt/cmake/builtin-config-ix.cmake
+++ b/compiler-rt/cmake/builtin-config-ix.cmake
@@ -23,6 +23,12 @@ int foo(int x, int y) {
 ")
 
 
+builtin_check_c_compiler_source(COMPILER_RT_HAS_ASM_LSE
+"
+asm(\".arch armv8-a+lse\");
+asm(\"cas w0, w1, [x2]\");
+")
+
 set(ARM64 aarch64)
 set(ARM32 arm armhf armv6m armv7m armv7em armv7 armv7s armv7k)
 set(HEXAGON hexagon)

diff  --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index 3c29bba612e1..7f3df6ff548d 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -502,9 +502,39 @@ endif()
 set(aarch64_SOURCES
   ${GENERIC_TF_SOURCES}
   ${GENERIC_SOURCES}
+  cpu_model.c
   aarch64/fp_mode.c
 )
 
+# Generate outline atomics helpers from lse.S base
+set(CUSTOM_FLAGS ${CMAKE_C_FLAGS})
+if(NOT ANDROID)
+  append_list_if(COMPILER_RT_HAS_VISIBILITY_HIDDEN_FLAG -DVISIBILITY_HIDDEN CUSTOM_FLAGS)
+endif()
+append_list_if(COMPILER_RT_HAS_ASM_LSE -DHAS_ASM_LSE CUSTOM_FLAGS)
+string(REPLACE " " "\t" CUSTOM_FLAGS "${CUSTOM_FLAGS}")
+
+foreach(pat cas swp ldadd ldclr ldeor ldset)
+  foreach(size 1 2 4 8 16)
+    foreach(model 1 2 3 4)
+      if(pat STREQUAL "cas" OR NOT size STREQUAL "16")
+        set(helper_asm outline_atomic_${pat}${size}_${model}.S)
+        add_custom_command(
+          OUTPUT ${helper_asm}
+          COMMAND ${CMAKE_C_COMPILER} -E ${CUSTOM_FLAGS} -DL_${pat} -DSIZE=${size} -DMODEL=${model}
+                  ${CMAKE_CURRENT_SOURCE_DIR}/aarch64/lse.S -o ${helper_asm}
+          DEPENDS aarch64/lse.S assembly.h
+        )
+        set_source_files_properties(${helper_asm} PROPERTIES GENERATED TRUE)
+        set(aarch64_SOURCES
+          ${aarch64_SOURCES}
+          ${helper_asm}
+        )
+      endif()
+    endforeach(model)
+  endforeach(size)
+endforeach(pat)
+
 if (MINGW)
   set(aarch64_SOURCES
     ${aarch64_SOURCES}

diff  --git a/compiler-rt/lib/builtins/aarch64/lse.S b/compiler-rt/lib/builtins/aarch64/lse.S
new file mode 100644
index 000000000000..4c75fa524c44
--- /dev/null
+++ b/compiler-rt/lib/builtins/aarch64/lse.S
@@ -0,0 +1,227 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "../assembly.h"
+
+// Out-of-line LSE atomics helpers. Ported from libgcc library.
+// N = {1, 2, 4, 8}
+// M = {1, 2, 4, 8, 16}
+// ORDER = {'relax', 'acq', 'rel', 'acq_rel'}
+// Routines implemented:
+//
+//  iM __aarch64_casM_ORDER(iM expected, iM desired, iM *ptr)
+//  iN __aarch64_swpN_ORDER(iN val, iN *ptr)
+//  iN __aarch64_ldaddN_ORDER(iN val, iN *ptr)
+//  iN __aarch64_ldclrN_ORDER(iN val, iN *ptr)
+//  iN __aarch64_ldeorN_ORDER(iN val, iN *ptr)
+//  iN __aarch64_ldsetN_ORDER(iN val, iN *ptr)
+//
+// Routines may modify temporary registers tmp0, tmp1, tmp2,
+// return value x0 and the flags only.
+
+#ifdef __aarch64__
+
+#ifdef HAS_ASM_LSE
+.arch armv8-a+lse
+#else
+.arch armv8-a
+#endif
+
+HIDDEN(__aarch64_have_lse_atomics)
+
+// Generate mnemonics for
+// L_cas:                                 SIZE: 1,2,4,8,16 MODEL: 1,2,3,4
+// L_swp L_ldadd L_ldclr L_ldeor L_ldset: SIZE: 1,2,4,8    MODEL: 1,2,3,4
+
+#if SIZE == 1
+#define S b
+#define UXT uxtb
+#define B 0x00000000
+#elif SIZE == 2
+#define S h
+#define UXT uxth
+#define B 0x40000000
+#elif SIZE == 4 || SIZE == 8 || SIZE == 16
+#define S
+#define UXT mov
+#if SIZE == 4
+#define B 0x80000000
+#elif SIZE == 8
+#define B 0xc0000000
+#endif
+#else
+#error
+#endif // SIZE
+
+#if MODEL == 1
+#define SUFF _relax
+#define A
+#define L
+#define M 0x000000
+#define N 0x000000
+#elif MODEL == 2
+#define SUFF _acq
+#define A a
+#define L
+#define M 0x400000
+#define N 0x800000
+#elif MODEL == 3
+#define SUFF _rel
+#define A
+#define L l
+#define M 0x008000
+#define N 0x400000
+#elif MODEL == 4
+#define SUFF _acq_rel
+#define A a
+#define L l
+#define M 0x408000
+#define N 0xc00000
+#else
+#error
+#endif // MODEL
+
+// Define register size.
+#define x(N) GLUE2(x, N)
+#define w(N) GLUE2(w, N)
+#if SIZE < 8
+#define s(N) w(N)
+#else
+#define s(N) x(N)
+#endif
+
+#define NAME(BASE) GLUE4(__aarch64_, BASE, SIZE, SUFF)
+#define LDXR GLUE4(ld, A, xr, S)
+#define STXR GLUE4(st, L, xr, S)
+
+// Define temporary registers.
+#define tmp0 16
+#define tmp1 17
+#define tmp2 15
+
+// Macro for branch to label if no LSE available
+.macro JUMP_IF_NOT_LSE label
+        adrp    x(tmp0), __aarch64_have_lse_atomics
+        ldrb    w(tmp0), [x(tmp0), :lo12:__aarch64_have_lse_atomics]
+        cbz     w(tmp0), \label
+.endm
+
+#ifdef L_cas
+DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(cas))
+        JUMP_IF_NOT_LSE 8f
+#if SIZE < 16
+#ifdef HAS_ASM_LSE
+#define CAS GLUE4(cas, A, L, S) s(0), s(1), [x2]
+#else
+#define CAS .inst 0x08a07c41 + B + M
+#endif
+        CAS    // s(0), s(1), [x2]
+        ret
+8:
+        UXT    s(tmp0), s(0)
+0:
+        LDXR   s(0), [x2]
+        cmp    s(0), s(tmp0)
+        bne    1f
+        STXR   w(tmp1), s(1), [x2]
+        cbnz   w(tmp1), 0b
+1:
+        ret
+#else
+#define LDXP GLUE3(ld, A, xp)
+#define STXP GLUE3(st, L, xp)
+#ifdef HAS_ASM_LSE
+#define CASP GLUE3(casp, A, L)  x0, x1, x2, x3, [x4]
+#else
+#define CASP .inst 0x48207c82 + M
+#endif
+
+        CASP   // x0, x1, x2, x3, [x4]
+        ret
+8:
+        mov    x(tmp0), x0
+        mov    x(tmp1), x1
+0:
+        LDXP   x0, x1, [x4]
+        cmp    x0, x(tmp0)
+        ccmp   x1, x(tmp1), #0, eq
+        bne    1f
+        STXP   w(tmp2), x2, x3, [x4]
+        cbnz   w(tmp2), 0b
+1:
+        ret
+#endif
+END_COMPILERRT_OUTLINE_FUNCTION(NAME(cas))
+#endif // L_cas
+
+#ifdef L_swp
+#ifdef HAS_ASM_LSE
+#define SWP GLUE4(swp, A, L, S)  s(0), s(0), [x1]
+#else
+#define SWP .inst 0x38208020 + B + N
+#endif
+DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(swp))
+        JUMP_IF_NOT_LSE 8f
+        SWP    // s(0), s(0), [x1]
+        ret
+8:
+        mov    s(tmp0), s(0)
+0:
+        LDXR   s(0), [x1]
+        STXR   w(tmp1), s(tmp0), [x1]
+        cbnz   w(tmp1), 0b
+        ret
+END_COMPILERRT_OUTLINE_FUNCTION(NAME(swp))
+#endif // L_swp
+
+#if defined(L_ldadd) || defined(L_ldclr) ||                                    \
+    defined(L_ldeor) || defined(L_ldset)
+
+#ifdef L_ldadd
+#define LDNM ldadd
+#define OP add
+#define OPN 0x0000
+#elif defined(L_ldclr)
+#define LDNM ldclr
+#define OP bic
+#define OPN 0x1000
+#elif defined(L_ldeor)
+#define LDNM ldeor
+#define OP eor
+#define OPN 0x2000
+#elif defined(L_ldset)
+#define LDNM ldset
+#define OP orr
+#define OPN 0x3000
+#else
+#error
+#endif
+
+#ifdef HAS_ASM_LSE
+#define LDOP GLUE4(LDNM, A, L, S) s(0), s(0), [x1]
+#else
+#define LDOP .inst 0x38200020 + OPN + B + N
+#endif
+
+DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(LDNM))
+        JUMP_IF_NOT_LSE 8f
+        LDOP // s(0), s(0), [x1]
+        ret
+8:
+        mov    s(tmp0), s(0)
+0:
+        LDXR   s(0), [x1]
+        OP     s(tmp1), s(0), s(tmp0)
+        STXR   w(tmp2), s(tmp1), [x1]
+        cbnz   w(tmp2), 0b
+        ret
+END_COMPILERRT_OUTLINE_FUNCTION(NAME(LDNM))
+#endif // L_ldadd L_ldclr L_ldeor L_ldset
+
+NO_EXEC_STACK_DIRECTIVE
+
+// GNU property note for BTI and PAC
+GNU_PROPERTY_BTI_PAC
+
+#endif // __aarch64__

diff  --git a/compiler-rt/lib/builtins/assembly.h b/compiler-rt/lib/builtins/assembly.h
index f437cb87f60a..3b7f592fa95c 100644
--- a/compiler-rt/lib/builtins/assembly.h
+++ b/compiler-rt/lib/builtins/assembly.h
@@ -35,14 +35,18 @@
 #define HIDDEN(name) .hidden name
 #define LOCAL_LABEL(name) .L_##name
 #define FILE_LEVEL_DIRECTIVE
-#if defined(__arm__)
+#if defined(__arm__) || defined(__aarch64__)
 #define SYMBOL_IS_FUNC(name) .type name,%function
+#define FUNC_ALIGN                                                             \
+  .text SEPARATOR                                                              \
+  .balign 16 SEPARATOR
 #else
 #define SYMBOL_IS_FUNC(name) .type name, at function
+#define FUNC_ALIGN
 #endif
 #define CONST_SECTION .section .rodata
 
-#if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \
+#if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) ||        \
     defined(__linux__)
 #define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits
 #else
@@ -65,6 +69,58 @@
 
 #endif
 
+// BTI and PAC gnu property note
+#define NT_GNU_PROPERTY_TYPE_0 5
+#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000
+#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI 1
+#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC 2
+
+#if defined(__ARM_FEATURE_BTI_DEFAULT)
+#define BTI_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_BTI
+#else
+#define BTI_FLAG 0
+#endif
+
+#if __ARM_FEATURE_PAC_DEFAULT & 3
+#define PAC_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_PAC
+#else
+#define PAC_FLAG 0
+#endif
+
+#define GNU_PROPERTY(type, value)                                              \
+  .pushsection .note.gnu.property, "a" SEPARATOR                               \
+  .p2align 3 SEPARATOR                                                         \
+  .word 4 SEPARATOR                                                            \
+  .word 16 SEPARATOR                                                           \
+  .word NT_GNU_PROPERTY_TYPE_0 SEPARATOR                                       \
+  .asciz "GNU" SEPARATOR                                                       \
+  .word type SEPARATOR                                                         \
+  .word 4 SEPARATOR                                                            \
+  .word value SEPARATOR                                                        \
+  .word 0 SEPARATOR                                                            \
+  .popsection
+
+#if BTI_FLAG != 0
+#define BTI_C bti c
+#else
+#define BTI_C
+#endif
+
+#if (BTI_FLAG | PAC_FLAG) != 0
+#define GNU_PROPERTY_BTI_PAC                                                   \
+  GNU_PROPERTY(GNU_PROPERTY_AARCH64_FEATURE_1_AND, BTI_FLAG | PAC_FLAG)
+#else
+#define GNU_PROPERTY_BTI_PAC
+#endif
+
+#if defined(__clang__) || defined(__GCC_HAVE_DWARF2_CFI_ASM)
+#define CFI_START .cfi_startproc
+#define CFI_END .cfi_endproc
+#else
+#define CFI_START
+#define CFI_END
+#endif
+
 #if defined(__arm__)
 
 // Determine actual [ARM][THUMB[1][2]] ISA using compiler predefined macros:
@@ -131,8 +187,14 @@
 #define DEFINE_CODE_STATE
 #endif
 
-#define GLUE2(a, b) a##b
-#define GLUE(a, b) GLUE2(a, b)
+#define GLUE2_(a, b) a##b
+#define GLUE(a, b) GLUE2_(a, b)
+#define GLUE2(a, b) GLUE2_(a, b)
+#define GLUE3_(a, b, c) a##b##c
+#define GLUE3(a, b, c) GLUE3_(a, b, c)
+#define GLUE4_(a, b, c, d) a##b##c##d
+#define GLUE4(a, b, c, d) GLUE4_(a, b, c, d)
+
 #define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name)
 
 #ifdef VISIBILITY_HIDDEN
@@ -177,6 +239,16 @@
   DECLARE_FUNC_ENCODING                                                        \
   name:
 
+#define DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(name)                     \
+  DEFINE_CODE_STATE                                                            \
+  FUNC_ALIGN                                                                   \
+  .globl name SEPARATOR                                                        \
+  SYMBOL_IS_FUNC(name) SEPARATOR                                               \
+  DECLARE_SYMBOL_VISIBILITY(name) SEPARATOR                                    \
+  CFI_START SEPARATOR                                                          \
+  DECLARE_FUNC_ENCODING                                                        \
+  name: SEPARATOR BTI_C
+
 #define DEFINE_COMPILERRT_FUNCTION_ALIAS(name, target)                         \
   .globl SYMBOL_NAME(name) SEPARATOR                                           \
   SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR                                  \
@@ -193,8 +265,12 @@
 #ifdef __ELF__
 #define END_COMPILERRT_FUNCTION(name)                                          \
   .size SYMBOL_NAME(name), . - SYMBOL_NAME(name)
+#define END_COMPILERRT_OUTLINE_FUNCTION(name)                                  \
+  CFI_END SEPARATOR                                                            \
+  .size SYMBOL_NAME(name), . - SYMBOL_NAME(name)
 #else
 #define END_COMPILERRT_FUNCTION(name)
+#define END_COMPILERRT_OUTLINE_FUNCTION(name)
 #endif
 
 #endif // COMPILERRT_ASSEMBLY_H

diff  --git a/compiler-rt/lib/builtins/cpu_model.c b/compiler-rt/lib/builtins/cpu_model.c
index e8b23d5e5381..05ef8492384f 100644
--- a/compiler-rt/lib/builtins/cpu_model.c
+++ b/compiler-rt/lib/builtins/cpu_model.c
@@ -8,10 +8,21 @@
 //
 //  This file is based on LLVM's lib/Support/Host.cpp.
 //  It implements the operating system Host concept and builtin
-//  __cpu_model for the compiler_rt library, for x86 only.
+//  __cpu_model for the compiler_rt library for x86 and
+//  __aarch64_have_lse_atomics for AArch64.
 //
 //===----------------------------------------------------------------------===//
 
+#if defined(HAVE_INIT_PRIORITY)
+#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101))
+#elif __has_attribute(__constructor__)
+#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__))
+#else
+// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
+// this runs during initialization.
+#define CONSTRUCTOR_ATTRIBUTE
+#endif
+
 #if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) ||           \
      defined(_M_X64)) &&                                                       \
     (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
@@ -665,16 +676,6 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
 #undef setFeature
 }
 
-#if defined(HAVE_INIT_PRIORITY)
-#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101))
-#elif __has_attribute(__constructor__)
-#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__))
-#else
-// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
-// this runs during initialization.
-#define CONSTRUCTOR_ATTRIBUTE
-#endif
-
 #ifndef _WIN32
 __attribute__((visibility("hidden")))
 #endif
@@ -749,5 +750,24 @@ int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
 
   return 0;
 }
-
+#elif defined(__aarch64__)
+// LSE support detection for out-of-line atomics
+// using HWCAP and Auxiliary vector
+_Bool __aarch64_have_lse_atomics
+    __attribute__((visibility("hidden"), nocommon));
+#if defined(__has_include)
+#if __has_include(<sys/auxv.h>)
+#include <sys/auxv.h>
+#ifndef AT_HWCAP
+#define AT_HWCAP 16
 #endif
+#ifndef HWCAP_ATOMICS
+#define HWCAP_ATOMICS (1 << 8)
+#endif
+static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) {
+  unsigned long hwcap = getauxval(AT_HWCAP);
+  __aarch64_have_lse_atomics = (hwcap & HWCAP_ATOMICS) != 0;
+}
+#endif // defined(__has_include)
+#endif // __has_include(<sys/auxv.h>)
+#endif // defined(__aarch64__)


        


More information about the llvm-commits mailing list