[llvm] 8705399 - AArch64: support atomics in GISel

Tim Northover via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 26 06:38:12 PDT 2021


Author: Tim Northover
Date: 2021-04-26T14:38:06+01:00
New Revision: 8705399d010558ca65d9bd8571b4eaf099f07b20

URL: https://github.com/llvm/llvm-project/commit/8705399d010558ca65d9bd8571b4eaf099f07b20
DIFF: https://github.com/llvm/llvm-project/commit/8705399d010558ca65d9bd8571b4eaf099f07b20.diff

LOG: AArch64: support atomics in GISel

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrGISel.td
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
    llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index be8d8352d7f01..07ce7739a1fb1 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -202,4 +202,18 @@ let Predicates = [HasNEON] in {
   def : Pat<(v2i32 (fp_to_uint v2f64:$src)),
             (XTNv2i32 (FCVTZUv2f64 V128:$src))>;
 
-}
\ No newline at end of file
+}
+
+let Predicates = [HasNoLSE] in {
+def : Pat<(atomic_cmp_swap_8 GPR64:$addr, GPR32:$desired, GPR32:$new),
+          (CMP_SWAP_8 GPR64:$addr, GPR32:$desired, GPR32:$new)>;
+
+def : Pat<(atomic_cmp_swap_16 GPR64:$addr, GPR32:$desired, GPR32:$new),
+          (CMP_SWAP_16 GPR64:$addr, GPR32:$desired, GPR32:$new)>;
+
+def : Pat<(atomic_cmp_swap_32 GPR64:$addr, GPR32:$desired, GPR32:$new),
+          (CMP_SWAP_32 GPR64:$addr, GPR32:$desired, GPR32:$new)>;
+
+def : Pat<(atomic_cmp_swap_64 GPR64:$addr, GPR64:$desired, GPR64:$new),
+          (CMP_SWAP_64 GPR64:$addr, GPR64:$desired, GPR64:$new)>;
+}

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 0fb4f0891688e..beca38f3e598f 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -95,6 +95,7 @@ def HasCRC           : Predicate<"Subtarget->hasCRC()">,
                                  AssemblerPredicate<(all_of FeatureCRC), "crc">;
 def HasLSE           : Predicate<"Subtarget->hasLSE()">,
                                  AssemblerPredicate<(all_of FeatureLSE), "lse">;
+def HasNoLSE         : Predicate<"!Subtarget->hasLSE()">;
 def HasRAS           : Predicate<"Subtarget->hasRAS()">,
                                  AssemblerPredicate<(all_of FeatureRAS), "ras">;
 def HasRDM           : Predicate<"Subtarget->hasRDM()">,

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 7160432884fee..a0bc543a08359 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2668,18 +2668,14 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
 
     auto &MemOp = **I.memoperands_begin();
     uint64_t MemSizeInBytes = MemOp.getSize();
-    if (MemOp.isAtomic()) {
-      // For now we just support s8 acquire loads to be able to compile stack
-      // protector code.
-      if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
-          MemSizeInBytes == 1) {
-        I.setDesc(TII.get(AArch64::LDARB));
-        return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
-      }
-      LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
-      return false;
-    }
     unsigned MemSizeInBits = MemSizeInBytes * 8;
+    AtomicOrdering Order = MemOp.getOrdering();
+
+    // Need special instructions for atomics that affect ordering.
+    if (Order != AtomicOrdering::NotAtomic &&
+        Order != AtomicOrdering::Unordered &&
+        Order != AtomicOrdering::Monotonic)
+      return false;
 
 #ifndef NDEBUG
     const Register PtrReg = I.getOperand(1).getReg();

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 70a9fbe52d9a9..ea690d8020482 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -496,20 +496,16 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .clampScalar(0, s8, s64)
       .widenScalarToNextPow2(0, /*Min*/ 8);
 
-  if (ST.hasLSE()) {
-    getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
-        .lowerIf(all(
-            typeInSet(0, {s8, s16, s32, s64}), typeIs(1, s1), typeIs(2, p0),
-            atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic)));
-
-    getActionDefinitionsBuilder(
-        {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
-         G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
-         G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX, G_ATOMIC_CMPXCHG})
-        .legalIf(all(
-            typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0),
-            atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Monotonic)));
-  }
+  getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
+      .lowerIf(
+          all(typeInSet(0, {s8, s16, s32, s64}), typeIs(1, s1), typeIs(2, p0)));
+
+  getActionDefinitionsBuilder(
+      {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
+       G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
+       G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX, G_ATOMIC_CMPXCHG})
+      .legalIf(all(
+          typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0)));
 
   getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0});
 

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
new file mode 100644
index 0000000000000..92cf76a7be45b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
@@ -0,0 +1,557 @@
+; RUN: llc < %s -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-NOLSE,CHECK-NOLSE-O1
+; RUN: llc < %s -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -O0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-NOLSE,CHECK-NOLSE-O0
+; RUN: llc < %s -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -mcpu=apple-a13 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-LSE,CHECK-LSE-O1
+; RUN: llc < %s -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -mcpu=apple-a13 -O0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-LSE,CHECK-LSE-O0
+
+define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 {
+; CHECK-NOLSE-LABEL: val_compare_and_swap:
+; CHECK-NOLSE-O1: LBB0_1:
+; CHECK-NOLSE-O1:     ldaxr   [[VAL:w[0-9]+]], [x0]
+; CHECK-NOLSE-O1:     cmp     [[VAL]], w1
+; CHECK-NOLSE-O1:     b.ne    LBB0_4
+; CHECK-NOLSE-O1:     stxr    [[STATUS:w[0-9]+]], w2, [x0]
+; CHECK-NOLSE-O1:     cbnz    [[STATUS]], LBB0_1
+; CHECK-NOLSE-O1:     mov     w0, [[VAL]]
+; CHECK-NOLSE-O1:     ret
+; CHECK-NOLSE-O1: LBB0_4:
+; CHECK-NOLSE-O1:     clrex
+; CHECK-NOLSE-O1:     mov     w0, [[VAL]]
+; CHECK-NOLSE-O1:     ret
+
+; CHECK-NOLSE-O0:     mov x[[ADDR:[0-9]+]], x0
+; CHECK-NOLSE-O0: LBB0_1:
+; CHECK-NOLSE-O0:     ldaxr   w0, [x[[ADDR]]]
+; CHECK-NOLSE-O0:     cmp     w0, w1
+; CHECK-NOLSE-O0:     b.ne    LBB0_3
+; CHECK-NOLSE-O0:     stlxr    [[STATUS:w[0-9]+]], w2, [x[[ADDR]]]
+; CHECK-NOLSE-O0:     cbnz    [[STATUS]], LBB0_1
+; CHECK-NOLSE-O0: LBB0_3:
+; CHECK-NOLSE-O0:     ret
+
+; CHECK-LSE-LABEL: val_compare_and_swap:
+; CHECK-LSE-O1: casa w1, w2, [x0]
+; CHECK-LSE-O1: mov x0, x1
+
+; CHECK-LSE-O0: mov     x[[ADDR:[0-9]+]], x0
+; CHECK-LSE-O0: mov     x0, x1
+; CHECK-LSE-O0: casa    w0, w2, [x[[ADDR]]]
+
+  %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire
+  %val = extractvalue { i32, i1 } %pair, 0
+  ret i32 %val
+}
+
+define i32 @val_compare_and_swap_from_load(i32* %p, i32 %cmp, i32* %pnew) #0 {
+; CHECK-NOLSE-LABEL: val_compare_and_swap_from_load:
+; CHECK-NOLSE-O1:     ldr [[NEW:w[0-9]+]], [x2]
+; CHECK-NOLSE-O1: LBB1_1:
+; CHECK-NOLSE-O1:     ldaxr   [[VAL:w[0-9]+]], [x0]
+; CHECK-NOLSE-O1:     cmp     [[VAL]], w1
+; CHECK-NOLSE-O1:     b.ne    LBB1_4
+; CHECK-NOLSE-O1:     stxr    [[STATUS:w[0-9]+]], [[NEW]], [x0]
+; CHECK-NOLSE-O1:     cbnz    [[STATUS]], LBB1_1
+; CHECK-NOLSE-O1:     mov     w0, [[VAL]]
+; CHECK-NOLSE-O1:     ret
+; CHECK-NOLSE-O1: LBB1_4:
+; CHECK-NOLSE-O1:     clrex
+; CHECK-NOLSE-O1:     mov     w0, [[VAL]]
+; CHECK-NOLSE-O1:     ret
+
+; CHECK-NOLSE-O0:     mov x[[ADDR:[0-9]+]], x0
+; CHECK-NOLSE-O0:     ldr [[NEW:w[0-9]+]], [x2]
+; CHECK-NOLSE-O0: LBB1_1:
+; CHECK-NOLSE-O0:     ldaxr   w0, [x[[ADDR]]]
+; CHECK-NOLSE-O0:     cmp     w0, w1
+; CHECK-NOLSE-O0:     b.ne    LBB1_3
+; CHECK-NOLSE-O0:     stlxr    [[STATUS:w[0-9]+]], [[NEW]], [x[[ADDR]]]
+; CHECK-NOLSE-O0:     cbnz    [[STATUS]], LBB1_1
+; CHECK-NOLSE-O0: LBB1_3:
+; CHECK-NOLSE-O0:     ret
+
+; CHECK-LSE-LABEL: val_compare_and_swap_from_load:
+; CHECK-LSE-O1: ldr [[NEW:w[0-9]+]], [x2]
+; CHECK-LSE-O1: casa w1, [[NEW]], [x0]
+; CHECK-LSE-O1: mov x0, x1
+
+; CHECK-LSE-O0: mov     x[[ADDR:[0-9]+]], x0
+; CHECK-LSE-O0: mov     x0, x1
+; CHECK-LSE-O0: ldr [[NEW:w[0-9]+]], [x2]
+; CHECK-LSE-O0: casa    w0, [[NEW]], [x[[ADDR]]]
+
+  %new = load i32, i32* %pnew
+  %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire
+  %val = extractvalue { i32, i1 } %pair, 0
+  ret i32 %val
+}
+
+define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) #0 {
+; CHECK-NOLSE-LABEL: val_compare_and_swap_rel:
+; CHECK-NOLSE-O1: LBB2_1:
+; CHECK-NOLSE-O1:     ldaxr   [[VAL:w[0-9]+]], [x0]
+; CHECK-NOLSE-O1:     cmp     [[VAL]], w1
+; CHECK-NOLSE-O1:     b.ne    LBB2_4
+; CHECK-NOLSE-O1:     stlxr    [[STATUS:w[0-9]+]], w2, [x0]
+; CHECK-NOLSE-O1:     cbnz    [[STATUS]], LBB2_1
+; CHECK-NOLSE-O1:     mov     w0, [[VAL]]
+; CHECK-NOLSE-O1:     ret
+; CHECK-NOLSE-O1: LBB2_4:
+; CHECK-NOLSE-O1:     clrex
+; CHECK-NOLSE-O1:     mov     w0, [[VAL]]
+; CHECK-NOLSE-O1:     ret
+
+; CHECK-NOLSE-O0:     mov x[[ADDR:[0-9]+]], x0
+; CHECK-NOLSE-O0: LBB2_1:
+; CHECK-NOLSE-O0:     ldaxr   w0, [x[[ADDR]]]
+; CHECK-NOLSE-O0:     cmp     w0, w1
+; CHECK-NOLSE-O0:     b.ne    LBB2_3
+; CHECK-NOLSE-O0:     stlxr    [[STATUS:w[0-9]+]], w2, [x[[ADDR]]]
+; CHECK-NOLSE-O0:     cbnz    [[STATUS]], LBB2_1
+; CHECK-NOLSE-O0: LBB2_3:
+; CHECK-NOLSE-O0:     ret
+
+; CHECK-LSE-LABEL: val_compare_and_swap_rel:
+; CHECK-LSE-O1: casal w1, w2, [x0]
+; CHECK-LSE-O1: mov x0, x1
+
+; CHECK-LSE-O0: mov     x[[ADDR:[0-9]+]], x0
+; CHECK-LSE-O0: mov     x0, x1
+; CHECK-LSE-O0: casal   w0, w2, [x[[ADDR]]]
+
+  %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel monotonic
+  %val = extractvalue { i32, i1 } %pair, 0
+  ret i32 %val
+}
+
+define i64 @val_compare_and_swap_64(i64* %p, i64 %cmp, i64 %new) #0 {
+; CHECK-NOLSE-LABEL: val_compare_and_swap_64:
+; CHECK-NOLSE-O1: LBB3_1:
+; CHECK-NOLSE-O1:     ldxr   [[VAL:x[0-9]+]], [x0]
+; CHECK-NOLSE-O1:     cmp     [[VAL]], x1
+; CHECK-NOLSE-O1:     b.ne    LBB3_4
+; CHECK-NOLSE-O1:     stxr    [[STATUS:w[0-9]+]], x2, [x0]
+; CHECK-NOLSE-O1:     cbnz    [[STATUS]], LBB3_1
+; CHECK-NOLSE-O1:     mov     x0, [[VAL]]
+; CHECK-NOLSE-O1:     ret
+; CHECK-NOLSE-O1: LBB3_4:
+; CHECK-NOLSE-O1:     clrex
+; CHECK-NOLSE-O1:     mov     x0, [[VAL]]
+; CHECK-NOLSE-O1:     ret
+
+; CHECK-NOLSE-O0:     mov x[[ADDR:[0-9]+]], x0
+; CHECK-NOLSE-O0: LBB3_1:
+; CHECK-NOLSE-O0:     ldaxr   x0, [x[[ADDR]]]
+; CHECK-NOLSE-O0:     cmp     x0, x1
+; CHECK-NOLSE-O0:     b.ne    LBB3_3
+; CHECK-NOLSE-O0:     stlxr    [[STATUS:w[0-9]+]], x2, [x[[ADDR]]]
+; CHECK-NOLSE-O0:     cbnz    [[STATUS]], LBB3_1
+; CHECK-NOLSE-O0: LBB3_3:
+; CHECK-NOLSE-O0:     ret
+
+; CHECK-LSE-LABEL: val_compare_and_swap_64:
+; CHECK-LSE-O1: cas x1, x2, [x0]
+; CHECK-LSE-O1: mov x0, x1
+
+; CHECK-LSE-O0: mov     x[[ADDR:[0-9]+]], x0
+; CHECK-LSE-O0: mov     x0, x1
+; CHECK-LSE-O0: cas     x0, x2, [x[[ADDR]]]
+
+  %pair = cmpxchg i64* %p, i64 %cmp, i64 %new monotonic monotonic
+  %val = extractvalue { i64, i1 } %pair, 0
+  ret i64 %val
+}
+
+define i32 @fetch_and_nand(i32* %p) #0 {
+; CHECK-NOLSE-LABEL: fetch_and_nand:
+; CHECK-NOLSE-O1: LBB4_1:
+; CHECK-NOLSE-O1:     ldxr    [[VAL:w[0-9]+]], [x0]
+; CHECK-NOLSE-O1:     and     [[NEWTMP:w[0-9]+]], [[VAL]], #0x7
+; CHECK-NOLSE-O1:     mvn     [[NEW:w[0-9]+]], [[NEWTMP]]
+; CHECK-NOLSE-O1:     stlxr   [[STATUS:w[0-9]+]], [[NEW]], [x0]
+; CHECK-NOLSE-O1:     cbnz    [[STATUS]], LBB4_1
+; CHECK-NOLSE-O1:     mov     w0, [[VAL]]
+; CHECK-NOLSE-O1:     ret
+
+; CHECK-NOLSE-O0: ldxr
+; CHECK-NOLSE-O0: stlxr
+
+; CHECK-LSE-LABEL: fetch_and_nand:
+; CHECK-LSE-O1: LBB4_1:
+; CHECK-LSE-O1:     ldxr    w[[VAL:[0-9]+]], [x0]
+; CHECK-LSE-O1:     and     [[NEWTMP:w[0-9]+]], w[[VAL]], #0x7
+; CHECK-LSE-O1:     mvn     [[NEW:w[0-9]+]], [[NEWTMP]]
+; CHECK-LSE-O1:     stlxr   [[STATUS:w[0-9]+]], [[NEW]], [x0]
+; CHECK-LSE-O1:     cbnz    [[STATUS]], LBB4_1
+; CHECK-LSE-O1:     mov     x0, x[[VAL]]
+
+  %val = atomicrmw nand i32* %p, i32 7 release
+  ret i32 %val
+}
+
+define i64 @fetch_and_nand_64(i64* %p) #0 {
+; CHECK-NOLSE-LABEL: fetch_and_nand_64
+; CHECK-NOLSE-O1: LBB5_1:
+; CHECK-NOLSE-O1:     ldaxr    [[VAL:x[0-9]+]], [x0]
+; CHECK-NOLSE-O1:     and     [[NEWTMP:x[0-9]+]], [[VAL]], #0x7
+; CHECK-NOLSE-O1:     mvn     [[NEW:x[0-9]+]], [[NEWTMP]]
+; CHECK-NOLSE-O1:     stlxr   [[STATUS:w[0-9]+]], [[NEW]], [x0]
+; CHECK-NOLSE-O1:     cbnz    [[STATUS]], LBB5_1
+; CHECK-NOLSE-O1:     mov     x0, [[VAL]]
+; CHECK-NOLSE-O1:     ret
+
+; CHECK-NOLSE-O0: ldaxr
+; CHECK-NOLSE-O0: stlxr
+
+; CHECK-LSE-LABEL: fetch_and_nand_64:
+; CHECK-LSE-O1: LBB5_1:
+; CHECK-LSE-O1:     ldaxr    [[VAL:x[0-9]+]], [x0]
+; CHECK-LSE-O1:     and     [[NEWTMP:x[0-9]+]], [[VAL]], #0x7
+; CHECK-LSE-O1:     mvn     [[NEW:x[0-9]+]], [[NEWTMP]]
+; CHECK-LSE-O1:     stlxr   [[STATUS:w[0-9]+]], [[NEW]], [x0]
+; CHECK-LSE-O1:     cbnz    [[STATUS]], LBB5_1
+; CHECK-LSE-O1:     mov     x0, [[VAL]]
+
+  %val = atomicrmw nand i64* %p, i64 7 acq_rel
+  ret i64 %val
+}
+
+define i32 @fetch_and_or(i32* %p) #0 {
+; CHECK-NOLSE-LABEL: fetch_and_or:
+; CHECK-NOLSE-O1:     mov [[FIVE:w[0-9]+]], #5
+; CHECK-NOLSE-O1: LBB6_1:
+; CHECK-NOLSE-O1:     ldaxr   [[VAL:w[0-9]+]], [x0]
+; CHECK-NOLSE-O1:     orr     [[NEW:w[0-9]+]], [[VAL]], [[FIVE]]
+; CHECK-NOLSE-O1:     stlxr   [[STATUS:w[0-9]+]], [[NEW]], [x0]
+; CHECK-NOLSE-O1:     cbnz    [[STATUS]], LBB6_1
+; CHECK-NOLSE-O1:     mov     w0, [[VAL]]
+; CHECK-NOLSE-O1:     ret
+
+; CHECK-NOLSE-O0: ldaxr
+; CHECK-NOLSE-O0: stlxr
+
+; CHECK-LSE-LABEL: fetch_and_or:
+; CHECK-LSE:       ; %bb.0:
+; CHECK-LSE:    mov w8, #5
+; CHECK-LSE:    ldsetal w8, w0, [x0]
+; CHECK-LSE:    ret
+  %val = atomicrmw or i32* %p, i32 5 seq_cst
+  ret i32 %val
+}
+
+define i64 @fetch_and_or_64(i64* %p) #0 {
+; CHECK-NOLSE-LABEL: fetch_and_or_64:
+; CHECK-NOLSE-O1: LBB7_1:
+; CHECK-NOLSE-O1:     ldxr   [[VAL:x[0-9]+]], [x0]
+; CHECK-NOLSE-O1:     orr     [[NEW:x[0-9]+]], [[VAL]], #0x7
+; CHECK-NOLSE-O1:     stxr   [[STATUS:w[0-9]+]], [[NEW]], [x0]
+; CHECK-NOLSE-O1:     cbnz    [[STATUS]], LBB7_1
+; CHECK-NOLSE-O1:     mov     x0, [[VAL]]
+; CHECK-NOLSE-O1:     ret
+
+; CHECK-NOLSE-O0: ldxr
+; CHECK-NOLSE-O0: stxr
+
+; CHECK-LSE-LABEL: fetch_and_or_64:
+; CHECK-LSE:    mov w[[SEVEN:[0-9]+]], #7
+; CHECK-LSE:    ldset x[[SEVEN]], x0, [x0]
+; CHECK-LSE:    ret
+  %val = atomicrmw or i64* %p, i64 7 monotonic
+  ret i64 %val
+}
+
+define void @acquire_fence() #0 {
+; CHECK-NOLSE-LABEL: acquire_fence:
+; CHECK-NOLSE:    dmb ish
+; CHECK-NOLSE:    ret
+;
+; CHECK-LSE-LABEL: acquire_fence:
+; CHECK-LSE:    dmb ish
+; CHECK-LSE:    ret
+   fence acquire
+   ret void
+}
+
+define void @release_fence() #0 {
+; CHECK-NOLSE-LABEL: release_fence:
+; CHECK-NOLSE:    dmb ish
+; CHECK-NOLSE:    ret
+;
+; CHECK-LSE-LABEL: release_fence:
+; CHECK-LSE:       ; %bb.0:
+; CHECK-LSE:    dmb ish
+; CHECK-LSE:    ret
+   fence release
+   ret void
+}
+
+define void @seq_cst_fence() #0 {
+; CHECK-LABEL: seq_cst_fence:
+; CHECK-NOLSE:    dmb ish
+; CHECK-NOLSE:    ret
+;
+; CHECK-LSE-LABEL: seq_cst_fence:
+; CHECK-LSE:       ; %bb.0:
+; CHECK-LSE:    dmb ish
+; CHECK-LSE:    ret
+   fence seq_cst
+   ret void
+}
+
+define i32 @atomic_load(i32* %p) #0 {
+; CHECK-LABEL: atomic_load:
+; CHECK-NOLSE:    ldar w0, [x0]
+; CHECK-NOLSE:    ret
+;
+; CHECK-LSE-LABEL: atomic_load:
+; CHECK-LSE:    ldar w0, [x0]
+; CHECK-LSE:    ret
+   %r = load atomic i32, i32* %p seq_cst, align 4
+   ret i32 %r
+}
+
+define i8 @atomic_load_relaxed_8(i8* %p, i32 %off32) #0 {
+; CHECK-NOLSE-LABEL: atomic_load_relaxed_8:
+; CHECK-NOLSE-O1: ldrb    {{w[0-9]+}}, [x0, #4095]
+; CHECK-NOLSE-O1: ldrb    {{w[0-9]+}}, [x0, w1, sxtw]
+; CHECK-NOLSE-O1: ldurb   {{w[0-9]+}}, [x0, #-256]
+; CHECK-NOLSE-O1: add     x[[ADDR:[0-9]+]], x0, #291, lsl #12
+; CHECK-NOLSE-O1: ldrb    {{w[0-9]+}}, [x[[ADDR]]]
+
+; CHECK-LSE: ldrb
+; CHECK-LSE: ldrb
+; CHECK-LSE: ld{{u?}}rb
+; CHECK-LSE: ldrb
+
+  %ptr_unsigned = getelementptr i8, i8* %p, i32 4095
+  %val_unsigned = load atomic i8, i8* %ptr_unsigned monotonic, align 1
+
+  %ptr_regoff = getelementptr i8, i8* %p, i32 %off32
+  %val_regoff = load atomic i8, i8* %ptr_regoff unordered, align 1
+  %tot1 = add i8 %val_unsigned, %val_regoff
+
+  %ptr_unscaled = getelementptr i8, i8* %p, i32 -256
+  %val_unscaled = load atomic i8, i8* %ptr_unscaled monotonic, align 1
+  %tot2 = add i8 %tot1, %val_unscaled
+
+  %ptr_random = getelementptr i8, i8* %p, i32 1191936 ; 0x123000 (i.e. ADD imm)
+  %val_random = load atomic i8, i8* %ptr_random unordered, align 1
+  %tot3 = add i8 %tot2, %val_random
+
+  ret i8 %tot3
+}
+
+define i16 @atomic_load_relaxed_16(i16* %p, i32 %off32) #0 {
+; CHECK-NOLSE-LABEL: atomic_load_relaxed_16:
+; CHECK-NOLSE-O1: ldrh    {{w[0-9]+}}, [x0, #8190]
+; CHECK-NOLSE-O1: ldrh    {{w[0-9]+}}, [x0, w1, sxtw #1]
+; CHECK-NOLSE-O1: ldurh   {{w[0-9]+}}, [x0, #-256]
+; CHECK-NOLSE-O1: add     x[[ADDR:[0-9]+]], x0, #291, lsl #12          ; =1191936
+; CHECK-NOLSE-O1: ldrh    {{w[0-9]+}}, [x[[ADDR]]]
+
+; CHECK-LSE: ldrh
+; CHECK-LSE: ldrh
+; CHECK-LSE: ld{{u?}}rh
+; CHECK-LSE: ldrh
+
+  %ptr_unsigned = getelementptr i16, i16* %p, i32 4095
+  %val_unsigned = load atomic i16, i16* %ptr_unsigned monotonic, align 2
+
+  %ptr_regoff = getelementptr i16, i16* %p, i32 %off32
+  %val_regoff = load atomic i16, i16* %ptr_regoff unordered, align 2
+  %tot1 = add i16 %val_unsigned, %val_regoff
+
+  %ptr_unscaled = getelementptr i16, i16* %p, i32 -128
+  %val_unscaled = load atomic i16, i16* %ptr_unscaled monotonic, align 2
+  %tot2 = add i16 %tot1, %val_unscaled
+
+  %ptr_random = getelementptr i16, i16* %p, i32 595968 ; 0x123000/2 (i.e. ADD imm)
+  %val_random = load atomic i16, i16* %ptr_random unordered, align 2
+  %tot3 = add i16 %tot2, %val_random
+
+  ret i16 %tot3
+}
+
+define i32 @atomic_load_relaxed_32(i32* %p, i32 %off32) #0 {
+; CHECK-NOLSE-LABEL: atomic_load_relaxed_32:
+; CHECK-NOLSE-O1: ldr    {{w[0-9]+}}, [x0, #16380]
+; CHECK-NOLSE-O1: ldr    {{w[0-9]+}}, [x0, w1, sxtw #2]
+; CHECK-NOLSE-O1: ldur   {{w[0-9]+}}, [x0, #-256]
+; CHECK-NOLSE-O1: add     x[[ADDR:[0-9]+]], x0, #291, lsl #12          ; =1191936
+; CHECK-NOLSE-O1: ldr    {{w[0-9]+}}, [x[[ADDR]]]
+
+; CHECK-LSE-LABEL: atomic_load_relaxed_32:
+; CHECK-LSE:    ldr {{w[0-9]+}}, [x0, #16380]
+; CHECK-LSE:    ldr {{w[0-9]+}}, [x0, w1, sxtw #2]
+; CHECK-LSE:    ldur {{w[0-9]+}}, [x0, #-256]
+; CHECK-LSE:    add x[[ADDR:[0-9]+]], x0, #291, lsl #12 ; =1191936
+; CHECK-LSE:    ldr {{w[0-9]+}}, [x[[ADDR]]]
+
+  %ptr_unsigned = getelementptr i32, i32* %p, i32 4095
+  %val_unsigned = load atomic i32, i32* %ptr_unsigned monotonic, align 4
+
+  %ptr_regoff = getelementptr i32, i32* %p, i32 %off32
+  %val_regoff = load atomic i32, i32* %ptr_regoff unordered, align 4
+  %tot1 = add i32 %val_unsigned, %val_regoff
+
+  %ptr_unscaled = getelementptr i32, i32* %p, i32 -64
+  %val_unscaled = load atomic i32, i32* %ptr_unscaled monotonic, align 4
+  %tot2 = add i32 %tot1, %val_unscaled
+
+  %ptr_random = getelementptr i32, i32* %p, i32 297984 ; 0x123000/4 (i.e. ADD imm)
+  %val_random = load atomic i32, i32* %ptr_random unordered, align 4
+  %tot3 = add i32 %tot2, %val_random
+
+  ret i32 %tot3
+}
+
+define i64 @atomic_load_relaxed_64(i64* %p, i32 %off32) #0 {
+; CHECK-NOLSE-LABEL: atomic_load_relaxed_64:
+; CHECK-NOLSE-O1:    ldr {{x[0-9]+}}, [x0, #32760]
+; CHECK-NOLSE-O1:    ldr {{x[0-9]+}}, [x0, w1, sxtw #3]
+; CHECK-NOLSE-O1:    ldur {{x[0-9]+}}, [x0, #-256]
+; CHECK-NOLSE-O1:    add x[[ADDR:[0-9]+]], x0, #291, lsl #12
+; CHECK-NOLSE-O1:    ldr {{x[0-9]+}}, [x[[ADDR]]]
+
+; CHECK-LSE-LABEL: atomic_load_relaxed_64:
+; CHECK-LSE:    ldr {{x[0-9]+}}, [x0, #32760]
+; CHECK-LSE:    ldr {{x[0-9]+}}, [x0, w1, sxtw #3]
+; CHECK-LSE:    ldur {{x[0-9]+}}, [x0, #-256]
+; CHECK-LSE:    add x[[ADDR:[0-9]+]], x0, #291, lsl #12
+; CHECK-LSE:    ldr {{x[0-9]+}}, [x[[ADDR]]]
+
+  %ptr_unsigned = getelementptr i64, i64* %p, i32 4095
+  %val_unsigned = load atomic i64, i64* %ptr_unsigned monotonic, align 8
+
+  %ptr_regoff = getelementptr i64, i64* %p, i32 %off32
+  %val_regoff = load atomic i64, i64* %ptr_regoff unordered, align 8
+  %tot1 = add i64 %val_unsigned, %val_regoff
+
+  %ptr_unscaled = getelementptr i64, i64* %p, i32 -32
+  %val_unscaled = load atomic i64, i64* %ptr_unscaled monotonic, align 8
+  %tot2 = add i64 %tot1, %val_unscaled
+
+  %ptr_random = getelementptr i64, i64* %p, i32 148992 ; 0x123000/8 (i.e. ADD imm)
+  %val_random = load atomic i64, i64* %ptr_random unordered, align 8
+  %tot3 = add i64 %tot2, %val_random
+
+  ret i64 %tot3
+}
+
+
+define void @atomc_store(i32* %p) #0 {
+; CHECK-NOLSE-LABEL: atomc_store:
+; CHECK-NOLSE:    mov w8, #4
+; CHECK-NOLSE:    stlr w8, [x0]
+; CHECK-NOLSE:    ret
+;
+; CHECK-LSE-LABEL: atomc_store:
+; CHECK-LSE:    mov [[FOUR:w[0-9]+]], #4
+; CHECK-LSE:    stlr [[FOUR]], [x0]
+; CHECK-LSE:    ret
+   store atomic i32 4, i32* %p seq_cst, align 4
+   ret void
+}
+
+define void @atomic_store_relaxed_8(i8* %p, i32 %off32, i8 %val) #0 {
+; CHECK-NOLSE-LABEL: atomic_store_relaxed_8:
+; CHECK-NOLSE: strb    w2
+; CHECK-NOLSE: strb    w2
+; CHECK-NOLSE: strb    w2
+; CHECK-NOLSE: strb    w2
+
+  %ptr_unsigned = getelementptr i8, i8* %p, i32 4095
+  store atomic i8 %val, i8* %ptr_unsigned monotonic, align 1
+
+  %ptr_regoff = getelementptr i8, i8* %p, i32 %off32
+  store atomic i8 %val, i8* %ptr_regoff unordered, align 1
+
+  %ptr_unscaled = getelementptr i8, i8* %p, i32 -256
+  store atomic i8 %val, i8* %ptr_unscaled monotonic, align 1
+
+  %ptr_random = getelementptr i8, i8* %p, i32 1191936 ; 0x123000 (i.e. ADD imm)
+  store atomic i8 %val, i8* %ptr_random unordered, align 1
+
+  ret void
+}
+
+define void @atomic_store_relaxed_16(i16* %p, i32 %off32, i16 %val) #0 {
+; CHECK-NOLSE-LABEL: atomic_store_relaxed_16:
+; CHECK-NOLSE: strh    w2
+; CHECK-NOLSE: strh    w2
+; CHECK-NOLSE: strh    w2
+; CHECK-NOLSE: strh    w2
+
+; CHECK-LSE: strh w2
+; CHECK-LSE: strh w2
+; CHECK-LSE: strh w2
+; CHECK-LSE: strh w2
+
+  %ptr_unsigned = getelementptr i16, i16* %p, i32 4095
+  store atomic i16 %val, i16* %ptr_unsigned monotonic, align 2
+
+  %ptr_regoff = getelementptr i16, i16* %p, i32 %off32
+  store atomic i16 %val, i16* %ptr_regoff unordered, align 2
+
+  %ptr_unscaled = getelementptr i16, i16* %p, i32 -128
+  store atomic i16 %val, i16* %ptr_unscaled monotonic, align 2
+
+  %ptr_random = getelementptr i16, i16* %p, i32 595968 ; 0x123000/2 (i.e. ADD imm)
+  store atomic i16 %val, i16* %ptr_random unordered, align 2
+
+  ret void
+}
+
+define void @atomic_store_relaxed_32(i32* %p, i32 %off32, i32 %val) #0 {
+; CHECK-NOLSE-LABEL: atomic_store_relaxed_32:
+; CHECK-NOLSE:    str w2
+; CHECK-NOLSE:    str w2
+; CHECK-NOLSE:    stur w2
+; CHECK-NOLSE:    str w2
+
+; CHECK-LSE-LABEL: atomic_store_relaxed_32:
+; CHECK-LSE:    str w2
+; CHECK-LSE:    str w2
+; CHECK-LSE:    stur w2
+; CHECK-LSE:    str w2
+  %ptr_unsigned = getelementptr i32, i32* %p, i32 4095
+  store atomic i32 %val, i32* %ptr_unsigned monotonic, align 4
+
+  %ptr_regoff = getelementptr i32, i32* %p, i32 %off32
+  store atomic i32 %val, i32* %ptr_regoff unordered, align 4
+
+  %ptr_unscaled = getelementptr i32, i32* %p, i32 -64
+  store atomic i32 %val, i32* %ptr_unscaled monotonic, align 4
+
+  %ptr_random = getelementptr i32, i32* %p, i32 297984 ; 0x123000/4 (i.e. ADD imm)
+  store atomic i32 %val, i32* %ptr_random unordered, align 4
+
+  ret void
+}
+
+define void @atomic_store_relaxed_64(i64* %p, i32 %off32, i64 %val) #0 {
+; CHECK-NOLSE-LABEL: atomic_store_relaxed_64:
+; CHECK-NOLSE:    str x2
+; CHECK-NOLSE:    str x2
+; CHECK-NOLSE:    stur x2
+; CHECK-NOLSE:    str x2
+
+; CHECK-LSE-LABEL: atomic_store_relaxed_64:
+; CHECK-LSE:    str x2
+; CHECK-LSE:    str x2
+; CHECK-LSE:    stur x2
+; CHECK-LSE:    str x2
+  %ptr_unsigned = getelementptr i64, i64* %p, i32 4095
+  store atomic i64 %val, i64* %ptr_unsigned monotonic, align 8
+
+  %ptr_regoff = getelementptr i64, i64* %p, i32 %off32
+  store atomic i64 %val, i64* %ptr_regoff unordered, align 8
+
+  %ptr_unscaled = getelementptr i64, i64* %p, i32 -32
+  store atomic i64 %val, i64* %ptr_unscaled monotonic, align 8
+
+  %ptr_random = getelementptr i64, i64* %p, i32 148992 ; 0x123000/8 (i.e. ADD imm)
+  store atomic i64 %val, i64* %ptr_random unordered, align 8
+
+  ret void
+}
+
+attributes #0 = { nounwind }


        


More information about the llvm-commits mailing list