[llvm] r217076 - Use target-dependent emitLeading/TrailingFence instead of the target-independent insertLeading/TrailingFence (in AtomicExpandPass)

Robin Morisset morisset at google.com
Wed Sep 3 14:01:03 PDT 2014


Author: morisset
Date: Wed Sep  3 16:01:03 2014
New Revision: 217076

URL: http://llvm.org/viewvc/llvm-project?rev=217076&view=rev
Log:
Use target-dependent emitLeading/TrailingFence instead of the target-independent insertLeading/TrailingFence (in AtomicExpandPass)

Fixes two latent bugs:
- There was no fence inserted before expanded seq_cst load (unsound on Power)
- There was only a fence release before seq_cst stores (again unsound, in particular on Power)
    It is not even clear if this is correct on ARM swift processors (where release fences are
    DMB ishst instead of DMB ish). This behaviour is currently preserved on ARM Swift
    as it is not clear whether it is incorrect. I would love to get documentation stating
    whether it is correct or not.
These two bugs were not triggered because Power is not (yet) using this pass, and these
behaviours happen to be (mostly?) working on ARM
(although they completely butchered the semantics of the llvm IR).

See:
http://lists.cs.uiuc.edu/pipermail/llvmdev/2014-August/075821.html
for an example of the problems that can be caused by the second of these bugs.

I couldn't see a way of fixing these in a completely target-independent way without
adding lots of unnecessary fences on ARM, hence the target-dependent parts of this
patch.

This patch implements the new target-dependent parts only for ARM (the default
of not doing anything is enough for AArch64), other architectures will use this
infrastructure in later patches.

Modified:
    llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp
    llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
    llvm/trunk/lib/Target/ARM/ARMISelLowering.h
    llvm/trunk/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
    llvm/trunk/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll

Modified: llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp?rev=217076&r1=217075&r2=217076&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp (original)
+++ llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp Wed Sep  3 16:01:03 2014
@@ -44,9 +44,6 @@ namespace {
     bool expandAtomicStore(StoreInst *LI);
     bool expandAtomicRMW(AtomicRMWInst *AI);
     bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
-
-    AtomicOrdering insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
-    void insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
   };
 }
 
@@ -98,20 +95,29 @@ bool AtomicExpand::runOnFunction(Functio
 }
 
 bool AtomicExpand::expandAtomicLoad(LoadInst *LI) {
-  // Load instructions don't actually need a leading fence, even in the
-  // SequentiallyConsistent case.
+  auto TLI = TM->getSubtargetImpl()->getTargetLowering();
+  // If getInsertFencesForAtomic() returns true, then the target does not want
+  // to deal with memory orders, and emitLeading/TrailingFence should take care
+  // of everything. Otherwise, emitLeading/TrailingFence are no-op and we
+  // should preserve the ordering.
   AtomicOrdering MemOpOrder =
-      TM->getSubtargetImpl()->getTargetLowering()->getInsertFencesForAtomic()
-          ? Monotonic
-          : LI->getOrdering();
+      TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering();
+  IRBuilder<> Builder(LI);
+
+  // Note that although no fence is required before atomic load on ARM, it is
+  // required before SequentiallyConsistent loads for the recommended Power
+  // mapping (see http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html).
+  // So we let the target choose what to emit.
+  TLI->emitLeadingFence(Builder, LI->getOrdering(),
+                        /*IsStore=*/false, /*IsLoad=*/true);
 
-  // The only 64-bit load guaranteed to be single-copy atomic by the ARM is
+  // The only 64-bit load guaranteed to be single-copy atomic by ARM is
   // an ldrexd (A3.5.3).
-  IRBuilder<> Builder(LI);
-  Value *Val = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked(
-      Builder, LI->getPointerOperand(), MemOpOrder);
+  Value *Val =
+      TLI->emitLoadLinked(Builder, LI->getPointerOperand(), MemOpOrder);
 
-  insertTrailingFence(Builder, LI->getOrdering());
+  TLI->emitTrailingFence(Builder, LI->getOrdering(),
+                         /*IsStore=*/false, /*IsLoad=*/true);
 
   LI->replaceAllUsesWith(Val);
   LI->eraseFromParent();
@@ -134,11 +140,18 @@ bool AtomicExpand::expandAtomicStore(Sto
 }
 
 bool AtomicExpand::expandAtomicRMW(AtomicRMWInst *AI) {
+  auto TLI = TM->getSubtargetImpl()->getTargetLowering();
   AtomicOrdering Order = AI->getOrdering();
   Value *Addr = AI->getPointerOperand();
   BasicBlock *BB = AI->getParent();
   Function *F = BB->getParent();
   LLVMContext &Ctx = F->getContext();
+  // If getInsertFencesForAtomic() return true, then the target does not want to
+  // deal with memory orders, and emitLeading/TrailingFence should take care of
+  // everything. Otherwise, emitLeading/TrailingFence are no-op and we should
+  // preserve the ordering.
+  AtomicOrdering MemOpOrder =
+      TLI->getInsertFencesForAtomic() ? Monotonic : Order;
 
   // Given: atomicrmw some_op iN* %addr, iN %incr ordering
   //
@@ -165,13 +178,12 @@ bool AtomicExpand::expandAtomicRMW(Atomi
   // the branch entirely.
   std::prev(BB->end())->eraseFromParent();
   Builder.SetInsertPoint(BB);
-  AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order);
+  TLI->emitLeadingFence(Builder, Order, /*IsStore=*/true, /*IsLoad=*/true);
   Builder.CreateBr(LoopBB);
 
   // Start the main loop block now that we've taken care of the preliminaries.
   Builder.SetInsertPoint(LoopBB);
-  Value *Loaded = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked(
-      Builder, Addr, MemOpOrder);
+  Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
 
   Value *NewVal;
   switch (AI->getOperation()) {
@@ -218,14 +230,13 @@ bool AtomicExpand::expandAtomicRMW(Atomi
   }
 
   Value *StoreSuccess =
-      TM->getSubtargetImpl()->getTargetLowering()->emitStoreConditional(
-          Builder, NewVal, Addr, MemOpOrder);
+      TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
   Value *TryAgain = Builder.CreateICmpNE(
       StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
   Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
 
   Builder.SetInsertPoint(ExitBB, ExitBB->begin());
-  insertTrailingFence(Builder, Order);
+  TLI->emitTrailingFence(Builder, Order, /*IsStore=*/true, /*IsLoad=*/true);
 
   AI->replaceAllUsesWith(Loaded);
   AI->eraseFromParent();
@@ -234,12 +245,19 @@ bool AtomicExpand::expandAtomicRMW(Atomi
 }
 
 bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
+  auto TLI = TM->getSubtargetImpl()->getTargetLowering();
   AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
   AtomicOrdering FailureOrder = CI->getFailureOrdering();
   Value *Addr = CI->getPointerOperand();
   BasicBlock *BB = CI->getParent();
   Function *F = BB->getParent();
   LLVMContext &Ctx = F->getContext();
+  // If getInsertFencesForAtomic() return true, then the target does not want to
+  // deal with memory orders, and emitLeading/TrailingFence should take care of
+  // everything. Otherwise, emitLeading/TrailingFence are no-op and we should
+  // preserve the ordering.
+  AtomicOrdering MemOpOrder =
+      TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder;
 
   // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
   //
@@ -280,13 +298,13 @@ bool AtomicExpand::expandAtomicCmpXchg(A
   // the branch entirely.
   std::prev(BB->end())->eraseFromParent();
   Builder.SetInsertPoint(BB);
-  AtomicOrdering MemOpOrder = insertLeadingFence(Builder, SuccessOrder);
+  TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true,
+                        /*IsLoad=*/true);
   Builder.CreateBr(LoopBB);
 
   // Start the main loop block now that we've taken care of the preliminaries.
   Builder.SetInsertPoint(LoopBB);
-  Value *Loaded = TM->getSubtargetImpl()->getTargetLowering()->emitLoadLinked(
-      Builder, Addr, MemOpOrder);
+  Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
   Value *ShouldStore =
       Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
 
@@ -295,9 +313,8 @@ bool AtomicExpand::expandAtomicCmpXchg(A
   Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
 
   Builder.SetInsertPoint(TryStoreBB);
-  Value *StoreSuccess =
-      TM->getSubtargetImpl()->getTargetLowering()->emitStoreConditional(
-          Builder, CI->getNewValOperand(), Addr, MemOpOrder);
+  Value *StoreSuccess = TLI->emitStoreConditional(
+      Builder, CI->getNewValOperand(), Addr, MemOpOrder);
   StoreSuccess = Builder.CreateICmpEQ(
       StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
   Builder.CreateCondBr(StoreSuccess, SuccessBB,
@@ -305,11 +322,13 @@ bool AtomicExpand::expandAtomicCmpXchg(A
 
   // Make sure later instructions don't get reordered with a fence if necessary.
   Builder.SetInsertPoint(SuccessBB);
-  insertTrailingFence(Builder, SuccessOrder);
+  TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true,
+                         /*IsLoad=*/true);
   Builder.CreateBr(ExitBB);
 
   Builder.SetInsertPoint(FailureBB);
-  insertTrailingFence(Builder, FailureOrder);
+  TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true,
+                         /*IsLoad=*/true);
   Builder.CreateBr(ExitBB);
 
   // Finally, we have control-flow based knowledge of whether the cmpxchg
@@ -358,27 +377,3 @@ bool AtomicExpand::expandAtomicCmpXchg(A
   CI->eraseFromParent();
   return true;
 }
-
-AtomicOrdering AtomicExpand::insertLeadingFence(IRBuilder<> &Builder,
-                                                       AtomicOrdering Ord) {
-  if (!TM->getSubtargetImpl()->getTargetLowering()->getInsertFencesForAtomic())
-    return Ord;
-
-  if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
-    Builder.CreateFence(Release);
-
-  // The exclusive operations don't need any barrier if we're adding separate
-  // fences.
-  return Monotonic;
-}
-
-void AtomicExpand::insertTrailingFence(IRBuilder<> &Builder,
-                                              AtomicOrdering Ord) {
-  if (!TM->getSubtargetImpl()->getTargetLowering()->getInsertFencesForAtomic())
-    return;
-
-  if (Ord == Acquire || Ord == AcquireRelease)
-    Builder.CreateFence(Acquire);
-  else if (Ord == SequentiallyConsistent)
-    Builder.CreateFence(SequentiallyConsistent);
-}

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=217076&r1=217075&r2=217076&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Wed Sep  3 16:01:03 2014
@@ -2723,7 +2723,7 @@ static SDValue LowerATOMIC_FENCE(SDValue
 
   ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
   AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
-  unsigned Domain = ARM_MB::ISH;
+  ARM_MB::MemBOpt Domain = ARM_MB::ISH;
   if (Subtarget->isMClass()) {
     // Only a full system barrier exists in the M-class architectures.
     Domain = ARM_MB::SY;
@@ -10982,6 +10982,63 @@ bool ARMTargetLowering::shouldConvertCon
   return true;
 }
 
+static void makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) {
+  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+  Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
+  Constant *CDomain = Builder.getInt32(Domain);
+  Builder.CreateCall(DMB, CDomain);
+}
+
+// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
+void ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
+                                         AtomicOrdering Ord, bool IsStore,
+                                         bool IsLoad) const {
+  if (!getInsertFencesForAtomic())
+    return;
+
+  switch (Ord) {
+  case NotAtomic:
+  case Unordered:
+    llvm_unreachable("Invalid fence: unordered/non-atomic");
+  case Monotonic:
+  case Acquire:
+    return; // Nothing to do
+  case SequentiallyConsistent:
+    if (!IsStore)
+      return; // Nothing to do
+              /*FALLTHROUGH*/
+  case Release:
+  case AcquireRelease:
+    if (Subtarget->isSwift())
+      makeDMB(Builder, ARM_MB::ISHST);
+    // FIXME: add a comment with a link to documentation justifying this.
+    else
+      makeDMB(Builder, ARM_MB::ISH);
+    return;
+  }
+}
+
+void ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
+                                          AtomicOrdering Ord, bool IsStore,
+                                          bool IsLoad) const {
+  if (!getInsertFencesForAtomic())
+    return;
+
+  switch (Ord) {
+  case NotAtomic:
+  case Unordered:
+    llvm_unreachable("Invalid fence: unordered/not-atomic");
+  case Monotonic:
+  case Release:
+    return; // Nothing to do
+  case Acquire:
+  case AcquireRelease:
+    case SequentiallyConsistent:
+    makeDMB(Builder, ARM_MB::ISH);
+    return;
+  }
+}
+
 bool ARMTargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const {
   // Loads and stores less than 64-bits are already atomic; ones above that
   // are doomed anyway, so defer to the default libcall and blame the OS when

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=217076&r1=217075&r2=217076&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Wed Sep  3 16:01:03 2014
@@ -397,6 +397,11 @@ namespace llvm {
     Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
                                 Value *Addr, AtomicOrdering Ord) const override;
 
+    void emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+                          bool IsStore, bool IsLoad) const override;
+    void emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
+                           bool IsStore, bool IsLoad) const override;
+
     bool shouldExpandAtomicInIR(Instruction *Inst) const override;
 
     bool useLoadStackGuardNode() const override;

Modified: llvm/trunk/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll?rev=217076&r1=217075&r2=217076&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll (original)
+++ llvm/trunk/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll Wed Sep  3 16:01:03 2014
@@ -2,7 +2,7 @@
 
 define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) {
 ; CHECK-LABEL: @test_atomic_xchg_i8
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
 ; CHECK: br label %[[LOOP:.*]]
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
@@ -12,7 +12,7 @@ define i8 @test_atomic_xchg_i8(i8* %ptr,
 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
 ; CHECK: [[END]]:
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
 ; CHECK: ret i8 [[OLDVAL]]
   %res = atomicrmw xchg i8* %ptr, i8 %xchgend monotonic
   ret i8 %res
@@ -20,7 +20,7 @@ define i8 @test_atomic_xchg_i8(i8* %ptr,
 
 define i16 @test_atomic_add_i16(i16* %ptr, i16 %addend) {
 ; CHECK-LABEL: @test_atomic_add_i16
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[LOOP:.*]]
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
@@ -31,7 +31,7 @@ define i16 @test_atomic_add_i16(i16* %pt
 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
 ; CHECK: [[END]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: ret i16 [[OLDVAL]]
   %res = atomicrmw add i16* %ptr, i16 %addend seq_cst
   ret i16 %res
@@ -39,7 +39,7 @@ define i16 @test_atomic_add_i16(i16* %pt
 
 define i32 @test_atomic_sub_i32(i32* %ptr, i32 %subend) {
 ; CHECK-LABEL: @test_atomic_sub_i32
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
 ; CHECK: br label %[[LOOP:.*]]
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr)
@@ -48,7 +48,7 @@ define i32 @test_atomic_sub_i32(i32* %pt
 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
 ; CHECK: [[END]]:
-; CHECK: fence acquire
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: ret i32 [[OLDVAL]]
   %res = atomicrmw sub i32* %ptr, i32 %subend acquire
   ret i32 %res
@@ -56,7 +56,7 @@ define i32 @test_atomic_sub_i32(i32* %pt
 
 define i8 @test_atomic_and_i8(i8* %ptr, i8 %andend) {
 ; CHECK-LABEL: @test_atomic_and_i8
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[LOOP:.*]]
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
@@ -67,7 +67,7 @@ define i8 @test_atomic_and_i8(i8* %ptr,
 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
 ; CHECK: [[END]]:
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
 ; CHECK: ret i8 [[OLDVAL]]
   %res = atomicrmw and i8* %ptr, i8 %andend release
   ret i8 %res
@@ -75,7 +75,7 @@ define i8 @test_atomic_and_i8(i8* %ptr,
 
 define i16 @test_atomic_nand_i16(i16* %ptr, i16 %nandend) {
 ; CHECK-LABEL: @test_atomic_nand_i16
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[LOOP:.*]]
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
@@ -87,7 +87,7 @@ define i16 @test_atomic_nand_i16(i16* %p
 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
 ; CHECK: [[END]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: ret i16 [[OLDVAL]]
   %res = atomicrmw nand i16* %ptr, i16 %nandend seq_cst
   ret i16 %res
@@ -95,7 +95,7 @@ define i16 @test_atomic_nand_i16(i16* %p
 
 define i64 @test_atomic_or_i64(i64* %ptr, i64 %orend) {
 ; CHECK-LABEL: @test_atomic_or_i64
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[LOOP:.*]]
 ; CHECK: [[LOOP]]:
 ; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
@@ -115,7 +115,7 @@ define i64 @test_atomic_or_i64(i64* %ptr
 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
 ; CHECK: [[END]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: ret i64 [[OLDVAL]]
   %res = atomicrmw or i64* %ptr, i64 %orend seq_cst
   ret i64 %res
@@ -123,7 +123,7 @@ define i64 @test_atomic_or_i64(i64* %ptr
 
 define i8 @test_atomic_xor_i8(i8* %ptr, i8 %xorend) {
 ; CHECK-LABEL: @test_atomic_xor_i8
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[LOOP:.*]]
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
@@ -134,7 +134,7 @@ define i8 @test_atomic_xor_i8(i8* %ptr,
 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
 ; CHECK: [[END]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: ret i8 [[OLDVAL]]
   %res = atomicrmw xor i8* %ptr, i8 %xorend seq_cst
   ret i8 %res
@@ -142,7 +142,7 @@ define i8 @test_atomic_xor_i8(i8* %ptr,
 
 define i8 @test_atomic_max_i8(i8* %ptr, i8 %maxend) {
 ; CHECK-LABEL: @test_atomic_max_i8
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[LOOP:.*]]
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
@@ -154,7 +154,7 @@ define i8 @test_atomic_max_i8(i8* %ptr,
 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
 ; CHECK: [[END]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: ret i8 [[OLDVAL]]
   %res = atomicrmw max i8* %ptr, i8 %maxend seq_cst
   ret i8 %res
@@ -162,7 +162,7 @@ define i8 @test_atomic_max_i8(i8* %ptr,
 
 define i8 @test_atomic_min_i8(i8* %ptr, i8 %minend) {
 ; CHECK-LABEL: @test_atomic_min_i8
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[LOOP:.*]]
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
@@ -174,7 +174,7 @@ define i8 @test_atomic_min_i8(i8* %ptr,
 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
 ; CHECK: [[END]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: ret i8 [[OLDVAL]]
   %res = atomicrmw min i8* %ptr, i8 %minend seq_cst
   ret i8 %res
@@ -182,7 +182,7 @@ define i8 @test_atomic_min_i8(i8* %ptr,
 
 define i8 @test_atomic_umax_i8(i8* %ptr, i8 %umaxend) {
 ; CHECK-LABEL: @test_atomic_umax_i8
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[LOOP:.*]]
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
@@ -194,7 +194,7 @@ define i8 @test_atomic_umax_i8(i8* %ptr,
 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
 ; CHECK: [[END]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: ret i8 [[OLDVAL]]
   %res = atomicrmw umax i8* %ptr, i8 %umaxend seq_cst
   ret i8 %res
@@ -202,7 +202,7 @@ define i8 @test_atomic_umax_i8(i8* %ptr,
 
 define i8 @test_atomic_umin_i8(i8* %ptr, i8 %uminend) {
 ; CHECK-LABEL: @test_atomic_umin_i8
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[LOOP:.*]]
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
@@ -214,7 +214,7 @@ define i8 @test_atomic_umin_i8(i8* %ptr,
 ; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
 ; CHECK: [[END]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: ret i8 [[OLDVAL]]
   %res = atomicrmw umin i8* %ptr, i8 %uminend seq_cst
   ret i8 %res
@@ -222,7 +222,7 @@ define i8 @test_atomic_umin_i8(i8* %ptr,
 
 define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
 ; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[LOOP:.*]]
 
 ; CHECK: [[LOOP]]:
@@ -238,11 +238,11 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst
 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
 
 ; CHECK: [[SUCCESS_BB]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[DONE:.*]]
 
 ; CHECK: [[FAILURE_BB]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[DONE]]
 
 ; CHECK: [[DONE]]:
@@ -256,7 +256,7 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst
 
 define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newval) {
 ; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic
-; CHECK: fence release
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[LOOP:.*]]
 
 ; CHECK: [[LOOP]]:
@@ -272,11 +272,11 @@ define i16 @test_cmpxchg_i16_seqcst_mono
 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
 
 ; CHECK: [[SUCCESS_BB]]:
-; CHECK: fence seq_cst
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[DONE:.*]]
 
 ; CHECK: [[FAILURE_BB]]:
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
 ; CHECK: br label %[[DONE]]
 
 ; CHECK: [[DONE]]:
@@ -290,7 +290,7 @@ define i16 @test_cmpxchg_i16_seqcst_mono
 
 define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newval) {
 ; CHECK-LABEL: @test_cmpxchg_i32_acquire_acquire
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
 ; CHECK: br label %[[LOOP:.*]]
 
 ; CHECK: [[LOOP]]:
@@ -304,11 +304,11 @@ define i32 @test_cmpxchg_i32_acquire_acq
 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
 
 ; CHECK: [[SUCCESS_BB]]:
-; CHECK: fence acquire
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[DONE:.*]]
 
 ; CHECK: [[FAILURE_BB]]:
-; CHECK: fence acquire
+; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[DONE]]
 
 ; CHECK: [[DONE]]:
@@ -322,7 +322,7 @@ define i32 @test_cmpxchg_i32_acquire_acq
 
 define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %newval) {
 ; CHECK-LABEL: @test_cmpxchg_i64_monotonic_monotonic
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
 ; CHECK: br label %[[LOOP:.*]]
 
 ; CHECK: [[LOOP]]:
@@ -347,11 +347,11 @@ define i64 @test_cmpxchg_i64_monotonic_m
 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
 
 ; CHECK: [[SUCCESS_BB]]:
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
 ; CHECK: br label %[[DONE:.*]]
 
 ; CHECK: [[FAILURE_BB]]:
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
 ; CHECK: br label %[[DONE]]
 
 ; CHECK: [[DONE]]:

Modified: llvm/trunk/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll?rev=217076&r1=217075&r2=217076&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll (original)
+++ llvm/trunk/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll Wed Sep  3 16:01:03 2014
@@ -2,7 +2,8 @@
 
 define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) {
 ; CHECK-LABEL: @test_cmpxchg_seq_cst
-; CHECK:     fence release
+; Intrinsic for "dmb ishst" is then expected
+; CHECK:     call void @llvm.arm.dmb(i32 10)
 ; CHECK:     br label %[[START:.*]]
 
 ; CHECK: [[START]]:
@@ -16,11 +17,11 @@ define i32 @test_cmpxchg_seq_cst(i32* %a
 ; CHECK:     br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB]]
 
 ; CHECK: [[SUCCESS_BB]]:
-; CHECK:     fence seq_cst
+; CHECK:     call void @llvm.arm.dmb(i32 11)
 ; CHECK:     br label %[[END:.*]]
 
 ; CHECK: [[FAILURE_BB]]:
-; CHECK:     fence seq_cst
+; CHECK:     call void @llvm.arm.dmb(i32 11)
 ; CHECK:     br label %[[END]]
 
 ; CHECK: [[END]]:
@@ -34,7 +35,7 @@ define i32 @test_cmpxchg_seq_cst(i32* %a
 
 define i1 @test_cmpxchg_weak_fail(i32* %addr, i32 %desired, i32 %new) {
 ; CHECK-LABEL: @test_cmpxchg_weak_fail
-; CHECK:     fence release
+; CHECK:     call void @llvm.arm.dmb(i32 10)
 ; CHECK:     br label %[[START:.*]]
 
 ; CHECK: [[START]]:
@@ -48,11 +49,11 @@ define i1 @test_cmpxchg_weak_fail(i32* %
 ; CHECK:     br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[SUCCESS_BB]]:
-; CHECK:     fence seq_cst
+; CHECK:     call void @llvm.arm.dmb(i32 11)
 ; CHECK:     br label %[[END:.*]]
 
 ; CHECK: [[FAILURE_BB]]:
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
 ; CHECK:     br label %[[END]]
 
 ; CHECK: [[END]]:
@@ -66,7 +67,7 @@ define i1 @test_cmpxchg_weak_fail(i32* %
 
 define i32 @test_cmpxchg_monotonic(i32* %addr, i32 %desired, i32 %new) {
 ; CHECK-LABEL: @test_cmpxchg_monotonic
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
 ; CHECK:     br label %[[START:.*]]
 
 ; CHECK: [[START]]:
@@ -80,11 +81,11 @@ define i32 @test_cmpxchg_monotonic(i32*
 ; CHECK:     br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[SUCCESS_BB]]:
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
 ; CHECK:     br label %[[END:.*]]
 
 ; CHECK: [[FAILURE_BB]]:
-; CHECK-NOT: fence
+; CHECK-NOT: dmb
 ; CHECK:     br label %[[END]]
 
 ; CHECK: [[END]]:





More information about the llvm-commits mailing list