[llvm] r261568 - ARM: sink atomic release barrier as far as possible into cmpxchg.

Tim Northover via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 22 12:55:52 PST 2016


Author: tnorthover
Date: Mon Feb 22 14:55:50 2016
New Revision: 261568

URL: http://llvm.org/viewvc/llvm-project?rev=261568&view=rev
Log:
ARM: sink atomic release barrier as far as possible into cmpxchg.

DMB instructions can be expensive, so it's best to avoid them if possible. In
atomicrmw operations there will always be an attempted store so a release
barrier is always needed, but in the cmpxchg case we can delay the DMB until we
know we'll definitely try to perform a store (and so need release semantics).

In the strong cmpxchg case this isn't quite free: we must duplicate the LDREX
instructions to skip the barrier on subsequent iterations. The basic outline
becomes:

        ldrex rOld, [rAddr]
        cmp rOld, rDesired
        bne Ldone
        dmb
    Lloop:
        strex rRes, rNew, [rAddr]
        cbz rRes Ldone
        ldrex rOld, [rAddr]
        cmp rOld, rDesired
        beq Lloop
    Ldone:

So we'll skip this version for strong operations in "minsize" functions.

Modified:
    llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp
    llvm/trunk/test/CodeGen/ARM/atomic-64bit.ll
    llvm/trunk/test/CodeGen/ARM/atomic-op.ll
    llvm/trunk/test/CodeGen/ARM/cmpxchg-idioms.ll
    llvm/trunk/test/CodeGen/ARM/cmpxchg-weak.ll
    llvm/trunk/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
    llvm/trunk/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll

Modified: llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp?rev=261568&r1=261567&r2=261568&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp (original)
+++ llvm/trunk/lib/CodeGen/AtomicExpandPass.cpp Mon Feb 22 14:55:50 2016
@@ -521,30 +521,62 @@ bool AtomicExpand::expandAtomicCmpXchg(A
   AtomicOrdering MemOpOrder =
       TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder;
 
+  // In implementations which use a barrier to achieve release semantics, we can
+  // delay emitting this barrier until we know a store is actually going to be
+  // attempted. The cost of this delay is that we need 2 copies of the block
+  // emitting the load-linked, affecting code size.
+  //
+  // Ideally, this logic would be unconditional except for the minsize check
+  // since in other cases the extra blocks naturally collapse down to the
+  // minimal loop. Unfortunately, this puts too much stress on later
+  // optimisations so we avoid emitting the extra logic in those cases too.
+  bool HasReleasedLoadBB = !CI->isWeak() && TLI->getInsertFencesForAtomic() &&
+                           SuccessOrder != Monotonic &&
+                           SuccessOrder != Acquire && !F->optForMinSize();
+
+  // There's no overhead for sinking the release barrier in a weak cmpxchg, so
+  // do it even on minsize.
+  bool UseUnconditionalReleaseBarrier = F->optForMinSize() && !CI->isWeak();
+
   // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
   //
   // The full expansion we produce is:
   //     [...]
-  //     fence?
   // cmpxchg.start:
-  //     %loaded = @load.linked(%addr)
-  //     %should_store = icmp eq %loaded, %desired
-  //     br i1 %should_store, label %cmpxchg.trystore,
+  //     %unreleasedload = @load.linked(%addr)
+  //     %should_store = icmp eq %unreleasedload, %desired
+  //     br i1 %should_store, label %cmpxchg.fencedstore,
   //                          label %cmpxchg.nostore
+  // cmpxchg.releasingstore:
+  //     fence?
+  //     br label cmpxchg.trystore
   // cmpxchg.trystore:
+  //     %loaded.trystore = phi [%unreleasedload, %releasingstore],
+  //                            [%releasedload, %cmpxchg.releasedload]
   //     %stored = @store_conditional(%new, %addr)
   //     %success = icmp eq i32 %stored, 0
-  //     br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure
+  //     br i1 %success, label %cmpxchg.success,
+  //                     label %cmpxchg.releasedload/%cmpxchg.failure
+  // cmpxchg.releasedload:
+  //     %releasedload = @load.linked(%addr)
+  //     %should_store = icmp eq %releasedload, %desired
+  //     br i1 %should_store, label %cmpxchg.trystore,
+  //                          label %cmpxchg.failure
   // cmpxchg.success:
   //     fence?
   //     br label %cmpxchg.end
   // cmpxchg.nostore:
+  //     %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
+  //                           [%releasedload,
+  //                               %cmpxchg.releasedload/%cmpxchg.trystore]
   //     @load_linked_fail_balance()?
   //     br label %cmpxchg.failure
   // cmpxchg.failure:
   //     fence?
   //     br label %cmpxchg.end
   // cmpxchg.end:
+  //     %loaded = phi [%loaded.nostore, %cmpxchg.failure],
+  //                   [%loaded.trystore, %cmpxchg.trystore]
   //     %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
   //     %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
   //     %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
@@ -553,8 +585,13 @@ bool AtomicExpand::expandAtomicCmpXchg(A
   auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
   auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
   auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
-  auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB);
-  auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
+  auto ReleasedLoadBB =
+      BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
+  auto TryStoreBB =
+      BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
+  auto ReleasingStoreBB =
+      BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
+  auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
 
   // This grabs the DebugLoc from CI
   IRBuilder<> Builder(CI);
@@ -564,29 +601,51 @@ bool AtomicExpand::expandAtomicCmpXchg(A
   // the branch entirely.
   std::prev(BB->end())->eraseFromParent();
   Builder.SetInsertPoint(BB);
-  TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true,
-                        /*IsLoad=*/true);
-  Builder.CreateBr(LoopBB);
+  if (UseUnconditionalReleaseBarrier)
+    TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true,
+                          /*IsLoad=*/true);
+  Builder.CreateBr(StartBB);
 
   // Start the main loop block now that we've taken care of the preliminaries.
-  Builder.SetInsertPoint(LoopBB);
-  Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
-  Value *ShouldStore =
-      Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
+  Builder.SetInsertPoint(StartBB);
+  Value *UnreleasedLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
+  Value *ShouldStore = Builder.CreateICmpEQ(
+      UnreleasedLoad, CI->getCompareOperand(), "should_store");
 
   // If the cmpxchg doesn't actually need any ordering when it fails, we can
   // jump straight past that fence instruction (if it exists).
-  Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
+  Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
+
+  Builder.SetInsertPoint(ReleasingStoreBB);
+  if (!UseUnconditionalReleaseBarrier)
+    TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true,
+                          /*IsLoad=*/true);
+  Builder.CreateBr(TryStoreBB);
 
   Builder.SetInsertPoint(TryStoreBB);
   Value *StoreSuccess = TLI->emitStoreConditional(
       Builder, CI->getNewValOperand(), Addr, MemOpOrder);
   StoreSuccess = Builder.CreateICmpEQ(
       StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
+  BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
   Builder.CreateCondBr(StoreSuccess, SuccessBB,
-                       CI->isWeak() ? FailureBB : LoopBB);
+                       CI->isWeak() ? FailureBB : RetryBB);
 
-  // Make sure later instructions don't get reordered with a fence if necessary.
+  Builder.SetInsertPoint(ReleasedLoadBB);
+  Value *SecondLoad;
+  if (HasReleasedLoadBB) {
+    SecondLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
+    ShouldStore = Builder.CreateICmpEQ(SecondLoad, CI->getCompareOperand(),
+                                       "should_store");
+
+    // If the cmpxchg doesn't actually need any ordering when it fails, we can
+    // jump straight past that fence instruction (if it exists).
+    Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
+  } else
+    Builder.CreateUnreachable();
+
+  // Make sure later instructions don't get reordered with a fence if
+  // necessary.
   Builder.SetInsertPoint(SuccessBB);
   TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true,
                          /*IsLoad=*/true);
@@ -606,14 +665,36 @@ bool AtomicExpand::expandAtomicCmpXchg(A
 
   // Finally, we have control-flow based knowledge of whether the cmpxchg
   // succeeded or not. We expose this to later passes by converting any
-  // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI.
-
-  // Setup the builder so we can create any PHIs we need.
+  // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
+  // PHI.
   Builder.SetInsertPoint(ExitBB, ExitBB->begin());
   PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
   Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
   Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
 
+  // Setup the builder so we can create any PHIs we need.
+  Value *Loaded;
+  if (!HasReleasedLoadBB)
+    Loaded = UnreleasedLoad;
+  else {
+    Builder.SetInsertPoint(TryStoreBB, TryStoreBB->begin());
+    PHINode *TryStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
+    TryStoreLoaded->addIncoming(UnreleasedLoad, ReleasingStoreBB);
+    TryStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
+
+    Builder.SetInsertPoint(NoStoreBB, NoStoreBB->begin());
+    PHINode *NoStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
+    NoStoreLoaded->addIncoming(UnreleasedLoad, StartBB);
+    NoStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
+
+    Builder.SetInsertPoint(ExitBB, ++ExitBB->begin());
+    PHINode *ExitLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
+    ExitLoaded->addIncoming(TryStoreLoaded, SuccessBB);
+    ExitLoaded->addIncoming(NoStoreLoaded, FailureBB);
+
+    Loaded = ExitLoaded;
+  }
+
   // Look for any users of the cmpxchg that are just comparing the loaded value
   // against the desired one, and replace them with the CFG-derived version.
   SmallVector<ExtractValueInst *, 2> PrunedInsts;

Modified: llvm/trunk/test/CodeGen/ARM/atomic-64bit.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/atomic-64bit.ll?rev=261568&r1=261567&r2=261568&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/atomic-64bit.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/atomic-64bit.ll Mon Feb 22 14:55:50 2016
@@ -172,31 +172,31 @@ define i64 @test6(i64* %ptr, i64 %val) {
 define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
 ; CHECK-LABEL: test7:
 ; CHECK-DAG: mov [[VAL1LO:r[0-9]+]], r1
-; CHECK-DAG: dmb {{ish$}}
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
-; CHECK-LE-DAG: eor     [[MISMATCH_LO:r[0-9]+]], [[REG1]], [[VAL1LO]]
-; CHECK-LE-DAG: eor     [[MISMATCH_HI:r[0-9]+]], [[REG2]], r2
-; CHECK-BE-DAG: eor     [[MISMATCH_LO:r[0-9]+]], [[REG2]], r2
-; CHECK-BE-DAG: eor     [[MISMATCH_HI:r[0-9]+]], [[REG1]], r1
+; CHECK-LE-DAG: eor     [[MISMATCH_LO:.*]], [[REG1]], [[VAL1LO]]
+; CHECK-LE-DAG: eor     [[MISMATCH_HI:.*]], [[REG2]], r2
+; CHECK-BE-DAG: eor     [[MISMATCH_LO:.*]], [[REG2]], r2
+; CHECK-BE-DAG: eor     [[MISMATCH_HI:.*]], [[REG1]], r1
 ; CHECK: orrs    {{r[0-9]+}}, [[MISMATCH_LO]], [[MISMATCH_HI]]
 ; CHECK: bne
+; CHECK-DAG: dmb {{ish$}}
 ; CHECK: strexd {{[a-z0-9]+}}, {{r[0-9]?[02468]}}, {{r[0-9]?[13579]}}
 ; CHECK: cmp
-; CHECK: bne
+; CHECK: beq
 ; CHECK: dmb {{ish$}}
 
 ; CHECK-THUMB-LABEL: test7:
-; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
 ; CHECK-THUMB-LE-DAG: eor.w     [[MISMATCH_LO:[a-z0-9]+]], [[REG1]], r2
 ; CHECK-THUMB-LE-DAG: eor.w     [[MISMATCH_HI:[a-z0-9]+]], [[REG2]], r3
 ; CHECK-THUMB-BE-DAG: eor.w     [[MISMATCH_HI:[a-z0-9]+]], [[REG1]], r2
 ; CHECK-THUMB-BE-DAG: eor.w     [[MISMATCH_LO:[a-z0-9]+]], [[REG2]], r3
-; CHECK-THUMB-LE: orrs    [[MISMATCH_HI]], [[MISMATCH_LO]]
+; CHECK-THUMB-LE: orrs.w    {{.*}}, [[MISMATCH_LO]], [[MISMATCH_HI]]
 ; CHECK-THUMB: bne
+; CHECK-THUMB: dmb {{ish$}}
 ; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}}
 ; CHECK-THUMB: cmp
-; CHECK-THUMB: bne
+; CHECK-THUMB: beq
 ; CHECK-THUMB: dmb {{ish$}}
 
   %pair = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst seq_cst

Modified: llvm/trunk/test/CodeGen/ARM/atomic-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/atomic-op.ll?rev=261568&r1=261567&r2=261568&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/atomic-op.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/atomic-op.ll Mon Feb 22 14:55:50 2016
@@ -272,31 +272,37 @@ define i32 @test_cmpxchg_fail_order(i32
 
   %pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic
   %oldval = extractvalue { i32, i1 } %pair, 0
-; CHECK-ARMV7:     dmb ish
-; CHECK-ARMV7: [[LOOP_BB:\.?LBB[0-9]+_1]]:
 ; CHECK-ARMV7:     ldrex   [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
 ; CHECK-ARMV7:     cmp     [[OLDVAL]], r1
 ; CHECK-ARMV7:     bne     [[FAIL_BB:\.?LBB[0-9]+_[0-9]+]]
+; CHECK-ARMV7:     dmb ish
+; CHECK-ARMV7: [[LOOP_BB:\.?LBB.*]]:
 ; CHECK-ARMV7:     strex   [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
 ; CHECK-ARMV7:     cmp     [[SUCCESS]], #0
-; CHECK-ARMV7:     bne     [[LOOP_BB]]
-; CHECK-ARMV7:     dmb     ish
-; CHECK-ARMV7:     bx      lr
+; CHECK-ARMV7:     beq     [[SUCCESS_BB:\.?LBB.*]]
+; CHECK-ARMV7:     ldrex   [[OLDVAL]], [r[[ADDR]]]
+; CHECK-ARMV7:     cmp     [[OLDVAL]], r1
+; CHECK-ARMV7:     beq     [[LOOP_BB]]
 ; CHECK-ARMV7: [[FAIL_BB]]:
 ; CHECK-ARMV7:     clrex
 ; CHECK-ARMV7:     bx      lr
+; CHECK-ARMV7: [[SUCCESS_BB]]:
+; CHECK-ARMV7:     dmb     ish
+; CHECK-ARMV7:     bx      lr
 
-; CHECK-T2:     dmb ish
-; CHECK-T2: [[LOOP_BB:\.?LBB[0-9]+_1]]:
 ; CHECK-T2:     ldrex   [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
 ; CHECK-T2:     cmp     [[OLDVAL]], r1
-; CHECK-T2:     clrexne
-; CHECK-T2:     bxne    lr
+; CHECK-T2:     bne     [[FAIL_BB:\.?LBB.*]]
+; CHECK-T2:     dmb ish
+; CHECK-T2: [[LOOP_BB:\.?LBB.*]]:
 ; CHECK-T2:     strex   [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
 ; CHECK-T2:     cmp     [[SUCCESS]], #0
 ; CHECK-T2:     dmbeq   ish
 ; CHECK-T2:     bxeq    lr
-; CHECK-T2:     b       [[LOOP_BB]]
+; CHECK-T2:     ldrex   [[OLDVAL]], [r[[ADDR]]]
+; CHECK-T2:     cmp     [[OLDVAL]], r1
+; CHECK-T2:     beq     [[LOOP_BB]]
+; CHECK-T2:     clrex
 
   ret i32 %oldval
 }

Modified: llvm/trunk/test/CodeGen/ARM/cmpxchg-idioms.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/cmpxchg-idioms.ll?rev=261568&r1=261567&r2=261568&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/cmpxchg-idioms.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/cmpxchg-idioms.ll Mon Feb 22 14:55:50 2016
@@ -3,26 +3,31 @@
 define i32 @test_return(i32* %p, i32 %oldval, i32 %newval) {
 ; CHECK-LABEL: test_return:
 
-; CHECK: dmb ishst
-
-; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
 ; CHECK: ldrex [[LOADED:r[0-9]+]], [r0]
 ; CHECK: cmp [[LOADED]], r1
 ; CHECK: bne [[FAILED:LBB[0-9]+_[0-9]+]]
 
+; CHECK: dmb ishst
+
+; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
 ; CHECK: strex [[STATUS:r[0-9]+]], {{r[0-9]+}}, [r0]
-; CHECK: cmp [[STATUS]], #0
-; CHECK: bne [[LOOP]]
+; CHECK: cbz [[STATUS]], [[SUCCESS:LBB[0-9]+_[0-9]+]]
 
+; CHECK: ldrex [[LOADED]], [r0]
+; CHECK: cmp [[LOADED]], r1
+; CHECK: beq [[LOOP]]
+
+; CHECK: [[FAILED]]:
 ; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
+; CHECK: clrex
 ; CHECK: dmb ish
-; CHECK: movs r0, #1
+; CHECK: movs r0, #0
 ; CHECK: bx lr
 
-; CHECK: [[FAILED]]:
+; CHECK: [[SUCCESS]]:
 ; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
 ; CHECK: dmb ish
-; CHECK: movs r0, #0
+; CHECK: movs r0, #1
 ; CHECK: bx lr
 
   %pair = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst
@@ -34,26 +39,33 @@ define i32 @test_return(i32* %p, i32 %ol
 define i1 @test_return_bool(i8* %value, i8 %oldValue, i8 %newValue) {
 ; CHECK-LABEL: test_return_bool:
 
-; CHECK: dmb ishst
 ; CHECK: uxtb [[OLDBYTE:r[0-9]+]], r1
 
-; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
 ; CHECK: ldrexb [[LOADED:r[0-9]+]], [r0]
 ; CHECK: cmp [[LOADED]], [[OLDBYTE]]
 ; CHECK: bne [[FAIL:LBB[0-9]+_[0-9]+]]
 
+; CHECK: dmb ishst
+
+; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
 ; CHECK: strexb [[STATUS:r[0-9]+]], {{r[0-9]+}}, [r0]
-; CHECK: cmp [[STATUS]], #0
-; CHECK: bne [[LOOP]]
+; CHECK: cbz [[STATUS]], [[SUCCESS:LBB[0-9]+_[0-9]+]]
+
+; CHECK: ldrexb [[LOADED]], [r0]
+; CHECK: cmp [[LOADED]], [[OLDBYTE]]
+; CHECK: beq [[LOOP]]
+
 
   ; FIXME: this eor is redundant. Need to teach DAG combine that.
-; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
-; CHECK: movs [[TMP:r[0-9]+]], #1
+; CHECK: [[FAIL]]:
+; CHECK: clrex
+; CHECK: movs [[TMP:r[0-9]+]], #0
 ; CHECK: eor r0, [[TMP]], #1
 ; CHECK: bx lr
 
-; CHECK: [[FAIL]]:
-; CHECK: movs [[TMP:r[0-9]+]], #0
+; CHECK: [[SUCCESS]]:
+; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
+; CHECK: movs [[TMP:r[0-9]+]], #1
 ; CHECK: eor r0, [[TMP]], #1
 ; CHECK: bx lr
 
@@ -67,26 +79,31 @@ define i1 @test_return_bool(i8* %value,
 define void @test_conditional(i32* %p, i32 %oldval, i32 %newval) {
 ; CHECK-LABEL: test_conditional:
 
-; CHECK: dmb ishst
-
-; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
 ; CHECK: ldrex [[LOADED:r[0-9]+]], [r0]
 ; CHECK: cmp [[LOADED]], r1
 ; CHECK: bne [[FAILED:LBB[0-9]+_[0-9]+]]
 
+; CHECK: dmb ishst
+
+; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
 ; CHECK: strex [[STATUS:r[0-9]+]], r2, [r0]
-; CHECK: cmp [[STATUS]], #0
-; CHECK: bne [[LOOP]]
+; CHECK: cbz [[STATUS]], [[SUCCESS:LBB[0-9]+_[0-9]+]]
 
-; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
-; CHECK: dmb ish
-; CHECK: b.w _bar
+; CHECK: ldrex [[LOADED]], [r0]
+; CHECK: cmp [[LOADED]], r1
+; CHECK: beq [[LOOP]]
 
 ; CHECK: [[FAILED]]:
 ; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
+; CHECK: clrex
 ; CHECK: dmb ish
 ; CHECK: b.w _baz
 
+; CHECK: [[SUCCESS]]:
+; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
+; CHECK: dmb ish
+; CHECK: b.w _bar
+
   %pair = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst
   %success = extractvalue { i32, i1 } %pair, 1
   br i1 %success, label %true, label %false

Modified: llvm/trunk/test/CodeGen/ARM/cmpxchg-weak.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/cmpxchg-weak.ll?rev=261568&r1=261567&r2=261568&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/cmpxchg-weak.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/cmpxchg-weak.ll Mon Feb 22 14:55:50 2016
@@ -6,11 +6,11 @@ define void @test_cmpxchg_weak(i32 *%add
   %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic
   %oldval = extractvalue { i32, i1 } %pair, 0
 ; CHECK-NEXT: BB#0:
-; CHECK-NEXT:     dmb ish
 ; CHECK-NEXT:     ldrex   [[LOADED:r[0-9]+]], [r0]
 ; CHECK-NEXT:     cmp     [[LOADED]], r1
 ; CHECK-NEXT:     bne     [[LDFAILBB:LBB[0-9]+_[0-9]+]]
 ; CHECK-NEXT: BB#1:
+; CHECK-NEXT:     dmb ish
 ; CHECK-NEXT:     strex   [[SUCCESS:r[0-9]+]], r2, [r0]
 ; CHECK-NEXT:     cmp     [[SUCCESS]], #0
 ; CHECK-NEXT:     bne     [[FAILBB:LBB[0-9]+_[0-9]+]]
@@ -36,13 +36,13 @@ define i1 @test_cmpxchg_weak_to_bool(i32
   %success = extractvalue { i32, i1 } %pair, 1
 
 ; CHECK-NEXT: BB#0:
-; CHECK-NEXT:     dmb ish
 ; CHECK-NEXT:     ldrex   [[LOADED:r[0-9]+]], [r1]
 ; CHECK-NEXT:     cmp     [[LOADED]], r2
 ; CHECK-NEXT:     bne     [[LDFAILBB:LBB[0-9]+_[0-9]+]]
 ; CHECK-NEXT: BB#1:
-; CHECK-NEXT:     strex   [[SUCCESS:r[0-9]+]], r3, [r1]
+; CHECK-NEXT:     dmb ish
 ; CHECK-NEXT:     mov     r0, #0
+; CHECK-NEXT:     strex   [[SUCCESS:r[0-9]+]], r3, [r1]
 ; CHECK-NEXT:     cmp     [[SUCCESS]], #0
 ; CHECK-NEXT:     bxne    lr
 ; CHECK-NEXT:     dmb     ish

Modified: llvm/trunk/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll?rev=261568&r1=261567&r2=261568&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll (original)
+++ llvm/trunk/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll Mon Feb 22 14:55:50 2016
@@ -222,26 +222,37 @@ define i8 @test_atomic_umin_i8(i8* %ptr,
 
 define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
 ; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst
-; CHECK: call void @llvm.arm.dmb(i32 11)
-; CHECK: br label %[[LOOP:.*]]
+; CHECK: br label %[[START:.*]]
 
-; CHECK: [[LOOP]]:
+; CHECK: [[START]]:
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
-; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
+; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
 
-; CHECK: [[TRY_STORE]]:
+; CHECK: [[FENCED_STORE]]:
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[LOOP:.*]]
+
+; CHECK: [[LOOP]]:
+; CHECK: [[LOADED_LOOP:%.*]] = phi i8 [ [[OLDVAL]], %[[FENCED_STORE]] ], [ [[OLDVAL_LOOP:%.*]], %[[RELEASED_LOAD:.*]] ]
 ; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
 ; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[RELEASED_LOAD]]
+
+; CHECK: [[RELEASED_LOAD]]:
+; CHECK: [[OLDVAL32_LOOP:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
+; CHECK: [[OLDVAL_LOOP]] = trunc i32 [[OLDVAL32_LOOP]] to i8
+; CHECK: [[SHOULD_STORE_LOOP:%.*]] = icmp eq i8 [[OLDVAL_LOOP]], %desired
+; CHECK: br i1 [[SHOULD_STORE_LOOP]], label %[[LOOP]], label %[[NO_STORE_BB]]
 
 ; CHECK: [[SUCCESS_BB]]:
 ; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[DONE:.*]]
 
 ; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: [[LOADED_NO_STORE:%.*]] = phi i8 [ [[OLDVAL]], %[[START]] ], [ [[OLDVAL_LOOP]], %[[RELEASED_LOAD]] ]
 ; CHECK-NEXT: call void @llvm.arm.clrex()
 ; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
 
@@ -251,7 +262,8 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst
 
 ; CHECK: [[DONE]]:
 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK: ret i8 [[OLDVAL]]
+; CHECK: [[LOADED:%.*]] = phi i8 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_NO_STORE]], %[[FAILURE_BB]] ]
+; CHECK: ret i8 [[LOADED]]
 
   %pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
   %old = extractvalue { i8, i1 } %pairold, 0
@@ -260,26 +272,37 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst
 
 define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newval) {
 ; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic
-; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[LOOP:.*]]
 
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
 
-; CHECK: [[TRY_STORE]]:
+; CHECK: [[FENCED_STORE]]:
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[LOOP:.*]]
+
+; CHECK: [[LOOP]]:
+; CHECK: [[LOADED_LOOP:%.*]] = phi i16 [ [[OLDVAL]], %[[FENCED_STORE]] ], [ [[OLDVAL_LOOP:%.*]], %[[RELEASED_LOAD:.*]] ]
 ; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
 ; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[RELEASED_LOAD:.*]]
+
+; CHECK: [[RELEASED_LOAD]]:
+; CHECK: [[OLDVAL32_LOOP:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
+; CHECK: [[OLDVAL_LOOP]] = trunc i32 [[OLDVAL32_LOOP]] to i16
+; CHECK: [[SHOULD_STORE_LOOP:%.*]] = icmp eq i16 [[OLDVAL_LOOP]], %desired
+; CHECK: br i1 [[SHOULD_STORE_LOOP]], label %[[LOOP]], label %[[NO_STORE_BB]]
 
 ; CHECK: [[SUCCESS_BB]]:
 ; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[DONE:.*]]
 
 ; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: [[LOADED_NO_STORE:%.*]] = phi i16 [ [[OLDVAL]], %[[START]] ], [ [[OLDVAL_LOOP]], %[[RELEASED_LOAD]] ]
 ; CHECK-NEXT: call void @llvm.arm.clrex()
 ; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
 
@@ -289,7 +312,8 @@ define i16 @test_cmpxchg_i16_seqcst_mono
 
 ; CHECK: [[DONE]]:
 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK: ret i16 [[OLDVAL]]
+; CHECK: [[LOADED:%.*]] = phi i16 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_NO_STORE]], %[[FAILURE_BB]] ]
+; CHECK: ret i16 [[LOADED]]
 
   %pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
   %old = extractvalue { i16, i1 } %pairold, 0
@@ -378,3 +402,39 @@ define i64 @test_cmpxchg_i64_monotonic_m
   %old = extractvalue { i64, i1 } %pairold, 0
   ret i64 %old
 }
+
+define i32 @test_cmpxchg_minsize(i32* %addr, i32 %desired, i32 %new) minsize {
+; CHECK-LABEL: @test_cmpxchg_minsize
+; CHECK:     call void @llvm.arm.dmb(i32 11)
+; CHECK:     br label %[[START:.*]]
+
+; CHECK: [[START]]:
+; CHECK:     [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
+; CHECK:     [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
+; CHECK:     br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK:     [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
+; CHECK:     [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
+; CHECK:     br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[START]]
+
+; CHECK: [[SUCCESS_BB]]:
+; CHECK:     call void @llvm.arm.dmb(i32 11)
+; CHECK:     br label %[[END:.*]]
+
+; CHECK: [[NO_STORE_BB]]:
+; CHECK:     call void @llvm.arm.clrex()
+; CHECK:     br label %[[FAILURE_BB]]
+
+; CHECK: [[FAILURE_BB]]:
+; CHECK:     call void @llvm.arm.dmb(i32 11)
+; CHECK:     br label %[[END]]
+
+; CHECK: [[END]]:
+; CHECK:     [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
+; CHECK:     ret i32 [[LOADED]]
+
+  %pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
+  %oldval = extractvalue { i32, i1 } %pair, 0
+  ret i32 %oldval
+}

Modified: llvm/trunk/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll?rev=261568&r1=261567&r2=261568&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll (original)
+++ llvm/trunk/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll Mon Feb 22 14:55:50 2016
@@ -3,13 +3,16 @@
 define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) {
 ; CHECK-LABEL: @test_cmpxchg_seq_cst
 ; Intrinsic for "dmb ishst" is then expected
-; CHECK:     call void @llvm.arm.dmb(i32 10)
 ; CHECK:     br label %[[START:.*]]
 
 ; CHECK: [[START]]:
 ; CHECK:     [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
 ; CHECK:     [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
-; CHECK:     br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
+; CHECK:     br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
+
+; CHECK: [[FENCED_STORE]]:
+; CHECK:     call void @llvm.arm.dmb(i32 10)
+; CHECK:     br label %[[TRY_STORE:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK:     [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
@@ -39,13 +42,16 @@ define i32 @test_cmpxchg_seq_cst(i32* %a
 
 define i1 @test_cmpxchg_weak_fail(i32* %addr, i32 %desired, i32 %new) {
 ; CHECK-LABEL: @test_cmpxchg_weak_fail
-; CHECK:     call void @llvm.arm.dmb(i32 10)
 ; CHECK:     br label %[[START:.*]]
 
 ; CHECK: [[START]]:
 ; CHECK:     [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
 ; CHECK:     [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
-; CHECK:     br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
+; CHECK:     br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
+
+; CHECK: [[FENCED_STORE]]:
+; CHECK:     call void @llvm.arm.dmb(i32 10)
+; CHECK:     br label %[[TRY_STORE:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK:     [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
@@ -108,3 +114,42 @@ define i32 @test_cmpxchg_monotonic(i32*
   %oldval = extractvalue { i32, i1 } %pair, 0
   ret i32 %oldval
 }
+
+define i32 @test_cmpxchg_seq_cst_minsize(i32* %addr, i32 %desired, i32 %new) minsize {
+; CHECK-LABEL: @test_cmpxchg_seq_cst_minsize
+; CHECK:     br label %[[START:.*]]
+
+; CHECK: [[START]]:
+; CHECK:     [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
+; CHECK:     [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
+; CHECK:     br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
+
+; CHECK: [[FENCED_STORE]]:
+; CHECK:     call void @llvm.arm.dmb(i32 10)
+; CHECK:     br label %[[TRY_STORE:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK:     [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
+; CHECK:     [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
+; CHECK:     br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[SUCCESS_BB]]:
+; CHECK:     call void @llvm.arm.dmb(i32 11)
+; CHECK:     br label %[[END:.*]]
+
+; CHECK: [[NO_STORE_BB]]:
+; CHECK:     call void @llvm.arm.clrex()
+; CHECK:     br label %[[FAILURE_BB]]
+
+; CHECK: [[FAILURE_BB]]:
+; CHECK:     call void @llvm.arm.dmb(i32 11)
+; CHECK:     br label %[[END]]
+
+; CHECK: [[END]]:
+; CHECK:     [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
+; CHECK:     ret i32 [[LOADED]]
+
+  %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
+  %oldval = extractvalue { i32, i1 } %pair, 0
+  ret i32 %oldval
+}




More information about the llvm-commits mailing list