[llvm] [PowerPC] support branch hint for AtomicExpandImpl::expandAtomicCmpXchg (PR #152366)

zhijian lin via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 8 11:39:37 PDT 2025


https://github.com/diggerlin updated https://github.com/llvm/llvm-project/pull/152366

>From 5afd4560e10021571218fc10ef3131527d75e85a Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 16 Jul 2025 18:57:19 +0000
Subject: [PATCH 1/5] add branch hint

---
 llvm/include/llvm/CodeGen/TargetLowering.h  | 5 +++++
 llvm/lib/CodeGen/AtomicExpandPass.cpp       | 9 ++++++---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 5 +++++
 llvm/lib/Target/PowerPC/PPCISelLowering.h   | 1 +
 4 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index cbdc1b6031680..e7fb34b036e8b 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2332,6 +2332,11 @@ class LLVM_ABI TargetLoweringBase {
   virtual Instruction *emitTrailingFence(IRBuilderBase &Builder,
                                          Instruction *Inst,
                                          AtomicOrdering Ord) const;
+
+  virtual MDNode *getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const {
+    return nullptr;
+  }
+
   /// @}
 
   // Emits code that executes when the comparison result in the ll/sc
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 3f3d5dc90711f..a5319f3a2440a 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1454,7 +1454,8 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
 
   // If the cmpxchg doesn't actually need any ordering when it fails, we can
   // jump straight past that fence instruction (if it exists).
-  Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
+  Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB,
+                       TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
 
   Builder.SetInsertPoint(ReleasingStoreBB);
   if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
@@ -1473,7 +1474,8 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
       StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
   BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
   Builder.CreateCondBr(StoreSuccess, SuccessBB,
-                       CI->isWeak() ? FailureBB : RetryBB);
+                       CI->isWeak() ? FailureBB : RetryBB,
+                       TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
 
   Builder.SetInsertPoint(ReleasedLoadBB);
   Value *SecondLoad;
@@ -1486,7 +1488,8 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
 
     // If the cmpxchg doesn't actually need any ordering when it fails, we can
     // jump straight past that fence instruction (if it exists).
-    Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
+    Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB,
+                         TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
     // Update PHI node in TryStoreBB.
     LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
   } else
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 459525ed4ee9a..853923ac4125a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -68,6 +68,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IntrinsicsPowerPC.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
@@ -12816,6 +12817,10 @@ Value *PPCTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
   return Builder.CreateXor(Call, Builder.getInt32(1));
 }
 
+MDNode *PPCTargetLowering::getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const {
+  return MDBuilder(Ctx).createLikelyBranchWeights();
+}
+
 // The mappings for emitLeading/TrailingFence is taken from
 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
 Instruction *PPCTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 124c7116dc3b5..9f73c5587805a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -938,6 +938,7 @@ namespace llvm {
     Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
                                    AtomicOrdering Ord) const override;
 
+    virtual MDNode *getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const override;
     bool shouldInlineQuadwordAtomics() const;
 
     TargetLowering::AtomicExpansionKind

>From b6976bf482cda0e2b74102847f68efd02d28c26c Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Thu, 24 Jul 2025 13:56:14 +0000
Subject: [PATCH 2/5] modify test case

---
 llvm/include/llvm/CodeGen/TargetLowering.h    |   3 +-
 llvm/lib/CodeGen/AtomicExpandPass.cpp         |  16 +-
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |   3 +-
 llvm/lib/Target/PowerPC/PPCISelLowering.h     |   3 +-
 .../CodeGen/PowerPC/PR35812-neg-cmpxchg.ll    |  72 +--
 llvm/test/CodeGen/PowerPC/all-atomics.ll      | 211 +++----
 .../PowerPC/atomic-compare-exchange-weak.ll   |  34 +-
 llvm/test/CodeGen/PowerPC/atomic-float.ll     |  90 ++-
 .../PowerPC/atomicrmw-cond-sub-clamp.ll       | 466 ++++++++-------
 .../PowerPC/atomicrmw-uinc-udec-wrap.ll       | 462 ++++++++-------
 .../CodeGen/PowerPC/atomics-regression.ll     | 544 ++++++++----------
 llvm/test/CodeGen/PowerPC/atomics.ll          | 114 ++--
 llvm/test/CodeGen/PowerPC/loop-comment.ll     |   5 +-
 13 files changed, 991 insertions(+), 1032 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index e7fb34b036e8b..027bcc5bc53ae 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2333,7 +2333,8 @@ class LLVM_ABI TargetLoweringBase {
                                          Instruction *Inst,
                                          AtomicOrdering Ord) const;
 
-  virtual MDNode *getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const {
+  virtual MDNode *
+  getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const {
     return nullptr;
   }
 
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index a5319f3a2440a..abaa8b6e841f6 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1454,8 +1454,9 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
 
   // If the cmpxchg doesn't actually need any ordering when it fails, we can
   // jump straight past that fence instruction (if it exists).
-  Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB,
-                       TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
+  Builder.CreateCondBr(
+      ShouldStore, ReleasingStoreBB, NoStoreBB,
+      TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
 
   Builder.SetInsertPoint(ReleasingStoreBB);
   if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
@@ -1473,9 +1474,9 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
   StoreSuccess = Builder.CreateICmpEQ(
       StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
   BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
-  Builder.CreateCondBr(StoreSuccess, SuccessBB,
-                       CI->isWeak() ? FailureBB : RetryBB,
-                       TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
+  Builder.CreateCondBr(
+      StoreSuccess, SuccessBB, CI->isWeak() ? FailureBB : RetryBB,
+      TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
 
   Builder.SetInsertPoint(ReleasedLoadBB);
   Value *SecondLoad;
@@ -1488,8 +1489,9 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
 
     // If the cmpxchg doesn't actually need any ordering when it fails, we can
     // jump straight past that fence instruction (if it exists).
-    Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB,
-                         TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
+    Builder.CreateCondBr(
+        ShouldStore, TryStoreBB, NoStoreBB,
+        TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
     // Update PHI node in TryStoreBB.
     LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
   } else
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 853923ac4125a..b218532e56b6a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12817,7 +12817,8 @@ Value *PPCTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
   return Builder.CreateXor(Call, Builder.getInt32(1));
 }
 
-MDNode *PPCTargetLowering::getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const {
+MDNode *PPCTargetLowering::getTrueBranchHintWeightForAtomicCmpXchgg(
+    LLVMContext &Ctx) const {
   return MDBuilder(Ctx).createLikelyBranchWeights();
 }
 
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 9f73c5587805a..4892a3c603a6c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -938,7 +938,8 @@ namespace llvm {
     Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
                                    AtomicOrdering Ord) const override;
 
-    virtual MDNode *getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const override;
+    virtual MDNode *
+    getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const override;
     bool shouldInlineQuadwordAtomics() const;
 
     TargetLowering::AtomicExpansionKind
diff --git a/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll b/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
index b7852c3c3e6e0..2d8e0e869a860 100644
--- a/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
+++ b/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
@@ -19,51 +19,53 @@ define signext i32 @main() nounwind {
 ; CHECK-NEXT:    addi 3, 1, 46
 ; CHECK-NEXT:    lharx 4, 0, 3
 ; CHECK-NEXT:    cmplwi 4, 33059
-; CHECK-NEXT:    bne 0, .LBB0_4
+; CHECK-NEXT:    bne- 0, .LBB0_4
 ; CHECK-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    li 4, 234
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB0_2: # %cmpxchg.trystore
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    sthcx. 4, 0, 3
-; CHECK-NEXT:    beq 0, .LBB0_7
+; CHECK-NEXT:    beq+ 0, .LBB0_5
 ; CHECK-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lharx 5, 0, 3
 ; CHECK-NEXT:    cmplwi 5, 33059
-; CHECK-NEXT:    beq 0, .LBB0_2
+; CHECK-NEXT:    beq+ 0, .LBB0_2
 ; CHECK-NEXT:  .LBB0_4: # %cmpxchg.nostore
 ; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    b .LBB0_8
-; CHECK-NEXT:  .LBB0_5: # %L.B0000
+; CHECK-NEXT:    crxor 20, 20, 20
+; CHECK-NEXT:    b .LBB0_6
+; CHECK-NEXT:  .LBB0_5: # %cmpxchg.success
+; CHECK-NEXT:    lwsync
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:  .LBB0_6: # %cmpxchg.end
+; CHECK-NEXT:    bc 4, 20, .LBB0_9
+; CHECK-NEXT:  # %bb.7: # %L.B0000
 ; CHECK-NEXT:    lhz 3, 46(1)
 ; CHECK-NEXT:    cmplwi 3, 234
-; CHECK-NEXT:    bne 0, .LBB0_9
-; CHECK-NEXT:  # %bb.6: # %L.B0001
+; CHECK-NEXT:    bne 0, .LBB0_10
+; CHECK-NEXT:  # %bb.8: # %L.B0001
 ; CHECK-NEXT:    addis 3, 2, .L_MergedGlobals at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .L_MergedGlobals at toc@l
 ; CHECK-NEXT:    bl puts
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:    b .LBB0_11
-; CHECK-NEXT:  .LBB0_7: # %cmpxchg.success
-; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    b .LBB0_5
-; CHECK-NEXT:  .LBB0_8: # %L.B0003
+; CHECK-NEXT:    b .LBB0_12
+; CHECK-NEXT:  .LBB0_9: # %L.B0003
 ; CHECK-NEXT:    addis 3, 2, .L_MergedGlobals at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .L_MergedGlobals at toc@l
 ; CHECK-NEXT:    addi 3, 3, 16
-; CHECK-NEXT:    b .LBB0_10
-; CHECK-NEXT:  .LBB0_9: # %L.B0005
+; CHECK-NEXT:    b .LBB0_11
+; CHECK-NEXT:  .LBB0_10: # %L.B0005
 ; CHECK-NEXT:    addis 3, 2, .L_MergedGlobals at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .L_MergedGlobals at toc@l
 ; CHECK-NEXT:    addi 3, 3, 64
-; CHECK-NEXT:  .LBB0_10: # %L.B0003
+; CHECK-NEXT:  .LBB0_11: # %L.B0003
 ; CHECK-NEXT:    bl puts
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:  .LBB0_11: # %L.B0003
+; CHECK-NEXT:  .LBB0_12: # %L.B0003
 ; CHECK-NEXT:    addi 1, 1, 48
 ; CHECK-NEXT:    ld 0, 16(1)
 ; CHECK-NEXT:    mtlr 0
@@ -83,7 +85,7 @@ define signext i32 @main() nounwind {
 ; CHECK-P7-NEXT:    srw 6, 5, 4
 ; CHECK-P7-NEXT:    clrlwi 6, 6, 16
 ; CHECK-P7-NEXT:    cmplwi 6, 33059
-; CHECK-P7-NEXT:    bne 0, .LBB0_4
+; CHECK-P7-NEXT:    bne- 0, .LBB0_4
 ; CHECK-P7-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; CHECK-P7-NEXT:    lis 6, 0
 ; CHECK-P7-NEXT:    li 7, 234
@@ -92,51 +94,53 @@ define signext i32 @main() nounwind {
 ; CHECK-P7-NEXT:    slw 7, 7, 4
 ; CHECK-P7-NEXT:    slw 6, 6, 4
 ; CHECK-P7-NEXT:    not 6, 6
-; CHECK-P7-NEXT:    .p2align 4
 ; CHECK-P7-NEXT:  .LBB0_2: # %cmpxchg.trystore
 ; CHECK-P7-NEXT:    #
 ; CHECK-P7-NEXT:    and 5, 5, 6
 ; CHECK-P7-NEXT:    or 5, 5, 7
 ; CHECK-P7-NEXT:    stwcx. 5, 0, 3
-; CHECK-P7-NEXT:    beq 0, .LBB0_7
+; CHECK-P7-NEXT:    beq+ 0, .LBB0_5
 ; CHECK-P7-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; CHECK-P7-NEXT:    #
 ; CHECK-P7-NEXT:    lwarx 5, 0, 3
 ; CHECK-P7-NEXT:    srw 8, 5, 4
 ; CHECK-P7-NEXT:    clrlwi 8, 8, 16
 ; CHECK-P7-NEXT:    cmplwi 8, 33059
-; CHECK-P7-NEXT:    beq 0, .LBB0_2
+; CHECK-P7-NEXT:    beq+ 0, .LBB0_2
 ; CHECK-P7-NEXT:  .LBB0_4: # %cmpxchg.nostore
+; CHECK-P7-NEXT:    crxor 20, 20, 20
 ; CHECK-P7-NEXT:    lwsync
-; CHECK-P7-NEXT:    b .LBB0_8
-; CHECK-P7-NEXT:  .LBB0_5: # %L.B0000
+; CHECK-P7-NEXT:    b .LBB0_6
+; CHECK-P7-NEXT:  .LBB0_5: # %cmpxchg.success
+; CHECK-P7-NEXT:    lwsync
+; CHECK-P7-NEXT:    creqv 20, 20, 20
+; CHECK-P7-NEXT:  .LBB0_6: # %cmpxchg.end
+; CHECK-P7-NEXT:    bc 4, 20, .LBB0_9
+; CHECK-P7-NEXT:  # %bb.7: # %L.B0000
 ; CHECK-P7-NEXT:    lhz 3, 46(1)
 ; CHECK-P7-NEXT:    cmplwi 3, 234
-; CHECK-P7-NEXT:    bne 0, .LBB0_9
-; CHECK-P7-NEXT:  # %bb.6: # %L.B0001
+; CHECK-P7-NEXT:    bne 0, .LBB0_10
+; CHECK-P7-NEXT:  # %bb.8: # %L.B0001
 ; CHECK-P7-NEXT:    addis 3, 2, .L_MergedGlobals at toc@ha
 ; CHECK-P7-NEXT:    addi 3, 3, .L_MergedGlobals at toc@l
 ; CHECK-P7-NEXT:    bl puts
 ; CHECK-P7-NEXT:    nop
 ; CHECK-P7-NEXT:    li 3, 0
-; CHECK-P7-NEXT:    b .LBB0_11
-; CHECK-P7-NEXT:  .LBB0_7: # %cmpxchg.success
-; CHECK-P7-NEXT:    lwsync
-; CHECK-P7-NEXT:    b .LBB0_5
-; CHECK-P7-NEXT:  .LBB0_8: # %L.B0003
+; CHECK-P7-NEXT:    b .LBB0_12
+; CHECK-P7-NEXT:  .LBB0_9: # %L.B0003
 ; CHECK-P7-NEXT:    addis 3, 2, .L_MergedGlobals at toc@ha
 ; CHECK-P7-NEXT:    addi 3, 3, .L_MergedGlobals at toc@l
 ; CHECK-P7-NEXT:    addi 3, 3, 16
-; CHECK-P7-NEXT:    b .LBB0_10
-; CHECK-P7-NEXT:  .LBB0_9: # %L.B0005
+; CHECK-P7-NEXT:    b .LBB0_11
+; CHECK-P7-NEXT:  .LBB0_10: # %L.B0005
 ; CHECK-P7-NEXT:    addis 3, 2, .L_MergedGlobals at toc@ha
 ; CHECK-P7-NEXT:    addi 3, 3, .L_MergedGlobals at toc@l
 ; CHECK-P7-NEXT:    addi 3, 3, 64
-; CHECK-P7-NEXT:  .LBB0_10: # %L.B0003
+; CHECK-P7-NEXT:  .LBB0_11: # %L.B0003
 ; CHECK-P7-NEXT:    bl puts
 ; CHECK-P7-NEXT:    nop
 ; CHECK-P7-NEXT:    li 3, 1
-; CHECK-P7-NEXT:  .LBB0_11: # %L.B0003
+; CHECK-P7-NEXT:  .LBB0_12: # %L.B0003
 ; CHECK-P7-NEXT:    addi 1, 1, 48
 ; CHECK-P7-NEXT:    ld 0, 16(1)
 ; CHECK-P7-NEXT:    mtlr 0
diff --git a/llvm/test/CodeGen/PowerPC/all-atomics.ll b/llvm/test/CodeGen/PowerPC/all-atomics.ll
index 07afea75aec67..7e892fc4ae6eb 100644
--- a/llvm/test/CodeGen/PowerPC/all-atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/all-atomics.ll
@@ -4347,19 +4347,18 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lbz 8, sc at toc@l(4)
 ; CHECK-NEXT:    lbarx 5, 0, 6
 ; CHECK-NEXT:    cmplw 5, 7
-; CHECK-NEXT:    bne 0, .LBB3_4
+; CHECK-NEXT:    bne- 0, .LBB3_4
 ; CHECK-NEXT:  # %bb.1: # %cmpxchg.fencedstore276
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_2: # %cmpxchg.trystore275
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    stbcx. 8, 0, 6
-; CHECK-NEXT:    beq 0, .LBB3_4
+; CHECK-NEXT:    beq+ 0, .LBB3_4
 ; CHECK-NEXT:  # %bb.3: # %cmpxchg.releasedload274
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lbarx 5, 0, 6
 ; CHECK-NEXT:    cmplw 5, 7
-; CHECK-NEXT:    beq 0, .LBB3_2
+; CHECK-NEXT:    beq+ 0, .LBB3_2
 ; CHECK-NEXT:  .LBB3_4: # %cmpxchg.nostore272
 ; CHECK-NEXT:    addi 7, 3, uc at toc@l
 ; CHECK-NEXT:    lwsync
@@ -4367,20 +4366,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lbz 9, uc at toc@l(3)
 ; CHECK-NEXT:    lbarx 8, 0, 7
 ; CHECK-NEXT:    cmplw 8, 9
-; CHECK-NEXT:    bne 0, .LBB3_8
+; CHECK-NEXT:    bne- 0, .LBB3_8
 ; CHECK-NEXT:  # %bb.5: # %cmpxchg.fencedstore257
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    clrlwi 5, 5, 24
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_6: # %cmpxchg.trystore256
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    stbcx. 5, 0, 7
-; CHECK-NEXT:    beq 0, .LBB3_8
+; CHECK-NEXT:    beq+ 0, .LBB3_8
 ; CHECK-NEXT:  # %bb.7: # %cmpxchg.releasedload255
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lbarx 8, 0, 7
 ; CHECK-NEXT:    cmplw 8, 9
-; CHECK-NEXT:    beq 0, .LBB3_6
+; CHECK-NEXT:    beq+ 0, .LBB3_6
 ; CHECK-NEXT:  .LBB3_8: # %cmpxchg.nostore253
 ; CHECK-NEXT:    addis 5, 2, ss at toc@ha
 ; CHECK-NEXT:    lwsync
@@ -4390,21 +4388,20 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    addi 8, 5, ss at toc@l
 ; CHECK-NEXT:    lharx 9, 0, 8
 ; CHECK-NEXT:    cmplw 9, 10
-; CHECK-NEXT:    bne 0, .LBB3_12
+; CHECK-NEXT:    bne- 0, .LBB3_12
 ; CHECK-NEXT:  # %bb.9: # %cmpxchg.fencedstore238
 ; CHECK-NEXT:    extsb 11, 11
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    clrlwi 11, 11, 16
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_10: # %cmpxchg.trystore237
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    sthcx. 11, 0, 8
-; CHECK-NEXT:    beq 0, .LBB3_12
+; CHECK-NEXT:    beq+ 0, .LBB3_12
 ; CHECK-NEXT:  # %bb.11: # %cmpxchg.releasedload236
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lharx 9, 0, 8
 ; CHECK-NEXT:    cmplw 9, 10
-; CHECK-NEXT:    beq 0, .LBB3_10
+; CHECK-NEXT:    beq+ 0, .LBB3_10
 ; CHECK-NEXT:  .LBB3_12: # %cmpxchg.nostore234
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    sth 9, ss at toc@l(5)
@@ -4414,21 +4411,20 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    addi 9, 5, us at toc@l
 ; CHECK-NEXT:    lharx 10, 0, 9
 ; CHECK-NEXT:    cmplw 10, 11
-; CHECK-NEXT:    bne 0, .LBB3_16
+; CHECK-NEXT:    bne- 0, .LBB3_16
 ; CHECK-NEXT:  # %bb.13: # %cmpxchg.fencedstore219
 ; CHECK-NEXT:    extsb 12, 12
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    clrlwi 12, 12, 16
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_14: # %cmpxchg.trystore218
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    sthcx. 12, 0, 9
-; CHECK-NEXT:    beq 0, .LBB3_16
+; CHECK-NEXT:    beq+ 0, .LBB3_16
 ; CHECK-NEXT:  # %bb.15: # %cmpxchg.releasedload217
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lharx 10, 0, 9
 ; CHECK-NEXT:    cmplw 10, 11
-; CHECK-NEXT:    beq 0, .LBB3_14
+; CHECK-NEXT:    beq+ 0, .LBB3_14
 ; CHECK-NEXT:  .LBB3_16: # %cmpxchg.nostore215
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    sth 10, us at toc@l(5)
@@ -4438,20 +4434,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    addi 10, 5, si at toc@l
 ; CHECK-NEXT:    lwarx 11, 0, 10
 ; CHECK-NEXT:    cmplw 11, 12
-; CHECK-NEXT:    bne 0, .LBB3_20
+; CHECK-NEXT:    bne- 0, .LBB3_20
 ; CHECK-NEXT:  # %bb.17: # %cmpxchg.fencedstore200
 ; CHECK-NEXT:    extsb 0, 0
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_18: # %cmpxchg.trystore199
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 0, 0, 10
-; CHECK-NEXT:    beq 0, .LBB3_20
+; CHECK-NEXT:    beq+ 0, .LBB3_20
 ; CHECK-NEXT:  # %bb.19: # %cmpxchg.releasedload198
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lwarx 11, 0, 10
 ; CHECK-NEXT:    cmplw 11, 12
-; CHECK-NEXT:    beq 0, .LBB3_18
+; CHECK-NEXT:    beq+ 0, .LBB3_18
 ; CHECK-NEXT:  .LBB3_20: # %cmpxchg.nostore196
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    stw 11, si at toc@l(5)
@@ -4461,20 +4456,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    addi 11, 5, ui at toc@l
 ; CHECK-NEXT:    lwarx 12, 0, 11
 ; CHECK-NEXT:    cmplw 12, 0
-; CHECK-NEXT:    bne 0, .LBB3_24
+; CHECK-NEXT:    bne- 0, .LBB3_24
 ; CHECK-NEXT:  # %bb.21: # %cmpxchg.fencedstore181
 ; CHECK-NEXT:    extsb 30, 30
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_22: # %cmpxchg.trystore180
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 30, 0, 11
-; CHECK-NEXT:    beq 0, .LBB3_24
+; CHECK-NEXT:    beq+ 0, .LBB3_24
 ; CHECK-NEXT:  # %bb.23: # %cmpxchg.releasedload179
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lwarx 12, 0, 11
 ; CHECK-NEXT:    cmplw 12, 0
-; CHECK-NEXT:    beq 0, .LBB3_22
+; CHECK-NEXT:    beq+ 0, .LBB3_22
 ; CHECK-NEXT:  .LBB3_24: # %cmpxchg.nostore177
 ; CHECK-NEXT:    addis 30, 2, sll at toc@ha
 ; CHECK-NEXT:    lwsync
@@ -4484,20 +4478,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    addi 12, 30, sll at toc@l
 ; CHECK-NEXT:    ldarx 0, 0, 12
 ; CHECK-NEXT:    cmpld 0, 29
-; CHECK-NEXT:    bne 0, .LBB3_28
+; CHECK-NEXT:    bne- 0, .LBB3_28
 ; CHECK-NEXT:  # %bb.25: # %cmpxchg.fencedstore162
 ; CHECK-NEXT:    extsb 28, 28
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_26: # %cmpxchg.trystore161
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    stdcx. 28, 0, 12
-; CHECK-NEXT:    beq 0, .LBB3_28
+; CHECK-NEXT:    beq+ 0, .LBB3_28
 ; CHECK-NEXT:  # %bb.27: # %cmpxchg.releasedload160
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    ldarx 0, 0, 12
 ; CHECK-NEXT:    cmpld 0, 29
-; CHECK-NEXT:    beq 0, .LBB3_26
+; CHECK-NEXT:    beq+ 0, .LBB3_26
 ; CHECK-NEXT:  .LBB3_28: # %cmpxchg.nostore158
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    std 0, sll at toc@l(30)
@@ -4507,20 +4500,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    addi 0, 30, ull at toc@l
 ; CHECK-NEXT:    ldarx 29, 0, 0
 ; CHECK-NEXT:    cmpld 29, 28
-; CHECK-NEXT:    bne 0, .LBB3_32
+; CHECK-NEXT:    bne- 0, .LBB3_32
 ; CHECK-NEXT:  # %bb.29: # %cmpxchg.fencedstore143
 ; CHECK-NEXT:    extsb 27, 27
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_30: # %cmpxchg.trystore142
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    stdcx. 27, 0, 0
-; CHECK-NEXT:    beq 0, .LBB3_32
+; CHECK-NEXT:    beq+ 0, .LBB3_32
 ; CHECK-NEXT:  # %bb.31: # %cmpxchg.releasedload141
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    ldarx 29, 0, 0
 ; CHECK-NEXT:    cmpld 29, 28
-; CHECK-NEXT:    beq 0, .LBB3_30
+; CHECK-NEXT:    beq+ 0, .LBB3_30
 ; CHECK-NEXT:  .LBB3_32: # %cmpxchg.nostore139
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    std 29, ull at toc@l(30)
@@ -4528,19 +4520,18 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lbz 29, sc at toc@l(4)
 ; CHECK-NEXT:    lbarx 28, 0, 6
 ; CHECK-NEXT:    cmplw 28, 30
-; CHECK-NEXT:    bne 0, .LBB3_36
+; CHECK-NEXT:    bne- 0, .LBB3_36
 ; CHECK-NEXT:  # %bb.33: # %cmpxchg.fencedstore124
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_34: # %cmpxchg.trystore123
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    stbcx. 29, 0, 6
-; CHECK-NEXT:    beq 0, .LBB3_37
+; CHECK-NEXT:    beq+ 0, .LBB3_37
 ; CHECK-NEXT:  # %bb.35: # %cmpxchg.releasedload122
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lbarx 28, 0, 6
 ; CHECK-NEXT:    cmplw 28, 30
-; CHECK-NEXT:    beq 0, .LBB3_34
+; CHECK-NEXT:    beq+ 0, .LBB3_34
 ; CHECK-NEXT:  .LBB3_36: # %cmpxchg.nostore120
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    crxor 20, 20, 20
@@ -4557,19 +4548,18 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lbz 6, uc at toc@l(3)
 ; CHECK-NEXT:    lbarx 29, 0, 7
 ; CHECK-NEXT:    cmplw 29, 6
-; CHECK-NEXT:    bne 0, .LBB3_42
+; CHECK-NEXT:    bne- 0, .LBB3_42
 ; CHECK-NEXT:  # %bb.39: # %cmpxchg.fencedstore105
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_40: # %cmpxchg.trystore104
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    stbcx. 30, 0, 7
-; CHECK-NEXT:    beq 0, .LBB3_43
+; CHECK-NEXT:    beq+ 0, .LBB3_43
 ; CHECK-NEXT:  # %bb.41: # %cmpxchg.releasedload103
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lbarx 29, 0, 7
 ; CHECK-NEXT:    cmplw 29, 6
-; CHECK-NEXT:    beq 0, .LBB3_40
+; CHECK-NEXT:    beq+ 0, .LBB3_40
 ; CHECK-NEXT:  .LBB3_42: # %cmpxchg.nostore101
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    crxor 20, 20, 20
@@ -4586,21 +4576,20 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lbz 6, uc at toc@l(3)
 ; CHECK-NEXT:    lharx 30, 0, 8
 ; CHECK-NEXT:    cmplw 30, 6
-; CHECK-NEXT:    bne 0, .LBB3_48
+; CHECK-NEXT:    bne- 0, .LBB3_48
 ; CHECK-NEXT:  # %bb.45: # %cmpxchg.fencedstore86
 ; CHECK-NEXT:    extsb 7, 7
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    clrlwi 7, 7, 16
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_46: # %cmpxchg.trystore85
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    sthcx. 7, 0, 8
-; CHECK-NEXT:    beq 0, .LBB3_49
+; CHECK-NEXT:    beq+ 0, .LBB3_49
 ; CHECK-NEXT:  # %bb.47: # %cmpxchg.releasedload84
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lharx 30, 0, 8
 ; CHECK-NEXT:    cmplw 30, 6
-; CHECK-NEXT:    beq 0, .LBB3_46
+; CHECK-NEXT:    beq+ 0, .LBB3_46
 ; CHECK-NEXT:  .LBB3_48: # %cmpxchg.nostore82
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    crxor 20, 20, 20
@@ -4617,21 +4606,20 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lbz 6, uc at toc@l(3)
 ; CHECK-NEXT:    lharx 8, 0, 9
 ; CHECK-NEXT:    cmplw 8, 6
-; CHECK-NEXT:    bne 0, .LBB3_54
+; CHECK-NEXT:    bne- 0, .LBB3_54
 ; CHECK-NEXT:  # %bb.51: # %cmpxchg.fencedstore67
 ; CHECK-NEXT:    extsb 7, 7
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    clrlwi 7, 7, 16
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_52: # %cmpxchg.trystore66
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    sthcx. 7, 0, 9
-; CHECK-NEXT:    beq 0, .LBB3_55
+; CHECK-NEXT:    beq+ 0, .LBB3_55
 ; CHECK-NEXT:  # %bb.53: # %cmpxchg.releasedload65
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lharx 8, 0, 9
 ; CHECK-NEXT:    cmplw 8, 6
-; CHECK-NEXT:    beq 0, .LBB3_52
+; CHECK-NEXT:    beq+ 0, .LBB3_52
 ; CHECK-NEXT:  .LBB3_54: # %cmpxchg.nostore63
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    crxor 20, 20, 20
@@ -4648,20 +4636,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lbz 6, uc at toc@l(3)
 ; CHECK-NEXT:    lwarx 8, 0, 10
 ; CHECK-NEXT:    cmplw 8, 6
-; CHECK-NEXT:    bne 0, .LBB3_60
+; CHECK-NEXT:    bne- 0, .LBB3_60
 ; CHECK-NEXT:  # %bb.57: # %cmpxchg.fencedstore48
 ; CHECK-NEXT:    extsb 7, 7
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_58: # %cmpxchg.trystore47
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 7, 0, 10
-; CHECK-NEXT:    beq 0, .LBB3_61
+; CHECK-NEXT:    beq+ 0, .LBB3_61
 ; CHECK-NEXT:  # %bb.59: # %cmpxchg.releasedload46
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lwarx 8, 0, 10
 ; CHECK-NEXT:    cmplw 8, 6
-; CHECK-NEXT:    beq 0, .LBB3_58
+; CHECK-NEXT:    beq+ 0, .LBB3_58
 ; CHECK-NEXT:  .LBB3_60: # %cmpxchg.nostore44
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    crxor 20, 20, 20
@@ -4678,20 +4665,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lbz 6, uc at toc@l(3)
 ; CHECK-NEXT:    lwarx 8, 0, 11
 ; CHECK-NEXT:    cmplw 8, 6
-; CHECK-NEXT:    bne 0, .LBB3_66
+; CHECK-NEXT:    bne- 0, .LBB3_66
 ; CHECK-NEXT:  # %bb.63: # %cmpxchg.fencedstore29
 ; CHECK-NEXT:    extsb 7, 7
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_64: # %cmpxchg.trystore28
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 7, 0, 11
-; CHECK-NEXT:    beq 0, .LBB3_67
+; CHECK-NEXT:    beq+ 0, .LBB3_67
 ; CHECK-NEXT:  # %bb.65: # %cmpxchg.releasedload27
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lwarx 8, 0, 11
 ; CHECK-NEXT:    cmplw 8, 6
-; CHECK-NEXT:    beq 0, .LBB3_64
+; CHECK-NEXT:    beq+ 0, .LBB3_64
 ; CHECK-NEXT:  .LBB3_66: # %cmpxchg.nostore25
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    crxor 20, 20, 20
@@ -4708,20 +4694,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lbz 6, uc at toc@l(3)
 ; CHECK-NEXT:    ldarx 8, 0, 12
 ; CHECK-NEXT:    cmpld 8, 6
-; CHECK-NEXT:    bne 0, .LBB3_72
+; CHECK-NEXT:    bne- 0, .LBB3_72
 ; CHECK-NEXT:  # %bb.69: # %cmpxchg.fencedstore10
 ; CHECK-NEXT:    extsb 7, 7
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_70: # %cmpxchg.trystore9
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    stdcx. 7, 0, 12
-; CHECK-NEXT:    beq 0, .LBB3_73
+; CHECK-NEXT:    beq+ 0, .LBB3_73
 ; CHECK-NEXT:  # %bb.71: # %cmpxchg.releasedload8
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    ldarx 8, 0, 12
 ; CHECK-NEXT:    cmpld 8, 6
-; CHECK-NEXT:    beq 0, .LBB3_70
+; CHECK-NEXT:    beq+ 0, .LBB3_70
 ; CHECK-NEXT:  .LBB3_72: # %cmpxchg.nostore6
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    crxor 20, 20, 20
@@ -4738,20 +4723,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    stw 6, ui at toc@l(5)
 ; CHECK-NEXT:    ldarx 6, 0, 0
 ; CHECK-NEXT:    cmpld 6, 3
-; CHECK-NEXT:    bne 0, .LBB3_78
+; CHECK-NEXT:    bne- 0, .LBB3_78
 ; CHECK-NEXT:  # %bb.75: # %cmpxchg.fencedstore
 ; CHECK-NEXT:    extsb 4, 4
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_76: # %cmpxchg.trystore
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    stdcx. 4, 0, 0
-; CHECK-NEXT:    beq 0, .LBB3_79
+; CHECK-NEXT:    beq+ 0, .LBB3_79
 ; CHECK-NEXT:  # %bb.77: # %cmpxchg.releasedload
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    ldarx 6, 0, 0
 ; CHECK-NEXT:    cmpld 6, 3
-; CHECK-NEXT:    beq 0, .LBB3_76
+; CHECK-NEXT:    beq+ 0, .LBB3_76
 ; CHECK-NEXT:  .LBB3_78: # %cmpxchg.nostore
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    crxor 20, 20, 20
@@ -4807,24 +4791,23 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:    srw 6, 3, 26
 ; AIX32-NEXT:    clrlwi 6, 6, 24
 ; AIX32-NEXT:    cmplw 6, 4
-; AIX32-NEXT:    bne 0, L..BB3_4
+; AIX32-NEXT:    bne- 0, L..BB3_4
 ; AIX32-NEXT:  # %bb.1: # %cmpxchg.fencedstore289
 ; AIX32-NEXT:    sync
 ; AIX32-NEXT:    slw 5, 5, 26
-; AIX32-NEXT:    .align 4
 ; AIX32-NEXT:  L..BB3_2: # %cmpxchg.trystore288
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    and 6, 3, 25
 ; AIX32-NEXT:    or 6, 6, 5
 ; AIX32-NEXT:    stwcx. 6, 0, 27
-; AIX32-NEXT:    beq 0, L..BB3_4
+; AIX32-NEXT:    beq+ 0, L..BB3_4
 ; AIX32-NEXT:  # %bb.3: # %cmpxchg.releasedload287
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    lwarx 3, 0, 27
 ; AIX32-NEXT:    srw 6, 3, 26
 ; AIX32-NEXT:    clrlwi 6, 6, 24
 ; AIX32-NEXT:    cmplw 6, 4
-; AIX32-NEXT:    beq 0, L..BB3_2
+; AIX32-NEXT:    beq+ 0, L..BB3_2
 ; AIX32-NEXT:  L..BB3_4: # %cmpxchg.nostore285
 ; AIX32-NEXT:    not 4, 30
 ; AIX32-NEXT:    srw 5, 3, 26
@@ -4840,25 +4823,24 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:    srw 6, 4, 23
 ; AIX32-NEXT:    clrlwi 6, 6, 24
 ; AIX32-NEXT:    cmplw 6, 3
-; AIX32-NEXT:    bne 0, L..BB3_8
+; AIX32-NEXT:    bne- 0, L..BB3_8
 ; AIX32-NEXT:  # %bb.5: # %cmpxchg.fencedstore256
 ; AIX32-NEXT:    clrlwi 5, 5, 24
 ; AIX32-NEXT:    sync
 ; AIX32-NEXT:    slw 5, 5, 23
-; AIX32-NEXT:    .align 4
 ; AIX32-NEXT:  L..BB3_6: # %cmpxchg.trystore255
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    and 6, 4, 22
 ; AIX32-NEXT:    or 6, 6, 5
 ; AIX32-NEXT:    stwcx. 6, 0, 24
-; AIX32-NEXT:    beq 0, L..BB3_8
+; AIX32-NEXT:    beq+ 0, L..BB3_8
 ; AIX32-NEXT:  # %bb.7: # %cmpxchg.releasedload254
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    lwarx 4, 0, 24
 ; AIX32-NEXT:    srw 6, 4, 23
 ; AIX32-NEXT:    clrlwi 6, 6, 24
 ; AIX32-NEXT:    cmplw 6, 3
-; AIX32-NEXT:    beq 0, L..BB3_6
+; AIX32-NEXT:    beq+ 0, L..BB3_6
 ; AIX32-NEXT:  L..BB3_8: # %cmpxchg.nostore252
 ; AIX32-NEXT:    srw 4, 4, 23
 ; AIX32-NEXT:    lwsync
@@ -4878,26 +4860,25 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:    srw 8, 5, 20
 ; AIX32-NEXT:    clrlwi 8, 8, 16
 ; AIX32-NEXT:    cmplw 8, 6
-; AIX32-NEXT:    bne 0, L..BB3_12
+; AIX32-NEXT:    bne- 0, L..BB3_12
 ; AIX32-NEXT:  # %bb.9: # %cmpxchg.fencedstore223
 ; AIX32-NEXT:    extsb 7, 7
 ; AIX32-NEXT:    sync
 ; AIX32-NEXT:    clrlwi 7, 7, 16
 ; AIX32-NEXT:    slw 7, 7, 20
-; AIX32-NEXT:    .align 4
 ; AIX32-NEXT:  L..BB3_10: # %cmpxchg.trystore222
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    and 8, 5, 19
 ; AIX32-NEXT:    or 8, 8, 7
 ; AIX32-NEXT:    stwcx. 8, 0, 21
-; AIX32-NEXT:    beq 0, L..BB3_12
+; AIX32-NEXT:    beq+ 0, L..BB3_12
 ; AIX32-NEXT:  # %bb.11: # %cmpxchg.releasedload221
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    lwarx 5, 0, 21
 ; AIX32-NEXT:    srw 8, 5, 20
 ; AIX32-NEXT:    clrlwi 8, 8, 16
 ; AIX32-NEXT:    cmplw 8, 6
-; AIX32-NEXT:    beq 0, L..BB3_10
+; AIX32-NEXT:    beq+ 0, L..BB3_10
 ; AIX32-NEXT:  L..BB3_12: # %cmpxchg.nostore219
 ; AIX32-NEXT:    srw 5, 5, 20
 ; AIX32-NEXT:    lwsync
@@ -4915,26 +4896,25 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:    srw 7, 3, 17
 ; AIX32-NEXT:    clrlwi 7, 7, 16
 ; AIX32-NEXT:    cmplw 7, 5
-; AIX32-NEXT:    bne 0, L..BB3_16
+; AIX32-NEXT:    bne- 0, L..BB3_16
 ; AIX32-NEXT:  # %bb.13: # %cmpxchg.fencedstore190
 ; AIX32-NEXT:    extsb 6, 6
 ; AIX32-NEXT:    sync
 ; AIX32-NEXT:    clrlwi 6, 6, 16
 ; AIX32-NEXT:    slw 6, 6, 17
-; AIX32-NEXT:    .align 4
 ; AIX32-NEXT:  L..BB3_14: # %cmpxchg.trystore189
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    and 7, 3, 16
 ; AIX32-NEXT:    or 7, 7, 6
 ; AIX32-NEXT:    stwcx. 7, 0, 18
-; AIX32-NEXT:    beq 0, L..BB3_16
+; AIX32-NEXT:    beq+ 0, L..BB3_16
 ; AIX32-NEXT:  # %bb.15: # %cmpxchg.releasedload188
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    lwarx 3, 0, 18
 ; AIX32-NEXT:    srw 7, 3, 17
 ; AIX32-NEXT:    clrlwi 7, 7, 16
 ; AIX32-NEXT:    cmplw 7, 5
-; AIX32-NEXT:    beq 0, L..BB3_14
+; AIX32-NEXT:    beq+ 0, L..BB3_14
 ; AIX32-NEXT:  L..BB3_16: # %cmpxchg.nostore186
 ; AIX32-NEXT:    srw 3, 3, 17
 ; AIX32-NEXT:    lwsync
@@ -4944,20 +4924,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:    lbz 4, 0(30)
 ; AIX32-NEXT:    lwarx 3, 0, 15
 ; AIX32-NEXT:    cmplw 3, 4
-; AIX32-NEXT:    bne 0, L..BB3_20
+; AIX32-NEXT:    bne- 0, L..BB3_20
 ; AIX32-NEXT:  # %bb.17: # %cmpxchg.fencedstore171
 ; AIX32-NEXT:    extsb 5, 5
 ; AIX32-NEXT:    sync
-; AIX32-NEXT:    .align 5
 ; AIX32-NEXT:  L..BB3_18: # %cmpxchg.trystore170
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    stwcx. 5, 0, 15
-; AIX32-NEXT:    beq 0, L..BB3_20
+; AIX32-NEXT:    beq+ 0, L..BB3_20
 ; AIX32-NEXT:  # %bb.19: # %cmpxchg.releasedload169
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    lwarx 3, 0, 15
 ; AIX32-NEXT:    cmplw 3, 4
-; AIX32-NEXT:    beq 0, L..BB3_18
+; AIX32-NEXT:    beq+ 0, L..BB3_18
 ; AIX32-NEXT:  L..BB3_20: # %cmpxchg.nostore167
 ; AIX32-NEXT:    lwsync
 ; AIX32-NEXT:    lwz 28, L..C5(2) # @ui
@@ -4966,20 +4945,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:    lbz 5, 0(29)
 ; AIX32-NEXT:    lwarx 3, 0, 28
 ; AIX32-NEXT:    cmplw 3, 4
-; AIX32-NEXT:    bne 0, L..BB3_24
+; AIX32-NEXT:    bne- 0, L..BB3_24
 ; AIX32-NEXT:  # %bb.21: # %cmpxchg.fencedstore152
 ; AIX32-NEXT:    extsb 5, 5
 ; AIX32-NEXT:    sync
-; AIX32-NEXT:    .align 5
 ; AIX32-NEXT:  L..BB3_22: # %cmpxchg.trystore151
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    stwcx. 5, 0, 28
-; AIX32-NEXT:    beq 0, L..BB3_24
+; AIX32-NEXT:    beq+ 0, L..BB3_24
 ; AIX32-NEXT:  # %bb.23: # %cmpxchg.releasedload150
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    lwarx 3, 0, 28
 ; AIX32-NEXT:    cmplw 3, 4
-; AIX32-NEXT:    beq 0, L..BB3_22
+; AIX32-NEXT:    beq+ 0, L..BB3_22
 ; AIX32-NEXT:  L..BB3_24: # %cmpxchg.nostore148
 ; AIX32-NEXT:    lwsync
 ; AIX32-NEXT:    stw 3, 0(28)
@@ -5024,24 +5002,23 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:    srw 6, 4, 26
 ; AIX32-NEXT:    clrlwi 6, 6, 24
 ; AIX32-NEXT:    cmplw 6, 3
-; AIX32-NEXT:    bne 0, L..BB3_28
+; AIX32-NEXT:    bne- 0, L..BB3_28
 ; AIX32-NEXT:  # %bb.25: # %cmpxchg.fencedstore119
 ; AIX32-NEXT:    sync
 ; AIX32-NEXT:    slw 5, 5, 26
-; AIX32-NEXT:    .align 4
 ; AIX32-NEXT:  L..BB3_26: # %cmpxchg.trystore118
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    and 4, 4, 25
 ; AIX32-NEXT:    or 4, 4, 5
 ; AIX32-NEXT:    stwcx. 4, 0, 27
-; AIX32-NEXT:    beq 0, L..BB3_29
+; AIX32-NEXT:    beq+ 0, L..BB3_29
 ; AIX32-NEXT:  # %bb.27: # %cmpxchg.releasedload117
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    lwarx 4, 0, 27
 ; AIX32-NEXT:    srw 6, 4, 26
 ; AIX32-NEXT:    clrlwi 6, 6, 24
 ; AIX32-NEXT:    cmplw 6, 3
-; AIX32-NEXT:    beq 0, L..BB3_26
+; AIX32-NEXT:    beq+ 0, L..BB3_26
 ; AIX32-NEXT:  L..BB3_28: # %cmpxchg.nostore115
 ; AIX32-NEXT:    crxor 20, 20, 20
 ; AIX32-NEXT:    lwsync
@@ -5060,24 +5037,23 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:    srw 6, 4, 23
 ; AIX32-NEXT:    clrlwi 6, 6, 24
 ; AIX32-NEXT:    cmplw 6, 3
-; AIX32-NEXT:    bne 0, L..BB3_34
+; AIX32-NEXT:    bne- 0, L..BB3_34
 ; AIX32-NEXT:  # %bb.31: # %cmpxchg.fencedstore86
 ; AIX32-NEXT:    sync
 ; AIX32-NEXT:    slw 5, 5, 23
-; AIX32-NEXT:    .align 4
 ; AIX32-NEXT:  L..BB3_32: # %cmpxchg.trystore85
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    and 4, 4, 22
 ; AIX32-NEXT:    or 4, 4, 5
 ; AIX32-NEXT:    stwcx. 4, 0, 24
-; AIX32-NEXT:    beq 0, L..BB3_35
+; AIX32-NEXT:    beq+ 0, L..BB3_35
 ; AIX32-NEXT:  # %bb.33: # %cmpxchg.releasedload84
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    lwarx 4, 0, 24
 ; AIX32-NEXT:    srw 6, 4, 23
 ; AIX32-NEXT:    clrlwi 6, 6, 24
 ; AIX32-NEXT:    cmplw 6, 3
-; AIX32-NEXT:    beq 0, L..BB3_32
+; AIX32-NEXT:    beq+ 0, L..BB3_32
 ; AIX32-NEXT:  L..BB3_34: # %cmpxchg.nostore82
 ; AIX32-NEXT:    crxor 20, 20, 20
 ; AIX32-NEXT:    lwsync
@@ -5096,26 +5072,25 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:    srw 6, 4, 20
 ; AIX32-NEXT:    clrlwi 6, 6, 16
 ; AIX32-NEXT:    cmplw 6, 3
-; AIX32-NEXT:    bne 0, L..BB3_40
+; AIX32-NEXT:    bne- 0, L..BB3_40
 ; AIX32-NEXT:  # %bb.37: # %cmpxchg.fencedstore53
 ; AIX32-NEXT:    extsb 5, 5
 ; AIX32-NEXT:    sync
 ; AIX32-NEXT:    clrlwi 5, 5, 16
 ; AIX32-NEXT:    slw 5, 5, 20
-; AIX32-NEXT:    .align 4
 ; AIX32-NEXT:  L..BB3_38: # %cmpxchg.trystore52
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    and 4, 4, 19
 ; AIX32-NEXT:    or 4, 4, 5
 ; AIX32-NEXT:    stwcx. 4, 0, 21
-; AIX32-NEXT:    beq 0, L..BB3_41
+; AIX32-NEXT:    beq+ 0, L..BB3_41
 ; AIX32-NEXT:  # %bb.39: # %cmpxchg.releasedload51
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    lwarx 4, 0, 21
 ; AIX32-NEXT:    srw 6, 4, 20
 ; AIX32-NEXT:    clrlwi 6, 6, 16
 ; AIX32-NEXT:    cmplw 6, 3
-; AIX32-NEXT:    beq 0, L..BB3_38
+; AIX32-NEXT:    beq+ 0, L..BB3_38
 ; AIX32-NEXT:  L..BB3_40: # %cmpxchg.nostore49
 ; AIX32-NEXT:    crxor 20, 20, 20
 ; AIX32-NEXT:    lwsync
@@ -5134,26 +5109,25 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:    srw 6, 4, 17
 ; AIX32-NEXT:    clrlwi 6, 6, 16
 ; AIX32-NEXT:    cmplw 6, 3
-; AIX32-NEXT:    bne 0, L..BB3_46
+; AIX32-NEXT:    bne- 0, L..BB3_46
 ; AIX32-NEXT:  # %bb.43: # %cmpxchg.fencedstore29
 ; AIX32-NEXT:    extsb 5, 5
 ; AIX32-NEXT:    sync
 ; AIX32-NEXT:    clrlwi 5, 5, 16
 ; AIX32-NEXT:    slw 5, 5, 17
-; AIX32-NEXT:    .align 4
 ; AIX32-NEXT:  L..BB3_44: # %cmpxchg.trystore28
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    and 4, 4, 16
 ; AIX32-NEXT:    or 4, 4, 5
 ; AIX32-NEXT:    stwcx. 4, 0, 18
-; AIX32-NEXT:    beq 0, L..BB3_47
+; AIX32-NEXT:    beq+ 0, L..BB3_47
 ; AIX32-NEXT:  # %bb.45: # %cmpxchg.releasedload27
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    lwarx 4, 0, 18
 ; AIX32-NEXT:    srw 6, 4, 17
 ; AIX32-NEXT:    clrlwi 6, 6, 16
 ; AIX32-NEXT:    cmplw 6, 3
-; AIX32-NEXT:    beq 0, L..BB3_44
+; AIX32-NEXT:    beq+ 0, L..BB3_44
 ; AIX32-NEXT:  L..BB3_46: # %cmpxchg.nostore25
 ; AIX32-NEXT:    crxor 20, 20, 20
 ; AIX32-NEXT:    lwsync
@@ -5170,20 +5144,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:    lbz 3, 0(30)
 ; AIX32-NEXT:    lwarx 5, 0, 15
 ; AIX32-NEXT:    cmplw 5, 3
-; AIX32-NEXT:    bne 0, L..BB3_52
+; AIX32-NEXT:    bne- 0, L..BB3_52
 ; AIX32-NEXT:  # %bb.49: # %cmpxchg.fencedstore10
 ; AIX32-NEXT:    extsb 4, 4
 ; AIX32-NEXT:    sync
-; AIX32-NEXT:    .align 5
 ; AIX32-NEXT:  L..BB3_50: # %cmpxchg.trystore9
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    stwcx. 4, 0, 15
-; AIX32-NEXT:    beq 0, L..BB3_53
+; AIX32-NEXT:    beq+ 0, L..BB3_53
 ; AIX32-NEXT:  # %bb.51: # %cmpxchg.releasedload8
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    lwarx 5, 0, 15
 ; AIX32-NEXT:    cmplw 5, 3
-; AIX32-NEXT:    beq 0, L..BB3_50
+; AIX32-NEXT:    beq+ 0, L..BB3_50
 ; AIX32-NEXT:  L..BB3_52: # %cmpxchg.nostore6
 ; AIX32-NEXT:    crxor 20, 20, 20
 ; AIX32-NEXT:    lwsync
@@ -5200,20 +5173,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:    lbz 3, 0(30)
 ; AIX32-NEXT:    lwarx 5, 0, 28
 ; AIX32-NEXT:    cmplw 5, 3
-; AIX32-NEXT:    bne 0, L..BB3_58
+; AIX32-NEXT:    bne- 0, L..BB3_58
 ; AIX32-NEXT:  # %bb.55: # %cmpxchg.fencedstore
 ; AIX32-NEXT:    extsb 4, 4
 ; AIX32-NEXT:    sync
-; AIX32-NEXT:    .align 5
 ; AIX32-NEXT:  L..BB3_56: # %cmpxchg.trystore
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    stwcx. 4, 0, 28
-; AIX32-NEXT:    beq 0, L..BB3_59
+; AIX32-NEXT:    beq+ 0, L..BB3_59
 ; AIX32-NEXT:  # %bb.57: # %cmpxchg.releasedload
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    lwarx 5, 0, 28
 ; AIX32-NEXT:    cmplw 5, 3
-; AIX32-NEXT:    beq 0, L..BB3_56
+; AIX32-NEXT:    beq+ 0, L..BB3_56
 ; AIX32-NEXT:  L..BB3_58: # %cmpxchg.nostore
 ; AIX32-NEXT:    crxor 20, 20, 20
 ; AIX32-NEXT:    lwsync
@@ -5838,21 +5810,20 @@ define dso_local i64 @cmpswplp(ptr noundef %ptr, ptr nocapture noundef readnone
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    ldarx 4, 0, 3
 ; CHECK-NEXT:    cmpld 4, 5
-; CHECK-NEXT:    bne 0, .LBB6_2
+; CHECK-NEXT:    bne- 0, .LBB6_3
 ; CHECK-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; CHECK-NEXT:    addi 4, 5, 1
+; CHECK-NEXT:    creqv 20, 20, 20
 ; CHECK-NEXT:    stdcx. 4, 0, 3
-; CHECK-NEXT:    beq 0, .LBB6_4
-; CHECK-NEXT:  .LBB6_2: # %cmpxchg.failure
-; CHECK-NEXT:    crxor 20, 20, 20
-; CHECK-NEXT:  .LBB6_3: # %cmpxchg.end
+; CHECK-NEXT:    bne- 0, .LBB6_3
+; CHECK-NEXT:  .LBB6_2: # %cmpxchg.end
 ; CHECK-NEXT:    li 3, 66
 ; CHECK-NEXT:    li 4, 55
 ; CHECK-NEXT:    isel 3, 4, 3, 20
 ; CHECK-NEXT:    blr
-; CHECK-NEXT:  .LBB6_4:
-; CHECK-NEXT:    creqv 20, 20, 20
-; CHECK-NEXT:    b .LBB6_3
+; CHECK-NEXT:  .LBB6_3: # %cmpxchg.failure
+; CHECK-NEXT:    crxor 20, 20, 20
+; CHECK-NEXT:    b .LBB6_2
 ;
 ; AIX32-LABEL: cmpswplp:
 ; AIX32:       # %bb.0: # %entry
diff --git a/llvm/test/CodeGen/PowerPC/atomic-compare-exchange-weak.ll b/llvm/test/CodeGen/PowerPC/atomic-compare-exchange-weak.ll
index 65a12a6222f24..ae071194b4479 100644
--- a/llvm/test/CodeGen/PowerPC/atomic-compare-exchange-weak.ll
+++ b/llvm/test/CodeGen/PowerPC/atomic-compare-exchange-weak.ll
@@ -19,13 +19,14 @@ define i32 @foo(ptr noundef %cp, ptr noundef %old, i32 noundef %c)  {
 ; CHECK-NEXT:    stw r5, -16(r1)
 ; CHECK-NEXT:    lwarx r6, 0, r3
 ; CHECK-NEXT:    cmplw r6, r7
-; CHECK-NEXT:    bne cr0, L..BB0_2
+; CHECK-NEXT:    bne- cr0, L..BB0_5
 ; CHECK-NEXT:  # %bb.1: # %cmpxchg.fencedstore
+; CHECK-NEXT:    creqv 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
 ; CHECK-NEXT:    stwcx. r5, 0, r3
-; CHECK-NEXT:    beq cr0, L..BB0_5
-; CHECK-NEXT:  L..BB0_2: # %cmpxchg.failure
-; CHECK-NEXT:    crxor 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
-; CHECK-NEXT:  # %bb.3: # %cmpxchg.store_expected
+; CHECK-NEXT:    bne- cr0, L..BB0_5
+; CHECK-NEXT:  # %bb.2: # %cmpxchg.end
+; CHECK-NEXT:    bc 12, 4*cr5+lt, L..BB0_4
+; CHECK-NEXT:  L..BB0_3: # %cmpxchg.store_expected
 ; CHECK-NEXT:    stw r6, 0(r4)
 ; CHECK-NEXT:  L..BB0_4: # %cmpxchg.continue
 ; CHECK-NEXT:    li r3, 0
@@ -33,9 +34,9 @@ define i32 @foo(ptr noundef %cp, ptr noundef %old, i32 noundef %c)  {
 ; CHECK-NEXT:    isel r3, r4, r3, 4*cr5+lt
 ; CHECK-NEXT:    stb r3, -17(r1)
 ; CHECK-NEXT:    blr
-; CHECK-NEXT:  L..BB0_5:
-; CHECK-NEXT:    creqv 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
-; CHECK-NEXT:    b L..BB0_4
+; CHECK-NEXT:  L..BB0_5: # %cmpxchg.failure
+; CHECK-NEXT:    crxor 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
+; CHECK-NEXT:    b L..BB0_3
 ;
 ; CHECK64-LABEL: foo:
 ; CHECK64:       # %bb.0: # %entry
@@ -46,13 +47,14 @@ define i32 @foo(ptr noundef %cp, ptr noundef %old, i32 noundef %c)  {
 ; CHECK64-NEXT:    stw r5, -24(r1)
 ; CHECK64-NEXT:    lwarx r6, 0, r3
 ; CHECK64-NEXT:    cmplw r6, r7
-; CHECK64-NEXT:    bne cr0, L..BB0_2
+; CHECK64-NEXT:    bne- cr0, L..BB0_5
 ; CHECK64-NEXT:  # %bb.1: # %cmpxchg.fencedstore
+; CHECK64-NEXT:    creqv 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
 ; CHECK64-NEXT:    stwcx. r5, 0, r3
-; CHECK64-NEXT:    beq cr0, L..BB0_5
-; CHECK64-NEXT:  L..BB0_2: # %cmpxchg.failure
-; CHECK64-NEXT:    crxor 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
-; CHECK64-NEXT:  # %bb.3: # %cmpxchg.store_expected
+; CHECK64-NEXT:    bne- cr0, L..BB0_5
+; CHECK64-NEXT:  # %bb.2: # %cmpxchg.end
+; CHECK64-NEXT:    bc 12, 4*cr5+lt, L..BB0_4
+; CHECK64-NEXT:  L..BB0_3: # %cmpxchg.store_expected
 ; CHECK64-NEXT:    stw r6, 0(r4)
 ; CHECK64-NEXT:  L..BB0_4: # %cmpxchg.continue
 ; CHECK64-NEXT:    li r3, 0
@@ -63,9 +65,9 @@ define i32 @foo(ptr noundef %cp, ptr noundef %old, i32 noundef %c)  {
 ; CHECK64-NEXT:    li r3, 0
 ; CHECK64-NEXT:    isel r3, r4, r3, 4*cr5+lt
 ; CHECK64-NEXT:    blr
-; CHECK64-NEXT:  L..BB0_5:
-; CHECK64-NEXT:    creqv 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
-; CHECK64-NEXT:    b L..BB0_4
+; CHECK64-NEXT:  L..BB0_5: # %cmpxchg.failure
+; CHECK64-NEXT:    crxor 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
+; CHECK64-NEXT:    b L..BB0_3
 entry:
   %cp.addr = alloca ptr, align 4
   %old.addr = alloca ptr, align 4
diff --git a/llvm/test/CodeGen/PowerPC/atomic-float.ll b/llvm/test/CodeGen/PowerPC/atomic-float.ll
index 600d28936c162..8232a44c7da26 100644
--- a/llvm/test/CodeGen/PowerPC/atomic-float.ll
+++ b/llvm/test/CodeGen/PowerPC/atomic-float.ll
@@ -9,37 +9,36 @@ define float @test_add(ptr %ptr, float %incr) {
 ; CHECK-64:       # %bb.0: # %entry
 ; CHECK-64-NEXT:    sync
 ; CHECK-64-NEXT:    lfs 0, 0(3)
-; CHECK-64-NEXT:    b .LBB0_3
-; CHECK-64-NEXT:  .LBB0_1:                                # %cmpxchg.nostore
-; CHECK-64-NEXT:                                          #   in Loop: Header=BB0_3 Depth=1
-; CHECK-64-NEXT:    crxor 20, 20, 20
-; CHECK-64-NEXT:  .LBB0_2:                                # %cmpxchg.end
-; CHECK-64-NEXT:                                          #   in Loop: Header=BB0_3 Depth=1
-; CHECK-64-NEXT:    stw 4, -12(1)
-; CHECK-64-NEXT:    lfs 0, -12(1)
-; CHECK-64-NEXT:    bc 12, 20, .LBB0_7
-; CHECK-64-NEXT:  .LBB0_3:                                # %atomicrmw.start
-; CHECK-64-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-64-NEXT:                                          #     Child Loop BB0_4 Depth 2
+; CHECK-64-NEXT:  .LBB0_1: # %atomicrmw.start
+; CHECK-64-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-64-NEXT:    # Child Loop BB0_2 Depth 2
 ; CHECK-64-NEXT:    fadds 2, 0, 1
 ; CHECK-64-NEXT:    stfs 2, -4(1)
 ; CHECK-64-NEXT:    stfs 0, -8(1)
 ; CHECK-64-NEXT:    lwz 5, -4(1)
 ; CHECK-64-NEXT:    lwz 6, -8(1)
-; CHECK-64-NEXT:  .LBB0_4:                                # %cmpxchg.start
-; CHECK-64-NEXT:                                          #   Parent Loop BB0_3 Depth=1
-; CHECK-64-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-64-NEXT:  .LBB0_2: # %cmpxchg.start
+; CHECK-64-NEXT:    # Parent Loop BB0_1 Depth=1
+; CHECK-64-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-64-NEXT:    lwarx 4, 0, 3
-; CHECK-64-NEXT:    cmplw   4, 6
-; CHECK-64-NEXT:    bne     0, .LBB0_1
-; CHECK-64-NEXT:  # %bb.5:                                # %cmpxchg.fencedstore
-; CHECK-64-NEXT:                                          #   in Loop: Header=BB0_4 Depth=2
+; CHECK-64-NEXT:    cmplw 4, 6
+; CHECK-64-NEXT:    bne- 0, .LBB0_5
+; CHECK-64-NEXT:  # %bb.3: # %cmpxchg.fencedstore
+; CHECK-64-NEXT:    #
 ; CHECK-64-NEXT:    stwcx. 5, 0, 3
-; CHECK-64-NEXT:    bne     0, .LBB0_4
-; CHECK-64-NEXT:  # %bb.6:                                #   in Loop: Header=BB0_3 Depth=1
 ; CHECK-64-NEXT:    creqv 20, 20, 20
-; CHECK-64-NEXT:    b .LBB0_2
-; CHECK-64-NEXT:  .LBB0_7:                                # %atomicrmw.end
+; CHECK-64-NEXT:    bne- 0, .LBB0_2
+; CHECK-64-NEXT:  .LBB0_4: # %cmpxchg.end
+; CHECK-64-NEXT:    #
+; CHECK-64-NEXT:    stw 4, -12(1)
+; CHECK-64-NEXT:    lfs 0, -12(1)
+; CHECK-64-NEXT:    bc 4, 20, .LBB0_1
+; CHECK-64-NEXT:    b .LBB0_6
+; CHECK-64-NEXT:  .LBB0_5: # %cmpxchg.nostore
+; CHECK-64-NEXT:    #
+; CHECK-64-NEXT:    crxor 20, 20, 20
+; CHECK-64-NEXT:    b .LBB0_4
+; CHECK-64-NEXT:  .LBB0_6: # %atomicrmw.end
 ; CHECK-64-NEXT:    fmr 1, 0
 ; CHECK-64-NEXT:    lwsync
 ; CHECK-64-NEXT:    blr
@@ -50,37 +49,36 @@ define float @test_add(ptr %ptr, float %incr) {
 ; CHECK-32-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-32-NEXT:    sync
 ; CHECK-32-NEXT:    lfs 0, 0(3)
-; CHECK-32-NEXT:    b .LBB0_3
-; CHECK-32-NEXT:  .LBB0_1:                                # %cmpxchg.nostore
-; CHECK-32-NEXT:                                          #   in Loop: Header=BB0_3 Depth=1
-; CHECK-32-NEXT:    crxor 20, 20, 20
-; CHECK-32-NEXT:  .LBB0_2:                                # %cmpxchg.end
-; CHECK-32-NEXT:                                          #   in Loop: Header=BB0_3 Depth=1
-; CHECK-32-NEXT:    stw 4, 20(1)
-; CHECK-32-NEXT:    lfs 0, 20(1)
-; CHECK-32-NEXT:    bc 12, 20, .LBB0_7
-; CHECK-32-NEXT:  .LBB0_3:                                # %atomicrmw.start
-; CHECK-32-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-32-NEXT:                                          #     Child Loop BB0_4 Depth 2
+; CHECK-32-NEXT:  .LBB0_1: # %atomicrmw.start
+; CHECK-32-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-32-NEXT:    # Child Loop BB0_2 Depth 2
 ; CHECK-32-NEXT:    fadds 2, 0, 1
 ; CHECK-32-NEXT:    stfs 2, 28(1)
 ; CHECK-32-NEXT:    stfs 0, 24(1)
 ; CHECK-32-NEXT:    lwz 5, 28(1)
 ; CHECK-32-NEXT:    lwz 6, 24(1)
-; CHECK-32-NEXT:  .LBB0_4:                                # %cmpxchg.start
-; CHECK-32-NEXT:                                          #   Parent Loop BB0_3 Depth=1
-; CHECK-32-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-32-NEXT:  .LBB0_2: # %cmpxchg.start
+; CHECK-32-NEXT:    # Parent Loop BB0_1 Depth=1
+; CHECK-32-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-32-NEXT:    lwarx 4, 0, 3
-; CHECK-32-NEXT:    cmplw   4, 6
-; CHECK-32-NEXT:    bne     0, .LBB0_1
-; CHECK-32-NEXT:  # %bb.5:                                # %cmpxchg.fencedstore
-; CHECK-32-NEXT:                                          #   in Loop: Header=BB0_4 Depth=2
+; CHECK-32-NEXT:    cmplw 4, 6
+; CHECK-32-NEXT:    bne- 0, .LBB0_5
+; CHECK-32-NEXT:  # %bb.3: # %cmpxchg.fencedstore
+; CHECK-32-NEXT:    #
 ; CHECK-32-NEXT:    stwcx. 5, 0, 3
-; CHECK-32-NEXT:    bne     0, .LBB0_4
-; CHECK-32-NEXT:  # %bb.6:                                #   in Loop: Header=BB0_3 Depth=1
 ; CHECK-32-NEXT:    creqv 20, 20, 20
-; CHECK-32-NEXT:    b .LBB0_2
-; CHECK-32-NEXT:  .LBB0_7:                                # %atomicrmw.end
+; CHECK-32-NEXT:    bne- 0, .LBB0_2
+; CHECK-32-NEXT:  .LBB0_4: # %cmpxchg.end
+; CHECK-32-NEXT:    #
+; CHECK-32-NEXT:    stw 4, 20(1)
+; CHECK-32-NEXT:    lfs 0, 20(1)
+; CHECK-32-NEXT:    bc 4, 20, .LBB0_1
+; CHECK-32-NEXT:    b .LBB0_6
+; CHECK-32-NEXT:  .LBB0_5: # %cmpxchg.nostore
+; CHECK-32-NEXT:    #
+; CHECK-32-NEXT:    crxor 20, 20, 20
+; CHECK-32-NEXT:    b .LBB0_4
+; CHECK-32-NEXT:  .LBB0_6: # %atomicrmw.end
 ; CHECK-32-NEXT:    fmr 1, 0
 ; CHECK-32-NEXT:    lwsync
 ; CHECK-32-NEXT:    addi 1, 1, 32
diff --git a/llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll b/llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll
index 27a26aaca8b26..ff176c80ab342 100644
--- a/llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll
+++ b/llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll
@@ -6,45 +6,49 @@ define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    rldicr 5, 3, 0, 61
-; CHECK-NEXT:    not     3, 3
+; CHECK-NEXT:    not 3, 3
 ; CHECK-NEXT:    li 6, 255
 ; CHECK-NEXT:    lwz 8, 0(5)
 ; CHECK-NEXT:    rlwinm 3, 3, 3, 27, 28
 ; CHECK-NEXT:    slw 6, 6, 3
-; CHECK-NEXT:    not     6, 6
-; CHECK-NEXT:    clrlwi  7, 4, 24
-; CHECK-NEXT:    b .LBB0_2
-; CHECK-NEXT:  .LBB0_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT:    mr      8, 9
-; CHECK-NEXT:  .LBB0_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB0_5 Depth 2
+; CHECK-NEXT:    not 6, 6
+; CHECK-NEXT:    clrlwi 7, 4, 24
+; CHECK-NEXT:  .LBB0_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB0_4 Depth 2
 ; CHECK-NEXT:    srw 9, 8, 3
-; CHECK-NEXT:    clrlwi  10, 9, 24
-; CHECK-NEXT:    cmplw   10, 7
-; CHECK-NEXT:    blt     0, .LBB0_4
-; CHECK-NEXT:  # %bb.3:                                #   in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT:    sub     9, 9, 4
-; CHECK-NEXT:  .LBB0_4:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT:    clrlwi  9, 9, 24
+; CHECK-NEXT:    clrlwi 10, 9, 24
+; CHECK-NEXT:    cmplw 10, 7
+; CHECK-NEXT:    blt 0, .LBB0_3
+; CHECK-NEXT:  # %bb.2:
+; CHECK-NEXT:    sub 9, 9, 4
+; CHECK-NEXT:  .LBB0_3: # %atomicrmw.start
+; CHECK-NEXT:    #
+; CHECK-NEXT:    clrlwi 9, 9, 24
 ; CHECK-NEXT:    slw 9, 9, 3
 ; CHECK-NEXT:    and 10, 8, 6
 ; CHECK-NEXT:    or 10, 10, 9
-; CHECK-NEXT:  .LBB0_5:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB0_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:  .LBB0_4: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB0_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    lwarx 9, 0, 5
-; CHECK-NEXT:    cmplw   9, 8
-; CHECK-NEXT:    bne     0, .LBB0_1
-; CHECK-NEXT:  # %bb.6:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB0_5 Depth=2
+; CHECK-NEXT:    cmplw 9, 8
+; CHECK-NEXT:    bne- 0, .LBB0_7
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 10, 0, 5
-; CHECK-NEXT:    bne     0, .LBB0_5
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    mr      8, 9
-; CHECK-NEXT:  # %bb.8:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB0_4
+; CHECK-NEXT:  # %bb.6: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 8, 9
+; CHECK-NEXT:    bc 4, 20, .LBB0_1
+; CHECK-NEXT:    b .LBB0_8
+; CHECK-NEXT:  .LBB0_7: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 8, 9
+; CHECK-NEXT:    b .LBB0_1
+; CHECK-NEXT:  .LBB0_8: # %atomicrmw.end
 ; CHECK-NEXT:    srw 3, 9, 3
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
@@ -57,47 +61,51 @@ define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    rldicr 5, 3, 0, 61
-; CHECK-NEXT:    clrlwi  3, 3, 30
+; CHECK-NEXT:    clrlwi 3, 3, 30
 ; CHECK-NEXT:    lis 6, 0
 ; CHECK-NEXT:    xori 3, 3, 2
 ; CHECK-NEXT:    lwz 8, 0(5)
 ; CHECK-NEXT:    ori 6, 6, 65535
 ; CHECK-NEXT:    slwi 3, 3, 3
 ; CHECK-NEXT:    slw 6, 6, 3
-; CHECK-NEXT:    not     6, 6
-; CHECK-NEXT:    clrlwi  7, 4, 16
-; CHECK-NEXT:    b .LBB1_2
-; CHECK-NEXT:  .LBB1_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    mr      8, 9
-; CHECK-NEXT:  .LBB1_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB1_5 Depth 2
+; CHECK-NEXT:    not 6, 6
+; CHECK-NEXT:    clrlwi 7, 4, 16
+; CHECK-NEXT:  .LBB1_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB1_4 Depth 2
 ; CHECK-NEXT:    srw 9, 8, 3
-; CHECK-NEXT:    clrlwi  10, 9, 16
-; CHECK-NEXT:    cmplw   10, 7
-; CHECK-NEXT:    blt     0, .LBB1_4
-; CHECK-NEXT:  # %bb.3:                                #   in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    sub     9, 9, 4
-; CHECK-NEXT:  .LBB1_4:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    clrlwi  9, 9, 16
+; CHECK-NEXT:    clrlwi 10, 9, 16
+; CHECK-NEXT:    cmplw 10, 7
+; CHECK-NEXT:    blt 0, .LBB1_3
+; CHECK-NEXT:  # %bb.2:
+; CHECK-NEXT:    sub 9, 9, 4
+; CHECK-NEXT:  .LBB1_3: # %atomicrmw.start
+; CHECK-NEXT:    #
+; CHECK-NEXT:    clrlwi 9, 9, 16
 ; CHECK-NEXT:    slw 9, 9, 3
 ; CHECK-NEXT:    and 10, 8, 6
 ; CHECK-NEXT:    or 10, 10, 9
-; CHECK-NEXT:  .LBB1_5:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB1_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:  .LBB1_4: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB1_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    lwarx 9, 0, 5
-; CHECK-NEXT:    cmplw   9, 8
-; CHECK-NEXT:    bne     0, .LBB1_1
-; CHECK-NEXT:  # %bb.6:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB1_5 Depth=2
+; CHECK-NEXT:    cmplw 9, 8
+; CHECK-NEXT:    bne- 0, .LBB1_7
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 10, 0, 5
-; CHECK-NEXT:    bne     0, .LBB1_5
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    mr      8, 9
-; CHECK-NEXT:  # %bb.8:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB1_4
+; CHECK-NEXT:  # %bb.6: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 8, 9
+; CHECK-NEXT:    bc 4, 20, .LBB1_1
+; CHECK-NEXT:    b .LBB1_8
+; CHECK-NEXT:  .LBB1_7: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 8, 9
+; CHECK-NEXT:    b .LBB1_1
+; CHECK-NEXT:  .LBB1_8: # %atomicrmw.end
 ; CHECK-NEXT:    srw 3, 9, 3
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
@@ -110,34 +118,38 @@ define i32 @atomicrmw_usub_cond_i32(ptr %ptr, i32 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    lwz 6, 0(3)
-; CHECK-NEXT:    b .LBB2_2
-; CHECK-NEXT:  .LBB2_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB2_2 Depth=1
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  .LBB2_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB2_5 Depth 2
-; CHECK-NEXT:    cmplw   6, 4
-; CHECK-NEXT:    bge 0, .LBB2_4
-; CHECK-NEXT:  # %bb.3:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB2_2 Depth=1
-; CHECK-NEXT:    mr      7, 6
-; CHECK-NEXT:    b .LBB2_5
-; CHECK-NEXT:  .LBB2_4:                                #   in Loop: Header=BB2_2 Depth=1
-; CHECK-NEXT:    sub     7, 6, 4
-; CHECK-NEXT:  .LBB2_5:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB2_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:  .LBB2_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB2_4 Depth 2
+; CHECK-NEXT:    cmplw 6, 4
+; CHECK-NEXT:    bge 0, .LBB2_3
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.start
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 7, 6
+; CHECK-NEXT:    b .LBB2_4
+; CHECK-NEXT:  .LBB2_3:
+; CHECK-NEXT:    sub 7, 6, 4
+; CHECK-NEXT:  .LBB2_4: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB2_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    lwarx 5, 0, 3
-; CHECK-NEXT:    cmplw   5, 6
-; CHECK-NEXT:    bne     0, .LBB2_1
-; CHECK-NEXT:  # %bb.6:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB2_5 Depth=2
+; CHECK-NEXT:    cmplw 5, 6
+; CHECK-NEXT:    bne- 0, .LBB2_7
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 7, 0, 3
-; CHECK-NEXT:    bne     0, .LBB2_5
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  # %bb.8:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB2_4
+; CHECK-NEXT:  # %bb.6: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    bc 4, 20, .LBB2_1
+; CHECK-NEXT:    b .LBB2_8
+; CHECK-NEXT:  .LBB2_7: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    b .LBB2_1
+; CHECK-NEXT:  .LBB2_8: # %atomicrmw.end
 ; CHECK-NEXT:    mr 3, 5
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
@@ -150,34 +162,38 @@ define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    ld 6, 0(3)
-; CHECK-NEXT:    b .LBB3_2
-; CHECK-NEXT:  .LBB3_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB3_2 Depth=1
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  .LBB3_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB3_5 Depth 2
-; CHECK-NEXT:    cmpld   6, 4
-; CHECK-NEXT:    bge 0, .LBB3_4
-; CHECK-NEXT:  # %bb.3:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB3_2 Depth=1
-; CHECK-NEXT:    mr      7, 6
-; CHECK-NEXT:    b .LBB3_5
-; CHECK-NEXT:  .LBB3_4:                                #   in Loop: Header=BB3_2 Depth=1
-; CHECK-NEXT:    sub     7, 6, 4
-; CHECK-NEXT:  .LBB3_5:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB3_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:  .LBB3_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB3_4 Depth 2
+; CHECK-NEXT:    cmpld 6, 4
+; CHECK-NEXT:    bge 0, .LBB3_3
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.start
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 7, 6
+; CHECK-NEXT:    b .LBB3_4
+; CHECK-NEXT:  .LBB3_3:
+; CHECK-NEXT:    sub 7, 6, 4
+; CHECK-NEXT:  .LBB3_4: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB3_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    ldarx 5, 0, 3
-; CHECK-NEXT:    cmpld   5, 6
-; CHECK-NEXT:    bne     0, .LBB3_1
-; CHECK-NEXT:  # %bb.6:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB3_5 Depth=2
+; CHECK-NEXT:    cmpld 5, 6
+; CHECK-NEXT:    bne- 0, .LBB3_7
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stdcx. 7, 0, 3
-; CHECK-NEXT:    bne     0, .LBB3_5
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  # %bb.8:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB3_4
+; CHECK-NEXT:  # %bb.6: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    bc 4, 20, .LBB3_1
+; CHECK-NEXT:    b .LBB3_8
+; CHECK-NEXT:  .LBB3_7: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    b .LBB3_1
+; CHECK-NEXT:  .LBB3_8: # %atomicrmw.end
 ; CHECK-NEXT:    mr 3, 5
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
@@ -190,47 +206,51 @@ define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    rldicr 5, 3, 0, 61
-; CHECK-NEXT:    not     3, 3
+; CHECK-NEXT:    not 3, 3
 ; CHECK-NEXT:    li 6, 255
 ; CHECK-NEXT:    lwz 7, 0(5)
 ; CHECK-NEXT:    rlwinm 3, 3, 3, 27, 28
 ; CHECK-NEXT:    slw 6, 6, 3
-; CHECK-NEXT:    not     6, 6
-; CHECK-NEXT:    clrlwi  4, 4, 24
-; CHECK-NEXT:    b .LBB4_2
-; CHECK-NEXT:  .LBB4_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB4_2 Depth=1
-; CHECK-NEXT:    mr      7, 8
-; CHECK-NEXT:  .LBB4_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB4_5 Depth 2
+; CHECK-NEXT:    not 6, 6
+; CHECK-NEXT:    clrlwi 4, 4, 24
+; CHECK-NEXT:  .LBB4_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB4_4 Depth 2
 ; CHECK-NEXT:    srw 8, 7, 3
-; CHECK-NEXT:    clrlwi  9, 8, 24
-; CHECK-NEXT:    sub     8, 9, 4
-; CHECK-NEXT:    cmplw   8, 9
+; CHECK-NEXT:    clrlwi 9, 8, 24
+; CHECK-NEXT:    sub 8, 9, 4
+; CHECK-NEXT:    cmplw 8, 9
 ; CHECK-NEXT:    li 9, 0
-; CHECK-NEXT:    bgt     0, .LBB4_4
-; CHECK-NEXT:  # %bb.3:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB4_2 Depth=1
-; CHECK-NEXT:    mr      9, 8
-; CHECK-NEXT:  .LBB4_4:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB4_2 Depth=1
+; CHECK-NEXT:    bgt 0, .LBB4_3
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.start
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 9, 8
+; CHECK-NEXT:  .LBB4_3: # %atomicrmw.start
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    slw 8, 9, 3
 ; CHECK-NEXT:    and 9, 7, 6
 ; CHECK-NEXT:    or 9, 9, 8
-; CHECK-NEXT:  .LBB4_5:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB4_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:  .LBB4_4: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB4_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    lwarx 8, 0, 5
-; CHECK-NEXT:    cmplw   8, 7
-; CHECK-NEXT:    bne     0, .LBB4_1
-; CHECK-NEXT:  # %bb.6:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB4_5 Depth=2
+; CHECK-NEXT:    cmplw 8, 7
+; CHECK-NEXT:    bne- 0, .LBB4_7
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 9, 0, 5
-; CHECK-NEXT:    bne     0, .LBB4_5
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    mr      7, 8
-; CHECK-NEXT:  # %bb.8:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB4_4
+; CHECK-NEXT:  # %bb.6: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 7, 8
+; CHECK-NEXT:    bc 4, 20, .LBB4_1
+; CHECK-NEXT:    b .LBB4_8
+; CHECK-NEXT:  .LBB4_7: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 7, 8
+; CHECK-NEXT:    b .LBB4_1
+; CHECK-NEXT:  .LBB4_8: # %atomicrmw.end
 ; CHECK-NEXT:    srw 3, 8, 3
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
@@ -243,49 +263,53 @@ define i16 @atomicrmw_usub_sat_i16(ptr %ptr, i16 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    rldicr 5, 3, 0, 61
-; CHECK-NEXT:    clrlwi  3, 3, 30
+; CHECK-NEXT:    clrlwi 3, 3, 30
 ; CHECK-NEXT:    lis 6, 0
 ; CHECK-NEXT:    xori 3, 3, 2
 ; CHECK-NEXT:    lwz 7, 0(5)
 ; CHECK-NEXT:    ori 6, 6, 65535
 ; CHECK-NEXT:    slwi 3, 3, 3
 ; CHECK-NEXT:    slw 6, 6, 3
-; CHECK-NEXT:    not     6, 6
-; CHECK-NEXT:    clrlwi  4, 4, 16
-; CHECK-NEXT:    b .LBB5_2
-; CHECK-NEXT:  .LBB5_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB5_2 Depth=1
-; CHECK-NEXT:    mr      7, 8
-; CHECK-NEXT:  .LBB5_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB5_5 Depth 2
+; CHECK-NEXT:    not 6, 6
+; CHECK-NEXT:    clrlwi 4, 4, 16
+; CHECK-NEXT:  .LBB5_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB5_4 Depth 2
 ; CHECK-NEXT:    srw 8, 7, 3
-; CHECK-NEXT:    clrlwi  9, 8, 16
-; CHECK-NEXT:    sub     8, 9, 4
-; CHECK-NEXT:    cmplw   8, 9
+; CHECK-NEXT:    clrlwi 9, 8, 16
+; CHECK-NEXT:    sub 8, 9, 4
+; CHECK-NEXT:    cmplw 8, 9
 ; CHECK-NEXT:    li 9, 0
-; CHECK-NEXT:    bgt     0, .LBB5_4
-; CHECK-NEXT:  # %bb.3:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB5_2 Depth=1
-; CHECK-NEXT:    mr      9, 8
-; CHECK-NEXT:  .LBB5_4:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB5_2 Depth=1
+; CHECK-NEXT:    bgt 0, .LBB5_3
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.start
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 9, 8
+; CHECK-NEXT:  .LBB5_3: # %atomicrmw.start
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    slw 8, 9, 3
 ; CHECK-NEXT:    and 9, 7, 6
 ; CHECK-NEXT:    or 9, 9, 8
-; CHECK-NEXT:  .LBB5_5:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB5_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:  .LBB5_4: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB5_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    lwarx 8, 0, 5
-; CHECK-NEXT:    cmplw   8, 7
-; CHECK-NEXT:    bne     0, .LBB5_1
-; CHECK-NEXT:  # %bb.6:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB5_5 Depth=2
+; CHECK-NEXT:    cmplw 8, 7
+; CHECK-NEXT:    bne- 0, .LBB5_7
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 9, 0, 5
-; CHECK-NEXT:    bne     0, .LBB5_5
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    mr      7, 8
-; CHECK-NEXT:  # %bb.8:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB5_4
+; CHECK-NEXT:  # %bb.6: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 7, 8
+; CHECK-NEXT:    bc 4, 20, .LBB5_1
+; CHECK-NEXT:    b .LBB5_8
+; CHECK-NEXT:  .LBB5_7: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 7, 8
+; CHECK-NEXT:    b .LBB5_1
+; CHECK-NEXT:  .LBB5_8: # %atomicrmw.end
 ; CHECK-NEXT:    srw 3, 8, 3
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
@@ -298,33 +322,37 @@ define i32 @atomicrmw_usub_sat_i32(ptr %ptr, i32 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    lwz 6, 0(3)
-; CHECK-NEXT:    b .LBB6_2
-; CHECK-NEXT:  .LBB6_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB6_2 Depth=1
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  .LBB6_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB6_4 Depth 2
-; CHECK-NEXT:    sub     5, 6, 4
-; CHECK-NEXT:    cmplw   5, 6
+; CHECK-NEXT:  .LBB6_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB6_3 Depth 2
+; CHECK-NEXT:    sub 5, 6, 4
+; CHECK-NEXT:    cmplw 5, 6
 ; CHECK-NEXT:    li 7, 0
-; CHECK-NEXT:    bgt     0, .LBB6_4
-; CHECK-NEXT:  # %bb.3:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB6_2 Depth=1
-; CHECK-NEXT:    mr      7, 5
-; CHECK-NEXT:  .LBB6_4:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB6_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:    bgt 0, .LBB6_3
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.start
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 7, 5
+; CHECK-NEXT:  .LBB6_3: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB6_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    lwarx 5, 0, 3
-; CHECK-NEXT:    cmplw   5, 6
-; CHECK-NEXT:    bne     0, .LBB6_1
-; CHECK-NEXT:  # %bb.5:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB6_4 Depth=2
+; CHECK-NEXT:    cmplw 5, 6
+; CHECK-NEXT:    bne- 0, .LBB6_6
+; CHECK-NEXT:  # %bb.4: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 7, 0, 3
-; CHECK-NEXT:    bne     0, .LBB6_4
-; CHECK-NEXT:  # %bb.6:
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  # %bb.7:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB6_3
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    bc 4, 20, .LBB6_1
+; CHECK-NEXT:    b .LBB6_7
+; CHECK-NEXT:  .LBB6_6: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    b .LBB6_1
+; CHECK-NEXT:  .LBB6_7: # %atomicrmw.end
 ; CHECK-NEXT:    mr 3, 5
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
@@ -337,33 +365,37 @@ define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    ld 6, 0(3)
-; CHECK-NEXT:    b .LBB7_2
-; CHECK-NEXT:  .LBB7_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB7_2 Depth=1
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  .LBB7_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB7_4 Depth 2
-; CHECK-NEXT:    subc    5, 6, 4
+; CHECK-NEXT:  .LBB7_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB7_3 Depth 2
+; CHECK-NEXT:    subc 5, 6, 4
 ; CHECK-NEXT:    li 7, 0
 ; CHECK-NEXT:    addze. 8, 7
-; CHECK-NEXT:    beq     0, .LBB7_4
-; CHECK-NEXT:  # %bb.3:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB7_2 Depth=1
-; CHECK-NEXT:    mr      7, 5
-; CHECK-NEXT:  .LBB7_4:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB7_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:    beq 0, .LBB7_3
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.start
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 7, 5
+; CHECK-NEXT:  .LBB7_3: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB7_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    ldarx 5, 0, 3
-; CHECK-NEXT:    cmpld   5, 6
-; CHECK-NEXT:    bne     0, .LBB7_1
-; CHECK-NEXT:  # %bb.5:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB7_4 Depth=2
+; CHECK-NEXT:    cmpld 5, 6
+; CHECK-NEXT:    bne- 0, .LBB7_6
+; CHECK-NEXT:  # %bb.4: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stdcx. 7, 0, 3
-; CHECK-NEXT:    bne     0, .LBB7_4
-; CHECK-NEXT:  # %bb.6:
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  # %bb.7:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB7_3
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    bc 4, 20, .LBB7_1
+; CHECK-NEXT:    b .LBB7_7
+; CHECK-NEXT:  .LBB7_6: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    b .LBB7_1
+; CHECK-NEXT:  .LBB7_7: # %atomicrmw.end
 ; CHECK-NEXT:    mr 3, 5
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll
index 6ced47bd6bcba..4dc6d0ad3d5c7 100644
--- a/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll
@@ -6,47 +6,51 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    rldicr 5, 3, 0, 61
-; CHECK-NEXT:    not     3, 3
+; CHECK-NEXT:    not 3, 3
 ; CHECK-NEXT:    li 6, 255
 ; CHECK-NEXT:    lwz 7, 0(5)
 ; CHECK-NEXT:    rlwinm 3, 3, 3, 27, 28
 ; CHECK-NEXT:    slw 6, 6, 3
-; CHECK-NEXT:    not     6, 6
-; CHECK-NEXT:    clrlwi  4, 4, 24
-; CHECK-NEXT:    b .LBB0_2
-; CHECK-NEXT:  .LBB0_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT:    mr      7, 8
-; CHECK-NEXT:  .LBB0_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB0_5 Depth 2
+; CHECK-NEXT:    not 6, 6
+; CHECK-NEXT:    clrlwi 4, 4, 24
+; CHECK-NEXT:  .LBB0_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB0_4 Depth 2
 ; CHECK-NEXT:    srw 8, 7, 3
-; CHECK-NEXT:    clrlwi  9, 8, 24
-; CHECK-NEXT:    cmplw   9, 4
+; CHECK-NEXT:    clrlwi 9, 8, 24
+; CHECK-NEXT:    cmplw 9, 4
 ; CHECK-NEXT:    li 9, 0
-; CHECK-NEXT:    bge 0, .LBB0_4
-; CHECK-NEXT:  # %bb.3:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT:    bge 0, .LBB0_3
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.start
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    addi 9, 8, 1
-; CHECK-NEXT:  .LBB0_4:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT:    clrlwi  8, 9, 24
+; CHECK-NEXT:  .LBB0_3: # %atomicrmw.start
+; CHECK-NEXT:    #
+; CHECK-NEXT:    clrlwi 8, 9, 24
 ; CHECK-NEXT:    slw 8, 8, 3
 ; CHECK-NEXT:    and 9, 7, 6
 ; CHECK-NEXT:    or 9, 9, 8
-; CHECK-NEXT:  .LBB0_5:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB0_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:  .LBB0_4: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB0_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    lwarx 8, 0, 5
-; CHECK-NEXT:    cmplw   8, 7
-; CHECK-NEXT:    bne     0, .LBB0_1
-; CHECK-NEXT:  # %bb.6:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB0_5 Depth=2
+; CHECK-NEXT:    cmplw 8, 7
+; CHECK-NEXT:    bne- 0, .LBB0_7
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 9, 0, 5
-; CHECK-NEXT:    bne     0, .LBB0_5
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    mr      7, 8
-; CHECK-NEXT:  # %bb.8:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB0_4
+; CHECK-NEXT:  # %bb.6: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 7, 8
+; CHECK-NEXT:    bc 4, 20, .LBB0_1
+; CHECK-NEXT:    b .LBB0_8
+; CHECK-NEXT:  .LBB0_7: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 7, 8
+; CHECK-NEXT:    b .LBB0_1
+; CHECK-NEXT:  .LBB0_8: # %atomicrmw.end
 ; CHECK-NEXT:    srw 3, 8, 3
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
@@ -59,49 +63,53 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    rldicr 5, 3, 0, 61
-; CHECK-NEXT:    clrlwi  3, 3, 30
+; CHECK-NEXT:    clrlwi 3, 3, 30
 ; CHECK-NEXT:    lis 6, 0
 ; CHECK-NEXT:    xori 3, 3, 2
 ; CHECK-NEXT:    lwz 7, 0(5)
 ; CHECK-NEXT:    ori 6, 6, 65535
 ; CHECK-NEXT:    slwi 3, 3, 3
 ; CHECK-NEXT:    slw 6, 6, 3
-; CHECK-NEXT:    not     6, 6
-; CHECK-NEXT:    clrlwi  4, 4, 16
-; CHECK-NEXT:    b .LBB1_2
-; CHECK-NEXT:  .LBB1_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    mr      7, 8
-; CHECK-NEXT:  .LBB1_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB1_5 Depth 2
+; CHECK-NEXT:    not 6, 6
+; CHECK-NEXT:    clrlwi 4, 4, 16
+; CHECK-NEXT:  .LBB1_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB1_4 Depth 2
 ; CHECK-NEXT:    srw 8, 7, 3
-; CHECK-NEXT:    clrlwi  9, 8, 16
-; CHECK-NEXT:    cmplw   9, 4
+; CHECK-NEXT:    clrlwi 9, 8, 16
+; CHECK-NEXT:    cmplw 9, 4
 ; CHECK-NEXT:    li 9, 0
-; CHECK-NEXT:    bge 0, .LBB1_4
-; CHECK-NEXT:  # %bb.3:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT:    bge 0, .LBB1_3
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.start
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    addi 9, 8, 1
-; CHECK-NEXT:  .LBB1_4:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    clrlwi  8, 9, 16
+; CHECK-NEXT:  .LBB1_3: # %atomicrmw.start
+; CHECK-NEXT:    #
+; CHECK-NEXT:    clrlwi 8, 9, 16
 ; CHECK-NEXT:    slw 8, 8, 3
 ; CHECK-NEXT:    and 9, 7, 6
 ; CHECK-NEXT:    or 9, 9, 8
-; CHECK-NEXT:  .LBB1_5:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB1_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:  .LBB1_4: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB1_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    lwarx 8, 0, 5
-; CHECK-NEXT:    cmplw   8, 7
-; CHECK-NEXT:    bne     0, .LBB1_1
-; CHECK-NEXT:  # %bb.6:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB1_5 Depth=2
+; CHECK-NEXT:    cmplw 8, 7
+; CHECK-NEXT:    bne- 0, .LBB1_7
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 9, 0, 5
-; CHECK-NEXT:    bne     0, .LBB1_5
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    mr      7, 8
-; CHECK-NEXT:  # %bb.8:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB1_4
+; CHECK-NEXT:  # %bb.6: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 7, 8
+; CHECK-NEXT:    bc 4, 20, .LBB1_1
+; CHECK-NEXT:    b .LBB1_8
+; CHECK-NEXT:  .LBB1_7: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 7, 8
+; CHECK-NEXT:    b .LBB1_1
+; CHECK-NEXT:  .LBB1_8: # %atomicrmw.end
 ; CHECK-NEXT:    srw 3, 8, 3
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
@@ -114,32 +122,36 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    lwz 6, 0(3)
-; CHECK-NEXT:    b .LBB2_2
-; CHECK-NEXT:  .LBB2_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB2_2 Depth=1
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  .LBB2_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB2_4 Depth 2
-; CHECK-NEXT:    cmplw   6, 4
+; CHECK-NEXT:  .LBB2_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB2_3 Depth 2
+; CHECK-NEXT:    cmplw 6, 4
 ; CHECK-NEXT:    li 7, 0
-; CHECK-NEXT:    bge 0, .LBB2_4
-; CHECK-NEXT:  # %bb.3:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT:    bge 0, .LBB2_3
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.start
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    addi 7, 6, 1
-; CHECK-NEXT:  .LBB2_4:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB2_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:  .LBB2_3: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB2_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    lwarx 5, 0, 3
-; CHECK-NEXT:    cmplw   5, 6
-; CHECK-NEXT:    bne     0, .LBB2_1
-; CHECK-NEXT:  # %bb.5:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB2_4 Depth=2
+; CHECK-NEXT:    cmplw 5, 6
+; CHECK-NEXT:    bne- 0, .LBB2_6
+; CHECK-NEXT:  # %bb.4: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 7, 0, 3
-; CHECK-NEXT:    bne     0, .LBB2_4
-; CHECK-NEXT:  # %bb.6:
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  # %bb.7:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB2_3
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    bc 4, 20, .LBB2_1
+; CHECK-NEXT:    b .LBB2_7
+; CHECK-NEXT:  .LBB2_6: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    b .LBB2_1
+; CHECK-NEXT:  .LBB2_7: # %atomicrmw.end
 ; CHECK-NEXT:    mr 3, 5
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
@@ -152,32 +164,36 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    ld 6, 0(3)
-; CHECK-NEXT:    b .LBB3_2
-; CHECK-NEXT:  .LBB3_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB3_2 Depth=1
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  .LBB3_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB3_4 Depth 2
-; CHECK-NEXT:    cmpld   6, 4
+; CHECK-NEXT:  .LBB3_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB3_3 Depth 2
+; CHECK-NEXT:    cmpld 6, 4
 ; CHECK-NEXT:    li 7, 0
-; CHECK-NEXT:    bge 0, .LBB3_4
-; CHECK-NEXT:  # %bb.3:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB3_2 Depth=1
+; CHECK-NEXT:    bge 0, .LBB3_3
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.start
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    addi 7, 6, 1
-; CHECK-NEXT:  .LBB3_4:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB3_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:  .LBB3_3: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB3_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    ldarx 5, 0, 3
-; CHECK-NEXT:    cmpld   5, 6
-; CHECK-NEXT:    bne     0, .LBB3_1
-; CHECK-NEXT:  # %bb.5:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB3_4 Depth=2
+; CHECK-NEXT:    cmpld 5, 6
+; CHECK-NEXT:    bne- 0, .LBB3_6
+; CHECK-NEXT:  # %bb.4: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stdcx. 7, 0, 3
-; CHECK-NEXT:    bne     0, .LBB3_4
-; CHECK-NEXT:  # %bb.6:
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  # %bb.7:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB3_3
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    bc 4, 20, .LBB3_1
+; CHECK-NEXT:    b .LBB3_7
+; CHECK-NEXT:  .LBB3_6: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    b .LBB3_1
+; CHECK-NEXT:  .LBB3_7: # %atomicrmw.end
 ; CHECK-NEXT:    mr 3, 5
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
@@ -190,48 +206,52 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    rldicr 5, 3, 0, 61
-; CHECK-NEXT:    not     3, 3
+; CHECK-NEXT:    not 3, 3
 ; CHECK-NEXT:    li 6, 255
 ; CHECK-NEXT:    lwz 8, 0(5)
 ; CHECK-NEXT:    rlwinm 3, 3, 3, 27, 28
 ; CHECK-NEXT:    slw 6, 6, 3
-; CHECK-NEXT:    not     6, 6
-; CHECK-NEXT:    clrlwi  7, 4, 24
-; CHECK-NEXT:    b .LBB4_2
-; CHECK-NEXT:  .LBB4_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB4_2 Depth=1
-; CHECK-NEXT:    mr      8, 9
-; CHECK-NEXT:  .LBB4_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB4_5 Depth 2
+; CHECK-NEXT:    not 6, 6
+; CHECK-NEXT:    clrlwi 7, 4, 24
+; CHECK-NEXT:  .LBB4_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB4_4 Depth 2
 ; CHECK-NEXT:    srw 9, 8, 3
 ; CHECK-NEXT:    andi. 10, 9, 255
 ; CHECK-NEXT:    cmplw 1, 10, 7
 ; CHECK-NEXT:    cror 20, 2, 5
-; CHECK-NEXT:    mr      10, 4
-; CHECK-NEXT:    bc 12, 20, .LBB4_4
-; CHECK-NEXT:  # %bb.3:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB4_2 Depth=1
+; CHECK-NEXT:    mr 10, 4
+; CHECK-NEXT:    bc 12, 20, .LBB4_3
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.start
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    addi 10, 9, -1
-; CHECK-NEXT:  .LBB4_4:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB4_2 Depth=1
-; CHECK-NEXT:    clrlwi  9, 10, 24
+; CHECK-NEXT:  .LBB4_3: # %atomicrmw.start
+; CHECK-NEXT:    #
+; CHECK-NEXT:    clrlwi 9, 10, 24
 ; CHECK-NEXT:    slw 9, 9, 3
 ; CHECK-NEXT:    and 10, 8, 6
 ; CHECK-NEXT:    or 10, 10, 9
-; CHECK-NEXT:  .LBB4_5:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB4_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:  .LBB4_4: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB4_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    lwarx 9, 0, 5
-; CHECK-NEXT:    cmplw   9, 8
-; CHECK-NEXT:    bne     0, .LBB4_1
-; CHECK-NEXT:  # %bb.6:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB4_5 Depth=2
+; CHECK-NEXT:    cmplw 9, 8
+; CHECK-NEXT:    bne- 0, .LBB4_7
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 10, 0, 5
-; CHECK-NEXT:    bne     0, .LBB4_5
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    mr      8, 9
-; CHECK-NEXT:  # %bb.8:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB4_4
+; CHECK-NEXT:  # %bb.6: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 8, 9
+; CHECK-NEXT:    bc 4, 20, .LBB4_1
+; CHECK-NEXT:    b .LBB4_8
+; CHECK-NEXT:  .LBB4_7: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 8, 9
+; CHECK-NEXT:    b .LBB4_1
+; CHECK-NEXT:  .LBB4_8: # %atomicrmw.end
 ; CHECK-NEXT:    srw 3, 9, 3
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
@@ -244,50 +264,54 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    rldicr 5, 3, 0, 61
-; CHECK-NEXT:    clrlwi  3, 3, 30
+; CHECK-NEXT:    clrlwi 3, 3, 30
 ; CHECK-NEXT:    lis 6, 0
 ; CHECK-NEXT:    xori 3, 3, 2
 ; CHECK-NEXT:    lwz 8, 0(5)
 ; CHECK-NEXT:    ori 6, 6, 65535
 ; CHECK-NEXT:    slwi 3, 3, 3
 ; CHECK-NEXT:    slw 6, 6, 3
-; CHECK-NEXT:    not     6, 6
-; CHECK-NEXT:    clrlwi  7, 4, 16
-; CHECK-NEXT:    b .LBB5_2
-; CHECK-NEXT:  .LBB5_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB5_2 Depth=1
-; CHECK-NEXT:    mr      8, 9
-; CHECK-NEXT:  .LBB5_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB5_5 Depth 2
+; CHECK-NEXT:    not 6, 6
+; CHECK-NEXT:    clrlwi 7, 4, 16
+; CHECK-NEXT:  .LBB5_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB5_4 Depth 2
 ; CHECK-NEXT:    srw 9, 8, 3
 ; CHECK-NEXT:    andi. 10, 9, 65535
 ; CHECK-NEXT:    cmplw 1, 10, 7
 ; CHECK-NEXT:    cror 20, 2, 5
-; CHECK-NEXT:    mr      10, 4
-; CHECK-NEXT:    bc 12, 20, .LBB5_4
-; CHECK-NEXT:  # %bb.3:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB5_2 Depth=1
+; CHECK-NEXT:    mr 10, 4
+; CHECK-NEXT:    bc 12, 20, .LBB5_3
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.start
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    addi 10, 9, -1
-; CHECK-NEXT:  .LBB5_4:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB5_2 Depth=1
-; CHECK-NEXT:    clrlwi  9, 10, 16
+; CHECK-NEXT:  .LBB5_3: # %atomicrmw.start
+; CHECK-NEXT:    #
+; CHECK-NEXT:    clrlwi 9, 10, 16
 ; CHECK-NEXT:    slw 9, 9, 3
 ; CHECK-NEXT:    and 10, 8, 6
 ; CHECK-NEXT:    or 10, 10, 9
-; CHECK-NEXT:  .LBB5_5:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB5_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:  .LBB5_4: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB5_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    lwarx 9, 0, 5
-; CHECK-NEXT:    cmplw   9, 8
-; CHECK-NEXT:    bne     0, .LBB5_1
-; CHECK-NEXT:  # %bb.6:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB5_5 Depth=2
+; CHECK-NEXT:    cmplw 9, 8
+; CHECK-NEXT:    bne- 0, .LBB5_7
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 10, 0, 5
-; CHECK-NEXT:    bne     0, .LBB5_5
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    mr      8, 9
-; CHECK-NEXT:  # %bb.8:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB5_4
+; CHECK-NEXT:  # %bb.6: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 8, 9
+; CHECK-NEXT:    bc 4, 20, .LBB5_1
+; CHECK-NEXT:    b .LBB5_8
+; CHECK-NEXT:  .LBB5_7: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 8, 9
+; CHECK-NEXT:    b .LBB5_1
+; CHECK-NEXT:  .LBB5_8: # %atomicrmw.end
 ; CHECK-NEXT:    srw 3, 9, 3
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
@@ -300,37 +324,41 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    lwz 6, 0(3)
-; CHECK-NEXT:    b .LBB6_2
-; CHECK-NEXT:  .LBB6_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB6_2 Depth=1
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  .LBB6_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB6_5 Depth 2
-; CHECK-NEXT:    cmpwi   6, 0
-; CHECK-NEXT:    mr      7, 4
-; CHECK-NEXT:    bc 12, 2, .LBB6_5
-; CHECK-NEXT:  # %bb.3:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB6_2 Depth=1
-; CHECK-NEXT:    cmplw   6, 4
-; CHECK-NEXT:    mr      7, 4
-; CHECK-NEXT:    bc 12, 1, .LBB6_5
-; CHECK-NEXT:  # %bb.4:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB6_2 Depth=1
+; CHECK-NEXT:  .LBB6_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB6_4 Depth 2
+; CHECK-NEXT:    cmpwi 6, 0
+; CHECK-NEXT:    mr 7, 4
+; CHECK-NEXT:    bc 12, 2, .LBB6_4
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.start
+; CHECK-NEXT:    #
+; CHECK-NEXT:    cmplw 6, 4
+; CHECK-NEXT:    mr 7, 4
+; CHECK-NEXT:    bc 12, 1, .LBB6_4
+; CHECK-NEXT:  # %bb.3: # %atomicrmw.start
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    addi 7, 6, -1
-; CHECK-NEXT:  .LBB6_5:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB6_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:  .LBB6_4: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB6_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    lwarx 5, 0, 3
-; CHECK-NEXT:    cmplw   5, 6
-; CHECK-NEXT:    bne     0, .LBB6_1
-; CHECK-NEXT:  # %bb.6:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB6_5 Depth=2
+; CHECK-NEXT:    cmplw 5, 6
+; CHECK-NEXT:    bne- 0, .LBB6_7
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 7, 0, 3
-; CHECK-NEXT:    bne     0, .LBB6_5
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  # %bb.8:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB6_4
+; CHECK-NEXT:  # %bb.6: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    bc 4, 20, .LBB6_1
+; CHECK-NEXT:    b .LBB6_8
+; CHECK-NEXT:  .LBB6_7: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    b .LBB6_1
+; CHECK-NEXT:  .LBB6_8: # %atomicrmw.end
 ; CHECK-NEXT:    mr 3, 5
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
@@ -343,38 +371,42 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    ld 6, 0(3)
-; CHECK-NEXT:    b .LBB7_2
-; CHECK-NEXT:  .LBB7_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB7_2 Depth=1
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  .LBB7_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB7_5 Depth 2
-; CHECK-NEXT:    cmpdi   6, 0
-; CHECK-NEXT:    mr      7, 4
-; CHECK-NEXT:    bc 12, 2, .LBB7_5
-; CHECK-NEXT:  # %bb.3:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB7_2 Depth=1
-; CHECK-NEXT:    cmpld   6, 4
-; CHECK-NEXT:    mr      7, 4
-; CHECK-NEXT:    bc 12, 1, .LBB7_5
-; CHECK-NEXT:  # %bb.4:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB7_2 Depth=1
+; CHECK-NEXT:  .LBB7_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB7_4 Depth 2
+; CHECK-NEXT:    cmpdi 6, 0
+; CHECK-NEXT:    mr 7, 4
+; CHECK-NEXT:    bc 12, 2, .LBB7_4
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.start
+; CHECK-NEXT:    #
+; CHECK-NEXT:    cmpld 6, 4
+; CHECK-NEXT:    mr 7, 4
+; CHECK-NEXT:    bc 12, 1, .LBB7_4
+; CHECK-NEXT:  # %bb.3: # %atomicrmw.start
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    addi 7, 6, -1
-; CHECK-NEXT:  .LBB7_5:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB7_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:  .LBB7_4: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB7_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    ldarx 5, 0, 3
-; CHECK-NEXT:    cmpld   5, 6
-; CHECK-NEXT:    bne     0, .LBB7_1
-; CHECK-NEXT:  # %bb.6:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB7_5 Depth=2
+; CHECK-NEXT:    cmpld 5, 6
+; CHECK-NEXT:    bne- 0, .LBB7_7
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stdcx. 7, 0, 3
-; CHECK-NEXT:    bne     0, .LBB7_5
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  # %bb.8:                                # %atomicrmw.end
-; CHECK-NEXT:    mr      3, 5
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB7_4
+; CHECK-NEXT:  # %bb.6: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    bc 4, 20, .LBB7_1
+; CHECK-NEXT:    b .LBB7_8
+; CHECK-NEXT:  .LBB7_7: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    b .LBB7_1
+; CHECK-NEXT:  .LBB7_8: # %atomicrmw.end
+; CHECK-NEXT:    mr 3, 5
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
   %result = atomicrmw udec_wrap ptr %ptr, i64 %val seq_cst
diff --git a/llvm/test/CodeGen/PowerPC/atomics-regression.ll b/llvm/test/CodeGen/PowerPC/atomics-regression.ll
index 0474a479a1fef..90990bbb4124d 100644
--- a/llvm/test/CodeGen/PowerPC/atomics-regression.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics-regression.ll
@@ -402,16 +402,15 @@ define void @test40(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB40_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB40_1
+; PPC64LE-NEXT:    bne- 0, .LBB40_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.end
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic
@@ -423,16 +422,15 @@ define void @test41(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB41_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB41_1
+; PPC64LE-NEXT:    bne- 0, .LBB41_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -445,16 +443,15 @@ define void @test42(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB42_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB42_3
+; PPC64LE-NEXT:    bne- 0, .LBB42_3
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB42_1
+; PPC64LE-NEXT:    bne- 0, .LBB42_1
 ; PPC64LE-NEXT:  .LBB42_3: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -468,7 +465,7 @@ define void @test43(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
@@ -476,12 +473,12 @@ define void @test43(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:  .LBB43_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB43_2
+; PPC64LE-NEXT:    beq+ 0, .LBB43_2
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val release monotonic
   ret void
@@ -493,7 +490,7 @@ define void @test44(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB44_4
+; PPC64LE-NEXT:    bne- 0, .LBB44_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
@@ -501,12 +498,12 @@ define void @test44(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:  .LBB44_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB44_2
+; PPC64LE-NEXT:    beq+ 0, .LBB44_2
 ; PPC64LE-NEXT:  .LBB44_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -520,23 +517,21 @@ define void @test45(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB45_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB45_5
+; PPC64LE-NEXT:    beq+ 0, .LBB45_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB45_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB45_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB45_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB45_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acq_rel monotonic
@@ -549,20 +544,19 @@ define void @test46(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB46_4
+; PPC64LE-NEXT:    bne- 0, .LBB46_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB46_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB46_4
+; PPC64LE-NEXT:    beq+ 0, .LBB46_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB46_2
+; PPC64LE-NEXT:    beq+ 0, .LBB46_2
 ; PPC64LE-NEXT:  .LBB46_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -576,23 +570,21 @@ define void @test47(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB47_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB47_5
+; PPC64LE-NEXT:    beq+ 0, .LBB47_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB47_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB47_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB47_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB47_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val seq_cst monotonic
@@ -605,20 +597,19 @@ define void @test48(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB48_4
+; PPC64LE-NEXT:    bne- 0, .LBB48_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB48_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB48_4
+; PPC64LE-NEXT:    beq+ 0, .LBB48_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB48_2
+; PPC64LE-NEXT:    beq+ 0, .LBB48_2
 ; PPC64LE-NEXT:  .LBB48_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -632,20 +623,19 @@ define void @test49(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB49_4
+; PPC64LE-NEXT:    bne- 0, .LBB49_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB49_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB49_4
+; PPC64LE-NEXT:    beq+ 0, .LBB49_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB49_2
+; PPC64LE-NEXT:    beq+ 0, .LBB49_2
 ; PPC64LE-NEXT:  .LBB49_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -658,16 +648,15 @@ define void @test50(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB50_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB50_1
+; PPC64LE-NEXT:    bne- 0, .LBB50_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.end
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic
@@ -679,16 +668,15 @@ define void @test51(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB51_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB51_1
+; PPC64LE-NEXT:    bne- 0, .LBB51_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -701,16 +689,15 @@ define void @test52(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB52_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB52_3
+; PPC64LE-NEXT:    bne- 0, .LBB52_3
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB52_1
+; PPC64LE-NEXT:    bne- 0, .LBB52_1
 ; PPC64LE-NEXT:  .LBB52_3: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -724,7 +711,7 @@ define void @test53(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
@@ -732,12 +719,12 @@ define void @test53(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:  .LBB53_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB53_2
+; PPC64LE-NEXT:    beq+ 0, .LBB53_2
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val release monotonic
   ret void
@@ -749,7 +736,7 @@ define void @test54(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB54_4
+; PPC64LE-NEXT:    bne- 0, .LBB54_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
@@ -757,12 +744,12 @@ define void @test54(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:  .LBB54_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB54_2
+; PPC64LE-NEXT:    beq+ 0, .LBB54_2
 ; PPC64LE-NEXT:  .LBB54_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -776,23 +763,21 @@ define void @test55(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB55_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB55_5
+; PPC64LE-NEXT:    beq+ 0, .LBB55_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB55_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB55_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB55_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB55_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acq_rel monotonic
@@ -805,20 +790,19 @@ define void @test56(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB56_4
+; PPC64LE-NEXT:    bne- 0, .LBB56_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB56_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB56_4
+; PPC64LE-NEXT:    beq+ 0, .LBB56_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB56_2
+; PPC64LE-NEXT:    beq+ 0, .LBB56_2
 ; PPC64LE-NEXT:  .LBB56_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -832,23 +816,21 @@ define void @test57(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB57_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB57_5
+; PPC64LE-NEXT:    beq+ 0, .LBB57_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB57_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB57_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB57_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB57_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val seq_cst monotonic
@@ -861,20 +843,19 @@ define void @test58(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB58_4
+; PPC64LE-NEXT:    bne- 0, .LBB58_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB58_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB58_4
+; PPC64LE-NEXT:    beq+ 0, .LBB58_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB58_2
+; PPC64LE-NEXT:    beq+ 0, .LBB58_2
 ; PPC64LE-NEXT:  .LBB58_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -888,20 +869,19 @@ define void @test59(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB59_4
+; PPC64LE-NEXT:    bne- 0, .LBB59_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB59_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB59_4
+; PPC64LE-NEXT:    beq+ 0, .LBB59_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB59_2
+; PPC64LE-NEXT:    beq+ 0, .LBB59_2
 ; PPC64LE-NEXT:  .LBB59_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -912,16 +892,15 @@ define void @test59(ptr %ptr, i16 %cmp, i16 %val) {
 define void @test60(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE-LABEL: test60:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB60_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB60_1
+; PPC64LE-NEXT:    bne- 0, .LBB60_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.end
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic
@@ -931,16 +910,15 @@ define void @test60(ptr %ptr, i32 %cmp, i32 %val) {
 define void @test61(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE-LABEL: test61:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB61_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB61_1
+; PPC64LE-NEXT:    bne- 0, .LBB61_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -951,16 +929,15 @@ define void @test61(ptr %ptr, i32 %cmp, i32 %val) {
 define void @test62(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE-LABEL: test62:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB62_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB62_3
+; PPC64LE-NEXT:    bne- 0, .LBB62_3
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB62_1
+; PPC64LE-NEXT:    bne- 0, .LBB62_1
 ; PPC64LE-NEXT:  .LBB62_3: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -973,19 +950,19 @@ define void @test63(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB63_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB63_2
+; PPC64LE-NEXT:    beq+ 0, .LBB63_2
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val release monotonic
   ret void
@@ -996,19 +973,19 @@ define void @test64(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB64_4
+; PPC64LE-NEXT:    bne- 0, .LBB64_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB64_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB64_2
+; PPC64LE-NEXT:    beq+ 0, .LBB64_2
 ; PPC64LE-NEXT:  .LBB64_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1021,22 +998,20 @@ define void @test65(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB65_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB65_5
+; PPC64LE-NEXT:    beq+ 0, .LBB65_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB65_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB65_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB65_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB65_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acq_rel monotonic
@@ -1048,19 +1023,18 @@ define void @test66(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB66_4
+; PPC64LE-NEXT:    bne- 0, .LBB66_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB66_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB66_4
+; PPC64LE-NEXT:    beq+ 0, .LBB66_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB66_2
+; PPC64LE-NEXT:    beq+ 0, .LBB66_2
 ; PPC64LE-NEXT:  .LBB66_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1073,22 +1047,20 @@ define void @test67(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB67_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB67_5
+; PPC64LE-NEXT:    beq+ 0, .LBB67_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB67_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB67_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB67_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB67_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst monotonic
@@ -1100,19 +1072,18 @@ define void @test68(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB68_4
+; PPC64LE-NEXT:    bne- 0, .LBB68_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB68_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB68_4
+; PPC64LE-NEXT:    beq+ 0, .LBB68_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB68_2
+; PPC64LE-NEXT:    beq+ 0, .LBB68_2
 ; PPC64LE-NEXT:  .LBB68_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1125,19 +1096,18 @@ define void @test69(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB69_4
+; PPC64LE-NEXT:    bne- 0, .LBB69_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB69_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB69_4
+; PPC64LE-NEXT:    beq+ 0, .LBB69_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB69_2
+; PPC64LE-NEXT:    beq+ 0, .LBB69_2
 ; PPC64LE-NEXT:  .LBB69_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1148,16 +1118,15 @@ define void @test69(ptr %ptr, i32 %cmp, i32 %val) {
 define void @test70(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE-LABEL: test70:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB70_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB70_1
+; PPC64LE-NEXT:    bne- 0, .LBB70_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.end
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic
@@ -1167,16 +1136,15 @@ define void @test70(ptr %ptr, i64 %cmp, i64 %val) {
 define void @test71(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE-LABEL: test71:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB71_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB71_1
+; PPC64LE-NEXT:    bne- 0, .LBB71_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1187,16 +1155,15 @@ define void @test71(ptr %ptr, i64 %cmp, i64 %val) {
 define void @test72(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE-LABEL: test72:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB72_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB72_3
+; PPC64LE-NEXT:    bne- 0, .LBB72_3
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB72_1
+; PPC64LE-NEXT:    bne- 0, .LBB72_1
 ; PPC64LE-NEXT:  .LBB72_3: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1209,19 +1176,19 @@ define void @test73(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB73_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB73_2
+; PPC64LE-NEXT:    beq+ 0, .LBB73_2
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val release monotonic
   ret void
@@ -1232,19 +1199,19 @@ define void @test74(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB74_4
+; PPC64LE-NEXT:    bne- 0, .LBB74_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB74_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB74_2
+; PPC64LE-NEXT:    beq+ 0, .LBB74_2
 ; PPC64LE-NEXT:  .LBB74_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1257,22 +1224,20 @@ define void @test75(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB75_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB75_5
+; PPC64LE-NEXT:    beq+ 0, .LBB75_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB75_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB75_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB75_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB75_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acq_rel monotonic
@@ -1284,19 +1249,18 @@ define void @test76(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB76_4
+; PPC64LE-NEXT:    bne- 0, .LBB76_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB76_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB76_4
+; PPC64LE-NEXT:    beq+ 0, .LBB76_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB76_2
+; PPC64LE-NEXT:    beq+ 0, .LBB76_2
 ; PPC64LE-NEXT:  .LBB76_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1309,22 +1273,20 @@ define void @test77(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB77_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB77_5
+; PPC64LE-NEXT:    beq+ 0, .LBB77_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB77_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB77_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB77_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB77_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val seq_cst monotonic
@@ -1336,19 +1298,18 @@ define void @test78(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB78_4
+; PPC64LE-NEXT:    bne- 0, .LBB78_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB78_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB78_4
+; PPC64LE-NEXT:    beq+ 0, .LBB78_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB78_2
+; PPC64LE-NEXT:    beq+ 0, .LBB78_2
 ; PPC64LE-NEXT:  .LBB78_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1361,19 +1322,18 @@ define void @test79(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB79_4
+; PPC64LE-NEXT:    bne- 0, .LBB79_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB79_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB79_4
+; PPC64LE-NEXT:    beq+ 0, .LBB79_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB79_2
+; PPC64LE-NEXT:    beq+ 0, .LBB79_2
 ; PPC64LE-NEXT:  .LBB79_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1386,16 +1346,15 @@ define void @test80(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB80_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB80_1
+; PPC64LE-NEXT:    bne- 0, .LBB80_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.end
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val syncscope("singlethread") monotonic monotonic
@@ -1407,16 +1366,15 @@ define void @test81(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB81_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB81_1
+; PPC64LE-NEXT:    bne- 0, .LBB81_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1429,16 +1387,15 @@ define void @test82(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB82_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB82_3
+; PPC64LE-NEXT:    bne- 0, .LBB82_3
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB82_1
+; PPC64LE-NEXT:    bne- 0, .LBB82_1
 ; PPC64LE-NEXT:  .LBB82_3: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1452,7 +1409,7 @@ define void @test83(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
@@ -1460,12 +1417,12 @@ define void @test83(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:  .LBB83_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB83_2
+; PPC64LE-NEXT:    beq+ 0, .LBB83_2
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val syncscope("singlethread") release monotonic
   ret void
@@ -1477,7 +1434,7 @@ define void @test84(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB84_4
+; PPC64LE-NEXT:    bne- 0, .LBB84_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
@@ -1485,12 +1442,12 @@ define void @test84(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:  .LBB84_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB84_2
+; PPC64LE-NEXT:    beq+ 0, .LBB84_2
 ; PPC64LE-NEXT:  .LBB84_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1504,23 +1461,21 @@ define void @test85(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB85_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB85_5
+; PPC64LE-NEXT:    beq+ 0, .LBB85_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB85_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB85_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB85_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB85_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val syncscope("singlethread") acq_rel monotonic
@@ -1533,20 +1488,19 @@ define void @test86(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB86_4
+; PPC64LE-NEXT:    bne- 0, .LBB86_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB86_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB86_4
+; PPC64LE-NEXT:    beq+ 0, .LBB86_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB86_2
+; PPC64LE-NEXT:    beq+ 0, .LBB86_2
 ; PPC64LE-NEXT:  .LBB86_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1560,23 +1514,21 @@ define void @test87(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB87_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB87_5
+; PPC64LE-NEXT:    beq+ 0, .LBB87_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB87_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB87_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB87_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB87_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val syncscope("singlethread") seq_cst monotonic
@@ -1589,20 +1541,19 @@ define void @test88(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB88_4
+; PPC64LE-NEXT:    bne- 0, .LBB88_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB88_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB88_4
+; PPC64LE-NEXT:    beq+ 0, .LBB88_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB88_2
+; PPC64LE-NEXT:    beq+ 0, .LBB88_2
 ; PPC64LE-NEXT:  .LBB88_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1616,20 +1567,19 @@ define void @test89(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB89_4
+; PPC64LE-NEXT:    bne- 0, .LBB89_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB89_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB89_4
+; PPC64LE-NEXT:    beq+ 0, .LBB89_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB89_2
+; PPC64LE-NEXT:    beq+ 0, .LBB89_2
 ; PPC64LE-NEXT:  .LBB89_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1642,16 +1592,15 @@ define void @test90(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB90_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB90_1
+; PPC64LE-NEXT:    bne- 0, .LBB90_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.end
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val syncscope("singlethread") monotonic monotonic
@@ -1663,16 +1612,15 @@ define void @test91(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB91_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB91_1
+; PPC64LE-NEXT:    bne- 0, .LBB91_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1685,16 +1633,15 @@ define void @test92(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB92_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB92_3
+; PPC64LE-NEXT:    bne- 0, .LBB92_3
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB92_1
+; PPC64LE-NEXT:    bne- 0, .LBB92_1
 ; PPC64LE-NEXT:  .LBB92_3: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1708,7 +1655,7 @@ define void @test93(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
@@ -1716,12 +1663,12 @@ define void @test93(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:  .LBB93_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB93_2
+; PPC64LE-NEXT:    beq+ 0, .LBB93_2
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val syncscope("singlethread") release monotonic
   ret void
@@ -1733,7 +1680,7 @@ define void @test94(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB94_4
+; PPC64LE-NEXT:    bne- 0, .LBB94_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
@@ -1741,12 +1688,12 @@ define void @test94(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:  .LBB94_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB94_2
+; PPC64LE-NEXT:    beq+ 0, .LBB94_2
 ; PPC64LE-NEXT:  .LBB94_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1760,23 +1707,21 @@ define void @test95(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB95_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB95_5
+; PPC64LE-NEXT:    beq+ 0, .LBB95_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB95_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB95_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB95_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB95_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val syncscope("singlethread") acq_rel monotonic
@@ -1789,20 +1734,19 @@ define void @test96(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB96_4
+; PPC64LE-NEXT:    bne- 0, .LBB96_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB96_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB96_4
+; PPC64LE-NEXT:    beq+ 0, .LBB96_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB96_2
+; PPC64LE-NEXT:    beq+ 0, .LBB96_2
 ; PPC64LE-NEXT:  .LBB96_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1816,23 +1760,21 @@ define void @test97(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB97_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB97_5
+; PPC64LE-NEXT:    beq+ 0, .LBB97_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB97_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB97_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB97_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB97_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val syncscope("singlethread") seq_cst monotonic
@@ -1845,20 +1787,19 @@ define void @test98(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB98_4
+; PPC64LE-NEXT:    bne- 0, .LBB98_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB98_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB98_4
+; PPC64LE-NEXT:    beq+ 0, .LBB98_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB98_2
+; PPC64LE-NEXT:    beq+ 0, .LBB98_2
 ; PPC64LE-NEXT:  .LBB98_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1872,20 +1813,19 @@ define void @test99(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB99_4
+; PPC64LE-NEXT:    bne- 0, .LBB99_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB99_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB99_4
+; PPC64LE-NEXT:    beq+ 0, .LBB99_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB99_2
+; PPC64LE-NEXT:    beq+ 0, .LBB99_2
 ; PPC64LE-NEXT:  .LBB99_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1896,16 +1836,15 @@ define void @test99(ptr %ptr, i16 %cmp, i16 %val) {
 define void @test100(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE-LABEL: test100:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB100_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB100_1
+; PPC64LE-NEXT:    bne- 0, .LBB100_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.end
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val syncscope("singlethread") monotonic monotonic
@@ -1915,16 +1854,15 @@ define void @test100(ptr %ptr, i32 %cmp, i32 %val) {
 define void @test101(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE-LABEL: test101:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB101_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB101_1
+; PPC64LE-NEXT:    bne- 0, .LBB101_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1935,16 +1873,15 @@ define void @test101(ptr %ptr, i32 %cmp, i32 %val) {
 define void @test102(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE-LABEL: test102:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB102_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB102_3
+; PPC64LE-NEXT:    bne- 0, .LBB102_3
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB102_1
+; PPC64LE-NEXT:    bne- 0, .LBB102_1
 ; PPC64LE-NEXT:  .LBB102_3: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1957,19 +1894,19 @@ define void @test103(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB103_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB103_2
+; PPC64LE-NEXT:    beq+ 0, .LBB103_2
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val syncscope("singlethread") release monotonic
   ret void
@@ -1980,19 +1917,19 @@ define void @test104(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB104_4
+; PPC64LE-NEXT:    bne- 0, .LBB104_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB104_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB104_2
+; PPC64LE-NEXT:    beq+ 0, .LBB104_2
 ; PPC64LE-NEXT:  .LBB104_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -2005,22 +1942,20 @@ define void @test105(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB105_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB105_5
+; PPC64LE-NEXT:    beq+ 0, .LBB105_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB105_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB105_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB105_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB105_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val syncscope("singlethread") acq_rel monotonic
@@ -2032,19 +1967,18 @@ define void @test106(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB106_4
+; PPC64LE-NEXT:    bne- 0, .LBB106_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB106_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB106_4
+; PPC64LE-NEXT:    beq+ 0, .LBB106_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB106_2
+; PPC64LE-NEXT:    beq+ 0, .LBB106_2
 ; PPC64LE-NEXT:  .LBB106_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -2057,22 +1991,20 @@ define void @test107(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB107_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB107_5
+; PPC64LE-NEXT:    beq+ 0, .LBB107_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB107_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB107_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB107_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB107_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val syncscope("singlethread") seq_cst monotonic
@@ -2084,19 +2016,18 @@ define void @test108(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB108_4
+; PPC64LE-NEXT:    bne- 0, .LBB108_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB108_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB108_4
+; PPC64LE-NEXT:    beq+ 0, .LBB108_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB108_2
+; PPC64LE-NEXT:    beq+ 0, .LBB108_2
 ; PPC64LE-NEXT:  .LBB108_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -2109,19 +2040,18 @@ define void @test109(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB109_4
+; PPC64LE-NEXT:    bne- 0, .LBB109_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB109_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB109_4
+; PPC64LE-NEXT:    beq+ 0, .LBB109_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB109_2
+; PPC64LE-NEXT:    beq+ 0, .LBB109_2
 ; PPC64LE-NEXT:  .LBB109_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -2132,16 +2062,15 @@ define void @test109(ptr %ptr, i32 %cmp, i32 %val) {
 define void @test110(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE-LABEL: test110:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB110_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB110_1
+; PPC64LE-NEXT:    bne- 0, .LBB110_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.end
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val syncscope("singlethread") monotonic monotonic
@@ -2151,16 +2080,15 @@ define void @test110(ptr %ptr, i64 %cmp, i64 %val) {
 define void @test111(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE-LABEL: test111:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB111_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB111_1
+; PPC64LE-NEXT:    bne- 0, .LBB111_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -2171,16 +2099,15 @@ define void @test111(ptr %ptr, i64 %cmp, i64 %val) {
 define void @test112(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE-LABEL: test112:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB112_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB112_3
+; PPC64LE-NEXT:    bne- 0, .LBB112_3
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB112_1
+; PPC64LE-NEXT:    bne- 0, .LBB112_1
 ; PPC64LE-NEXT:  .LBB112_3: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -2193,19 +2120,19 @@ define void @test113(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB113_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB113_2
+; PPC64LE-NEXT:    beq+ 0, .LBB113_2
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val syncscope("singlethread") release monotonic
   ret void
@@ -2216,19 +2143,19 @@ define void @test114(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB114_4
+; PPC64LE-NEXT:    bne- 0, .LBB114_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB114_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB114_2
+; PPC64LE-NEXT:    beq+ 0, .LBB114_2
 ; PPC64LE-NEXT:  .LBB114_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -2241,22 +2168,20 @@ define void @test115(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB115_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB115_5
+; PPC64LE-NEXT:    beq+ 0, .LBB115_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB115_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB115_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB115_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB115_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val syncscope("singlethread") acq_rel monotonic
@@ -2268,19 +2193,18 @@ define void @test116(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB116_4
+; PPC64LE-NEXT:    bne- 0, .LBB116_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB116_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB116_4
+; PPC64LE-NEXT:    beq+ 0, .LBB116_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB116_2
+; PPC64LE-NEXT:    beq+ 0, .LBB116_2
 ; PPC64LE-NEXT:  .LBB116_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -2293,22 +2217,20 @@ define void @test117(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB117_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB117_5
+; PPC64LE-NEXT:    beq+ 0, .LBB117_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB117_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB117_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB117_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB117_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val syncscope("singlethread") seq_cst monotonic
@@ -2320,19 +2242,18 @@ define void @test118(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB118_4
+; PPC64LE-NEXT:    bne- 0, .LBB118_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB118_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB118_4
+; PPC64LE-NEXT:    beq+ 0, .LBB118_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB118_2
+; PPC64LE-NEXT:    beq+ 0, .LBB118_2
 ; PPC64LE-NEXT:  .LBB118_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -2345,19 +2266,18 @@ define void @test119(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB119_4
+; PPC64LE-NEXT:    bne- 0, .LBB119_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB119_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB119_4
+; PPC64LE-NEXT:    beq+ 0, .LBB119_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB119_2
+; PPC64LE-NEXT:    beq+ 0, .LBB119_2
 ; PPC64LE-NEXT:  .LBB119_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/atomics.ll b/llvm/test/CodeGen/PowerPC/atomics.ll
index 40786057ead5f..183c8e1323f2e 100644
--- a/llvm/test/CodeGen/PowerPC/atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics.ll
@@ -138,67 +138,67 @@ define void @store_i64_seq_cst(ptr %mem) {
 ; Atomic CmpXchg
 define i8 @cas_strong_i8_sc_sc(ptr %mem) {
 ; PPC32-LABEL: cas_strong_i8_sc_sc:
-; PPC32:       # %bb.0:
+; PPC32:       # %bb.0: # %cmpxchg.start
 ; PPC32-NEXT:    rlwinm r5, r3, 0, 0, 29
 ; PPC32-NEXT:    lwarx r4, 0, r5
-; PPC32-NEXT:    not     r3, r3
+; PPC32-NEXT:    not r3, r3
 ; PPC32-NEXT:    rlwinm r3, r3, 3, 27, 28
 ; PPC32-NEXT:    srw r6, r4, r3
 ; PPC32-NEXT:    andi. r6, r6, 255
-; PPC32-NEXT:    bne     cr0, .LBB8_4
-; PPC32-NEXT:  # %bb.1:                                # %cmpxchg.fencedstore
+; PPC32-NEXT:    bne- cr0, .LBB8_4
+; PPC32-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC32-NEXT:    li r6, 255
 ; PPC32-NEXT:    li r7, 1
 ; PPC32-NEXT:    slw r6, r6, r3
-; PPC32-NEXT:    not     r6, r6
+; PPC32-NEXT:    not r6, r6
 ; PPC32-NEXT:    slw r7, r7, r3
 ; PPC32-NEXT:    sync
-; PPC32-NEXT:  .LBB8_2:                                # %cmpxchg.trystore
-; PPC32-NEXT:                                          # =>This Inner Loop Header: Depth=1
+; PPC32-NEXT:  .LBB8_2: # %cmpxchg.trystore
+; PPC32-NEXT:    #
 ; PPC32-NEXT:    and r8, r4, r6
 ; PPC32-NEXT:    or r8, r8, r7
 ; PPC32-NEXT:    stwcx. r8, 0, r5
-; PPC32-NEXT:    beq     cr0, .LBB8_4
-; PPC32-NEXT:  # %bb.3:                                # %cmpxchg.releasedload
-; PPC32-NEXT:                                          #   in Loop: Header=BB8_2 Depth=1
+; PPC32-NEXT:    beq+ cr0, .LBB8_4
+; PPC32-NEXT:  # %bb.3: # %cmpxchg.releasedload
+; PPC32-NEXT:    #
 ; PPC32-NEXT:    lwarx r4, 0, r5
 ; PPC32-NEXT:    srw r8, r4, r3
 ; PPC32-NEXT:    andi. r8, r8, 255
-; PPC32-NEXT:    beq     cr0, .LBB8_2
-; PPC32-NEXT:  .LBB8_4:                                # %cmpxchg.nostore
+; PPC32-NEXT:    beq+ cr0, .LBB8_2
+; PPC32-NEXT:  .LBB8_4: # %cmpxchg.nostore
 ; PPC32-NEXT:    srw r3, r4, r3
 ; PPC32-NEXT:    lwsync
 ; PPC32-NEXT:    blr
 ;
 ; PPC64-LABEL: cas_strong_i8_sc_sc:
-; PPC64:       # %bb.0:
+; PPC64:       # %bb.0: # %cmpxchg.start
 ; PPC64-NEXT:    rldicr r5, r3, 0, 61
-; PPC64-NEXT:    not     r3, r3
+; PPC64-NEXT:    not r3, r3
 ; PPC64-NEXT:    lwarx r4, 0, r5
 ; PPC64-NEXT:    rlwinm r3, r3, 3, 27, 28
 ; PPC64-NEXT:    srw r6, r4, r3
 ; PPC64-NEXT:    andi. r6, r6, 255
-; PPC64-NEXT:    bne     cr0, .LBB8_4
-; PPC64-NEXT:  # %bb.1:                                # %cmpxchg.fencedstore
+; PPC64-NEXT:    bne- cr0, .LBB8_4
+; PPC64-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64-NEXT:    li r6, 255
 ; PPC64-NEXT:    li r7, 1
 ; PPC64-NEXT:    slw r6, r6, r3
-; PPC64-NEXT:    not     r6, r6
+; PPC64-NEXT:    not r6, r6
 ; PPC64-NEXT:    slw r7, r7, r3
 ; PPC64-NEXT:    sync
-; PPC64-NEXT:  .LBB8_2:                                # %cmpxchg.trystore
-; PPC64-NEXT:                                          # =>This Inner Loop Header: Depth=1
+; PPC64-NEXT:  .LBB8_2: # %cmpxchg.trystore
+; PPC64-NEXT:    #
 ; PPC64-NEXT:    and r8, r4, r6
 ; PPC64-NEXT:    or r8, r8, r7
 ; PPC64-NEXT:    stwcx. r8, 0, r5
-; PPC64-NEXT:    beq     cr0, .LBB8_4
-; PPC64-NEXT:  # %bb.3:                                # %cmpxchg.releasedload
-; PPC64-NEXT:                                          #   in Loop: Header=BB8_2 Depth=1
+; PPC64-NEXT:    beq+ cr0, .LBB8_4
+; PPC64-NEXT:  # %bb.3: # %cmpxchg.releasedload
+; PPC64-NEXT:    #
 ; PPC64-NEXT:    lwarx r4, 0, r5
 ; PPC64-NEXT:    srw r8, r4, r3
 ; PPC64-NEXT:    andi. r8, r8, 255
-; PPC64-NEXT:    beq     cr0, .LBB8_2
-; PPC64-NEXT:  .LBB8_4:                                # %cmpxchg.nostore
+; PPC64-NEXT:    beq+ cr0, .LBB8_2
+; PPC64-NEXT:  .LBB8_4: # %cmpxchg.nostore
 ; PPC64-NEXT:    srw r3, r4, r3
 ; PPC64-NEXT:    lwsync
 ; PPC64-NEXT:    blr
@@ -208,54 +208,50 @@ define i8 @cas_strong_i8_sc_sc(ptr %mem) {
 }
 define i16 @cas_weak_i16_acquire_acquire(ptr %mem) {
 ; PPC32-LABEL: cas_weak_i16_acquire_acquire:
-; PPC32:       # %bb.0:
+; PPC32:       # %bb.0: # %cmpxchg.start
 ; PPC32-NEXT:    rlwinm r4, r3, 0, 0, 29
 ; PPC32-NEXT:    lwarx r5, 0, r4
-; PPC32-NEXT:    clrlwi  r3, r3, 30
+; PPC32-NEXT:    clrlwi r3, r3, 30
 ; PPC32-NEXT:    xori r3, r3, 2
 ; PPC32-NEXT:    slwi r6, r3, 3
 ; PPC32-NEXT:    srw r3, r5, r6
 ; PPC32-NEXT:    andi. r7, r3, 65535
-; PPC32-NEXT:    beq     cr0, .LBB9_2
-; PPC32-NEXT:  # %bb.1:                                # %cmpxchg.failure
-; PPC32-NEXT:    lwsync
-; PPC32-NEXT:    blr
-; PPC32-NEXT:  .LBB9_2:                                # %cmpxchg.fencedstore
+; PPC32-NEXT:    bne- cr0, .LBB9_2
+; PPC32-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC32-NEXT:    lis r7, 0
 ; PPC32-NEXT:    ori r7, r7, 65535
 ; PPC32-NEXT:    slw r7, r7, r6
 ; PPC32-NEXT:    li r8, 1
-; PPC32-NEXT:    not     r7, r7
+; PPC32-NEXT:    not r7, r7
 ; PPC32-NEXT:    slw r6, r8, r6
 ; PPC32-NEXT:    and r5, r5, r7
 ; PPC32-NEXT:    or r5, r5, r6
 ; PPC32-NEXT:    stwcx. r5, 0, r4
+; PPC32-NEXT:  .LBB9_2: # %cmpxchg.failure
 ; PPC32-NEXT:    lwsync
 ; PPC32-NEXT:    blr
 ;
 ; PPC64-LABEL: cas_weak_i16_acquire_acquire:
-; PPC64:       # %bb.0:
-; PPC64-NEXT:   rldicr r4, r3, 0, 61
-; PPC64-NEXT:    clrlwi  r3, r3, 30
+; PPC64:       # %bb.0: # %cmpxchg.start
+; PPC64-NEXT:    rldicr r4, r3, 0, 61
+; PPC64-NEXT:    clrlwi r3, r3, 30
 ; PPC64-NEXT:    lwarx r5, 0, r4
 ; PPC64-NEXT:    xori r3, r3, 2
 ; PPC64-NEXT:    slwi r6, r3, 3
 ; PPC64-NEXT:    srw r3, r5, r6
 ; PPC64-NEXT:    andi. r7, r3, 65535
-; PPC64-NEXT:    beq     cr0, .LBB9_2
-; PPC64-NEXT:  # %bb.1:                                # %cmpxchg.failure
-; PPC64-NEXT:    lwsync
-; PPC64-NEXT:    blr
-; PPC64-NEXT:  .LBB9_2:                                # %cmpxchg.fencedstore
+; PPC64-NEXT:    bne- cr0, .LBB9_2
+; PPC64-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64-NEXT:    lis r7, 0
 ; PPC64-NEXT:    ori r7, r7, 65535
 ; PPC64-NEXT:    slw r7, r7, r6
 ; PPC64-NEXT:    li r8, 1
-; PPC64-NEXT:    not     r7, r7
+; PPC64-NEXT:    not r7, r7
 ; PPC64-NEXT:    slw r6, r8, r6
 ; PPC64-NEXT:    and r5, r5, r7
 ; PPC64-NEXT:    or r5, r5, r6
 ; PPC64-NEXT:    stwcx. r5, 0, r4
+; PPC64-NEXT:  .LBB9_2: # %cmpxchg.failure
 ; PPC64-NEXT:    lwsync
 ; PPC64-NEXT:    blr
   %val = cmpxchg weak ptr %mem, i16 0, i16 1 acquire acquire
@@ -264,24 +260,24 @@ define i16 @cas_weak_i16_acquire_acquire(ptr %mem) {
 }
 define i32 @cas_strong_i32_acqrel_acquire(ptr %mem) {
 ; CHECK-LABEL: cas_strong_i32_acqrel_acquire:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    mr      r4, r3
+; CHECK:       # %bb.0: # %cmpxchg.start
+; CHECK-NEXT:    mr r4, r3
 ; CHECK-NEXT:    lwarx r3, 0, r3
-; CHECK-NEXT:    cmplwi  r3, 0
-; CHECK-NEXT:    bne     cr0, .LBB10_4
-; CHECK-NEXT:  # %bb.1:                                # %cmpxchg.fencedstore
+; CHECK-NEXT:    cmplwi r3, 0
+; CHECK-NEXT:    bne- cr0, .LBB10_4
+; CHECK-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; CHECK-NEXT:    li r5, 1
 ; CHECK-NEXT:    lwsync
-; CHECK-NEXT:  .LBB10_2:                               # %cmpxchg.trystore
-; CHECK-NEXT:                                          # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:  .LBB10_2: # %cmpxchg.trystore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. r5, 0, r4
-; CHECK-NEXT:    beq     cr0, .LBB10_4
-; CHECK-NEXT:  # %bb.3:                                # %cmpxchg.releasedload
-; CHECK-NEXT:                                          #   in Loop: Header=BB10_2 Depth=1
+; CHECK-NEXT:    beq+ cr0, .LBB10_4
+; CHECK-NEXT:  # %bb.3: # %cmpxchg.releasedload
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    lwarx r3, 0, r4
-; CHECK-NEXT:    cmplwi  r3, 0
-; CHECK-NEXT:    beq     cr0, .LBB10_2
-; CHECK-NEXT:  .LBB10_4:                               # %cmpxchg.nostore
+; CHECK-NEXT:    cmplwi r3, 0
+; CHECK-NEXT:    beq+ cr0, .LBB10_2
+; CHECK-NEXT:  .LBB10_4: # %cmpxchg.nostore
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
   %val = cmpxchg ptr %mem, i32 0, i32 1 acq_rel acquire
@@ -313,12 +309,12 @@ define i64 @cas_weak_i64_release_monotonic(ptr %mem) {
 ; PPC32-NEXT:    blr
 ;
 ; PPC64-LABEL: cas_weak_i64_release_monotonic:
-; PPC64:       # %bb.0:
-; PPC64-NEXT:    mr      r4, r3
+; PPC64:       # %bb.0: # %cmpxchg.start
+; PPC64-NEXT:    mr r4, r3
 ; PPC64-NEXT:    ldarx r3, 0, r3
-; PPC64-NEXT:    cmpldi  r3, 0
-; PPC64-NEXT:    bnelr   cr0
-; PPC64-NEXT:  # %bb.1:                                # %cmpxchg.fencedstore
+; PPC64-NEXT:    cmpldi r3, 0
+; PPC64-NEXT:    bnelr- cr0
+; PPC64-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64-NEXT:    li r5, 1
 ; PPC64-NEXT:    lwsync
 ; PPC64-NEXT:    stdcx. r5, 0, r4
diff --git a/llvm/test/CodeGen/PowerPC/loop-comment.ll b/llvm/test/CodeGen/PowerPC/loop-comment.ll
index 530e67b4804fb..b4ceb36768904 100644
--- a/llvm/test/CodeGen/PowerPC/loop-comment.ll
+++ b/llvm/test/CodeGen/PowerPC/loop-comment.ll
@@ -6,16 +6,15 @@ define void @test(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB0_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB0_1
+; PPC64LE-NEXT:    bne- 0, .LBB0_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.end
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic

>From 98d946ac4e944c92eb4a83bbff4bd6bf05e99daa Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Thu, 7 Aug 2025 14:00:50 +0000
Subject: [PATCH 3/5] change function name from
 getTrueBranchHintWeightForAtomicCmpXchgg to
 getTrueBranchHintWeightForAtomicCmpXchg

---
 llvm/include/llvm/CodeGen/TargetLowering.h  | 2 +-
 llvm/lib/CodeGen/AtomicExpandPass.cpp       | 6 +++---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 2 +-
 llvm/lib/Target/PowerPC/PPCISelLowering.h   | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 027bcc5bc53ae..609453164fcc7 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2334,7 +2334,7 @@ class LLVM_ABI TargetLoweringBase {
                                          AtomicOrdering Ord) const;
 
   virtual MDNode *
-  getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const {
+  getTrueBranchHintWeightForAtomicCmpXchg(LLVMContext &Ctx) const {
     return nullptr;
   }
 
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index abaa8b6e841f6..64fc6359ac726 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1456,7 +1456,7 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
   // jump straight past that fence instruction (if it exists).
   Builder.CreateCondBr(
       ShouldStore, ReleasingStoreBB, NoStoreBB,
-      TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
+      TLI->getTrueBranchHintWeightForAtomicCmpXchg(F->getContext()));
 
   Builder.SetInsertPoint(ReleasingStoreBB);
   if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
@@ -1476,7 +1476,7 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
   BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
   Builder.CreateCondBr(
       StoreSuccess, SuccessBB, CI->isWeak() ? FailureBB : RetryBB,
-      TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
+      TLI->getTrueBranchHintWeightForAtomicCmpXchg(F->getContext()));
 
   Builder.SetInsertPoint(ReleasedLoadBB);
   Value *SecondLoad;
@@ -1491,7 +1491,7 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
     // jump straight past that fence instruction (if it exists).
     Builder.CreateCondBr(
         ShouldStore, TryStoreBB, NoStoreBB,
-        TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
+        TLI->getTrueBranchHintWeightForAtomicCmpXchg(F->getContext()));
     // Update PHI node in TryStoreBB.
     LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
   } else
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index b218532e56b6a..865d3f5465859 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12817,7 +12817,7 @@ Value *PPCTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
   return Builder.CreateXor(Call, Builder.getInt32(1));
 }
 
-MDNode *PPCTargetLowering::getTrueBranchHintWeightForAtomicCmpXchgg(
+MDNode *PPCTargetLowering::getTrueBranchHintWeightForAtomicCmpXchg(
     LLVMContext &Ctx) const {
   return MDBuilder(Ctx).createLikelyBranchWeights();
 }
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 4892a3c603a6c..4b78955db9fbf 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -939,7 +939,7 @@ namespace llvm {
                                    AtomicOrdering Ord) const override;
 
     virtual MDNode *
-    getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const override;
+    getTrueBranchHintWeightForAtomicCmpXchg(LLVMContext &Ctx) const override;
     bool shouldInlineQuadwordAtomics() const;
 
     TargetLowering::AtomicExpansionKind

>From 5d2dcdd75dba26fc36aa910d1ef5fdca00b84650 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Thu, 7 Aug 2025 17:24:01 +0000
Subject: [PATCH 4/5] using the target independ code

---
 llvm/include/llvm/CodeGen/TargetLowering.h  |  6 ------
 llvm/lib/CodeGen/AtomicExpandPass.cpp       | 13 ++++++-------
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp |  6 ------
 llvm/lib/Target/PowerPC/PPCISelLowering.h   |  2 --
 llvm/test/CodeGen/AArch64/atomic-ops.ll     | 10 +++++-----
 5 files changed, 11 insertions(+), 26 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 609453164fcc7..cbdc1b6031680 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2332,12 +2332,6 @@ class LLVM_ABI TargetLoweringBase {
   virtual Instruction *emitTrailingFence(IRBuilderBase &Builder,
                                          Instruction *Inst,
                                          AtomicOrdering Ord) const;
-
-  virtual MDNode *
-  getTrueBranchHintWeightForAtomicCmpXchg(LLVMContext &Ctx) const {
-    return nullptr;
-  }
-
   /// @}
 
   // Emits code that executes when the comparison result in the ll/sc
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 64fc6359ac726..0d90cdb52a448 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1454,9 +1454,8 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
 
   // If the cmpxchg doesn't actually need any ordering when it fails, we can
   // jump straight past that fence instruction (if it exists).
-  Builder.CreateCondBr(
-      ShouldStore, ReleasingStoreBB, NoStoreBB,
-      TLI->getTrueBranchHintWeightForAtomicCmpXchg(F->getContext()));
+  Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB,
+                       MDBuilder(F->getContext()).createLikelyBranchWeights());
 
   Builder.SetInsertPoint(ReleasingStoreBB);
   if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
@@ -1474,9 +1473,9 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
   StoreSuccess = Builder.CreateICmpEQ(
       StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
   BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
-  Builder.CreateCondBr(
-      StoreSuccess, SuccessBB, CI->isWeak() ? FailureBB : RetryBB,
-      TLI->getTrueBranchHintWeightForAtomicCmpXchg(F->getContext()));
+  Builder.CreateCondBr(StoreSuccess, SuccessBB,
+                       CI->isWeak() ? FailureBB : RetryBB,
+                       MDBuilder(F->getContext()).createLikelyBranchWeights());
 
   Builder.SetInsertPoint(ReleasedLoadBB);
   Value *SecondLoad;
@@ -1491,7 +1490,7 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
     // jump straight past that fence instruction (if it exists).
     Builder.CreateCondBr(
         ShouldStore, TryStoreBB, NoStoreBB,
-        TLI->getTrueBranchHintWeightForAtomicCmpXchg(F->getContext()));
+        MDBuilder(F->getContext()).createLikelyBranchWeights());
     // Update PHI node in TryStoreBB.
     LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
   } else
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 865d3f5465859..459525ed4ee9a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -68,7 +68,6 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IntrinsicsPowerPC.h"
-#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
@@ -12817,11 +12816,6 @@ Value *PPCTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
   return Builder.CreateXor(Call, Builder.getInt32(1));
 }
 
-MDNode *PPCTargetLowering::getTrueBranchHintWeightForAtomicCmpXchg(
-    LLVMContext &Ctx) const {
-  return MDBuilder(Ctx).createLikelyBranchWeights();
-}
-
 // The mappings for emitLeading/TrailingFence is taken from
 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
 Instruction *PPCTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 4b78955db9fbf..124c7116dc3b5 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -938,8 +938,6 @@ namespace llvm {
     Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
                                    AtomicOrdering Ord) const override;
 
-    virtual MDNode *
-    getTrueBranchHintWeightForAtomicCmpXchg(LLVMContext &Ctx) const override;
     bool shouldInlineQuadwordAtomics() const;
 
     TargetLowering::AtomicExpansionKind
diff --git a/llvm/test/CodeGen/AArch64/atomic-ops.ll b/llvm/test/CodeGen/AArch64/atomic-ops.ll
index d8ac89f76b321..deeba7ef3ce2c 100644
--- a/llvm/test/CodeGen/AArch64/atomic-ops.ll
+++ b/llvm/test/CodeGen/AArch64/atomic-ops.ll
@@ -1090,18 +1090,18 @@ define dso_local void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
 ; INLINE_ATOMICS-NEXT:    // =>This Inner Loop Header: Depth=1
 ; INLINE_ATOMICS-NEXT:    ldxr x8, [x9]
 ; INLINE_ATOMICS-NEXT:    cmp x8, x0
-; INLINE_ATOMICS-NEXT:    b.ne .LBB43_3
+; INLINE_ATOMICS-NEXT:    b.ne .LBB43_4
 ; INLINE_ATOMICS-NEXT:  // %bb.2: // %cmpxchg.trystore
 ; INLINE_ATOMICS-NEXT:    // in Loop: Header=BB43_1 Depth=1
 ; INLINE_ATOMICS-NEXT:    stxr w10, x1, [x9]
 ; INLINE_ATOMICS-NEXT:    cbnz w10, .LBB43_1
-; INLINE_ATOMICS-NEXT:    b .LBB43_4
-; INLINE_ATOMICS-NEXT:  .LBB43_3: // %cmpxchg.nostore
-; INLINE_ATOMICS-NEXT:    clrex
-; INLINE_ATOMICS-NEXT:  .LBB43_4: // %cmpxchg.end
+; INLINE_ATOMICS-NEXT:  .LBB43_3: // %cmpxchg.end
 ; INLINE_ATOMICS-NEXT:    adrp x9, var64
 ; INLINE_ATOMICS-NEXT:    str x8, [x9, :lo12:var64]
 ; INLINE_ATOMICS-NEXT:    ret
+; INLINE_ATOMICS-NEXT:  .LBB43_4: // %cmpxchg.nostore
+; INLINE_ATOMICS-NEXT:    clrex
+; INLINE_ATOMICS-NEXT:    b .LBB43_3
 ;
 ; OUTLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i64:
 ; OUTLINE_ATOMICS:       // %bb.0:

>From 70c1ccf4aba144516a8e8193b6e60bb9987ad37b Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Fri, 8 Aug 2025 18:43:53 +0000
Subject: [PATCH 5/5] change test case

---
 .../AArch64/GlobalISel/arm64-atomic.ll        |  60 ++--
 .../AArch64/GlobalISel/arm64-pcsections.ll    | 128 ++++----
 llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll  |  10 +-
 llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll   | 294 +++++++++---------
 llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll   | 294 +++++++++---------
 llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll   | 294 +++++++++---------
 llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll   | 294 +++++++++---------
 llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll   |  30 +-
 llvm/test/CodeGen/ARM/atomic-cmpxchg.ll       |  31 +-
 llvm/test/CodeGen/ARM/cmpxchg-idioms.ll       |  25 +-
 llvm/test/CodeGen/ARM/cmpxchg-weak.ll         |  17 +-
 .../CodeGen/Hexagon/atomic-opaque-basic.ll    |   1 -
 .../AtomicExpand/ARM/atomic-expansion-v7.ll   |  55 +---
 .../AtomicExpand/ARM/atomic-expansion-v8.ll   |  41 +--
 .../AtomicExpand/ARM/cmpxchg-weak.ll          |  16 +-
 15 files changed, 758 insertions(+), 832 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
index 5bc041aef88ba..e6bf3ab674717 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
@@ -6002,15 +6002,17 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
 ; CHECK-NOLSE-O1-NEXT:    b.ne LBB67_4
 ; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %cmpxchg.trystore
 ; CHECK-NOLSE-O1-NEXT:    ; in Loop: Header=BB67_1 Depth=1
-; CHECK-NOLSE-O1-NEXT:    stxrb w9, w2, [x8]
-; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB67_1
-; CHECK-NOLSE-O1-NEXT:  ; %bb.3:
-; CHECK-NOLSE-O1-NEXT:    mov w1, #1 ; =0x1
+; CHECK-NOLSE-O1-NEXT:    stxrb w10, w2, [x8]
+; CHECK-NOLSE-O1-NEXT:    mov w9, #1 ; =0x1
+; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB67_1
+; CHECK-NOLSE-O1-NEXT:  ; %bb.3: ; %cmpxchg.end
+; CHECK-NOLSE-O1-NEXT:    mov w1, w9
 ; CHECK-NOLSE-O1-NEXT:    ; kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOLSE-O1-NEXT:    ret
 ; CHECK-NOLSE-O1-NEXT:  LBB67_4: ; %cmpxchg.nostore
-; CHECK-NOLSE-O1-NEXT:    mov w1, wzr
+; CHECK-NOLSE-O1-NEXT:    mov w9, wzr
 ; CHECK-NOLSE-O1-NEXT:    clrex
+; CHECK-NOLSE-O1-NEXT:    mov w1, w9
 ; CHECK-NOLSE-O1-NEXT:    ; kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOLSE-O1-NEXT:    ret
 ;
@@ -6108,15 +6110,17 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
 ; CHECK-NOLSE-O1-NEXT:    b.ne LBB68_4
 ; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %cmpxchg.trystore
 ; CHECK-NOLSE-O1-NEXT:    ; in Loop: Header=BB68_1 Depth=1
-; CHECK-NOLSE-O1-NEXT:    stxrh w9, w2, [x8]
-; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB68_1
-; CHECK-NOLSE-O1-NEXT:  ; %bb.3:
-; CHECK-NOLSE-O1-NEXT:    mov w1, #1 ; =0x1
+; CHECK-NOLSE-O1-NEXT:    stxrh w10, w2, [x8]
+; CHECK-NOLSE-O1-NEXT:    mov w9, #1 ; =0x1
+; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB68_1
+; CHECK-NOLSE-O1-NEXT:  ; %bb.3: ; %cmpxchg.end
+; CHECK-NOLSE-O1-NEXT:    mov w1, w9
 ; CHECK-NOLSE-O1-NEXT:    ; kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOLSE-O1-NEXT:    ret
 ; CHECK-NOLSE-O1-NEXT:  LBB68_4: ; %cmpxchg.nostore
-; CHECK-NOLSE-O1-NEXT:    mov w1, wzr
+; CHECK-NOLSE-O1-NEXT:    mov w9, wzr
 ; CHECK-NOLSE-O1-NEXT:    clrex
+; CHECK-NOLSE-O1-NEXT:    mov w1, w9
 ; CHECK-NOLSE-O1-NEXT:    ; kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOLSE-O1-NEXT:    ret
 ;
@@ -6206,6 +6210,7 @@ define { i32, i1 } @cmpxchg_i32(ptr %ptr, i32 %desired, i32 %new) {
 ; CHECK-NOLSE-O1-LABEL: cmpxchg_i32:
 ; CHECK-NOLSE-O1:       ; %bb.0:
 ; CHECK-NOLSE-O1-NEXT:    mov x8, x0
+; CHECK-NOLSE-O1-NEXT:    mov w9, #1 ; =0x1
 ; CHECK-NOLSE-O1-NEXT:  LBB69_1: ; %cmpxchg.start
 ; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NOLSE-O1-NEXT:    ldxr w0, [x8]
@@ -6213,15 +6218,16 @@ define { i32, i1 } @cmpxchg_i32(ptr %ptr, i32 %desired, i32 %new) {
 ; CHECK-NOLSE-O1-NEXT:    b.ne LBB69_4
 ; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %cmpxchg.trystore
 ; CHECK-NOLSE-O1-NEXT:    ; in Loop: Header=BB69_1 Depth=1
-; CHECK-NOLSE-O1-NEXT:    stxr w9, w2, [x8]
-; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB69_1
-; CHECK-NOLSE-O1-NEXT:  ; %bb.3:
-; CHECK-NOLSE-O1-NEXT:    mov w1, #1 ; =0x1
+; CHECK-NOLSE-O1-NEXT:    stxr w10, w2, [x8]
+; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB69_1
+; CHECK-NOLSE-O1-NEXT:  ; %bb.3: ; %cmpxchg.end
+; CHECK-NOLSE-O1-NEXT:    mov w1, w9
 ; CHECK-NOLSE-O1-NEXT:    ; kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOLSE-O1-NEXT:    ret
 ; CHECK-NOLSE-O1-NEXT:  LBB69_4: ; %cmpxchg.nostore
-; CHECK-NOLSE-O1-NEXT:    mov w1, wzr
+; CHECK-NOLSE-O1-NEXT:    mov w9, wzr
 ; CHECK-NOLSE-O1-NEXT:    clrex
+; CHECK-NOLSE-O1-NEXT:    mov w1, w9
 ; CHECK-NOLSE-O1-NEXT:    ; kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOLSE-O1-NEXT:    ret
 ;
@@ -6306,6 +6312,7 @@ define { i64, i1 } @cmpxchg_i64(ptr %ptr, i64 %desired, i64 %new) {
 ; CHECK-NOLSE-O1-LABEL: cmpxchg_i64:
 ; CHECK-NOLSE-O1:       ; %bb.0:
 ; CHECK-NOLSE-O1-NEXT:    mov x8, x0
+; CHECK-NOLSE-O1-NEXT:    mov w9, #1 ; =0x1
 ; CHECK-NOLSE-O1-NEXT:  LBB70_1: ; %cmpxchg.start
 ; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NOLSE-O1-NEXT:    ldxr x0, [x8]
@@ -6313,14 +6320,15 @@ define { i64, i1 } @cmpxchg_i64(ptr %ptr, i64 %desired, i64 %new) {
 ; CHECK-NOLSE-O1-NEXT:    b.ne LBB70_4
 ; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %cmpxchg.trystore
 ; CHECK-NOLSE-O1-NEXT:    ; in Loop: Header=BB70_1 Depth=1
-; CHECK-NOLSE-O1-NEXT:    stxr w9, x2, [x8]
-; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB70_1
-; CHECK-NOLSE-O1-NEXT:  ; %bb.3:
-; CHECK-NOLSE-O1-NEXT:    mov w1, #1 ; =0x1
+; CHECK-NOLSE-O1-NEXT:    stxr w10, x2, [x8]
+; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB70_1
+; CHECK-NOLSE-O1-NEXT:  ; %bb.3: ; %cmpxchg.end
+; CHECK-NOLSE-O1-NEXT:    mov w1, w9
 ; CHECK-NOLSE-O1-NEXT:    ret
 ; CHECK-NOLSE-O1-NEXT:  LBB70_4: ; %cmpxchg.nostore
-; CHECK-NOLSE-O1-NEXT:    mov w1, wzr
+; CHECK-NOLSE-O1-NEXT:    mov w9, wzr
 ; CHECK-NOLSE-O1-NEXT:    clrex
+; CHECK-NOLSE-O1-NEXT:    mov w1, w9
 ; CHECK-NOLSE-O1-NEXT:    ret
 ;
 ; CHECK-OUTLINE-O1-LABEL: cmpxchg_i64:
@@ -6404,6 +6412,7 @@ define { ptr, i1 } @cmpxchg_ptr(ptr %ptr, ptr %desired, ptr %new) {
 ; CHECK-NOLSE-O1-LABEL: cmpxchg_ptr:
 ; CHECK-NOLSE-O1:       ; %bb.0:
 ; CHECK-NOLSE-O1-NEXT:    mov x8, x0
+; CHECK-NOLSE-O1-NEXT:    mov w9, #1 ; =0x1
 ; CHECK-NOLSE-O1-NEXT:  LBB71_1: ; %cmpxchg.start
 ; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NOLSE-O1-NEXT:    ldxr x0, [x8]
@@ -6411,14 +6420,15 @@ define { ptr, i1 } @cmpxchg_ptr(ptr %ptr, ptr %desired, ptr %new) {
 ; CHECK-NOLSE-O1-NEXT:    b.ne LBB71_4
 ; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %cmpxchg.trystore
 ; CHECK-NOLSE-O1-NEXT:    ; in Loop: Header=BB71_1 Depth=1
-; CHECK-NOLSE-O1-NEXT:    stxr w9, x2, [x8]
-; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB71_1
-; CHECK-NOLSE-O1-NEXT:  ; %bb.3:
-; CHECK-NOLSE-O1-NEXT:    mov w1, #1 ; =0x1
+; CHECK-NOLSE-O1-NEXT:    stxr w10, x2, [x8]
+; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB71_1
+; CHECK-NOLSE-O1-NEXT:  ; %bb.3: ; %cmpxchg.end
+; CHECK-NOLSE-O1-NEXT:    mov w1, w9
 ; CHECK-NOLSE-O1-NEXT:    ret
 ; CHECK-NOLSE-O1-NEXT:  LBB71_4: ; %cmpxchg.nostore
-; CHECK-NOLSE-O1-NEXT:    mov w1, wzr
+; CHECK-NOLSE-O1-NEXT:    mov w9, wzr
 ; CHECK-NOLSE-O1-NEXT:    clrex
+; CHECK-NOLSE-O1-NEXT:    mov w1, w9
 ; CHECK-NOLSE-O1-NEXT:    ret
 ;
 ; CHECK-OUTLINE-O1-LABEL: cmpxchg_ptr:
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
index 4a85d8490d2e9..cab2741be9929 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
@@ -9,7 +9,7 @@ define i32 @val_compare_and_swap(ptr %p, i32 %cmp, i32 %new) {
   ; CHECK-NEXT:   liveins: $w1, $w2, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.cmpxchg.start:
-  ; CHECK-NEXT:   successors: %bb.2(0x7c000000), %bb.3(0x04000000)
+  ; CHECK-NEXT:   successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
   ; CHECK-NEXT:   liveins: $w1, $w2, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
@@ -17,7 +17,7 @@ define i32 @val_compare_and_swap(ptr %p, i32 %cmp, i32 %new) {
   ; CHECK-NEXT:   Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.cmpxchg.trystore:
-  ; CHECK-NEXT:   successors: %bb.4(0x04000000), %bb.1(0x7c000000)
+  ; CHECK-NEXT:   successors: %bb.4(0x7ffff800), %bb.1(0x00000800)
   ; CHECK-NEXT:   liveins: $w1, $w2, $x0, $x8
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   early-clobber renamable $w9 = STXRW renamable $w2, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p)
@@ -49,7 +49,7 @@ define i32 @val_compare_and_swap_from_load(ptr %p, i32 %cmp, ptr %pnew) {
   ; CHECK-NEXT:   renamable $w9 = LDRWui killed renamable $x2, 0, implicit-def renamable $x9, pcsections !0 :: (load (s32) from %ir.pnew)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.cmpxchg.start:
-  ; CHECK-NEXT:   successors: %bb.2(0x7c000000), %bb.3(0x04000000)
+  ; CHECK-NEXT:   successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
   ; CHECK-NEXT:   liveins: $w1, $x0, $x9
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
@@ -57,7 +57,7 @@ define i32 @val_compare_and_swap_from_load(ptr %p, i32 %cmp, ptr %pnew) {
   ; CHECK-NEXT:   Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.cmpxchg.trystore:
-  ; CHECK-NEXT:   successors: %bb.4(0x04000000), %bb.1(0x7c000000)
+  ; CHECK-NEXT:   successors: %bb.4(0x7ffff800), %bb.1(0x00000800)
   ; CHECK-NEXT:   liveins: $w1, $x0, $x8, $x9
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   early-clobber renamable $w10 = STXRW renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p)
@@ -88,7 +88,7 @@ define i32 @val_compare_and_swap_rel(ptr %p, i32 %cmp, i32 %new) {
   ; CHECK-NEXT:   liveins: $w1, $w2, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.cmpxchg.start:
-  ; CHECK-NEXT:   successors: %bb.2(0x7c000000), %bb.3(0x04000000)
+  ; CHECK-NEXT:   successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
   ; CHECK-NEXT:   liveins: $w1, $w2, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
@@ -96,7 +96,7 @@ define i32 @val_compare_and_swap_rel(ptr %p, i32 %cmp, i32 %new) {
   ; CHECK-NEXT:   Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.cmpxchg.trystore:
-  ; CHECK-NEXT:   successors: %bb.4(0x04000000), %bb.1(0x7c000000)
+  ; CHECK-NEXT:   successors: %bb.4(0x7ffff800), %bb.1(0x00000800)
   ; CHECK-NEXT:   liveins: $w1, $w2, $x0, $x8
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   early-clobber renamable $w9 = STLXRW renamable $w2, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p)
@@ -126,7 +126,7 @@ define i64 @val_compare_and_swap_64(ptr %p, i64 %cmp, i64 %new) {
   ; CHECK-NEXT:   liveins: $x0, $x1, $x2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.cmpxchg.start:
-  ; CHECK-NEXT:   successors: %bb.2(0x7c000000), %bb.3(0x04000000)
+  ; CHECK-NEXT:   successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
   ; CHECK-NEXT:   liveins: $x0, $x1, $x2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $x8 = LDXRX renamable $x0, pcsections !0 :: (volatile load (s64) from %ir.p)
@@ -134,7 +134,7 @@ define i64 @val_compare_and_swap_64(ptr %p, i64 %cmp, i64 %new) {
   ; CHECK-NEXT:   Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.cmpxchg.trystore:
-  ; CHECK-NEXT:   successors: %bb.4(0x04000000), %bb.1(0x7c000000)
+  ; CHECK-NEXT:   successors: %bb.4(0x7ffff800), %bb.1(0x00000800)
   ; CHECK-NEXT:   liveins: $x0, $x1, $x2, $x8
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   early-clobber renamable $w9 = STXRX renamable $x2, renamable $x0, pcsections !0 :: (volatile store (s64) into %ir.p)
@@ -164,7 +164,7 @@ define i64 @val_compare_and_swap_64_monotonic_seqcst(ptr %p, i64 %cmp, i64 %new)
   ; CHECK-NEXT:   liveins: $x0, $x1, $x2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.cmpxchg.start:
-  ; CHECK-NEXT:   successors: %bb.2(0x7c000000), %bb.3(0x04000000)
+  ; CHECK-NEXT:   successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
   ; CHECK-NEXT:   liveins: $x0, $x1, $x2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $x8 = LDAXRX renamable $x0, pcsections !0 :: (volatile load (s64) from %ir.p)
@@ -172,7 +172,7 @@ define i64 @val_compare_and_swap_64_monotonic_seqcst(ptr %p, i64 %cmp, i64 %new)
   ; CHECK-NEXT:   Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.cmpxchg.trystore:
-  ; CHECK-NEXT:   successors: %bb.4(0x04000000), %bb.1(0x7c000000)
+  ; CHECK-NEXT:   successors: %bb.4(0x7ffff800), %bb.1(0x00000800)
   ; CHECK-NEXT:   liveins: $x0, $x1, $x2, $x8
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   early-clobber renamable $w9 = STLXRX renamable $x2, renamable $x0, pcsections !0 :: (volatile store (s64) into %ir.p)
@@ -202,7 +202,7 @@ define i64 @val_compare_and_swap_64_release_acquire(ptr %p, i64 %cmp, i64 %new)
   ; CHECK-NEXT:   liveins: $x0, $x1, $x2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.cmpxchg.start:
-  ; CHECK-NEXT:   successors: %bb.2(0x7c000000), %bb.3(0x04000000)
+  ; CHECK-NEXT:   successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
   ; CHECK-NEXT:   liveins: $x0, $x1, $x2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $x8 = LDAXRX renamable $x0, pcsections !0 :: (volatile load (s64) from %ir.p)
@@ -210,7 +210,7 @@ define i64 @val_compare_and_swap_64_release_acquire(ptr %p, i64 %cmp, i64 %new)
   ; CHECK-NEXT:   Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.cmpxchg.trystore:
-  ; CHECK-NEXT:   successors: %bb.4(0x04000000), %bb.1(0x7c000000)
+  ; CHECK-NEXT:   successors: %bb.4(0x7ffff800), %bb.1(0x00000800)
   ; CHECK-NEXT:   liveins: $x0, $x1, $x2, $x8
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   early-clobber renamable $w9 = STLXRX renamable $x2, renamable $x0, pcsections !0 :: (volatile store (s64) into %ir.p)
@@ -240,7 +240,7 @@ define i32 @fetch_and_nand(ptr %p) {
   ; CHECK-NEXT:   liveins: $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
-  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
@@ -265,7 +265,7 @@ define i64 @fetch_and_nand_64(ptr %p) {
   ; CHECK-NEXT:   liveins: $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
-  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $x8 = LDAXRX renamable $x0, pcsections !0 :: (volatile load (s64) from %ir.p)
@@ -292,7 +292,7 @@ define i32 @fetch_and_or(ptr %p) {
   ; CHECK-NEXT:   renamable $w9 = MOVZWi 5, 0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
-  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w9, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
@@ -316,7 +316,7 @@ define i64 @fetch_and_or_64(ptr %p) {
   ; CHECK-NEXT:   liveins: $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
-  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $x8 = LDXRX renamable $x0, pcsections !0 :: (volatile load (s64) from %ir.p)
@@ -723,7 +723,7 @@ define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
-  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
@@ -747,7 +747,7 @@ define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
-  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
@@ -770,7 +770,7 @@ define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
-  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
@@ -794,7 +794,7 @@ define i8 @atomicrmw_and_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
-  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
@@ -818,7 +818,7 @@ define i8 @atomicrmw_or_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
-  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
@@ -842,7 +842,7 @@ define i8 @atomicrmw_xor_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
-  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
@@ -866,7 +866,7 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
-  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
@@ -892,7 +892,7 @@ define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
-  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
@@ -920,7 +920,7 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   renamable $w9 = ANDWri killed renamable $w1, 7
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
-  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w9, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
@@ -948,7 +948,7 @@ define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   renamable $w9 = ANDWri killed renamable $w1, 7
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
-  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w9, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
@@ -974,7 +974,7 @@ define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
-  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
@@ -998,7 +998,7 @@ define i16 @atomicrmw_xchg_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
-  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
@@ -1021,7 +1021,7 @@ define i16 @atomicrmw_sub_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
-  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
@@ -1045,7 +1045,7 @@ define i16 @atomicrmw_and_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
-  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
@@ -1069,7 +1069,7 @@ define i16 @atomicrmw_or_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
-  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
@@ -1093,7 +1093,7 @@ define i16 @atomicrmw_xor_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
-  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
@@ -1117,7 +1117,7 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
-  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
@@ -1143,7 +1143,7 @@ define i16 @atomicrmw_max_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
-  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
@@ -1171,7 +1171,7 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   renamable $w9 = ANDWri killed renamable $w1, 15
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
-  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w9, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
@@ -1199,7 +1199,7 @@ define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   renamable $w9 = ANDWri killed renamable $w1, 15
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
-  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+  ; CHECK-NEXT:  successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w9, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
@@ -1227,34 +1227,35 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
   ; CHECK-NEXT:   $x8 = ORRXrs $xzr, $x0, 0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.cmpxchg.start:
-  ; CHECK-NEXT:   successors: %bb.2(0x7c000000), %bb.4(0x04000000)
+  ; CHECK-NEXT:   successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
   ; CHECK-NEXT:   liveins: $w1, $w2, $x8
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w0 = LDXRB renamable $x8, implicit-def renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w9 = ANDWri renamable $w0, 7, pcsections !0
   ; CHECK-NEXT:   dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 0, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   Bcc 1, %bb.4, implicit killed $nzcv, pcsections !0
+  ; CHECK-NEXT:   Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.cmpxchg.trystore:
-  ; CHECK-NEXT:   successors: %bb.3(0x04000000), %bb.1(0x7c000000)
+  ; CHECK-NEXT:   successors: %bb.4(0x7ffff800), %bb.1(0x00000800) 
   ; CHECK-NEXT:   liveins: $w1, $w2, $x0, $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   early-clobber renamable $w9 = STXRB renamable $w2, renamable $x8, pcsections !0 :: (volatile store (s8) into %ir.ptr)
-  ; CHECK-NEXT:   CBNZW killed renamable $w9, %bb.1
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRB renamable $w2, renamable $x8, pcsections !0 :: (volatile store (s8) into %ir.ptr) 
+  ; CHECK-NEXT:   renamable $w9 = MOVZWi 1, 0 
+  ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1
+  ; CHECK-NEXT:   B %bb.4 
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT: bb.3.cmpxchg.nostore: 
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
   ; CHECK-NEXT:   liveins: $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w1 = MOVZWi 1, 0
-  ; CHECK-NEXT:   $w0 = KILL renamable $w0, implicit killed $x0
-  ; CHECK-NEXT:   RET undef $lr, implicit $w0, implicit $w1
+  ; CHECK-NEXT:   $w9 = ORRWrs $wzr, $wzr, 0
+  ; CHECK-NEXT:   CLREX 15, pcsections !0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.4.cmpxchg.nostore:
-  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: bb.4.cmpxchg.end:
+  ; CHECK-NEXT:   liveins: $w9, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w1 = ORRWrs $wzr, $wzr, 0
-  ; CHECK-NEXT:   CLREX 15, pcsections !0
   ; CHECK-NEXT:   $w0 = KILL renamable $w0, implicit killed $x0
+  ; CHECK-NEXT:   $w1 = ORRWrs $wzr, killed $w9, 0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0, implicit $w1
   %res = cmpxchg ptr %ptr, i8 %desired, i8 %new monotonic monotonic, !pcsections !0
   ret { i8, i1 } %res
@@ -1269,35 +1270,36 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
   ; CHECK-NEXT:   $x8 = ORRXrs $xzr, $x0, 0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.cmpxchg.start:
-  ; CHECK-NEXT:   successors: %bb.2(0x7c000000), %bb.4(0x04000000)
+  ; CHECK-NEXT:   successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
   ; CHECK-NEXT:   liveins: $w1, $w2, $x8
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w0 = LDXRH renamable $x8, implicit-def renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w9 = ANDWri renamable $w0, 15, pcsections !0
   ; CHECK-NEXT:   dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 8, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   Bcc 1, %bb.4, implicit killed $nzcv, pcsections !0
+  ; CHECK-NEXT:   Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.cmpxchg.trystore:
-  ; CHECK-NEXT:   successors: %bb.3(0x04000000), %bb.1(0x7c000000)
+  ; CHECK-NEXT:   successors: %bb.4(0x7ffff800), %bb.1(0x00000800) 
   ; CHECK-NEXT:   liveins: $w1, $w2, $x0, $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   early-clobber renamable $w9 = STXRH renamable $w2, renamable $x8, pcsections !0 :: (volatile store (s16) into %ir.ptr)
-  ; CHECK-NEXT:   CBNZW killed renamable $w9, %bb.1
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRH renamable $w2, renamable $x8, pcsections !0 :: (volatile store (s16) into %ir.ptr) 
+  ; CHECK-NEXT:   renamable $w9 = MOVZWi 1, 0
+  ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1
+  ; CHECK-NEXT:   B %bb.4
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT: bb.3.cmpxchg.nostore:
+  ; CHECK-NEXT:   successors: %bb.4(0x80000000)
   ; CHECK-NEXT:   liveins: $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w1 = MOVZWi 1, 0
-  ; CHECK-NEXT:   $w0 = KILL renamable $w0, implicit killed $x0
-  ; CHECK-NEXT:   RET undef $lr, implicit $w0, implicit $w1
+  ; CHECK-NEXT:   $w9 = ORRWrs $wzr, $wzr, 0
+  ; CHECK-NEXT:   CLREX 15, pcsections !0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.4.cmpxchg.nostore:
-  ; CHECK-NEXT:   liveins: $x0
+  ; CHECK-NEXT: bb.4.cmpxchg.end:
+  ; CHECK-NEXT:   liveins: $w9, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w1 = ORRWrs $wzr, $wzr, 0
-  ; CHECK-NEXT:   CLREX 15, pcsections !0
   ; CHECK-NEXT:   $w0 = KILL renamable $w0, implicit killed $x0
-  ; CHECK-NEXT:   RET undef $lr, implicit $w0, implicit $w1
+  ; CHECK-NEXT:   $w1 = ORRWrs $wzr, killed $w9, 0
+  ; CHECK-NEXT:   RET undef $lr, implicit $w0, implicit $w1 
   %res = cmpxchg ptr %ptr, i16 %desired, i16 %new monotonic monotonic, !pcsections !0
   ret { i16, i1 } %res
 }
diff --git a/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll b/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll
index 42cb3d4e9589d..bf78429da52f3 100644
--- a/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll
+++ b/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll
@@ -850,18 +850,18 @@ define dso_local void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldxr x8, [x9]
 ; CHECK-NEXT:    cmp x8, x0
-; CHECK-NEXT:    b.ne .LBB43_3
+; CHECK-NEXT:    b.ne .LBB43_4
 ; CHECK-NEXT:  // %bb.2: // %cmpxchg.trystore
 ; CHECK-NEXT:    // in Loop: Header=BB43_1 Depth=1
 ; CHECK-NEXT:    stxr w10, x1, [x9]
 ; CHECK-NEXT:    cbnz w10, .LBB43_1
-; CHECK-NEXT:    b .LBB43_4
-; CHECK-NEXT:  .LBB43_3: // %cmpxchg.nostore
-; CHECK-NEXT:    clrex
-; CHECK-NEXT:  .LBB43_4: // %cmpxchg.end
+; CHECK-NEXT:  .LBB43_3: // %cmpxchg.end
 ; CHECK-NEXT:    adrp x9, var64
 ; CHECK-NEXT:    str x8, [x9, :lo12:var64]
 ; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB43_4: // %cmpxchg.nostore
+; CHECK-NEXT:    clrex
+; CHECK-NEXT:    b .LBB43_3
    %pair = cmpxchg ptr @var64, i64 %wanted, i64 %new monotonic monotonic
    %old = extractvalue { i64, i1 } %pair, 0
    store i64 %old, ptr @var64
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
index 21729b9dfd101..24a6c3c440e18 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
@@ -49,15 +49,9 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    ldrh w0, [x0]
 ; SOFTFP-NOLSE-NEXT:    mov w20, w1
 ; SOFTFP-NOLSE-NEXT:    stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT:    b .LBB0_2
-; SOFTFP-NOLSE-NEXT:  .LBB0_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB0_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB0_6
-; SOFTFP-NOLSE-NEXT:  .LBB0_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB0_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB0_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB0_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    mov w22, w0
 ; SOFTFP-NOLSE-NEXT:    and w0, w20, #0xffff
 ; SOFTFP-NOLSE-NEXT:    bl __extendhfsf2
@@ -68,19 +62,25 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    bl __addsf3
 ; SOFTFP-NOLSE-NEXT:    bl __truncsfhf2
 ; SOFTFP-NOLSE-NEXT:    mov w8, w0
-; SOFTFP-NOLSE-NEXT:  .LBB0_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB0_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB0_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB0_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxrh w0, [x19]
 ; SOFTFP-NOLSE-NEXT:    cmp w0, w22, uxth
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB0_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB0_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB0_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB0_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB0_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB0_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB0_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB0_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB0_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB0_1
+; SOFTFP-NOLSE-NEXT:    b .LBB0_6
+; SOFTFP-NOLSE-NEXT:  .LBB0_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB0_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB0_1
 ; SOFTFP-NOLSE-NEXT:  .LBB0_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
@@ -137,15 +137,9 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    ldrh w0, [x0]
 ; SOFTFP-NOLSE-NEXT:    mov w20, w1
 ; SOFTFP-NOLSE-NEXT:    stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT:    b .LBB1_2
-; SOFTFP-NOLSE-NEXT:  .LBB1_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB1_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB1_6
-; SOFTFP-NOLSE-NEXT:  .LBB1_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB1_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB1_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB1_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    mov w22, w0
 ; SOFTFP-NOLSE-NEXT:    and w0, w20, #0xffff
 ; SOFTFP-NOLSE-NEXT:    bl __extendhfsf2
@@ -156,19 +150,25 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    bl __addsf3
 ; SOFTFP-NOLSE-NEXT:    bl __truncsfhf2
 ; SOFTFP-NOLSE-NEXT:    mov w8, w0
-; SOFTFP-NOLSE-NEXT:  .LBB1_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB1_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB1_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB1_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxrh w0, [x19]
 ; SOFTFP-NOLSE-NEXT:    cmp w0, w22, uxth
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB1_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB1_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB1_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB1_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB1_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB1_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB1_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB1_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB1_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB1_1
+; SOFTFP-NOLSE-NEXT:    b .LBB1_6
+; SOFTFP-NOLSE-NEXT:  .LBB1_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB1_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB1_1
 ; SOFTFP-NOLSE-NEXT:  .LBB1_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
@@ -236,34 +236,34 @@ define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
 ; SOFTFP-NOLSE-NEXT:    ldrh w0, [x0]
 ; SOFTFP-NOLSE-NEXT:    lsl w20, w1, #16
-; SOFTFP-NOLSE-NEXT:    b .LBB2_2
-; SOFTFP-NOLSE-NEXT:  .LBB2_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB2_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB2_6
-; SOFTFP-NOLSE-NEXT:  .LBB2_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB2_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB2_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB2_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    mov w21, w0
 ; SOFTFP-NOLSE-NEXT:    lsl w0, w0, #16
 ; SOFTFP-NOLSE-NEXT:    mov w1, w20
 ; SOFTFP-NOLSE-NEXT:    bl __addsf3
 ; SOFTFP-NOLSE-NEXT:    bl __truncsfbf2
 ; SOFTFP-NOLSE-NEXT:    mov w8, w0
-; SOFTFP-NOLSE-NEXT:  .LBB2_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB2_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB2_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB2_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxrh w0, [x19]
 ; SOFTFP-NOLSE-NEXT:    cmp w0, w21, uxth
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB2_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB2_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB2_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB2_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB2_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB2_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB2_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB2_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB2_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB2_1
+; SOFTFP-NOLSE-NEXT:    b .LBB2_6
+; SOFTFP-NOLSE-NEXT:  .LBB2_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB2_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB2_1
 ; SOFTFP-NOLSE-NEXT:  .LBB2_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
@@ -330,34 +330,34 @@ define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
 ; SOFTFP-NOLSE-NEXT:    ldrh w0, [x0]
 ; SOFTFP-NOLSE-NEXT:    lsl w20, w1, #16
-; SOFTFP-NOLSE-NEXT:    b .LBB3_2
-; SOFTFP-NOLSE-NEXT:  .LBB3_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB3_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB3_6
-; SOFTFP-NOLSE-NEXT:  .LBB3_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB3_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB3_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB3_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    mov w21, w0
 ; SOFTFP-NOLSE-NEXT:    lsl w0, w0, #16
 ; SOFTFP-NOLSE-NEXT:    mov w1, w20
 ; SOFTFP-NOLSE-NEXT:    bl __addsf3
 ; SOFTFP-NOLSE-NEXT:    bl __truncsfbf2
 ; SOFTFP-NOLSE-NEXT:    mov w8, w0
-; SOFTFP-NOLSE-NEXT:  .LBB3_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB3_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB3_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB3_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxrh w0, [x19]
 ; SOFTFP-NOLSE-NEXT:    cmp w0, w21, uxth
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB3_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB3_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB3_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB3_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB3_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB3_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB3_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB3_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB3_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB3_1
+; SOFTFP-NOLSE-NEXT:    b .LBB3_6
+; SOFTFP-NOLSE-NEXT:  .LBB3_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB3_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB3_1
 ; SOFTFP-NOLSE-NEXT:  .LBB3_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
@@ -406,32 +406,32 @@ define float @test_atomicrmw_fadd_f32_seq_cst_align4(ptr %ptr, float %value) #0
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
 ; SOFTFP-NOLSE-NEXT:    ldr w0, [x0]
 ; SOFTFP-NOLSE-NEXT:    mov w20, w1
-; SOFTFP-NOLSE-NEXT:    b .LBB4_2
-; SOFTFP-NOLSE-NEXT:  .LBB4_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB4_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB4_6
-; SOFTFP-NOLSE-NEXT:  .LBB4_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB4_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB4_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB4_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    mov w1, w20
 ; SOFTFP-NOLSE-NEXT:    mov w21, w0
 ; SOFTFP-NOLSE-NEXT:    bl __addsf3
 ; SOFTFP-NOLSE-NEXT:    mov w8, w0
-; SOFTFP-NOLSE-NEXT:  .LBB4_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB4_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB4_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB4_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxr w0, [x19]
 ; SOFTFP-NOLSE-NEXT:    cmp w0, w21
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB4_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB4_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB4_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB4_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxr w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB4_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB4_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB4_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB4_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB4_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB4_1
+; SOFTFP-NOLSE-NEXT:    b .LBB4_6
+; SOFTFP-NOLSE-NEXT:  .LBB4_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB4_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB4_1
 ; SOFTFP-NOLSE-NEXT:  .LBB4_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
@@ -480,32 +480,32 @@ define double @test_atomicrmw_fadd_f32_seq_cst_align8(ptr %ptr, double %value) #
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
 ; SOFTFP-NOLSE-NEXT:    ldr x0, [x0]
 ; SOFTFP-NOLSE-NEXT:    mov x20, x1
-; SOFTFP-NOLSE-NEXT:    b .LBB5_2
-; SOFTFP-NOLSE-NEXT:  .LBB5_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB5_6
-; SOFTFP-NOLSE-NEXT:  .LBB5_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB5_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB5_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB5_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    mov x1, x20
 ; SOFTFP-NOLSE-NEXT:    mov x21, x0
 ; SOFTFP-NOLSE-NEXT:    bl __adddf3
 ; SOFTFP-NOLSE-NEXT:    mov x8, x0
-; SOFTFP-NOLSE-NEXT:  .LBB5_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB5_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB5_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB5_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxr x0, [x19]
 ; SOFTFP-NOLSE-NEXT:    cmp x0, x21
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB5_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB5_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxr w9, x8, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB5_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB5_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB5_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB5_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB5_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB5_1
+; SOFTFP-NOLSE-NEXT:    b .LBB5_6
+; SOFTFP-NOLSE-NEXT:  .LBB5_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB5_1
 ; SOFTFP-NOLSE-NEXT:  .LBB5_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
@@ -701,16 +701,9 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
 ; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
 ; SOFTFP-NOLSE-NEXT:    mov w19, w2
 ; SOFTFP-NOLSE-NEXT:    mov x20, x0
-; SOFTFP-NOLSE-NEXT:    b .LBB7_2
-; SOFTFP-NOLSE-NEXT:  .LBB7_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB7_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    lsr w23, w22, #16
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB7_6
-; SOFTFP-NOLSE-NEXT:  .LBB7_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB7_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB7_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB7_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    and w0, w19, #0xffff
 ; SOFTFP-NOLSE-NEXT:    bl __extendhfsf2
 ; SOFTFP-NOLSE-NEXT:    mov w24, w0
@@ -731,20 +724,27 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
 ; SOFTFP-NOLSE-NEXT:    mov w8, w22
 ; SOFTFP-NOLSE-NEXT:    bfi w0, w24, #16, #16
 ; SOFTFP-NOLSE-NEXT:    bfi w8, w23, #16, #16
-; SOFTFP-NOLSE-NEXT:  .LBB7_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB7_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB7_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB7_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxr w22, [x20]
 ; SOFTFP-NOLSE-NEXT:    cmp w22, w8
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB7_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB7_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB7_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB7_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxr w9, w0, [x20]
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB7_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB7_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB7_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB7_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
 ; SOFTFP-NOLSE-NEXT:    lsr w23, w22, #16
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB7_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB7_1
+; SOFTFP-NOLSE-NEXT:    b .LBB7_6
+; SOFTFP-NOLSE-NEXT:  .LBB7_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB7_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    lsr w23, w22, #16
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB7_1
 ; SOFTFP-NOLSE-NEXT:  .LBB7_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    mov w0, w22
 ; SOFTFP-NOLSE-NEXT:    mov w1, w23
@@ -817,16 +817,9 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
 ; SOFTFP-NOLSE-NEXT:    lsl w21, w8, #16
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
 ; SOFTFP-NOLSE-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT:    b .LBB8_2
-; SOFTFP-NOLSE-NEXT:  .LBB8_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB8_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    lsr w1, w22, #16
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB8_6
-; SOFTFP-NOLSE-NEXT:  .LBB8_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB8_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB8_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB8_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    lsl w23, w1, #16
 ; SOFTFP-NOLSE-NEXT:    mov w1, w20
 ; SOFTFP-NOLSE-NEXT:    mov w0, w23
@@ -839,20 +832,27 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
 ; SOFTFP-NOLSE-NEXT:    bl __truncsfbf2
 ; SOFTFP-NOLSE-NEXT:    bfxil w23, w22, #0, #16
 ; SOFTFP-NOLSE-NEXT:    bfi w0, w24, #16, #16
-; SOFTFP-NOLSE-NEXT:  .LBB8_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB8_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB8_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB8_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxr w22, [x19]
 ; SOFTFP-NOLSE-NEXT:    cmp w22, w23
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB8_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB8_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB8_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB8_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxr w8, w0, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB8_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB8_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB8_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB8_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
 ; SOFTFP-NOLSE-NEXT:    lsr w1, w22, #16
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB8_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB8_1
+; SOFTFP-NOLSE-NEXT:    b .LBB8_6
+; SOFTFP-NOLSE-NEXT:  .LBB8_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB8_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    lsr w1, w22, #16
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB8_1
 ; SOFTFP-NOLSE-NEXT:  .LBB8_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    mov w0, w22
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
@@ -906,16 +906,9 @@ define <2 x float> @test_atomicrmw_fadd_v2f32_seq_cst_align8(ptr %ptr, <2 x floa
 ; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
 ; SOFTFP-NOLSE-NEXT:    mov w19, w2
 ; SOFTFP-NOLSE-NEXT:    mov x20, x0
-; SOFTFP-NOLSE-NEXT:    b .LBB9_2
-; SOFTFP-NOLSE-NEXT:  .LBB9_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB9_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    lsr x23, x22, #32
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB9_6
-; SOFTFP-NOLSE-NEXT:  .LBB9_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB9_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB9_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB9_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    mov w0, w23
 ; SOFTFP-NOLSE-NEXT:    mov w1, w19
 ; SOFTFP-NOLSE-NEXT:    bl __addsf3
@@ -928,20 +921,27 @@ define <2 x float> @test_atomicrmw_fadd_v2f32_seq_cst_align8(ptr %ptr, <2 x floa
 ; SOFTFP-NOLSE-NEXT:    // kill: def $w23 killed $w23 killed $x23 def $x23
 ; SOFTFP-NOLSE-NEXT:    orr x8, x8, x24, lsl #32
 ; SOFTFP-NOLSE-NEXT:    orr x9, x9, x23, lsl #32
-; SOFTFP-NOLSE-NEXT:  .LBB9_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB9_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB9_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB9_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxr x22, [x20]
 ; SOFTFP-NOLSE-NEXT:    cmp x22, x9
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB9_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB9_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB9_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB9_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxr w10, x8, [x20]
-; SOFTFP-NOLSE-NEXT:    cbnz w10, .LBB9_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB9_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w10, .LBB9_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB9_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
 ; SOFTFP-NOLSE-NEXT:    lsr x23, x22, #32
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB9_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB9_1
+; SOFTFP-NOLSE-NEXT:    b .LBB9_6
+; SOFTFP-NOLSE-NEXT:  .LBB9_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB9_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    lsr x23, x22, #32
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB9_1
 ; SOFTFP-NOLSE-NEXT:  .LBB9_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    mov w0, w22
 ; SOFTFP-NOLSE-NEXT:    mov w1, w23
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
index e3e18a1f91c6d..16825c9dcd178 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
@@ -51,15 +51,9 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    ldrh w0, [x0]
 ; SOFTFP-NOLSE-NEXT:    mov w20, w1
 ; SOFTFP-NOLSE-NEXT:    stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT:    b .LBB0_2
-; SOFTFP-NOLSE-NEXT:  .LBB0_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB0_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB0_6
-; SOFTFP-NOLSE-NEXT:  .LBB0_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB0_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB0_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB0_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    mov w22, w0
 ; SOFTFP-NOLSE-NEXT:    and w0, w20, #0xffff
 ; SOFTFP-NOLSE-NEXT:    bl __extendhfsf2
@@ -70,19 +64,25 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    bl fmaxf
 ; SOFTFP-NOLSE-NEXT:    bl __truncsfhf2
 ; SOFTFP-NOLSE-NEXT:    mov w8, w0
-; SOFTFP-NOLSE-NEXT:  .LBB0_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB0_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB0_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB0_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxrh w0, [x19]
 ; SOFTFP-NOLSE-NEXT:    cmp w0, w22, uxth
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB0_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB0_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB0_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB0_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB0_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB0_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB0_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB0_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB0_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB0_1
+; SOFTFP-NOLSE-NEXT:    b .LBB0_6
+; SOFTFP-NOLSE-NEXT:  .LBB0_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB0_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB0_1
 ; SOFTFP-NOLSE-NEXT:  .LBB0_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
@@ -139,15 +139,9 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    ldrh w0, [x0]
 ; SOFTFP-NOLSE-NEXT:    mov w20, w1
 ; SOFTFP-NOLSE-NEXT:    stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT:    b .LBB1_2
-; SOFTFP-NOLSE-NEXT:  .LBB1_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB1_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB1_6
-; SOFTFP-NOLSE-NEXT:  .LBB1_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB1_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB1_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB1_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    mov w22, w0
 ; SOFTFP-NOLSE-NEXT:    and w0, w20, #0xffff
 ; SOFTFP-NOLSE-NEXT:    bl __extendhfsf2
@@ -158,19 +152,25 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    bl fmaxf
 ; SOFTFP-NOLSE-NEXT:    bl __truncsfhf2
 ; SOFTFP-NOLSE-NEXT:    mov w8, w0
-; SOFTFP-NOLSE-NEXT:  .LBB1_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB1_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB1_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB1_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxrh w0, [x19]
 ; SOFTFP-NOLSE-NEXT:    cmp w0, w22, uxth
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB1_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB1_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB1_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB1_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB1_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB1_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB1_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB1_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB1_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB1_1
+; SOFTFP-NOLSE-NEXT:    b .LBB1_6
+; SOFTFP-NOLSE-NEXT:  .LBB1_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB1_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB1_1
 ; SOFTFP-NOLSE-NEXT:  .LBB1_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
@@ -238,34 +238,34 @@ define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
 ; SOFTFP-NOLSE-NEXT:    ldrh w0, [x0]
 ; SOFTFP-NOLSE-NEXT:    lsl w20, w1, #16
-; SOFTFP-NOLSE-NEXT:    b .LBB2_2
-; SOFTFP-NOLSE-NEXT:  .LBB2_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB2_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB2_6
-; SOFTFP-NOLSE-NEXT:  .LBB2_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB2_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB2_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB2_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    mov w21, w0
 ; SOFTFP-NOLSE-NEXT:    lsl w0, w0, #16
 ; SOFTFP-NOLSE-NEXT:    mov w1, w20
 ; SOFTFP-NOLSE-NEXT:    bl fmaxf
 ; SOFTFP-NOLSE-NEXT:    bl __truncsfbf2
 ; SOFTFP-NOLSE-NEXT:    mov w8, w0
-; SOFTFP-NOLSE-NEXT:  .LBB2_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB2_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB2_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB2_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxrh w0, [x19]
 ; SOFTFP-NOLSE-NEXT:    cmp w0, w21, uxth
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB2_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB2_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB2_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB2_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB2_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB2_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB2_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB2_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB2_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB2_1
+; SOFTFP-NOLSE-NEXT:    b .LBB2_6
+; SOFTFP-NOLSE-NEXT:  .LBB2_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB2_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB2_1
 ; SOFTFP-NOLSE-NEXT:  .LBB2_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
@@ -332,34 +332,34 @@ define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
 ; SOFTFP-NOLSE-NEXT:    ldrh w0, [x0]
 ; SOFTFP-NOLSE-NEXT:    lsl w20, w1, #16
-; SOFTFP-NOLSE-NEXT:    b .LBB3_2
-; SOFTFP-NOLSE-NEXT:  .LBB3_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB3_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB3_6
-; SOFTFP-NOLSE-NEXT:  .LBB3_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB3_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB3_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB3_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    mov w21, w0
 ; SOFTFP-NOLSE-NEXT:    lsl w0, w0, #16
 ; SOFTFP-NOLSE-NEXT:    mov w1, w20
 ; SOFTFP-NOLSE-NEXT:    bl fmaxf
 ; SOFTFP-NOLSE-NEXT:    bl __truncsfbf2
 ; SOFTFP-NOLSE-NEXT:    mov w8, w0
-; SOFTFP-NOLSE-NEXT:  .LBB3_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB3_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB3_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB3_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxrh w0, [x19]
 ; SOFTFP-NOLSE-NEXT:    cmp w0, w21, uxth
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB3_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB3_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB3_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB3_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB3_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB3_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB3_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB3_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB3_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB3_1
+; SOFTFP-NOLSE-NEXT:    b .LBB3_6
+; SOFTFP-NOLSE-NEXT:  .LBB3_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB3_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB3_1
 ; SOFTFP-NOLSE-NEXT:  .LBB3_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
@@ -408,32 +408,32 @@ define float @test_atomicrmw_fmax_f32_seq_cst_align4(ptr %ptr, float %value) #0
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
 ; SOFTFP-NOLSE-NEXT:    ldr w0, [x0]
 ; SOFTFP-NOLSE-NEXT:    mov w20, w1
-; SOFTFP-NOLSE-NEXT:    b .LBB4_2
-; SOFTFP-NOLSE-NEXT:  .LBB4_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB4_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB4_6
-; SOFTFP-NOLSE-NEXT:  .LBB4_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB4_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB4_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB4_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    mov w1, w20
 ; SOFTFP-NOLSE-NEXT:    mov w21, w0
 ; SOFTFP-NOLSE-NEXT:    bl fmaxf
 ; SOFTFP-NOLSE-NEXT:    mov w8, w0
-; SOFTFP-NOLSE-NEXT:  .LBB4_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB4_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB4_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB4_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxr w0, [x19]
 ; SOFTFP-NOLSE-NEXT:    cmp w0, w21
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB4_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB4_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB4_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB4_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxr w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB4_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB4_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB4_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB4_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB4_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB4_1
+; SOFTFP-NOLSE-NEXT:    b .LBB4_6
+; SOFTFP-NOLSE-NEXT:  .LBB4_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB4_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB4_1
 ; SOFTFP-NOLSE-NEXT:  .LBB4_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
@@ -482,32 +482,32 @@ define double @test_atomicrmw_fmax_f32_seq_cst_align8(ptr %ptr, double %value) #
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
 ; SOFTFP-NOLSE-NEXT:    ldr x0, [x0]
 ; SOFTFP-NOLSE-NEXT:    mov x20, x1
-; SOFTFP-NOLSE-NEXT:    b .LBB5_2
-; SOFTFP-NOLSE-NEXT:  .LBB5_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB5_6
-; SOFTFP-NOLSE-NEXT:  .LBB5_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB5_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB5_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB5_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    mov x1, x20
 ; SOFTFP-NOLSE-NEXT:    mov x21, x0
 ; SOFTFP-NOLSE-NEXT:    bl fmax
 ; SOFTFP-NOLSE-NEXT:    mov x8, x0
-; SOFTFP-NOLSE-NEXT:  .LBB5_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB5_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB5_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB5_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxr x0, [x19]
 ; SOFTFP-NOLSE-NEXT:    cmp x0, x21
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB5_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB5_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxr w9, x8, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB5_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB5_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB5_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB5_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB5_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB5_1
+; SOFTFP-NOLSE-NEXT:    b .LBB5_6
+; SOFTFP-NOLSE-NEXT:  .LBB5_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB5_1
 ; SOFTFP-NOLSE-NEXT:  .LBB5_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
@@ -581,16 +581,9 @@ define <2 x half> @test_atomicrmw_fmax_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
 ; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
 ; SOFTFP-NOLSE-NEXT:    mov w19, w2
 ; SOFTFP-NOLSE-NEXT:    mov x20, x0
-; SOFTFP-NOLSE-NEXT:    b .LBB6_2
-; SOFTFP-NOLSE-NEXT:  .LBB6_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB6_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    lsr w23, w22, #16
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB6_6
-; SOFTFP-NOLSE-NEXT:  .LBB6_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB6_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB6_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB6_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    and w0, w19, #0xffff
 ; SOFTFP-NOLSE-NEXT:    bl __extendhfsf2
 ; SOFTFP-NOLSE-NEXT:    mov w24, w0
@@ -611,20 +604,27 @@ define <2 x half> @test_atomicrmw_fmax_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
 ; SOFTFP-NOLSE-NEXT:    mov w8, w22
 ; SOFTFP-NOLSE-NEXT:    bfi w0, w24, #16, #16
 ; SOFTFP-NOLSE-NEXT:    bfi w8, w23, #16, #16
-; SOFTFP-NOLSE-NEXT:  .LBB6_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB6_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB6_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB6_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxr w22, [x20]
 ; SOFTFP-NOLSE-NEXT:    cmp w22, w8
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB6_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB6_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB6_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB6_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxr w9, w0, [x20]
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB6_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB6_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB6_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB6_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
 ; SOFTFP-NOLSE-NEXT:    lsr w23, w22, #16
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB6_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB6_1
+; SOFTFP-NOLSE-NEXT:    b .LBB6_6
+; SOFTFP-NOLSE-NEXT:  .LBB6_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB6_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    lsr w23, w22, #16
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB6_1
 ; SOFTFP-NOLSE-NEXT:  .LBB6_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    mov w0, w22
 ; SOFTFP-NOLSE-NEXT:    mov w1, w23
@@ -725,16 +725,9 @@ define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
 ; SOFTFP-NOLSE-NEXT:    lsl w21, w8, #16
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
 ; SOFTFP-NOLSE-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT:    b .LBB7_2
-; SOFTFP-NOLSE-NEXT:  .LBB7_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB7_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    lsr w1, w22, #16
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB7_6
-; SOFTFP-NOLSE-NEXT:  .LBB7_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB7_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB7_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB7_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    lsl w23, w1, #16
 ; SOFTFP-NOLSE-NEXT:    mov w1, w20
 ; SOFTFP-NOLSE-NEXT:    mov w0, w23
@@ -747,20 +740,27 @@ define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
 ; SOFTFP-NOLSE-NEXT:    bl __truncsfbf2
 ; SOFTFP-NOLSE-NEXT:    bfxil w23, w22, #0, #16
 ; SOFTFP-NOLSE-NEXT:    bfi w0, w24, #16, #16
-; SOFTFP-NOLSE-NEXT:  .LBB7_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB7_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB7_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB7_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxr w22, [x19]
 ; SOFTFP-NOLSE-NEXT:    cmp w22, w23
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB7_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB7_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB7_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB7_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxr w8, w0, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB7_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB7_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB7_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB7_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
 ; SOFTFP-NOLSE-NEXT:    lsr w1, w22, #16
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB7_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB7_1
+; SOFTFP-NOLSE-NEXT:    b .LBB7_6
+; SOFTFP-NOLSE-NEXT:  .LBB7_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB7_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    lsr w1, w22, #16
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB7_1
 ; SOFTFP-NOLSE-NEXT:  .LBB7_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    mov w0, w22
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
@@ -814,16 +814,9 @@ define <2 x float> @test_atomicrmw_fmax_v2f32_seq_cst_align8(ptr %ptr, <2 x floa
 ; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
 ; SOFTFP-NOLSE-NEXT:    mov w19, w2
 ; SOFTFP-NOLSE-NEXT:    mov x20, x0
-; SOFTFP-NOLSE-NEXT:    b .LBB8_2
-; SOFTFP-NOLSE-NEXT:  .LBB8_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB8_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    lsr x23, x22, #32
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB8_6
-; SOFTFP-NOLSE-NEXT:  .LBB8_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB8_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB8_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB8_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    mov w0, w23
 ; SOFTFP-NOLSE-NEXT:    mov w1, w19
 ; SOFTFP-NOLSE-NEXT:    bl fmaxf
@@ -836,20 +829,27 @@ define <2 x float> @test_atomicrmw_fmax_v2f32_seq_cst_align8(ptr %ptr, <2 x floa
 ; SOFTFP-NOLSE-NEXT:    // kill: def $w23 killed $w23 killed $x23 def $x23
 ; SOFTFP-NOLSE-NEXT:    orr x8, x8, x24, lsl #32
 ; SOFTFP-NOLSE-NEXT:    orr x9, x9, x23, lsl #32
-; SOFTFP-NOLSE-NEXT:  .LBB8_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB8_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB8_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB8_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxr x22, [x20]
 ; SOFTFP-NOLSE-NEXT:    cmp x22, x9
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB8_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB8_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB8_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB8_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxr w10, x8, [x20]
-; SOFTFP-NOLSE-NEXT:    cbnz w10, .LBB8_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB8_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w10, .LBB8_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB8_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
 ; SOFTFP-NOLSE-NEXT:    lsr x23, x22, #32
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB8_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB8_1
+; SOFTFP-NOLSE-NEXT:    b .LBB8_6
+; SOFTFP-NOLSE-NEXT:  .LBB8_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB8_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    lsr x23, x22, #32
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB8_1
 ; SOFTFP-NOLSE-NEXT:  .LBB8_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    mov w0, w22
 ; SOFTFP-NOLSE-NEXT:    mov w1, w23
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
index 10de6777bd285..314075c619103 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
@@ -51,15 +51,9 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    ldrh w0, [x0]
 ; SOFTFP-NOLSE-NEXT:    mov w20, w1
 ; SOFTFP-NOLSE-NEXT:    stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT:    b .LBB0_2
-; SOFTFP-NOLSE-NEXT:  .LBB0_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB0_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB0_6
-; SOFTFP-NOLSE-NEXT:  .LBB0_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB0_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB0_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB0_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    mov w22, w0
 ; SOFTFP-NOLSE-NEXT:    and w0, w20, #0xffff
 ; SOFTFP-NOLSE-NEXT:    bl __extendhfsf2
@@ -70,19 +64,25 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    bl fminf
 ; SOFTFP-NOLSE-NEXT:    bl __truncsfhf2
 ; SOFTFP-NOLSE-NEXT:    mov w8, w0
-; SOFTFP-NOLSE-NEXT:  .LBB0_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB0_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB0_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB0_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxrh w0, [x19]
 ; SOFTFP-NOLSE-NEXT:    cmp w0, w22, uxth
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB0_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB0_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB0_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB0_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB0_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB0_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB0_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB0_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB0_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB0_1
+; SOFTFP-NOLSE-NEXT:    b .LBB0_6
+; SOFTFP-NOLSE-NEXT:  .LBB0_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB0_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB0_1
 ; SOFTFP-NOLSE-NEXT:  .LBB0_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
@@ -139,15 +139,9 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    ldrh w0, [x0]
 ; SOFTFP-NOLSE-NEXT:    mov w20, w1
 ; SOFTFP-NOLSE-NEXT:    stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT:    b .LBB1_2
-; SOFTFP-NOLSE-NEXT:  .LBB1_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB1_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB1_6
-; SOFTFP-NOLSE-NEXT:  .LBB1_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB1_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB1_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB1_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    mov w22, w0
 ; SOFTFP-NOLSE-NEXT:    and w0, w20, #0xffff
 ; SOFTFP-NOLSE-NEXT:    bl __extendhfsf2
@@ -158,19 +152,25 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    bl fminf
 ; SOFTFP-NOLSE-NEXT:    bl __truncsfhf2
 ; SOFTFP-NOLSE-NEXT:    mov w8, w0
-; SOFTFP-NOLSE-NEXT:  .LBB1_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB1_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB1_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB1_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxrh w0, [x19]
 ; SOFTFP-NOLSE-NEXT:    cmp w0, w22, uxth
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB1_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB1_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB1_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB1_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB1_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB1_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB1_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB1_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB1_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB1_1
+; SOFTFP-NOLSE-NEXT:    b .LBB1_6
+; SOFTFP-NOLSE-NEXT:  .LBB1_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB1_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB1_1
 ; SOFTFP-NOLSE-NEXT:  .LBB1_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
@@ -238,34 +238,34 @@ define bfloat @test_atomicrmw_fmin_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
 ; SOFTFP-NOLSE-NEXT:    ldrh w0, [x0]
 ; SOFTFP-NOLSE-NEXT:    lsl w20, w1, #16
-; SOFTFP-NOLSE-NEXT:    b .LBB2_2
-; SOFTFP-NOLSE-NEXT:  .LBB2_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB2_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB2_6
-; SOFTFP-NOLSE-NEXT:  .LBB2_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB2_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB2_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB2_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    mov w21, w0
 ; SOFTFP-NOLSE-NEXT:    lsl w0, w0, #16
 ; SOFTFP-NOLSE-NEXT:    mov w1, w20
 ; SOFTFP-NOLSE-NEXT:    bl fminf
 ; SOFTFP-NOLSE-NEXT:    bl __truncsfbf2
 ; SOFTFP-NOLSE-NEXT:    mov w8, w0
-; SOFTFP-NOLSE-NEXT:  .LBB2_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB2_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB2_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB2_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxrh w0, [x19]
 ; SOFTFP-NOLSE-NEXT:    cmp w0, w21, uxth
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB2_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB2_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB2_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB2_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB2_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB2_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB2_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB2_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB2_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB2_1
+; SOFTFP-NOLSE-NEXT:    b .LBB2_6
+; SOFTFP-NOLSE-NEXT:  .LBB2_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB2_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB2_1
 ; SOFTFP-NOLSE-NEXT:  .LBB2_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
@@ -332,34 +332,34 @@ define bfloat @test_atomicrmw_fmin_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
 ; SOFTFP-NOLSE-NEXT:    ldrh w0, [x0]
 ; SOFTFP-NOLSE-NEXT:    lsl w20, w1, #16
-; SOFTFP-NOLSE-NEXT:    b .LBB3_2
-; SOFTFP-NOLSE-NEXT:  .LBB3_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB3_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB3_6
-; SOFTFP-NOLSE-NEXT:  .LBB3_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB3_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB3_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB3_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    mov w21, w0
 ; SOFTFP-NOLSE-NEXT:    lsl w0, w0, #16
 ; SOFTFP-NOLSE-NEXT:    mov w1, w20
 ; SOFTFP-NOLSE-NEXT:    bl fminf
 ; SOFTFP-NOLSE-NEXT:    bl __truncsfbf2
 ; SOFTFP-NOLSE-NEXT:    mov w8, w0
-; SOFTFP-NOLSE-NEXT:  .LBB3_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB3_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB3_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB3_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxrh w0, [x19]
 ; SOFTFP-NOLSE-NEXT:    cmp w0, w21, uxth
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB3_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB3_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB3_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB3_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB3_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB3_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB3_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB3_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB3_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB3_1
+; SOFTFP-NOLSE-NEXT:    b .LBB3_6
+; SOFTFP-NOLSE-NEXT:  .LBB3_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB3_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB3_1
 ; SOFTFP-NOLSE-NEXT:  .LBB3_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
@@ -408,32 +408,32 @@ define float @test_atomicrmw_fmin_f32_seq_cst_align4(ptr %ptr, float %value) #0
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
 ; SOFTFP-NOLSE-NEXT:    ldr w0, [x0]
 ; SOFTFP-NOLSE-NEXT:    mov w20, w1
-; SOFTFP-NOLSE-NEXT:    b .LBB4_2
-; SOFTFP-NOLSE-NEXT:  .LBB4_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB4_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB4_6
-; SOFTFP-NOLSE-NEXT:  .LBB4_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB4_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB4_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB4_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    mov w1, w20
 ; SOFTFP-NOLSE-NEXT:    mov w21, w0
 ; SOFTFP-NOLSE-NEXT:    bl fminf
 ; SOFTFP-NOLSE-NEXT:    mov w8, w0
-; SOFTFP-NOLSE-NEXT:  .LBB4_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB4_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB4_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB4_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxr w0, [x19]
 ; SOFTFP-NOLSE-NEXT:    cmp w0, w21
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB4_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB4_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB4_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB4_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxr w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB4_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB4_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB4_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB4_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB4_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB4_1
+; SOFTFP-NOLSE-NEXT:    b .LBB4_6
+; SOFTFP-NOLSE-NEXT:  .LBB4_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB4_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB4_1
 ; SOFTFP-NOLSE-NEXT:  .LBB4_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
@@ -482,32 +482,32 @@ define double @test_atomicrmw_fmin_f32_seq_cst_align8(ptr %ptr, double %value) #
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
 ; SOFTFP-NOLSE-NEXT:    ldr x0, [x0]
 ; SOFTFP-NOLSE-NEXT:    mov x20, x1
-; SOFTFP-NOLSE-NEXT:    b .LBB5_2
-; SOFTFP-NOLSE-NEXT:  .LBB5_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB5_6
-; SOFTFP-NOLSE-NEXT:  .LBB5_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB5_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB5_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB5_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    mov x1, x20
 ; SOFTFP-NOLSE-NEXT:    mov x21, x0
 ; SOFTFP-NOLSE-NEXT:    bl fmin
 ; SOFTFP-NOLSE-NEXT:    mov x8, x0
-; SOFTFP-NOLSE-NEXT:  .LBB5_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB5_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB5_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB5_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxr x0, [x19]
 ; SOFTFP-NOLSE-NEXT:    cmp x0, x21
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB5_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB5_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxr w9, x8, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB5_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB5_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB5_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB5_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB5_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB5_1
+; SOFTFP-NOLSE-NEXT:    b .LBB5_6
+; SOFTFP-NOLSE-NEXT:  .LBB5_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB5_1
 ; SOFTFP-NOLSE-NEXT:  .LBB5_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
@@ -581,16 +581,9 @@ define <2 x half> @test_atomicrmw_fmin_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
 ; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
 ; SOFTFP-NOLSE-NEXT:    mov w19, w2
 ; SOFTFP-NOLSE-NEXT:    mov x20, x0
-; SOFTFP-NOLSE-NEXT:    b .LBB6_2
-; SOFTFP-NOLSE-NEXT:  .LBB6_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB6_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    lsr w23, w22, #16
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB6_6
-; SOFTFP-NOLSE-NEXT:  .LBB6_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB6_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB6_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB6_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    and w0, w19, #0xffff
 ; SOFTFP-NOLSE-NEXT:    bl __extendhfsf2
 ; SOFTFP-NOLSE-NEXT:    mov w24, w0
@@ -611,20 +604,27 @@ define <2 x half> @test_atomicrmw_fmin_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
 ; SOFTFP-NOLSE-NEXT:    mov w8, w22
 ; SOFTFP-NOLSE-NEXT:    bfi w0, w24, #16, #16
 ; SOFTFP-NOLSE-NEXT:    bfi w8, w23, #16, #16
-; SOFTFP-NOLSE-NEXT:  .LBB6_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB6_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB6_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB6_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxr w22, [x20]
 ; SOFTFP-NOLSE-NEXT:    cmp w22, w8
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB6_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB6_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB6_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB6_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxr w9, w0, [x20]
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB6_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB6_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB6_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB6_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
 ; SOFTFP-NOLSE-NEXT:    lsr w23, w22, #16
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB6_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB6_1
+; SOFTFP-NOLSE-NEXT:    b .LBB6_6
+; SOFTFP-NOLSE-NEXT:  .LBB6_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB6_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    lsr w23, w22, #16
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB6_1
 ; SOFTFP-NOLSE-NEXT:  .LBB6_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    mov w0, w22
 ; SOFTFP-NOLSE-NEXT:    mov w1, w23
@@ -725,16 +725,9 @@ define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
 ; SOFTFP-NOLSE-NEXT:    lsl w21, w8, #16
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
 ; SOFTFP-NOLSE-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT:    b .LBB7_2
-; SOFTFP-NOLSE-NEXT:  .LBB7_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB7_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    lsr w1, w22, #16
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB7_6
-; SOFTFP-NOLSE-NEXT:  .LBB7_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB7_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB7_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB7_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    lsl w23, w1, #16
 ; SOFTFP-NOLSE-NEXT:    mov w1, w20
 ; SOFTFP-NOLSE-NEXT:    mov w0, w23
@@ -747,20 +740,27 @@ define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
 ; SOFTFP-NOLSE-NEXT:    bl __truncsfbf2
 ; SOFTFP-NOLSE-NEXT:    bfxil w23, w22, #0, #16
 ; SOFTFP-NOLSE-NEXT:    bfi w0, w24, #16, #16
-; SOFTFP-NOLSE-NEXT:  .LBB7_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB7_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB7_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB7_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxr w22, [x19]
 ; SOFTFP-NOLSE-NEXT:    cmp w22, w23
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB7_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB7_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB7_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB7_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxr w8, w0, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB7_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB7_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB7_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB7_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
 ; SOFTFP-NOLSE-NEXT:    lsr w1, w22, #16
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB7_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB7_1
+; SOFTFP-NOLSE-NEXT:    b .LBB7_6
+; SOFTFP-NOLSE-NEXT:  .LBB7_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB7_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    lsr w1, w22, #16
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB7_1
 ; SOFTFP-NOLSE-NEXT:  .LBB7_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    mov w0, w22
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
@@ -814,16 +814,9 @@ define <2 x float> @test_atomicrmw_fmin_v2f32_seq_cst_align8(ptr %ptr, <2 x floa
 ; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
 ; SOFTFP-NOLSE-NEXT:    mov w19, w2
 ; SOFTFP-NOLSE-NEXT:    mov x20, x0
-; SOFTFP-NOLSE-NEXT:    b .LBB8_2
-; SOFTFP-NOLSE-NEXT:  .LBB8_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB8_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    lsr x23, x22, #32
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB8_6
-; SOFTFP-NOLSE-NEXT:  .LBB8_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB8_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB8_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB8_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    mov w0, w23
 ; SOFTFP-NOLSE-NEXT:    mov w1, w19
 ; SOFTFP-NOLSE-NEXT:    bl fminf
@@ -836,20 +829,27 @@ define <2 x float> @test_atomicrmw_fmin_v2f32_seq_cst_align8(ptr %ptr, <2 x floa
 ; SOFTFP-NOLSE-NEXT:    // kill: def $w23 killed $w23 killed $x23 def $x23
 ; SOFTFP-NOLSE-NEXT:    orr x8, x8, x24, lsl #32
 ; SOFTFP-NOLSE-NEXT:    orr x9, x9, x23, lsl #32
-; SOFTFP-NOLSE-NEXT:  .LBB8_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB8_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB8_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB8_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxr x22, [x20]
 ; SOFTFP-NOLSE-NEXT:    cmp x22, x9
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB8_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB8_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB8_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB8_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxr w10, x8, [x20]
-; SOFTFP-NOLSE-NEXT:    cbnz w10, .LBB8_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB8_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w10, .LBB8_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB8_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
 ; SOFTFP-NOLSE-NEXT:    lsr x23, x22, #32
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB8_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB8_1
+; SOFTFP-NOLSE-NEXT:    b .LBB8_6
+; SOFTFP-NOLSE-NEXT:  .LBB8_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB8_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    lsr x23, x22, #32
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB8_1
 ; SOFTFP-NOLSE-NEXT:  .LBB8_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    mov w0, w22
 ; SOFTFP-NOLSE-NEXT:    mov w1, w23
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll
index 82e0f14e68e26..6bb541684c2bd 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll
@@ -49,15 +49,9 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    ldrh w0, [x0]
 ; SOFTFP-NOLSE-NEXT:    mov w20, w1
 ; SOFTFP-NOLSE-NEXT:    stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT:    b .LBB0_2
-; SOFTFP-NOLSE-NEXT:  .LBB0_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB0_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB0_6
-; SOFTFP-NOLSE-NEXT:  .LBB0_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB0_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB0_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB0_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    mov w22, w0
 ; SOFTFP-NOLSE-NEXT:    and w0, w20, #0xffff
 ; SOFTFP-NOLSE-NEXT:    bl __extendhfsf2
@@ -68,19 +62,25 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    bl __subsf3
 ; SOFTFP-NOLSE-NEXT:    bl __truncsfhf2
 ; SOFTFP-NOLSE-NEXT:    mov w8, w0
-; SOFTFP-NOLSE-NEXT:  .LBB0_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB0_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB0_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB0_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxrh w0, [x19]
 ; SOFTFP-NOLSE-NEXT:    cmp w0, w22, uxth
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB0_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB0_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB0_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB0_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB0_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB0_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB0_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB0_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB0_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB0_1
+; SOFTFP-NOLSE-NEXT:    b .LBB0_6
+; SOFTFP-NOLSE-NEXT:  .LBB0_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB0_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB0_1
 ; SOFTFP-NOLSE-NEXT:  .LBB0_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
@@ -137,15 +137,9 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    ldrh w0, [x0]
 ; SOFTFP-NOLSE-NEXT:    mov w20, w1
 ; SOFTFP-NOLSE-NEXT:    stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT:    b .LBB1_2
-; SOFTFP-NOLSE-NEXT:  .LBB1_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB1_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB1_6
-; SOFTFP-NOLSE-NEXT:  .LBB1_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB1_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB1_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB1_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    mov w22, w0
 ; SOFTFP-NOLSE-NEXT:    and w0, w20, #0xffff
 ; SOFTFP-NOLSE-NEXT:    bl __extendhfsf2
@@ -156,19 +150,25 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    bl __subsf3
 ; SOFTFP-NOLSE-NEXT:    bl __truncsfhf2
 ; SOFTFP-NOLSE-NEXT:    mov w8, w0
-; SOFTFP-NOLSE-NEXT:  .LBB1_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB1_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB1_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB1_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxrh w0, [x19]
 ; SOFTFP-NOLSE-NEXT:    cmp w0, w22, uxth
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB1_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB1_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB1_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB1_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB1_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB1_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB1_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB1_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB1_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB1_1
+; SOFTFP-NOLSE-NEXT:    b .LBB1_6
+; SOFTFP-NOLSE-NEXT:  .LBB1_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB1_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB1_1
 ; SOFTFP-NOLSE-NEXT:  .LBB1_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
@@ -236,34 +236,34 @@ define bfloat @test_atomicrmw_fsub_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
 ; SOFTFP-NOLSE-NEXT:    ldrh w0, [x0]
 ; SOFTFP-NOLSE-NEXT:    lsl w20, w1, #16
-; SOFTFP-NOLSE-NEXT:    b .LBB2_2
-; SOFTFP-NOLSE-NEXT:  .LBB2_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB2_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB2_6
-; SOFTFP-NOLSE-NEXT:  .LBB2_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB2_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB2_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB2_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    mov w21, w0
 ; SOFTFP-NOLSE-NEXT:    lsl w0, w0, #16
 ; SOFTFP-NOLSE-NEXT:    mov w1, w20
 ; SOFTFP-NOLSE-NEXT:    bl __subsf3
 ; SOFTFP-NOLSE-NEXT:    bl __truncsfbf2
 ; SOFTFP-NOLSE-NEXT:    mov w8, w0
-; SOFTFP-NOLSE-NEXT:  .LBB2_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB2_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB2_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB2_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxrh w0, [x19]
 ; SOFTFP-NOLSE-NEXT:    cmp w0, w21, uxth
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB2_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB2_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB2_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB2_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB2_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB2_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB2_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB2_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB2_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB2_1
+; SOFTFP-NOLSE-NEXT:    b .LBB2_6
+; SOFTFP-NOLSE-NEXT:  .LBB2_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB2_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB2_1
 ; SOFTFP-NOLSE-NEXT:  .LBB2_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
@@ -330,34 +330,34 @@ define bfloat @test_atomicrmw_fsub_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
 ; SOFTFP-NOLSE-NEXT:    ldrh w0, [x0]
 ; SOFTFP-NOLSE-NEXT:    lsl w20, w1, #16
-; SOFTFP-NOLSE-NEXT:    b .LBB3_2
-; SOFTFP-NOLSE-NEXT:  .LBB3_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB3_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB3_6
-; SOFTFP-NOLSE-NEXT:  .LBB3_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB3_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB3_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB3_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    mov w21, w0
 ; SOFTFP-NOLSE-NEXT:    lsl w0, w0, #16
 ; SOFTFP-NOLSE-NEXT:    mov w1, w20
 ; SOFTFP-NOLSE-NEXT:    bl __subsf3
 ; SOFTFP-NOLSE-NEXT:    bl __truncsfbf2
 ; SOFTFP-NOLSE-NEXT:    mov w8, w0
-; SOFTFP-NOLSE-NEXT:  .LBB3_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB3_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB3_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB3_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxrh w0, [x19]
 ; SOFTFP-NOLSE-NEXT:    cmp w0, w21, uxth
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB3_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB3_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB3_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB3_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB3_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB3_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB3_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB3_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB3_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB3_1
+; SOFTFP-NOLSE-NEXT:    b .LBB3_6
+; SOFTFP-NOLSE-NEXT:  .LBB3_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB3_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB3_1
 ; SOFTFP-NOLSE-NEXT:  .LBB3_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
@@ -406,32 +406,32 @@ define float @test_atomicrmw_fsub_f32_seq_cst_align4(ptr %ptr, float %value) #0
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
 ; SOFTFP-NOLSE-NEXT:    ldr w0, [x0]
 ; SOFTFP-NOLSE-NEXT:    mov w20, w1
-; SOFTFP-NOLSE-NEXT:    b .LBB4_2
-; SOFTFP-NOLSE-NEXT:  .LBB4_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB4_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB4_6
-; SOFTFP-NOLSE-NEXT:  .LBB4_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB4_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB4_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB4_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    mov w1, w20
 ; SOFTFP-NOLSE-NEXT:    mov w21, w0
 ; SOFTFP-NOLSE-NEXT:    bl __subsf3
 ; SOFTFP-NOLSE-NEXT:    mov w8, w0
-; SOFTFP-NOLSE-NEXT:  .LBB4_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB4_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB4_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB4_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxr w0, [x19]
 ; SOFTFP-NOLSE-NEXT:    cmp w0, w21
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB4_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB4_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB4_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB4_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxr w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB4_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB4_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB4_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB4_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB4_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB4_1
+; SOFTFP-NOLSE-NEXT:    b .LBB4_6
+; SOFTFP-NOLSE-NEXT:  .LBB4_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB4_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB4_1
 ; SOFTFP-NOLSE-NEXT:  .LBB4_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
@@ -480,32 +480,32 @@ define double @test_atomicrmw_fsub_f32_seq_cst_align8(ptr %ptr, double %value) #
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
 ; SOFTFP-NOLSE-NEXT:    ldr x0, [x0]
 ; SOFTFP-NOLSE-NEXT:    mov x20, x1
-; SOFTFP-NOLSE-NEXT:    b .LBB5_2
-; SOFTFP-NOLSE-NEXT:  .LBB5_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB5_6
-; SOFTFP-NOLSE-NEXT:  .LBB5_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB5_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB5_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB5_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    mov x1, x20
 ; SOFTFP-NOLSE-NEXT:    mov x21, x0
 ; SOFTFP-NOLSE-NEXT:    bl __subdf3
 ; SOFTFP-NOLSE-NEXT:    mov x8, x0
-; SOFTFP-NOLSE-NEXT:  .LBB5_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB5_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB5_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB5_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxr x0, [x19]
 ; SOFTFP-NOLSE-NEXT:    cmp x0, x21
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB5_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB5_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxr w9, x8, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB5_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB5_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB5_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB5_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB5_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB5_1
+; SOFTFP-NOLSE-NEXT:    b .LBB5_6
+; SOFTFP-NOLSE-NEXT:  .LBB5_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB5_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB5_1
 ; SOFTFP-NOLSE-NEXT:  .LBB5_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
@@ -701,16 +701,9 @@ define <2 x half> @test_atomicrmw_fsub_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
 ; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
 ; SOFTFP-NOLSE-NEXT:    mov w19, w2
 ; SOFTFP-NOLSE-NEXT:    mov x20, x0
-; SOFTFP-NOLSE-NEXT:    b .LBB7_2
-; SOFTFP-NOLSE-NEXT:  .LBB7_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB7_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    lsr w23, w22, #16
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB7_6
-; SOFTFP-NOLSE-NEXT:  .LBB7_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB7_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB7_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB7_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    and w0, w19, #0xffff
 ; SOFTFP-NOLSE-NEXT:    bl __extendhfsf2
 ; SOFTFP-NOLSE-NEXT:    mov w24, w0
@@ -731,20 +724,27 @@ define <2 x half> @test_atomicrmw_fsub_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
 ; SOFTFP-NOLSE-NEXT:    mov w8, w22
 ; SOFTFP-NOLSE-NEXT:    bfi w0, w24, #16, #16
 ; SOFTFP-NOLSE-NEXT:    bfi w8, w23, #16, #16
-; SOFTFP-NOLSE-NEXT:  .LBB7_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB7_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB7_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB7_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxr w22, [x20]
 ; SOFTFP-NOLSE-NEXT:    cmp w22, w8
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB7_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB7_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB7_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB7_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxr w9, w0, [x20]
-; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB7_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB7_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w9, .LBB7_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB7_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
 ; SOFTFP-NOLSE-NEXT:    lsr w23, w22, #16
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB7_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB7_1
+; SOFTFP-NOLSE-NEXT:    b .LBB7_6
+; SOFTFP-NOLSE-NEXT:  .LBB7_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB7_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    lsr w23, w22, #16
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB7_1
 ; SOFTFP-NOLSE-NEXT:  .LBB7_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    mov w0, w22
 ; SOFTFP-NOLSE-NEXT:    mov w1, w23
@@ -817,16 +817,9 @@ define <2 x bfloat> @test_atomicrmw_fsub_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
 ; SOFTFP-NOLSE-NEXT:    lsl w21, w8, #16
 ; SOFTFP-NOLSE-NEXT:    mov x19, x0
 ; SOFTFP-NOLSE-NEXT:    stp x24, x23, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT:    b .LBB8_2
-; SOFTFP-NOLSE-NEXT:  .LBB8_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB8_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    lsr w1, w22, #16
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB8_6
-; SOFTFP-NOLSE-NEXT:  .LBB8_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB8_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB8_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB8_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    lsl w23, w1, #16
 ; SOFTFP-NOLSE-NEXT:    mov w1, w20
 ; SOFTFP-NOLSE-NEXT:    mov w0, w23
@@ -839,20 +832,27 @@ define <2 x bfloat> @test_atomicrmw_fsub_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
 ; SOFTFP-NOLSE-NEXT:    bl __truncsfbf2
 ; SOFTFP-NOLSE-NEXT:    bfxil w23, w22, #0, #16
 ; SOFTFP-NOLSE-NEXT:    bfi w0, w24, #16, #16
-; SOFTFP-NOLSE-NEXT:  .LBB8_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB8_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB8_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB8_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxr w22, [x19]
 ; SOFTFP-NOLSE-NEXT:    cmp w22, w23
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB8_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB8_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB8_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB8_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxr w8, w0, [x19]
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB8_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB8_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB8_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB8_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
 ; SOFTFP-NOLSE-NEXT:    lsr w1, w22, #16
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB8_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB8_1
+; SOFTFP-NOLSE-NEXT:    b .LBB8_6
+; SOFTFP-NOLSE-NEXT:  .LBB8_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB8_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    lsr w1, w22, #16
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB8_1
 ; SOFTFP-NOLSE-NEXT:  .LBB8_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    mov w0, w22
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
@@ -906,16 +906,9 @@ define <2 x float> @test_atomicrmw_fsub_v2f32_seq_cst_align8(ptr %ptr, <2 x floa
 ; SOFTFP-NOLSE-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
 ; SOFTFP-NOLSE-NEXT:    mov w19, w2
 ; SOFTFP-NOLSE-NEXT:    mov x20, x0
-; SOFTFP-NOLSE-NEXT:    b .LBB9_2
-; SOFTFP-NOLSE-NEXT:  .LBB9_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB9_2 Depth=1
-; SOFTFP-NOLSE-NEXT:    mov w8, wzr
-; SOFTFP-NOLSE-NEXT:    clrex
-; SOFTFP-NOLSE-NEXT:    lsr x23, x22, #32
-; SOFTFP-NOLSE-NEXT:    cbnz w8, .LBB9_6
-; SOFTFP-NOLSE-NEXT:  .LBB9_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT:  .LBB9_1: // %atomicrmw.start
 ; SOFTFP-NOLSE-NEXT:    // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT:    // Child Loop BB9_3 Depth 2
+; SOFTFP-NOLSE-NEXT:    // Child Loop BB9_2 Depth 2
 ; SOFTFP-NOLSE-NEXT:    mov w0, w23
 ; SOFTFP-NOLSE-NEXT:    mov w1, w19
 ; SOFTFP-NOLSE-NEXT:    bl __subsf3
@@ -928,20 +921,27 @@ define <2 x float> @test_atomicrmw_fsub_v2f32_seq_cst_align8(ptr %ptr, <2 x floa
 ; SOFTFP-NOLSE-NEXT:    // kill: def $w23 killed $w23 killed $x23 def $x23
 ; SOFTFP-NOLSE-NEXT:    orr x8, x8, x24, lsl #32
 ; SOFTFP-NOLSE-NEXT:    orr x9, x9, x23, lsl #32
-; SOFTFP-NOLSE-NEXT:  .LBB9_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT:    // Parent Loop BB9_2 Depth=1
+; SOFTFP-NOLSE-NEXT:  .LBB9_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT:    // Parent Loop BB9_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    // => This Inner Loop Header: Depth=2
 ; SOFTFP-NOLSE-NEXT:    ldaxr x22, [x20]
 ; SOFTFP-NOLSE-NEXT:    cmp x22, x9
-; SOFTFP-NOLSE-NEXT:    b.ne .LBB9_1
-; SOFTFP-NOLSE-NEXT:  // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB9_3 Depth=2
+; SOFTFP-NOLSE-NEXT:    b.ne .LBB9_5
+; SOFTFP-NOLSE-NEXT:  // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB9_2 Depth=2
 ; SOFTFP-NOLSE-NEXT:    stlxr w10, x8, [x20]
-; SOFTFP-NOLSE-NEXT:    cbnz w10, .LBB9_3
-; SOFTFP-NOLSE-NEXT:  // %bb.5: // in Loop: Header=BB9_2 Depth=1
+; SOFTFP-NOLSE-NEXT:    cbnz w10, .LBB9_2
+; SOFTFP-NOLSE-NEXT:  // %bb.4: // in Loop: Header=BB9_1 Depth=1
 ; SOFTFP-NOLSE-NEXT:    mov w8, #1 // =0x1
 ; SOFTFP-NOLSE-NEXT:    lsr x23, x22, #32
-; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB9_2
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB9_1
+; SOFTFP-NOLSE-NEXT:    b .LBB9_6
+; SOFTFP-NOLSE-NEXT:  .LBB9_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT:    // in Loop: Header=BB9_1 Depth=1
+; SOFTFP-NOLSE-NEXT:    mov w8, wzr
+; SOFTFP-NOLSE-NEXT:    clrex
+; SOFTFP-NOLSE-NEXT:    lsr x23, x22, #32
+; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB9_1
 ; SOFTFP-NOLSE-NEXT:  .LBB9_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    mov w0, w22
 ; SOFTFP-NOLSE-NEXT:    mov w1, w23
diff --git a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
index b7817ebe59b9b..3f4dd116d91f8 100644
--- a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
+++ b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
@@ -181,41 +181,41 @@ define i1 @test_conditional2(i32 %a, i32 %b, ptr %c) {
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldaxr w8, [x19]
 ; CHECK-NEXT:    cmp w8, w21
-; CHECK-NEXT:    b.ne LBB3_4
+; CHECK-NEXT:    b.ne LBB3_9
 ; CHECK-NEXT:  ; %bb.2: ; %cmpxchg.trystore
 ; CHECK-NEXT:    ; in Loop: Header=BB3_1 Depth=1
 ; CHECK-NEXT:    stlxr w8, w20, [x19]
 ; CHECK-NEXT:    cbnz w8, LBB3_1
 ; CHECK-NEXT:  ; %bb.3:
 ; CHECK-NEXT:    mov w8, #1 ; =0x1
-; CHECK-NEXT:    b LBB3_5
-; CHECK-NEXT:  LBB3_4: ; %cmpxchg.nostore
-; CHECK-NEXT:    mov w8, wzr
-; CHECK-NEXT:    clrex
-; CHECK-NEXT:  LBB3_5: ; %for.cond.preheader
+; CHECK-NEXT:  LBB3_4: ; %for.cond.preheader
 ; CHECK-NEXT:    mov w22, #2 ; =0x2
-; CHECK-NEXT:  LBB3_6: ; %for.cond
+; CHECK-NEXT:  LBB3_5: ; %for.cond
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    cbz w22, LBB3_9
-; CHECK-NEXT:  ; %bb.7: ; %for.body
-; CHECK-NEXT:    ; in Loop: Header=BB3_6 Depth=1
+; CHECK-NEXT:    cbz w22, LBB3_8
+; CHECK-NEXT:  ; %bb.6: ; %for.body
+; CHECK-NEXT:    ; in Loop: Header=BB3_5 Depth=1
 ; CHECK-NEXT:    sub w22, w22, #1
 ; CHECK-NEXT:    orr w9, w21, w20
 ; CHECK-NEXT:    ldr w10, [x19, w22, sxtw #2]
 ; CHECK-NEXT:    cmp w9, w10
-; CHECK-NEXT:    b.eq LBB3_6
-; CHECK-NEXT:  ; %bb.8: ; %if.then
-; CHECK-NEXT:    ; in Loop: Header=BB3_6 Depth=1
+; CHECK-NEXT:    b.eq LBB3_5
+; CHECK-NEXT:  ; %bb.7: ; %if.then
+; CHECK-NEXT:    ; in Loop: Header=BB3_5 Depth=1
 ; CHECK-NEXT:    str w9, [x19, w22, sxtw #2]
 ; CHECK-NEXT:    bl _foo
 ; CHECK-NEXT:    mov w8, wzr
-; CHECK-NEXT:    b LBB3_6
-; CHECK-NEXT:  LBB3_9: ; %for.cond.cleanup
+; CHECK-NEXT:    b LBB3_5
+; CHECK-NEXT:  LBB3_8: ; %for.cond.cleanup
 ; CHECK-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
 ; CHECK-NEXT:    and w0, w8, #0x1
 ; CHECK-NEXT:    ldp x20, x19, [sp, #16] ; 16-byte Folded Reload
 ; CHECK-NEXT:    ldp x22, x21, [sp], #48 ; 16-byte Folded Reload
 ; CHECK-NEXT:    ret
+; CHECK-NEXT:  LBB3_9: ; %cmpxchg.nostore
+; CHECK-NEXT:    mov w8, wzr
+; CHECK-NEXT:    clrex
+; CHECK-NEXT:    b LBB3_4
 ;
 ; OUTLINE-ATOMICS-LABEL: test_conditional2:
 ; OUTLINE-ATOMICS:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll b/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll
index 4bf42d4ac9629..c37a21be5ca4d 100644
--- a/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll
+++ b/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll
@@ -36,16 +36,18 @@ define zeroext i1 @test_cmpxchg_res_i8(ptr %addr, i8 %desired, i8 zeroext %new)
 ; CHECK-THUMB-NEXT:    bx r1
 ;
 ; CHECK-ARMV6-LABEL: test_cmpxchg_res_i8:
-; CHECK-ARMV6:         uxtb r1, r1
+; CHECK-ARMV6:         .fnstart
+; CHECK-ARMV6-NEXT:    uxtb r12, r1
 ; CHECK-ARMV6-NEXT:  .LBB0_1:
-; CHECK-ARMV6-NEXT:    ldrexb r3, [r0]
-; CHECK-ARMV6-NEXT:    cmp r3, r1
+; CHECK-ARMV6-NEXT:    ldrexb r1, [r0]
+; CHECK-ARMV6-NEXT:    cmp r1, r12
 ; CHECK-ARMV6-NEXT:    movne r0, #0
 ; CHECK-ARMV6-NEXT:    bxne lr
 ; CHECK-ARMV6-NEXT:  .LBB0_2:
 ; CHECK-ARMV6-NEXT:    strexb r3, r2, [r0]
+; CHECK-ARMV6-NEXT:    mov r1, #1
 ; CHECK-ARMV6-NEXT:    cmp r3, #0
-; CHECK-ARMV6-NEXT:    moveq r0, #1
+; CHECK-ARMV6-NEXT:    moveq r0, r1
 ; CHECK-ARMV6-NEXT:    bxeq lr
 ; CHECK-ARMV6-NEXT:    b .LBB0_1
 ;
@@ -61,19 +63,22 @@ define zeroext i1 @test_cmpxchg_res_i8(ptr %addr, i8 %desired, i8 zeroext %new)
 ; CHECK-THUMBV6-NEXT:    pop {r4, pc}
 ;
 ; CHECK-ARMV7-LABEL: test_cmpxchg_res_i8:
-; CHECK-ARMV7:         uxtb r1, r1
+; CHECK-ARMV7:         .fnstart
+; CHECK-ARMV7-NEXT:    uxtb r12, r1
 ; CHECK-ARMV7-NEXT:  .LBB0_1:
-; CHECK-ARMV7-NEXT:    ldrexb r3, [r0]
-; CHECK-ARMV7-NEXT:    cmp r3, r1
-; CHECK-ARMV7-NEXT:    bne .LBB0_3
+; CHECK-ARMV7-NEXT:    ldrexb r1, [r0]
+; CHECK-ARMV7-NEXT:    cmp r1, r12
+; CHECK-ARMV7-NEXT:    bne .LBB0_4
 ; CHECK-ARMV7-NEXT:    strexb r3, r2, [r0]
+; CHECK-ARMV7-NEXT:    mov r1, #1
 ; CHECK-ARMV7-NEXT:    cmp r3, #0
-; CHECK-ARMV7-NEXT:    moveq r0, #1
-; CHECK-ARMV7-NEXT:    bxeq lr
-; CHECK-ARMV7-NEXT:    b .LBB0_1
-; CHECK-ARMV7-NEXT:  .LBB0_3:
-; CHECK-ARMV7-NEXT:    mov r0, #0
+; CHECK-ARMV7-NEXT:    bne .LBB0_1 
+; CHECK-ARMV7-NEXT:    mov r0, r1
+; CHECK-ARMV7-NEXT:    bx lr
+; CHECK-ARMV7-NEXT:  .LBB0_4:
+; CHECK-ARMV7-NEXT:    mov r1, #0
 ; CHECK-ARMV7-NEXT:    clrex
+; CHECK-ARMV7-NEXT:    mov r0, r1
 ; CHECK-ARMV7-NEXT:    bx lr
 ;
 ; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8:
diff --git a/llvm/test/CodeGen/ARM/cmpxchg-idioms.ll b/llvm/test/CodeGen/ARM/cmpxchg-idioms.ll
index 4ff71b42d5db0..d59f282314b5c 100644
--- a/llvm/test/CodeGen/ARM/cmpxchg-idioms.ll
+++ b/llvm/test/CodeGen/ARM/cmpxchg-idioms.ll
@@ -4,14 +4,14 @@ define i32 @test_return(ptr %p, i32 %oldval, i32 %newval) {
 ; CHECK-LABEL: test_return:
 
 ; CHECK: ldrex [[LOADED:r[0-9]+]], [r0]
-; CHECK: cmp [[LOADED]], r1
+; CHECK: cmp [[LOADED]], r1 
 ; CHECK: bne [[FAILED:LBB[0-9]+_[0-9]+]]
 
 ; CHECK: dmb ishst
 
 ; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
 ; CHECK: strex [[STATUS:r[0-9]+]], {{r[0-9]+}}, [r0]
-; CHECK: cbz [[STATUS]], [[SUCCESS:LBB[0-9]+_[0-9]+]]
+; CHECK: cmp r3, #0
 
 ; CHECK: ldrex [[LOADED]], [r0]
 ; CHECK: cmp [[LOADED]], r1
@@ -22,12 +22,6 @@ define i32 @test_return(ptr %p, i32 %oldval, i32 %newval) {
 ; CHECK: clrex
 ; CHECK: movs r0, #0
 ; CHECK: dmb ish
-; CHECK: bx lr
-
-; CHECK: [[SUCCESS]]:
-; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
-; CHECK: movs r0, #1
-; CHECK: dmb ish
 ; CHECK: bx lr
 
   %pair = cmpxchg ptr %p, i32 %oldval, i32 %newval seq_cst seq_cst
@@ -49,7 +43,7 @@ define i1 @test_return_bool(ptr %value, i8 %oldValue, i8 %newValue) {
 
 ; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
 ; CHECK: strexb [[STATUS:r[0-9]+]], {{r[0-9]+}}, [r0]
-; CHECK: cbz [[STATUS]], [[SUCCESS:LBB[0-9]+_[0-9]+]]
+; CHECK: cmp [[STATUS]], #0
 
 ; CHECK: ldrexb [[LOADED]], [r0]
 ; CHECK: cmp [[LOADED]], [[OLDBYTE]]
@@ -63,12 +57,6 @@ define i1 @test_return_bool(ptr %value, i8 %oldValue, i8 %newValue) {
 ; CHECK: eor r0, [[TMP]], #1
 ; CHECK: bx lr
 
-; CHECK: [[SUCCESS]]:
-; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
-; CHECK: movs [[TMP:r[0-9]+]], #1
-; CHECK: eor r0, [[TMP]], #1
-; CHECK: bx lr
-
 
   %pair = cmpxchg ptr %value, i8 %oldValue, i8 %newValue acq_rel monotonic
   %success = extractvalue { i8, i1 } %pair, 1
@@ -87,7 +75,7 @@ define void @test_conditional(ptr %p, i32 %oldval, i32 %newval) {
 
 ; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
 ; CHECK: strex [[STATUS:r[0-9]+]], r2, [r0]
-; CHECK: cbz [[STATUS]], [[SUCCESS:LBB[0-9]+_[0-9]+]]
+; CHECK: cmp [[STATUS]], #0 
 
 ; CHECK: ldrex [[LOADED]], [r0]
 ; CHECK: cmp [[LOADED]], r1
@@ -99,11 +87,6 @@ define void @test_conditional(ptr %p, i32 %oldval, i32 %newval) {
 ; CHECK: dmb ish
 ; CHECK: b.w _baz
 
-; CHECK: [[SUCCESS]]:
-; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
-; CHECK: dmb ish
-; CHECK: b.w _bar
-
   %pair = cmpxchg ptr %p, i32 %oldval, i32 %newval seq_cst seq_cst
   %success = extractvalue { i32, i1 } %pair, 1
   br i1 %success, label %true, label %false
diff --git a/llvm/test/CodeGen/ARM/cmpxchg-weak.ll b/llvm/test/CodeGen/ARM/cmpxchg-weak.ll
index 9963f2d08ba52..b33eea9975740 100644
--- a/llvm/test/CodeGen/ARM/cmpxchg-weak.ll
+++ b/llvm/test/CodeGen/ARM/cmpxchg-weak.ll
@@ -10,16 +10,14 @@ define void @test_cmpxchg_weak(ptr %addr, i32 %desired, i32 %new) {
 ; CHECK-NEXT:     dmb ish
 ; CHECK-NEXT:     strex   [[SUCCESS:r[0-9]+]], r2, [r0]
 ; CHECK-NEXT:     cmp     [[SUCCESS]], #0
-; CHECK-NEXT:     beq     [[SUCCESSBB:LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT:     bne     [[FAILBB:LBB[0-9]+_[0-9]+]]
 ; CHECK-NEXT: %bb.2:
+; CHECK-NEXT:     dmb     ish
 ; CHECK-NEXT:     str     r3, [r0]
 ; CHECK-NEXT:     bx      lr
 ; CHECK-NEXT: [[LDFAILBB]]:
 ; CHECK-NEXT:     clrex
-; CHECK-NEXT:     str     r3, [r0]
-; CHECK-NEXT:     bx      lr
-; CHECK-NEXT: [[SUCCESSBB]]:
-; CHECK-NEXT:     dmb     ish
+; CHECK-NEXT: [[FAILBB]]:
 ; CHECK-NEXT:     str     r3, [r0]
 ; CHECK-NEXT:     bx      lr
 ;
@@ -37,19 +35,20 @@ define i1 @test_cmpxchg_weak_to_bool(i32, ptr %addr, i32 %desired, i32 %new) {
 ; CHECK-NEXT:     bne     [[LDFAILBB:LBB[0-9]+_[0-9]+]]
 ; CHECK-NEXT: %bb.1:
 ; CHECK-NEXT:     dmb ish
-; CHECK-NEXT:     mov     r0, #0
 ; CHECK-NEXT:     strex   [[SUCCESS:r[0-9]+]], r3, [r1]
 ; CHECK-NEXT:     cmp     [[SUCCESS]], #0
-; CHECK-NEXT:     bxne    lr
-; CHECK-NEXT: LBB1_2:
+; CHECK-NEXT:     bne     [[FAILBB:LBB[0-9]+_[0-9]+]] 
+; CHECK-NEXT: %bb.2:
 ; CHECK-NEXT:     mov     r0, #1
 ; CHECK-NEXT:     dmb     ish
 ; CHECK-NEXT:     bx      lr
 ; CHECK-NEXT: [[LDFAILBB]]:
-; CHECK-NEXT:     mov     r0, #0
 ; CHECK-NEXT:     clrex
+; CHECK-NEXT: [[FAILBB]]:
+; CHECK-NEXT:     mov     r0, #0
 ; CHECK-NEXT:     bx      lr
 ;
+
   %pair = cmpxchg weak ptr %addr, i32 %desired, i32 %new seq_cst monotonic
   %success = extractvalue { i32, i1 } %pair, 1
   ret i1 %success
diff --git a/llvm/test/CodeGen/Hexagon/atomic-opaque-basic.ll b/llvm/test/CodeGen/Hexagon/atomic-opaque-basic.ll
index 4372cad3f87c6..0b0399cbf4661 100644
--- a/llvm/test/CodeGen/Hexagon/atomic-opaque-basic.ll
+++ b/llvm/test/CodeGen/Hexagon/atomic-opaque-basic.ll
@@ -79,7 +79,6 @@ define void @f1() #0 {
 ; CHECK-NEXT:    {
 ; CHECK-NEXT:     r4 = sub(#-1,r4)
 ; CHECK-NEXT:    }
-; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB1_1: // %cmpxchg.start
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    {
diff --git a/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll b/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
index 2e72d26ed4566..585b4c7538246 100644
--- a/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
+++ b/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt -S -o - -mtriple=armv7-apple-ios7.0 -passes=atomic-expand -codegen-opt-level=1 %s | FileCheck %s
 
 define i8 @test_atomic_xchg_i8(ptr %ptr, i8 %xchgend) {
@@ -221,49 +222,40 @@ define i8 @test_atomic_umin_i8(ptr %ptr, i8 %uminend) {
 define i8 @test_cmpxchg_i8_seqcst_seqcst(ptr %ptr, i8 %desired, i8 %newval) {
 ; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst
 ; CHECK: br label %[[START:.*]]
-
 ; CHECK: [[START]]:
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i8) %ptr)
 ; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
 ; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
-
 ; CHECK: [[FENCED_STORE]]:
 ; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[LOOP:.*]]
-
 ; CHECK: [[LOOP]]:
 ; CHECK: [[LOADED_LOOP:%.*]] = phi i8 [ [[OLDVAL]], %[[FENCED_STORE]] ], [ [[OLDVAL_LOOP:%.*]], %[[RELEASED_LOAD:.*]] ]
 ; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
 ; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0(i32 [[NEWVAL32]], ptr elementtype(i8) %ptr)
 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[RELEASED_LOAD]]
-
 ; CHECK: [[RELEASED_LOAD]]:
 ; CHECK: [[OLDVAL32_LOOP:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i8) %ptr)
 ; CHECK: [[OLDVAL_LOOP]] = trunc i32 [[OLDVAL32_LOOP]] to i8
 ; CHECK: [[SHOULD_STORE_LOOP:%.*]] = icmp eq i8 [[OLDVAL_LOOP]], %desired
-; CHECK: br i1 [[SHOULD_STORE_LOOP]], label %[[LOOP]], label %[[NO_STORE_BB]]
-
+; CHECK: br i1 [[SHOULD_STORE_LOOP]], label %[[LOOP]], label %[[NO_STORE_BB:cmpxchg\.nostore]]
 ; CHECK: [[SUCCESS_BB]]:
 ; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[DONE:.*]]
-
 ; CHECK: [[NO_STORE_BB]]:
 ; CHECK-NEXT: [[LOADED_NO_STORE:%.*]] = phi i8 [ [[OLDVAL]], %[[START]] ], [ [[OLDVAL_LOOP]], %[[RELEASED_LOAD]] ]
 ; CHECK-NEXT: call void @llvm.arm.clrex()
 ; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
-
 ; CHECK: [[FAILURE_BB]]:
 ; CHECK: [[LOADED_FAILURE:%.*]] = phi i8 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ]
 ; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[DONE]]
-
 ; CHECK: [[DONE]]:
 ; CHECK: [[LOADED:%.*]] = phi i8 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
 ; CHECK: ret i8 [[LOADED]]
-
   %pairold = cmpxchg ptr %ptr, i8 %desired, i8 %newval seq_cst seq_cst
   %old = extractvalue { i8, i1 } %pairold, 0
   ret i8 %old
@@ -272,49 +264,40 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(ptr %ptr, i8 %desired, i8 %newval) {
 define i16 @test_cmpxchg_i16_seqcst_monotonic(ptr %ptr, i16 %desired, i16 %newval) {
 ; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic
 ; CHECK: br label %[[LOOP:.*]]
-
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i16) %ptr)
 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
 ; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
-
 ; CHECK: [[FENCED_STORE]]:
 ; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[LOOP:.*]]
-
 ; CHECK: [[LOOP]]:
 ; CHECK: [[LOADED_LOOP:%.*]] = phi i16 [ [[OLDVAL]], %[[FENCED_STORE]] ], [ [[OLDVAL_LOOP:%.*]], %[[RELEASED_LOAD:.*]] ]
 ; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
 ; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0(i32 [[NEWVAL32]], ptr elementtype(i16) %ptr)
 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[RELEASED_LOAD:.*]]
-
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[RELEASED_LOAD:cmpxchg\.releasedload]]
 ; CHECK: [[RELEASED_LOAD]]:
 ; CHECK: [[OLDVAL32_LOOP:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i16) %ptr)
 ; CHECK: [[OLDVAL_LOOP]] = trunc i32 [[OLDVAL32_LOOP]] to i16
 ; CHECK: [[SHOULD_STORE_LOOP:%.*]] = icmp eq i16 [[OLDVAL_LOOP]], %desired
-; CHECK: br i1 [[SHOULD_STORE_LOOP]], label %[[LOOP]], label %[[NO_STORE_BB]]
-
+; CHECK: br i1 [[SHOULD_STORE_LOOP]], label %[[LOOP]], label %[[NO_STORE_BB:cmpxchg\.nostore]]
 ; CHECK: [[SUCCESS_BB]]:
 ; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[DONE:.*]]
-
 ; CHECK: [[NO_STORE_BB]]:
 ; CHECK-NEXT: [[LOADED_NO_STORE:%.*]] = phi i16 [ [[OLDVAL]], %[[START]] ], [ [[OLDVAL_LOOP]], %[[RELEASED_LOAD]] ]
 ; CHECK-NEXT: call void @llvm.arm.clrex()
 ; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
-
 ; CHECK: [[FAILURE_BB]]:
 ; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i16 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ]
 ; CHECK-NOT: dmb
 ; CHECK: br label %[[DONE]]
-
 ; CHECK: [[DONE]]:
 ; CHECK: [[LOADED:%.*]] = phi i16 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
 ; CHECK: ret i16 [[LOADED]]
-
   %pairold = cmpxchg ptr %ptr, i16 %desired, i16 %newval seq_cst monotonic
   %old = extractvalue { i16, i1 } %pairold, 0
   ret i16 %old
@@ -324,40 +307,32 @@ define i32 @test_cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %desired, i32 %newval
 ; CHECK-LABEL: @test_cmpxchg_i32_acquire_acquire
 ; CHECK-NOT: dmb
 ; CHECK: br label %[[LOOP:.*]]
-
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i32) %ptr)
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
-
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:cmpxchg\.nostore]]
 ; CHECK: [[FENCED_STORE]]:
 ; CHECK-NEXT: br label %[[TRY_STORE:.*]]
-
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[OLDVAL]], %[[FENCED_STORE]] ]
 ; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0(i32 %newval, ptr elementtype(i32) %ptr)
 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
-
 ; CHECK: [[SUCCESS_BB]]:
 ; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[DONE:.*]]
-
 ; CHECK: [[NO_STORE_BB]]:
 ; CHECK-NEXT: [[LOADED_NO_STORE:%.*]] = phi i32 [ [[OLDVAL]], %[[LOOP]] ]
 ; CHECK-NEXT: call void @llvm.arm.clrex()
 ; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
-
 ; CHECK: [[FAILURE_BB]]:
 ; CHECK: [[LOADED_FAILURE:%.*]] = phi i32 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ]
 ; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[DONE]]
-
 ; CHECK: [[DONE]]:
 ; CHECK: [[LOADED_EXIT:%.*]] = phi i32 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
 ; CHECK: ret i32 [[LOADED_EXIT]]
-
   %pairold = cmpxchg ptr %ptr, i32 %desired, i32 %newval acquire acquire
   %old = extractvalue { i32, i1 } %pairold, 0
   ret i32 %old
@@ -367,7 +342,6 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %desired, i64 %ne
 ; CHECK-LABEL: @test_cmpxchg_i64_monotonic_monotonic
 ; CHECK-NOT: dmb
 ; CHECK: br label %[[LOOP:.*]]
-
 ; CHECK: [[LOOP]]:
 ; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(ptr %ptr)
 ; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
@@ -377,11 +351,9 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %desired, i64 %ne
 ; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
 ; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
-
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:cmpxchg\.nostore]]
 ; CHECK: [[FENCED_STORE]]:
 ; CHECK-NEXT: br label %[[TRY_STORE:.*]]
-
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i64 [ [[OLDVAL]], %[[FENCED_STORE]] ]
 ; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
@@ -390,26 +362,21 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %desired, i64 %ne
 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], ptr %ptr)
 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
-
 ; CHECK: [[SUCCESS_BB]]:
 ; CHECK-NOT: dmb
 ; CHECK: br label %[[DONE:.*]]
-
 ; CHECK: [[NO_STORE_BB]]:
 ; CHECK-NEXT: [[LOADED_NO_STORE:%.*]] = phi i64 [ [[OLDVAL]], %[[LOOP]] ]
 ; CHECK-NEXT: call void @llvm.arm.clrex()
 ; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
-
 ; CHECK: [[FAILURE_BB]]:
 ; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i64 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ]
 ; CHECK-NOT: dmb
 ; CHECK: br label %[[DONE]]
-
 ; CHECK: [[DONE]]:
 ; CHECK: [[LOADED_EXIT:%.*]] = phi i64 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
 ; CHECK: ret i64 [[LOADED_EXIT]]
-
   %pairold = cmpxchg ptr %ptr, i64 %desired, i64 %newval monotonic monotonic
   %old = extractvalue { i64, i1 } %pairold, 0
   ret i64 %old
@@ -419,40 +386,32 @@ define i32 @test_cmpxchg_minsize(ptr %addr, i32 %desired, i32 %new) minsize {
 ; CHECK-LABEL: @test_cmpxchg_minsize
 ; CHECK:     call void @llvm.arm.dmb(i32 11)
 ; CHECK:     br label %[[START:.*]]
-
 ; CHECK: [[START]]:
 ; CHECK:     [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i32) %addr)
 ; CHECK:     [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
-; CHECK:     br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
-
+; CHECK:     br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:cmpxchg\.nostore]]
 ; CHECK: [[FENCED_STORE]]:
 ; CHECK-NEXT: br label %[[TRY_STORE:.*]]
-
 ; CHECK: [[TRY_STORE]]:
 ; CHECK:     [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[LOADED]], %[[FENCED_STORE]] ]
 ; CHECK:     [[STREX:%.*]] = call i32 @llvm.arm.strex.p0(i32 %new, ptr elementtype(i32) %addr)
 ; CHECK:     [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
 ; CHECK:     br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[START]]
-
 ; CHECK: [[SUCCESS_BB]]:
 ; CHECK:     call void @llvm.arm.dmb(i32 11)
 ; CHECK:     br label %[[END:.*]]
-
 ; CHECK: [[NO_STORE_BB]]:
 ; CHECK:     [[LOADED_NO_STORE:%.*]] = phi i32 [ [[LOADED]], %[[START]] ]
 ; CHECK:     call void @llvm.arm.clrex()
 ; CHECK:     br label %[[FAILURE_BB]]
-
 ; CHECK: [[FAILURE_BB]]:
 ; CHECK:     [[LOADED_FAILURE:%.*]] = phi i32 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ]
 ; CHECK:     call void @llvm.arm.dmb(i32 11)
 ; CHECK:     br label %[[END]]
-
 ; CHECK: [[END]]:
 ; CHECK: [[LOADED_EXIT:%.*]] = phi i32 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
 ; CHECK:     [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
 ; CHECK:     ret i32 [[LOADED_EXIT]]
-
   %pair = cmpxchg ptr %addr, i32 %desired, i32 %new seq_cst seq_cst
   %oldval = extractvalue { i32, i1 } %pair, 0
   ret i32 %oldval
diff --git a/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll b/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll
index 10073e23f5d46..98539ffcde32a 100644
--- a/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll
+++ b/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt -S -o - -mtriple=armv8-linux-gnueabihf -passes=atomic-expand %s -codegen-opt-level=1 | FileCheck %s
 
 define i8 @test_atomic_xchg_i8(ptr %ptr, i8 %xchgend) {
@@ -84,42 +85,34 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(ptr %ptr, i8 %desired, i8 %newval) {
 ; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst
 ; CHECK-NOT: fence
 ; CHECK: br label %[[LOOP:.*]]
-
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0(ptr elementtype(i8) %ptr)
 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
-
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:cmpxchg.nostore]]
 ; CHECK: [[FENCED_STORE]]:
 ; CHECK-NEXT: br label %[[TRY_STORE:.*]]
-
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i8 [ [[OLDVAL]], %[[FENCED_STORE]] ]
 ; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
 ; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.stlex.p0(i32 [[NEWVAL32]], ptr elementtype(i8) %ptr)
 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
-
 ; CHECK: [[SUCCESS_BB]]:
 ; CHECK-NOT: fence_cst
 ; CHECK: br label %[[DONE:.*]]
-
 ; CHECK: [[NO_STORE_BB]]:
 ; CHECK-NEXT: [[LOADED_NOSTORE:%.*]] = phi i8 [ [[OLDVAL]], %[[LOOP]] ]
 ; CHECK-NEXT: call void @llvm.arm.clrex()
 ; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
-
 ; CHECK: [[FAILURE_BB]]:
 ; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i8 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ]
 ; CHECK-NOT: fence_cst
 ; CHECK: br label %[[DONE]]
-
 ; CHECK: [[DONE]]:
 ; CHECK: [[LOADED_EXIT:%.*]] = phi i8 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
 ; CHECK: ret i8 [[LOADED_EXIT]]
-
   %pairold = cmpxchg ptr %ptr, i8 %desired, i8 %newval seq_cst seq_cst
   %old = extractvalue { i8, i1 } %pairold, 0
   ret i8 %old
@@ -129,43 +122,35 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(ptr %ptr, i16 %desired, i16 %newva
 ; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic
 ; CHECK-NOT: fence
 ; CHECK: br label %[[LOOP:.*]]
-
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0(ptr elementtype(i16) %ptr)
 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
-
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:cmpxchg.nostore]]
 ; CHECK: [[FENCED_STORE]]:
 ; CHECK-NEXT: br label %[[TRY_STORE:.*]]
-
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i16 [ [[OLDVAL]], %[[FENCED_STORE]] ]
 ; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
 ; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.stlex.p0(i32 [[NEWVAL32]], ptr elementtype(i16) %ptr)
 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
-
 ; CHECK: [[SUCCESS_BB]]:
 ; CHECK-NOT: fence
 ; CHECK: br label %[[DONE:.*]]
-
 ; CHECK: [[NO_STORE_BB]]:
 ; The PHI is not required.
 ; CHECK-NEXT: [[LOADED_NOSTORE:%.*]] = phi i16 [ [[OLDVAL]], %[[LOOP]] ]
 ; CHECK-NEXT: call void @llvm.arm.clrex()
 ; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
-
 ; CHECK: [[FAILURE_BB]]:
 ; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i16 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ]
 ; CHECK-NOT: fence
 ; CHECK: br label %[[DONE]]
-
 ; CHECK: [[DONE]]:
 ; CHECK: [[LOADED_EXIT:%.*]] = phi i16 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
 ; CHECK: ret i16 [[LOADED_EXIT]]
-
   %pairold = cmpxchg ptr %ptr, i16 %desired, i16 %newval seq_cst monotonic
   %old = extractvalue { i16, i1 } %pairold, 0
   ret i16 %old
@@ -175,40 +160,32 @@ define i32 @test_cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %desired, i32 %newval
 ; CHECK-LABEL: @test_cmpxchg_i32_acquire_acquire
 ; CHECK-NOT: fence
 ; CHECK: br label %[[LOOP:.*]]
-
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0(ptr elementtype(i32) %ptr)
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
-
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:cmpxchg.nostore]]
 ; CHECK: [[FENCED_STORE]]:
 ; CHECK-NEXT: br label %[[TRY_STORE:.*]]
-
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[OLDVAL]], %[[FENCED_STORE]] ]
 ; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0(i32 %newval, ptr elementtype(i32) %ptr)
 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
-
 ; CHECK: [[SUCCESS_BB]]:
 ; CHECK-NOT: fence_cst
 ; CHECK: br label %[[DONE:.*]]
-
 ; CHECK: [[NO_STORE_BB]]:
 ; CHECK-NEXT: [[LOADED_NOSTORE:%.*]] = phi i32 [ [[OLDVAL]], %[[LOOP]] ]
 ; CHECK-NEXT: call void @llvm.arm.clrex()
 ; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
-
 ; CHECK: [[FAILURE_BB]]:
 ; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i32 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ]
 ; CHECK-NOT: fence_cst
 ; CHECK: br label %[[DONE]]
-
 ; CHECK: [[DONE]]:
 ; CHECK: [[LOADED_EXIT:%.*]] = phi i32 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
 ; CHECK: ret i32 [[LOADED_EXIT]]
-
   %pairold = cmpxchg ptr %ptr, i32 %desired, i32 %newval acquire acquire
   %old = extractvalue { i32, i1 } %pairold, 0
   ret i32 %old
@@ -218,7 +195,6 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %desired, i64 %ne
 ; CHECK-LABEL: @test_cmpxchg_i64_monotonic_monotonic
 ; CHECK-NOT: fence
 ; CHECK: br label %[[LOOP:.*]]
-
 ; CHECK: [[LOOP]]:
 ; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(ptr %ptr)
 ; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
@@ -228,11 +204,9 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %desired, i64 %ne
 ; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
 ; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
-
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:cmpxchg.nostore]]
 ; CHECK: [[FENCED_STORE]]:
 ; CHECK-NEXT: br label %[[TRY_STORE:.*]]
-
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i64 [ [[OLDVAL]], %[[FENCED_STORE]] ]
 ; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
@@ -241,26 +215,21 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %desired, i64 %ne
 ; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], ptr %ptr)
 ; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
 ; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
-
 ; CHECK: [[SUCCESS_BB]]:
 ; CHECK-NOT: fence_cst
 ; CHECK: br label %[[DONE:.*]]
-
 ; CHECK: [[NO_STORE_BB]]:
 ; CHECK-NEXT: [[LOADED_NOSTORE:%.*]] = phi i64 [ [[OLDVAL]], %[[LOOP]] ]
 ; CHECK-NEXT: call void @llvm.arm.clrex()
 ; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
-
 ; CHECK: [[FAILURE_BB]]:
 ; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i64 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ]
 ; CHECK-NOT: fence_cst
 ; CHECK: br label %[[DONE]]
-
 ; CHECK: [[DONE]]:
 ; CHECK: [[LOADED_EXIT:%.*]] = phi i64 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
 ; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
 ; CHECK: ret i64 [[LOADED_EXIT]]
-
   %pairold = cmpxchg ptr %ptr, i64 %desired, i64 %newval monotonic monotonic
   %old = extractvalue { i64, i1 } %pairold, 0
   ret i64 %old
diff --git a/llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll b/llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll
index 8195a5b6145e3..aff4196815e21 100644
--- a/llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll
+++ b/llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll
@@ -9,7 +9,7 @@ define i32 @test_cmpxchg_seq_cst(ptr %addr, i32 %desired, i32 %new) {
 ; CHECK:       [[CMPXCHG_START]]:
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i32) [[ADDR]])
 ; CHECK-NEXT:    [[SHOULD_STORE:%.*]] = icmp eq i32 [[TMP1]], [[DESIRED]]
-; CHECK-NEXT:    br i1 [[SHOULD_STORE]], label %[[CMPXCHG_FENCEDSTORE:.*]], label %[[CMPXCHG_NOSTORE:.*]]
+; CHECK-NEXT:    br i1 [[SHOULD_STORE]], label %[[CMPXCHG_FENCEDSTORE:.*]], label %[[CMPXCHG_NOSTORE:cmpxchg.nostore]]
 ; CHECK:       [[CMPXCHG_FENCEDSTORE]]:
 ; CHECK-NEXT:    call void @llvm.arm.dmb(i32 10)
 ; CHECK-NEXT:    br label %[[CMPXCHG_TRYSTORE:.*]]
@@ -17,7 +17,7 @@ define i32 @test_cmpxchg_seq_cst(ptr %addr, i32 %desired, i32 %new) {
 ; CHECK-NEXT:    [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[TMP1]], %[[CMPXCHG_FENCEDSTORE]] ]
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.arm.strex.p0(i32 [[NEW]], ptr elementtype(i32) [[ADDR]])
 ; CHECK-NEXT:    [[SUCCESS:%.*]] = icmp eq i32 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[SUCCESS]], label %[[CMPXCHG_SUCCESS:.*]], label %[[CMPXCHG_FAILURE:.*]]
+; CHECK-NEXT:    br i1 [[SUCCESS]], label %[[CMPXCHG_SUCCESS:.*]], label %[[CMPXCHG_FAILURE:cmpxchg.failure]]
 ; CHECK:       [[CMPXCHG_RELEASEDLOAD:.*:]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       [[CMPXCHG_SUCCESS]]:
@@ -48,7 +48,7 @@ define i1 @test_cmpxchg_weak_fail(ptr %addr, i32 %desired, i32 %new) {
 ; CHECK:       [[CMPXCHG_START]]:
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i32) [[ADDR]])
 ; CHECK-NEXT:    [[SHOULD_STORE:%.*]] = icmp eq i32 [[TMP1]], [[DESIRED]]
-; CHECK-NEXT:    br i1 [[SHOULD_STORE]], label %[[CMPXCHG_FENCEDSTORE:.*]], label %[[CMPXCHG_NOSTORE:.*]]
+; CHECK-NEXT:    br i1 [[SHOULD_STORE]], label %[[CMPXCHG_FENCEDSTORE:.*]], label %[[CMPXCHG_NOSTORE:cmpxchg.nostore]]
 ; CHECK:       [[CMPXCHG_FENCEDSTORE]]:
 ; CHECK-NEXT:    call void @llvm.arm.dmb(i32 10)
 ; CHECK-NEXT:    br label %[[CMPXCHG_TRYSTORE:.*]]
@@ -56,7 +56,7 @@ define i1 @test_cmpxchg_weak_fail(ptr %addr, i32 %desired, i32 %new) {
 ; CHECK-NEXT:    [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[TMP1]], %[[CMPXCHG_FENCEDSTORE]] ]
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.arm.strex.p0(i32 [[NEW]], ptr elementtype(i32) [[ADDR]])
 ; CHECK-NEXT:    [[SUCCESS:%.*]] = icmp eq i32 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[SUCCESS]], label %[[CMPXCHG_SUCCESS:.*]], label %[[CMPXCHG_FAILURE:.*]]
+; CHECK-NEXT:    br i1 [[SUCCESS]], label %[[CMPXCHG_SUCCESS:.*]], label %[[CMPXCHG_FAILURE:cmpxchg.failure]]
 ; CHECK:       [[CMPXCHG_RELEASEDLOAD:.*:]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       [[CMPXCHG_SUCCESS]]:
@@ -86,14 +86,14 @@ define i32 @test_cmpxchg_monotonic(ptr %addr, i32 %desired, i32 %new) {
 ; CHECK:       [[CMPXCHG_START]]:
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i32) [[ADDR]])
 ; CHECK-NEXT:    [[SHOULD_STORE:%.*]] = icmp eq i32 [[TMP1]], [[DESIRED]]
-; CHECK-NEXT:    br i1 [[SHOULD_STORE]], label %[[CMPXCHG_FENCEDSTORE:.*]], label %[[CMPXCHG_NOSTORE:.*]]
+; CHECK-NEXT:    br i1 [[SHOULD_STORE]], label %[[CMPXCHG_FENCEDSTORE:.*]], label %[[CMPXCHG_NOSTORE:cmpxchg.nostore]]
 ; CHECK:       [[CMPXCHG_FENCEDSTORE]]:
 ; CHECK-NEXT:    br label %[[CMPXCHG_TRYSTORE:.*]]
 ; CHECK:       [[CMPXCHG_TRYSTORE]]:
 ; CHECK-NEXT:    [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[TMP1]], %[[CMPXCHG_FENCEDSTORE]] ]
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.arm.strex.p0(i32 [[NEW]], ptr elementtype(i32) [[ADDR]])
 ; CHECK-NEXT:    [[SUCCESS:%.*]] = icmp eq i32 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[SUCCESS]], label %[[CMPXCHG_SUCCESS:.*]], label %[[CMPXCHG_FAILURE:.*]]
+; CHECK-NEXT:    br i1 [[SUCCESS]], label %[[CMPXCHG_SUCCESS:.*]], label %[[CMPXCHG_FAILURE:cmpxchg.failure]]
 ; CHECK:       [[CMPXCHG_RELEASEDLOAD:.*:]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       [[CMPXCHG_SUCCESS]]:
@@ -122,7 +122,7 @@ define i32 @test_cmpxchg_seq_cst_minsize(ptr %addr, i32 %desired, i32 %new) mins
 ; CHECK:       [[CMPXCHG_START]]:
 ; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i32) [[ADDR]])
 ; CHECK-NEXT:    [[SHOULD_STORE:%.*]] = icmp eq i32 [[TMP1]], [[DESIRED]]
-; CHECK-NEXT:    br i1 [[SHOULD_STORE]], label %[[CMPXCHG_FENCEDSTORE:.*]], label %[[CMPXCHG_NOSTORE:.*]]
+; CHECK-NEXT:    br i1 [[SHOULD_STORE]], label %[[CMPXCHG_FENCEDSTORE:.*]], label %[[CMPXCHG_NOSTORE:cmpxchg.nostore]]
 ; CHECK:       [[CMPXCHG_FENCEDSTORE]]:
 ; CHECK-NEXT:    call void @llvm.arm.dmb(i32 10)
 ; CHECK-NEXT:    br label %[[CMPXCHG_TRYSTORE:.*]]
@@ -130,7 +130,7 @@ define i32 @test_cmpxchg_seq_cst_minsize(ptr %addr, i32 %desired, i32 %new) mins
 ; CHECK-NEXT:    [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[TMP1]], %[[CMPXCHG_FENCEDSTORE]] ]
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.arm.strex.p0(i32 [[NEW]], ptr elementtype(i32) [[ADDR]])
 ; CHECK-NEXT:    [[SUCCESS:%.*]] = icmp eq i32 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[SUCCESS]], label %[[CMPXCHG_SUCCESS:.*]], label %[[CMPXCHG_FAILURE:.*]]
+; CHECK-NEXT:    br i1 [[SUCCESS]], label %[[CMPXCHG_SUCCESS:.*]], label %[[CMPXCHG_FAILURE:cmpxchg.failure]]
 ; CHECK:       [[CMPXCHG_RELEASEDLOAD:.*:]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       [[CMPXCHG_SUCCESS]]:



More information about the llvm-commits mailing list