[llvm] [PowerPC] support branch hint for AtomicExpandImpl::expandAtomicCmpXchg (PR #152366)

zhijian lin via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 6 12:11:40 PDT 2025


https://github.com/diggerlin created https://github.com/llvm/llvm-project/pull/152366

The patch add branch hint  for AtomicExpandImpl::expandAtomicCmpXchg,  in PowerPC, it support branch hint as 

```
loop:
    lwarx r6,0,r3   #  load and reserve
    cmpw r4,r6      #1st 2 operands equal? bne- exit  #skip if not
    bne- exit       #skip if not
    stwcx. r5,0,r3  #store new value if still res’ved bne- loop #loop if lost reservation
    bne- loop #loop if lost reservation
exit:
    mr  r4,r6       #return value from storage
```

`-`  hints not taken,
`+` hints taken,

>From 5afd4560e10021571218fc10ef3131527d75e85a Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 16 Jul 2025 18:57:19 +0000
Subject: [PATCH 1/2] add branch hint

---
 llvm/include/llvm/CodeGen/TargetLowering.h  | 5 +++++
 llvm/lib/CodeGen/AtomicExpandPass.cpp       | 9 ++++++---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 5 +++++
 llvm/lib/Target/PowerPC/PPCISelLowering.h   | 1 +
 4 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index cbdc1b6031680..e7fb34b036e8b 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2332,6 +2332,11 @@ class LLVM_ABI TargetLoweringBase {
   virtual Instruction *emitTrailingFence(IRBuilderBase &Builder,
                                          Instruction *Inst,
                                          AtomicOrdering Ord) const;
+
+  virtual MDNode *getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const {
+    return nullptr;
+  }
+
   /// @}
 
   // Emits code that executes when the comparison result in the ll/sc
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 3f3d5dc90711f..a5319f3a2440a 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1454,7 +1454,8 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
 
   // If the cmpxchg doesn't actually need any ordering when it fails, we can
   // jump straight past that fence instruction (if it exists).
-  Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
+  Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB,
+                       TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
 
   Builder.SetInsertPoint(ReleasingStoreBB);
   if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
@@ -1473,7 +1474,8 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
       StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
   BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
   Builder.CreateCondBr(StoreSuccess, SuccessBB,
-                       CI->isWeak() ? FailureBB : RetryBB);
+                       CI->isWeak() ? FailureBB : RetryBB,
+                       TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
 
   Builder.SetInsertPoint(ReleasedLoadBB);
   Value *SecondLoad;
@@ -1486,7 +1488,8 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
 
     // If the cmpxchg doesn't actually need any ordering when it fails, we can
     // jump straight past that fence instruction (if it exists).
-    Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
+    Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB,
+                         TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
     // Update PHI node in TryStoreBB.
     LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
   } else
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 459525ed4ee9a..853923ac4125a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -68,6 +68,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IntrinsicsPowerPC.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
@@ -12816,6 +12817,10 @@ Value *PPCTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
   return Builder.CreateXor(Call, Builder.getInt32(1));
 }
 
+MDNode *PPCTargetLowering::getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const {
+  return MDBuilder(Ctx).createLikelyBranchWeights();
+}
+
 // The mappings for emitLeading/TrailingFence is taken from
 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
 Instruction *PPCTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 124c7116dc3b5..9f73c5587805a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -938,6 +938,7 @@ namespace llvm {
     Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
                                    AtomicOrdering Ord) const override;
 
+    virtual MDNode *getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const override;
     bool shouldInlineQuadwordAtomics() const;
 
     TargetLowering::AtomicExpansionKind

>From b6976bf482cda0e2b74102847f68efd02d28c26c Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Thu, 24 Jul 2025 13:56:14 +0000
Subject: [PATCH 2/2] modify test case

---
 llvm/include/llvm/CodeGen/TargetLowering.h    |   3 +-
 llvm/lib/CodeGen/AtomicExpandPass.cpp         |  16 +-
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |   3 +-
 llvm/lib/Target/PowerPC/PPCISelLowering.h     |   3 +-
 .../CodeGen/PowerPC/PR35812-neg-cmpxchg.ll    |  72 +--
 llvm/test/CodeGen/PowerPC/all-atomics.ll      | 211 +++----
 .../PowerPC/atomic-compare-exchange-weak.ll   |  34 +-
 llvm/test/CodeGen/PowerPC/atomic-float.ll     |  90 ++-
 .../PowerPC/atomicrmw-cond-sub-clamp.ll       | 466 ++++++++-------
 .../PowerPC/atomicrmw-uinc-udec-wrap.ll       | 462 ++++++++-------
 .../CodeGen/PowerPC/atomics-regression.ll     | 544 ++++++++----------
 llvm/test/CodeGen/PowerPC/atomics.ll          | 114 ++--
 llvm/test/CodeGen/PowerPC/loop-comment.ll     |   5 +-
 13 files changed, 991 insertions(+), 1032 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index e7fb34b036e8b..027bcc5bc53ae 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2333,7 +2333,8 @@ class LLVM_ABI TargetLoweringBase {
                                          Instruction *Inst,
                                          AtomicOrdering Ord) const;
 
-  virtual MDNode *getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const {
+  virtual MDNode *
+  getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const {
     return nullptr;
   }
 
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index a5319f3a2440a..abaa8b6e841f6 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1454,8 +1454,9 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
 
   // If the cmpxchg doesn't actually need any ordering when it fails, we can
   // jump straight past that fence instruction (if it exists).
-  Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB,
-                       TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
+  Builder.CreateCondBr(
+      ShouldStore, ReleasingStoreBB, NoStoreBB,
+      TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
 
   Builder.SetInsertPoint(ReleasingStoreBB);
   if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
@@ -1473,9 +1474,9 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
   StoreSuccess = Builder.CreateICmpEQ(
       StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
   BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
-  Builder.CreateCondBr(StoreSuccess, SuccessBB,
-                       CI->isWeak() ? FailureBB : RetryBB,
-                       TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
+  Builder.CreateCondBr(
+      StoreSuccess, SuccessBB, CI->isWeak() ? FailureBB : RetryBB,
+      TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
 
   Builder.SetInsertPoint(ReleasedLoadBB);
   Value *SecondLoad;
@@ -1488,8 +1489,9 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
 
     // If the cmpxchg doesn't actually need any ordering when it fails, we can
     // jump straight past that fence instruction (if it exists).
-    Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB,
-                         TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
+    Builder.CreateCondBr(
+        ShouldStore, TryStoreBB, NoStoreBB,
+        TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
     // Update PHI node in TryStoreBB.
     LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
   } else
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 853923ac4125a..b218532e56b6a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12817,7 +12817,8 @@ Value *PPCTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
   return Builder.CreateXor(Call, Builder.getInt32(1));
 }
 
-MDNode *PPCTargetLowering::getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const {
+MDNode *PPCTargetLowering::getTrueBranchHintWeightForAtomicCmpXchgg(
+    LLVMContext &Ctx) const {
   return MDBuilder(Ctx).createLikelyBranchWeights();
 }
 
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 9f73c5587805a..4892a3c603a6c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -938,7 +938,8 @@ namespace llvm {
     Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
                                    AtomicOrdering Ord) const override;
 
-    virtual MDNode *getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const override;
+    virtual MDNode *
+    getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const override;
     bool shouldInlineQuadwordAtomics() const;
 
     TargetLowering::AtomicExpansionKind
diff --git a/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll b/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
index b7852c3c3e6e0..2d8e0e869a860 100644
--- a/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
+++ b/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
@@ -19,51 +19,53 @@ define signext i32 @main() nounwind {
 ; CHECK-NEXT:    addi 3, 1, 46
 ; CHECK-NEXT:    lharx 4, 0, 3
 ; CHECK-NEXT:    cmplwi 4, 33059
-; CHECK-NEXT:    bne 0, .LBB0_4
+; CHECK-NEXT:    bne- 0, .LBB0_4
 ; CHECK-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    li 4, 234
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB0_2: # %cmpxchg.trystore
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    sthcx. 4, 0, 3
-; CHECK-NEXT:    beq 0, .LBB0_7
+; CHECK-NEXT:    beq+ 0, .LBB0_5
 ; CHECK-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lharx 5, 0, 3
 ; CHECK-NEXT:    cmplwi 5, 33059
-; CHECK-NEXT:    beq 0, .LBB0_2
+; CHECK-NEXT:    beq+ 0, .LBB0_2
 ; CHECK-NEXT:  .LBB0_4: # %cmpxchg.nostore
 ; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    b .LBB0_8
-; CHECK-NEXT:  .LBB0_5: # %L.B0000
+; CHECK-NEXT:    crxor 20, 20, 20
+; CHECK-NEXT:    b .LBB0_6
+; CHECK-NEXT:  .LBB0_5: # %cmpxchg.success
+; CHECK-NEXT:    lwsync
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:  .LBB0_6: # %cmpxchg.end
+; CHECK-NEXT:    bc 4, 20, .LBB0_9
+; CHECK-NEXT:  # %bb.7: # %L.B0000
 ; CHECK-NEXT:    lhz 3, 46(1)
 ; CHECK-NEXT:    cmplwi 3, 234
-; CHECK-NEXT:    bne 0, .LBB0_9
-; CHECK-NEXT:  # %bb.6: # %L.B0001
+; CHECK-NEXT:    bne 0, .LBB0_10
+; CHECK-NEXT:  # %bb.8: # %L.B0001
 ; CHECK-NEXT:    addis 3, 2, .L_MergedGlobals at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .L_MergedGlobals at toc@l
 ; CHECK-NEXT:    bl puts
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:    b .LBB0_11
-; CHECK-NEXT:  .LBB0_7: # %cmpxchg.success
-; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    b .LBB0_5
-; CHECK-NEXT:  .LBB0_8: # %L.B0003
+; CHECK-NEXT:    b .LBB0_12
+; CHECK-NEXT:  .LBB0_9: # %L.B0003
 ; CHECK-NEXT:    addis 3, 2, .L_MergedGlobals at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .L_MergedGlobals at toc@l
 ; CHECK-NEXT:    addi 3, 3, 16
-; CHECK-NEXT:    b .LBB0_10
-; CHECK-NEXT:  .LBB0_9: # %L.B0005
+; CHECK-NEXT:    b .LBB0_11
+; CHECK-NEXT:  .LBB0_10: # %L.B0005
 ; CHECK-NEXT:    addis 3, 2, .L_MergedGlobals at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .L_MergedGlobals at toc@l
 ; CHECK-NEXT:    addi 3, 3, 64
-; CHECK-NEXT:  .LBB0_10: # %L.B0003
+; CHECK-NEXT:  .LBB0_11: # %L.B0003
 ; CHECK-NEXT:    bl puts
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:  .LBB0_11: # %L.B0003
+; CHECK-NEXT:  .LBB0_12: # %L.B0003
 ; CHECK-NEXT:    addi 1, 1, 48
 ; CHECK-NEXT:    ld 0, 16(1)
 ; CHECK-NEXT:    mtlr 0
@@ -83,7 +85,7 @@ define signext i32 @main() nounwind {
 ; CHECK-P7-NEXT:    srw 6, 5, 4
 ; CHECK-P7-NEXT:    clrlwi 6, 6, 16
 ; CHECK-P7-NEXT:    cmplwi 6, 33059
-; CHECK-P7-NEXT:    bne 0, .LBB0_4
+; CHECK-P7-NEXT:    bne- 0, .LBB0_4
 ; CHECK-P7-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; CHECK-P7-NEXT:    lis 6, 0
 ; CHECK-P7-NEXT:    li 7, 234
@@ -92,51 +94,53 @@ define signext i32 @main() nounwind {
 ; CHECK-P7-NEXT:    slw 7, 7, 4
 ; CHECK-P7-NEXT:    slw 6, 6, 4
 ; CHECK-P7-NEXT:    not 6, 6
-; CHECK-P7-NEXT:    .p2align 4
 ; CHECK-P7-NEXT:  .LBB0_2: # %cmpxchg.trystore
 ; CHECK-P7-NEXT:    #
 ; CHECK-P7-NEXT:    and 5, 5, 6
 ; CHECK-P7-NEXT:    or 5, 5, 7
 ; CHECK-P7-NEXT:    stwcx. 5, 0, 3
-; CHECK-P7-NEXT:    beq 0, .LBB0_7
+; CHECK-P7-NEXT:    beq+ 0, .LBB0_5
 ; CHECK-P7-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; CHECK-P7-NEXT:    #
 ; CHECK-P7-NEXT:    lwarx 5, 0, 3
 ; CHECK-P7-NEXT:    srw 8, 5, 4
 ; CHECK-P7-NEXT:    clrlwi 8, 8, 16
 ; CHECK-P7-NEXT:    cmplwi 8, 33059
-; CHECK-P7-NEXT:    beq 0, .LBB0_2
+; CHECK-P7-NEXT:    beq+ 0, .LBB0_2
 ; CHECK-P7-NEXT:  .LBB0_4: # %cmpxchg.nostore
+; CHECK-P7-NEXT:    crxor 20, 20, 20
 ; CHECK-P7-NEXT:    lwsync
-; CHECK-P7-NEXT:    b .LBB0_8
-; CHECK-P7-NEXT:  .LBB0_5: # %L.B0000
+; CHECK-P7-NEXT:    b .LBB0_6
+; CHECK-P7-NEXT:  .LBB0_5: # %cmpxchg.success
+; CHECK-P7-NEXT:    lwsync
+; CHECK-P7-NEXT:    creqv 20, 20, 20
+; CHECK-P7-NEXT:  .LBB0_6: # %cmpxchg.end
+; CHECK-P7-NEXT:    bc 4, 20, .LBB0_9
+; CHECK-P7-NEXT:  # %bb.7: # %L.B0000
 ; CHECK-P7-NEXT:    lhz 3, 46(1)
 ; CHECK-P7-NEXT:    cmplwi 3, 234
-; CHECK-P7-NEXT:    bne 0, .LBB0_9
-; CHECK-P7-NEXT:  # %bb.6: # %L.B0001
+; CHECK-P7-NEXT:    bne 0, .LBB0_10
+; CHECK-P7-NEXT:  # %bb.8: # %L.B0001
 ; CHECK-P7-NEXT:    addis 3, 2, .L_MergedGlobals at toc@ha
 ; CHECK-P7-NEXT:    addi 3, 3, .L_MergedGlobals at toc@l
 ; CHECK-P7-NEXT:    bl puts
 ; CHECK-P7-NEXT:    nop
 ; CHECK-P7-NEXT:    li 3, 0
-; CHECK-P7-NEXT:    b .LBB0_11
-; CHECK-P7-NEXT:  .LBB0_7: # %cmpxchg.success
-; CHECK-P7-NEXT:    lwsync
-; CHECK-P7-NEXT:    b .LBB0_5
-; CHECK-P7-NEXT:  .LBB0_8: # %L.B0003
+; CHECK-P7-NEXT:    b .LBB0_12
+; CHECK-P7-NEXT:  .LBB0_9: # %L.B0003
 ; CHECK-P7-NEXT:    addis 3, 2, .L_MergedGlobals at toc@ha
 ; CHECK-P7-NEXT:    addi 3, 3, .L_MergedGlobals at toc@l
 ; CHECK-P7-NEXT:    addi 3, 3, 16
-; CHECK-P7-NEXT:    b .LBB0_10
-; CHECK-P7-NEXT:  .LBB0_9: # %L.B0005
+; CHECK-P7-NEXT:    b .LBB0_11
+; CHECK-P7-NEXT:  .LBB0_10: # %L.B0005
 ; CHECK-P7-NEXT:    addis 3, 2, .L_MergedGlobals at toc@ha
 ; CHECK-P7-NEXT:    addi 3, 3, .L_MergedGlobals at toc@l
 ; CHECK-P7-NEXT:    addi 3, 3, 64
-; CHECK-P7-NEXT:  .LBB0_10: # %L.B0003
+; CHECK-P7-NEXT:  .LBB0_11: # %L.B0003
 ; CHECK-P7-NEXT:    bl puts
 ; CHECK-P7-NEXT:    nop
 ; CHECK-P7-NEXT:    li 3, 1
-; CHECK-P7-NEXT:  .LBB0_11: # %L.B0003
+; CHECK-P7-NEXT:  .LBB0_12: # %L.B0003
 ; CHECK-P7-NEXT:    addi 1, 1, 48
 ; CHECK-P7-NEXT:    ld 0, 16(1)
 ; CHECK-P7-NEXT:    mtlr 0
diff --git a/llvm/test/CodeGen/PowerPC/all-atomics.ll b/llvm/test/CodeGen/PowerPC/all-atomics.ll
index 07afea75aec67..7e892fc4ae6eb 100644
--- a/llvm/test/CodeGen/PowerPC/all-atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/all-atomics.ll
@@ -4347,19 +4347,18 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lbz 8, sc at toc@l(4)
 ; CHECK-NEXT:    lbarx 5, 0, 6
 ; CHECK-NEXT:    cmplw 5, 7
-; CHECK-NEXT:    bne 0, .LBB3_4
+; CHECK-NEXT:    bne- 0, .LBB3_4
 ; CHECK-NEXT:  # %bb.1: # %cmpxchg.fencedstore276
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_2: # %cmpxchg.trystore275
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    stbcx. 8, 0, 6
-; CHECK-NEXT:    beq 0, .LBB3_4
+; CHECK-NEXT:    beq+ 0, .LBB3_4
 ; CHECK-NEXT:  # %bb.3: # %cmpxchg.releasedload274
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lbarx 5, 0, 6
 ; CHECK-NEXT:    cmplw 5, 7
-; CHECK-NEXT:    beq 0, .LBB3_2
+; CHECK-NEXT:    beq+ 0, .LBB3_2
 ; CHECK-NEXT:  .LBB3_4: # %cmpxchg.nostore272
 ; CHECK-NEXT:    addi 7, 3, uc at toc@l
 ; CHECK-NEXT:    lwsync
@@ -4367,20 +4366,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lbz 9, uc at toc@l(3)
 ; CHECK-NEXT:    lbarx 8, 0, 7
 ; CHECK-NEXT:    cmplw 8, 9
-; CHECK-NEXT:    bne 0, .LBB3_8
+; CHECK-NEXT:    bne- 0, .LBB3_8
 ; CHECK-NEXT:  # %bb.5: # %cmpxchg.fencedstore257
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    clrlwi 5, 5, 24
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_6: # %cmpxchg.trystore256
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    stbcx. 5, 0, 7
-; CHECK-NEXT:    beq 0, .LBB3_8
+; CHECK-NEXT:    beq+ 0, .LBB3_8
 ; CHECK-NEXT:  # %bb.7: # %cmpxchg.releasedload255
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lbarx 8, 0, 7
 ; CHECK-NEXT:    cmplw 8, 9
-; CHECK-NEXT:    beq 0, .LBB3_6
+; CHECK-NEXT:    beq+ 0, .LBB3_6
 ; CHECK-NEXT:  .LBB3_8: # %cmpxchg.nostore253
 ; CHECK-NEXT:    addis 5, 2, ss at toc@ha
 ; CHECK-NEXT:    lwsync
@@ -4390,21 +4388,20 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    addi 8, 5, ss at toc@l
 ; CHECK-NEXT:    lharx 9, 0, 8
 ; CHECK-NEXT:    cmplw 9, 10
-; CHECK-NEXT:    bne 0, .LBB3_12
+; CHECK-NEXT:    bne- 0, .LBB3_12
 ; CHECK-NEXT:  # %bb.9: # %cmpxchg.fencedstore238
 ; CHECK-NEXT:    extsb 11, 11
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    clrlwi 11, 11, 16
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_10: # %cmpxchg.trystore237
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    sthcx. 11, 0, 8
-; CHECK-NEXT:    beq 0, .LBB3_12
+; CHECK-NEXT:    beq+ 0, .LBB3_12
 ; CHECK-NEXT:  # %bb.11: # %cmpxchg.releasedload236
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lharx 9, 0, 8
 ; CHECK-NEXT:    cmplw 9, 10
-; CHECK-NEXT:    beq 0, .LBB3_10
+; CHECK-NEXT:    beq+ 0, .LBB3_10
 ; CHECK-NEXT:  .LBB3_12: # %cmpxchg.nostore234
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    sth 9, ss at toc@l(5)
@@ -4414,21 +4411,20 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    addi 9, 5, us at toc@l
 ; CHECK-NEXT:    lharx 10, 0, 9
 ; CHECK-NEXT:    cmplw 10, 11
-; CHECK-NEXT:    bne 0, .LBB3_16
+; CHECK-NEXT:    bne- 0, .LBB3_16
 ; CHECK-NEXT:  # %bb.13: # %cmpxchg.fencedstore219
 ; CHECK-NEXT:    extsb 12, 12
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    clrlwi 12, 12, 16
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_14: # %cmpxchg.trystore218
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    sthcx. 12, 0, 9
-; CHECK-NEXT:    beq 0, .LBB3_16
+; CHECK-NEXT:    beq+ 0, .LBB3_16
 ; CHECK-NEXT:  # %bb.15: # %cmpxchg.releasedload217
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lharx 10, 0, 9
 ; CHECK-NEXT:    cmplw 10, 11
-; CHECK-NEXT:    beq 0, .LBB3_14
+; CHECK-NEXT:    beq+ 0, .LBB3_14
 ; CHECK-NEXT:  .LBB3_16: # %cmpxchg.nostore215
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    sth 10, us at toc@l(5)
@@ -4438,20 +4434,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    addi 10, 5, si at toc@l
 ; CHECK-NEXT:    lwarx 11, 0, 10
 ; CHECK-NEXT:    cmplw 11, 12
-; CHECK-NEXT:    bne 0, .LBB3_20
+; CHECK-NEXT:    bne- 0, .LBB3_20
 ; CHECK-NEXT:  # %bb.17: # %cmpxchg.fencedstore200
 ; CHECK-NEXT:    extsb 0, 0
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_18: # %cmpxchg.trystore199
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 0, 0, 10
-; CHECK-NEXT:    beq 0, .LBB3_20
+; CHECK-NEXT:    beq+ 0, .LBB3_20
 ; CHECK-NEXT:  # %bb.19: # %cmpxchg.releasedload198
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lwarx 11, 0, 10
 ; CHECK-NEXT:    cmplw 11, 12
-; CHECK-NEXT:    beq 0, .LBB3_18
+; CHECK-NEXT:    beq+ 0, .LBB3_18
 ; CHECK-NEXT:  .LBB3_20: # %cmpxchg.nostore196
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    stw 11, si at toc@l(5)
@@ -4461,20 +4456,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    addi 11, 5, ui at toc@l
 ; CHECK-NEXT:    lwarx 12, 0, 11
 ; CHECK-NEXT:    cmplw 12, 0
-; CHECK-NEXT:    bne 0, .LBB3_24
+; CHECK-NEXT:    bne- 0, .LBB3_24
 ; CHECK-NEXT:  # %bb.21: # %cmpxchg.fencedstore181
 ; CHECK-NEXT:    extsb 30, 30
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_22: # %cmpxchg.trystore180
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 30, 0, 11
-; CHECK-NEXT:    beq 0, .LBB3_24
+; CHECK-NEXT:    beq+ 0, .LBB3_24
 ; CHECK-NEXT:  # %bb.23: # %cmpxchg.releasedload179
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lwarx 12, 0, 11
 ; CHECK-NEXT:    cmplw 12, 0
-; CHECK-NEXT:    beq 0, .LBB3_22
+; CHECK-NEXT:    beq+ 0, .LBB3_22
 ; CHECK-NEXT:  .LBB3_24: # %cmpxchg.nostore177
 ; CHECK-NEXT:    addis 30, 2, sll at toc@ha
 ; CHECK-NEXT:    lwsync
@@ -4484,20 +4478,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    addi 12, 30, sll at toc@l
 ; CHECK-NEXT:    ldarx 0, 0, 12
 ; CHECK-NEXT:    cmpld 0, 29
-; CHECK-NEXT:    bne 0, .LBB3_28
+; CHECK-NEXT:    bne- 0, .LBB3_28
 ; CHECK-NEXT:  # %bb.25: # %cmpxchg.fencedstore162
 ; CHECK-NEXT:    extsb 28, 28
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_26: # %cmpxchg.trystore161
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    stdcx. 28, 0, 12
-; CHECK-NEXT:    beq 0, .LBB3_28
+; CHECK-NEXT:    beq+ 0, .LBB3_28
 ; CHECK-NEXT:  # %bb.27: # %cmpxchg.releasedload160
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    ldarx 0, 0, 12
 ; CHECK-NEXT:    cmpld 0, 29
-; CHECK-NEXT:    beq 0, .LBB3_26
+; CHECK-NEXT:    beq+ 0, .LBB3_26
 ; CHECK-NEXT:  .LBB3_28: # %cmpxchg.nostore158
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    std 0, sll at toc@l(30)
@@ -4507,20 +4500,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    addi 0, 30, ull at toc@l
 ; CHECK-NEXT:    ldarx 29, 0, 0
 ; CHECK-NEXT:    cmpld 29, 28
-; CHECK-NEXT:    bne 0, .LBB3_32
+; CHECK-NEXT:    bne- 0, .LBB3_32
 ; CHECK-NEXT:  # %bb.29: # %cmpxchg.fencedstore143
 ; CHECK-NEXT:    extsb 27, 27
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_30: # %cmpxchg.trystore142
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    stdcx. 27, 0, 0
-; CHECK-NEXT:    beq 0, .LBB3_32
+; CHECK-NEXT:    beq+ 0, .LBB3_32
 ; CHECK-NEXT:  # %bb.31: # %cmpxchg.releasedload141
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    ldarx 29, 0, 0
 ; CHECK-NEXT:    cmpld 29, 28
-; CHECK-NEXT:    beq 0, .LBB3_30
+; CHECK-NEXT:    beq+ 0, .LBB3_30
 ; CHECK-NEXT:  .LBB3_32: # %cmpxchg.nostore139
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    std 29, ull at toc@l(30)
@@ -4528,19 +4520,18 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lbz 29, sc at toc@l(4)
 ; CHECK-NEXT:    lbarx 28, 0, 6
 ; CHECK-NEXT:    cmplw 28, 30
-; CHECK-NEXT:    bne 0, .LBB3_36
+; CHECK-NEXT:    bne- 0, .LBB3_36
 ; CHECK-NEXT:  # %bb.33: # %cmpxchg.fencedstore124
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_34: # %cmpxchg.trystore123
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    stbcx. 29, 0, 6
-; CHECK-NEXT:    beq 0, .LBB3_37
+; CHECK-NEXT:    beq+ 0, .LBB3_37
 ; CHECK-NEXT:  # %bb.35: # %cmpxchg.releasedload122
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lbarx 28, 0, 6
 ; CHECK-NEXT:    cmplw 28, 30
-; CHECK-NEXT:    beq 0, .LBB3_34
+; CHECK-NEXT:    beq+ 0, .LBB3_34
 ; CHECK-NEXT:  .LBB3_36: # %cmpxchg.nostore120
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    crxor 20, 20, 20
@@ -4557,19 +4548,18 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lbz 6, uc at toc@l(3)
 ; CHECK-NEXT:    lbarx 29, 0, 7
 ; CHECK-NEXT:    cmplw 29, 6
-; CHECK-NEXT:    bne 0, .LBB3_42
+; CHECK-NEXT:    bne- 0, .LBB3_42
 ; CHECK-NEXT:  # %bb.39: # %cmpxchg.fencedstore105
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_40: # %cmpxchg.trystore104
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    stbcx. 30, 0, 7
-; CHECK-NEXT:    beq 0, .LBB3_43
+; CHECK-NEXT:    beq+ 0, .LBB3_43
 ; CHECK-NEXT:  # %bb.41: # %cmpxchg.releasedload103
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lbarx 29, 0, 7
 ; CHECK-NEXT:    cmplw 29, 6
-; CHECK-NEXT:    beq 0, .LBB3_40
+; CHECK-NEXT:    beq+ 0, .LBB3_40
 ; CHECK-NEXT:  .LBB3_42: # %cmpxchg.nostore101
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    crxor 20, 20, 20
@@ -4586,21 +4576,20 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lbz 6, uc at toc@l(3)
 ; CHECK-NEXT:    lharx 30, 0, 8
 ; CHECK-NEXT:    cmplw 30, 6
-; CHECK-NEXT:    bne 0, .LBB3_48
+; CHECK-NEXT:    bne- 0, .LBB3_48
 ; CHECK-NEXT:  # %bb.45: # %cmpxchg.fencedstore86
 ; CHECK-NEXT:    extsb 7, 7
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    clrlwi 7, 7, 16
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_46: # %cmpxchg.trystore85
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    sthcx. 7, 0, 8
-; CHECK-NEXT:    beq 0, .LBB3_49
+; CHECK-NEXT:    beq+ 0, .LBB3_49
 ; CHECK-NEXT:  # %bb.47: # %cmpxchg.releasedload84
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lharx 30, 0, 8
 ; CHECK-NEXT:    cmplw 30, 6
-; CHECK-NEXT:    beq 0, .LBB3_46
+; CHECK-NEXT:    beq+ 0, .LBB3_46
 ; CHECK-NEXT:  .LBB3_48: # %cmpxchg.nostore82
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    crxor 20, 20, 20
@@ -4617,21 +4606,20 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lbz 6, uc at toc@l(3)
 ; CHECK-NEXT:    lharx 8, 0, 9
 ; CHECK-NEXT:    cmplw 8, 6
-; CHECK-NEXT:    bne 0, .LBB3_54
+; CHECK-NEXT:    bne- 0, .LBB3_54
 ; CHECK-NEXT:  # %bb.51: # %cmpxchg.fencedstore67
 ; CHECK-NEXT:    extsb 7, 7
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    clrlwi 7, 7, 16
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_52: # %cmpxchg.trystore66
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    sthcx. 7, 0, 9
-; CHECK-NEXT:    beq 0, .LBB3_55
+; CHECK-NEXT:    beq+ 0, .LBB3_55
 ; CHECK-NEXT:  # %bb.53: # %cmpxchg.releasedload65
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lharx 8, 0, 9
 ; CHECK-NEXT:    cmplw 8, 6
-; CHECK-NEXT:    beq 0, .LBB3_52
+; CHECK-NEXT:    beq+ 0, .LBB3_52
 ; CHECK-NEXT:  .LBB3_54: # %cmpxchg.nostore63
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    crxor 20, 20, 20
@@ -4648,20 +4636,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lbz 6, uc at toc@l(3)
 ; CHECK-NEXT:    lwarx 8, 0, 10
 ; CHECK-NEXT:    cmplw 8, 6
-; CHECK-NEXT:    bne 0, .LBB3_60
+; CHECK-NEXT:    bne- 0, .LBB3_60
 ; CHECK-NEXT:  # %bb.57: # %cmpxchg.fencedstore48
 ; CHECK-NEXT:    extsb 7, 7
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_58: # %cmpxchg.trystore47
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 7, 0, 10
-; CHECK-NEXT:    beq 0, .LBB3_61
+; CHECK-NEXT:    beq+ 0, .LBB3_61
 ; CHECK-NEXT:  # %bb.59: # %cmpxchg.releasedload46
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lwarx 8, 0, 10
 ; CHECK-NEXT:    cmplw 8, 6
-; CHECK-NEXT:    beq 0, .LBB3_58
+; CHECK-NEXT:    beq+ 0, .LBB3_58
 ; CHECK-NEXT:  .LBB3_60: # %cmpxchg.nostore44
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    crxor 20, 20, 20
@@ -4678,20 +4665,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lbz 6, uc at toc@l(3)
 ; CHECK-NEXT:    lwarx 8, 0, 11
 ; CHECK-NEXT:    cmplw 8, 6
-; CHECK-NEXT:    bne 0, .LBB3_66
+; CHECK-NEXT:    bne- 0, .LBB3_66
 ; CHECK-NEXT:  # %bb.63: # %cmpxchg.fencedstore29
 ; CHECK-NEXT:    extsb 7, 7
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_64: # %cmpxchg.trystore28
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 7, 0, 11
-; CHECK-NEXT:    beq 0, .LBB3_67
+; CHECK-NEXT:    beq+ 0, .LBB3_67
 ; CHECK-NEXT:  # %bb.65: # %cmpxchg.releasedload27
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lwarx 8, 0, 11
 ; CHECK-NEXT:    cmplw 8, 6
-; CHECK-NEXT:    beq 0, .LBB3_64
+; CHECK-NEXT:    beq+ 0, .LBB3_64
 ; CHECK-NEXT:  .LBB3_66: # %cmpxchg.nostore25
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    crxor 20, 20, 20
@@ -4708,20 +4694,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    lbz 6, uc at toc@l(3)
 ; CHECK-NEXT:    ldarx 8, 0, 12
 ; CHECK-NEXT:    cmpld 8, 6
-; CHECK-NEXT:    bne 0, .LBB3_72
+; CHECK-NEXT:    bne- 0, .LBB3_72
 ; CHECK-NEXT:  # %bb.69: # %cmpxchg.fencedstore10
 ; CHECK-NEXT:    extsb 7, 7
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_70: # %cmpxchg.trystore9
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    stdcx. 7, 0, 12
-; CHECK-NEXT:    beq 0, .LBB3_73
+; CHECK-NEXT:    beq+ 0, .LBB3_73
 ; CHECK-NEXT:  # %bb.71: # %cmpxchg.releasedload8
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    ldarx 8, 0, 12
 ; CHECK-NEXT:    cmpld 8, 6
-; CHECK-NEXT:    beq 0, .LBB3_70
+; CHECK-NEXT:    beq+ 0, .LBB3_70
 ; CHECK-NEXT:  .LBB3_72: # %cmpxchg.nostore6
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    crxor 20, 20, 20
@@ -4738,20 +4723,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-NEXT:    stw 6, ui at toc@l(5)
 ; CHECK-NEXT:    ldarx 6, 0, 0
 ; CHECK-NEXT:    cmpld 6, 3
-; CHECK-NEXT:    bne 0, .LBB3_78
+; CHECK-NEXT:    bne- 0, .LBB3_78
 ; CHECK-NEXT:  # %bb.75: # %cmpxchg.fencedstore
 ; CHECK-NEXT:    extsb 4, 4
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB3_76: # %cmpxchg.trystore
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    stdcx. 4, 0, 0
-; CHECK-NEXT:    beq 0, .LBB3_79
+; CHECK-NEXT:    beq+ 0, .LBB3_79
 ; CHECK-NEXT:  # %bb.77: # %cmpxchg.releasedload
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    ldarx 6, 0, 0
 ; CHECK-NEXT:    cmpld 6, 3
-; CHECK-NEXT:    beq 0, .LBB3_76
+; CHECK-NEXT:    beq+ 0, .LBB3_76
 ; CHECK-NEXT:  .LBB3_78: # %cmpxchg.nostore
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    crxor 20, 20, 20
@@ -4807,24 +4791,23 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:    srw 6, 3, 26
 ; AIX32-NEXT:    clrlwi 6, 6, 24
 ; AIX32-NEXT:    cmplw 6, 4
-; AIX32-NEXT:    bne 0, L..BB3_4
+; AIX32-NEXT:    bne- 0, L..BB3_4
 ; AIX32-NEXT:  # %bb.1: # %cmpxchg.fencedstore289
 ; AIX32-NEXT:    sync
 ; AIX32-NEXT:    slw 5, 5, 26
-; AIX32-NEXT:    .align 4
 ; AIX32-NEXT:  L..BB3_2: # %cmpxchg.trystore288
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    and 6, 3, 25
 ; AIX32-NEXT:    or 6, 6, 5
 ; AIX32-NEXT:    stwcx. 6, 0, 27
-; AIX32-NEXT:    beq 0, L..BB3_4
+; AIX32-NEXT:    beq+ 0, L..BB3_4
 ; AIX32-NEXT:  # %bb.3: # %cmpxchg.releasedload287
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    lwarx 3, 0, 27
 ; AIX32-NEXT:    srw 6, 3, 26
 ; AIX32-NEXT:    clrlwi 6, 6, 24
 ; AIX32-NEXT:    cmplw 6, 4
-; AIX32-NEXT:    beq 0, L..BB3_2
+; AIX32-NEXT:    beq+ 0, L..BB3_2
 ; AIX32-NEXT:  L..BB3_4: # %cmpxchg.nostore285
 ; AIX32-NEXT:    not 4, 30
 ; AIX32-NEXT:    srw 5, 3, 26
@@ -4840,25 +4823,24 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:    srw 6, 4, 23
 ; AIX32-NEXT:    clrlwi 6, 6, 24
 ; AIX32-NEXT:    cmplw 6, 3
-; AIX32-NEXT:    bne 0, L..BB3_8
+; AIX32-NEXT:    bne- 0, L..BB3_8
 ; AIX32-NEXT:  # %bb.5: # %cmpxchg.fencedstore256
 ; AIX32-NEXT:    clrlwi 5, 5, 24
 ; AIX32-NEXT:    sync
 ; AIX32-NEXT:    slw 5, 5, 23
-; AIX32-NEXT:    .align 4
 ; AIX32-NEXT:  L..BB3_6: # %cmpxchg.trystore255
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    and 6, 4, 22
 ; AIX32-NEXT:    or 6, 6, 5
 ; AIX32-NEXT:    stwcx. 6, 0, 24
-; AIX32-NEXT:    beq 0, L..BB3_8
+; AIX32-NEXT:    beq+ 0, L..BB3_8
 ; AIX32-NEXT:  # %bb.7: # %cmpxchg.releasedload254
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    lwarx 4, 0, 24
 ; AIX32-NEXT:    srw 6, 4, 23
 ; AIX32-NEXT:    clrlwi 6, 6, 24
 ; AIX32-NEXT:    cmplw 6, 3
-; AIX32-NEXT:    beq 0, L..BB3_6
+; AIX32-NEXT:    beq+ 0, L..BB3_6
 ; AIX32-NEXT:  L..BB3_8: # %cmpxchg.nostore252
 ; AIX32-NEXT:    srw 4, 4, 23
 ; AIX32-NEXT:    lwsync
@@ -4878,26 +4860,25 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:    srw 8, 5, 20
 ; AIX32-NEXT:    clrlwi 8, 8, 16
 ; AIX32-NEXT:    cmplw 8, 6
-; AIX32-NEXT:    bne 0, L..BB3_12
+; AIX32-NEXT:    bne- 0, L..BB3_12
 ; AIX32-NEXT:  # %bb.9: # %cmpxchg.fencedstore223
 ; AIX32-NEXT:    extsb 7, 7
 ; AIX32-NEXT:    sync
 ; AIX32-NEXT:    clrlwi 7, 7, 16
 ; AIX32-NEXT:    slw 7, 7, 20
-; AIX32-NEXT:    .align 4
 ; AIX32-NEXT:  L..BB3_10: # %cmpxchg.trystore222
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    and 8, 5, 19
 ; AIX32-NEXT:    or 8, 8, 7
 ; AIX32-NEXT:    stwcx. 8, 0, 21
-; AIX32-NEXT:    beq 0, L..BB3_12
+; AIX32-NEXT:    beq+ 0, L..BB3_12
 ; AIX32-NEXT:  # %bb.11: # %cmpxchg.releasedload221
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    lwarx 5, 0, 21
 ; AIX32-NEXT:    srw 8, 5, 20
 ; AIX32-NEXT:    clrlwi 8, 8, 16
 ; AIX32-NEXT:    cmplw 8, 6
-; AIX32-NEXT:    beq 0, L..BB3_10
+; AIX32-NEXT:    beq+ 0, L..BB3_10
 ; AIX32-NEXT:  L..BB3_12: # %cmpxchg.nostore219
 ; AIX32-NEXT:    srw 5, 5, 20
 ; AIX32-NEXT:    lwsync
@@ -4915,26 +4896,25 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:    srw 7, 3, 17
 ; AIX32-NEXT:    clrlwi 7, 7, 16
 ; AIX32-NEXT:    cmplw 7, 5
-; AIX32-NEXT:    bne 0, L..BB3_16
+; AIX32-NEXT:    bne- 0, L..BB3_16
 ; AIX32-NEXT:  # %bb.13: # %cmpxchg.fencedstore190
 ; AIX32-NEXT:    extsb 6, 6
 ; AIX32-NEXT:    sync
 ; AIX32-NEXT:    clrlwi 6, 6, 16
 ; AIX32-NEXT:    slw 6, 6, 17
-; AIX32-NEXT:    .align 4
 ; AIX32-NEXT:  L..BB3_14: # %cmpxchg.trystore189
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    and 7, 3, 16
 ; AIX32-NEXT:    or 7, 7, 6
 ; AIX32-NEXT:    stwcx. 7, 0, 18
-; AIX32-NEXT:    beq 0, L..BB3_16
+; AIX32-NEXT:    beq+ 0, L..BB3_16
 ; AIX32-NEXT:  # %bb.15: # %cmpxchg.releasedload188
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    lwarx 3, 0, 18
 ; AIX32-NEXT:    srw 7, 3, 17
 ; AIX32-NEXT:    clrlwi 7, 7, 16
 ; AIX32-NEXT:    cmplw 7, 5
-; AIX32-NEXT:    beq 0, L..BB3_14
+; AIX32-NEXT:    beq+ 0, L..BB3_14
 ; AIX32-NEXT:  L..BB3_16: # %cmpxchg.nostore186
 ; AIX32-NEXT:    srw 3, 3, 17
 ; AIX32-NEXT:    lwsync
@@ -4944,20 +4924,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:    lbz 4, 0(30)
 ; AIX32-NEXT:    lwarx 3, 0, 15
 ; AIX32-NEXT:    cmplw 3, 4
-; AIX32-NEXT:    bne 0, L..BB3_20
+; AIX32-NEXT:    bne- 0, L..BB3_20
 ; AIX32-NEXT:  # %bb.17: # %cmpxchg.fencedstore171
 ; AIX32-NEXT:    extsb 5, 5
 ; AIX32-NEXT:    sync
-; AIX32-NEXT:    .align 5
 ; AIX32-NEXT:  L..BB3_18: # %cmpxchg.trystore170
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    stwcx. 5, 0, 15
-; AIX32-NEXT:    beq 0, L..BB3_20
+; AIX32-NEXT:    beq+ 0, L..BB3_20
 ; AIX32-NEXT:  # %bb.19: # %cmpxchg.releasedload169
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    lwarx 3, 0, 15
 ; AIX32-NEXT:    cmplw 3, 4
-; AIX32-NEXT:    beq 0, L..BB3_18
+; AIX32-NEXT:    beq+ 0, L..BB3_18
 ; AIX32-NEXT:  L..BB3_20: # %cmpxchg.nostore167
 ; AIX32-NEXT:    lwsync
 ; AIX32-NEXT:    lwz 28, L..C5(2) # @ui
@@ -4966,20 +4945,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:    lbz 5, 0(29)
 ; AIX32-NEXT:    lwarx 3, 0, 28
 ; AIX32-NEXT:    cmplw 3, 4
-; AIX32-NEXT:    bne 0, L..BB3_24
+; AIX32-NEXT:    bne- 0, L..BB3_24
 ; AIX32-NEXT:  # %bb.21: # %cmpxchg.fencedstore152
 ; AIX32-NEXT:    extsb 5, 5
 ; AIX32-NEXT:    sync
-; AIX32-NEXT:    .align 5
 ; AIX32-NEXT:  L..BB3_22: # %cmpxchg.trystore151
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    stwcx. 5, 0, 28
-; AIX32-NEXT:    beq 0, L..BB3_24
+; AIX32-NEXT:    beq+ 0, L..BB3_24
 ; AIX32-NEXT:  # %bb.23: # %cmpxchg.releasedload150
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    lwarx 3, 0, 28
 ; AIX32-NEXT:    cmplw 3, 4
-; AIX32-NEXT:    beq 0, L..BB3_22
+; AIX32-NEXT:    beq+ 0, L..BB3_22
 ; AIX32-NEXT:  L..BB3_24: # %cmpxchg.nostore148
 ; AIX32-NEXT:    lwsync
 ; AIX32-NEXT:    stw 3, 0(28)
@@ -5024,24 +5002,23 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:    srw 6, 4, 26
 ; AIX32-NEXT:    clrlwi 6, 6, 24
 ; AIX32-NEXT:    cmplw 6, 3
-; AIX32-NEXT:    bne 0, L..BB3_28
+; AIX32-NEXT:    bne- 0, L..BB3_28
 ; AIX32-NEXT:  # %bb.25: # %cmpxchg.fencedstore119
 ; AIX32-NEXT:    sync
 ; AIX32-NEXT:    slw 5, 5, 26
-; AIX32-NEXT:    .align 4
 ; AIX32-NEXT:  L..BB3_26: # %cmpxchg.trystore118
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    and 4, 4, 25
 ; AIX32-NEXT:    or 4, 4, 5
 ; AIX32-NEXT:    stwcx. 4, 0, 27
-; AIX32-NEXT:    beq 0, L..BB3_29
+; AIX32-NEXT:    beq+ 0, L..BB3_29
 ; AIX32-NEXT:  # %bb.27: # %cmpxchg.releasedload117
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    lwarx 4, 0, 27
 ; AIX32-NEXT:    srw 6, 4, 26
 ; AIX32-NEXT:    clrlwi 6, 6, 24
 ; AIX32-NEXT:    cmplw 6, 3
-; AIX32-NEXT:    beq 0, L..BB3_26
+; AIX32-NEXT:    beq+ 0, L..BB3_26
 ; AIX32-NEXT:  L..BB3_28: # %cmpxchg.nostore115
 ; AIX32-NEXT:    crxor 20, 20, 20
 ; AIX32-NEXT:    lwsync
@@ -5060,24 +5037,23 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:    srw 6, 4, 23
 ; AIX32-NEXT:    clrlwi 6, 6, 24
 ; AIX32-NEXT:    cmplw 6, 3
-; AIX32-NEXT:    bne 0, L..BB3_34
+; AIX32-NEXT:    bne- 0, L..BB3_34
 ; AIX32-NEXT:  # %bb.31: # %cmpxchg.fencedstore86
 ; AIX32-NEXT:    sync
 ; AIX32-NEXT:    slw 5, 5, 23
-; AIX32-NEXT:    .align 4
 ; AIX32-NEXT:  L..BB3_32: # %cmpxchg.trystore85
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    and 4, 4, 22
 ; AIX32-NEXT:    or 4, 4, 5
 ; AIX32-NEXT:    stwcx. 4, 0, 24
-; AIX32-NEXT:    beq 0, L..BB3_35
+; AIX32-NEXT:    beq+ 0, L..BB3_35
 ; AIX32-NEXT:  # %bb.33: # %cmpxchg.releasedload84
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    lwarx 4, 0, 24
 ; AIX32-NEXT:    srw 6, 4, 23
 ; AIX32-NEXT:    clrlwi 6, 6, 24
 ; AIX32-NEXT:    cmplw 6, 3
-; AIX32-NEXT:    beq 0, L..BB3_32
+; AIX32-NEXT:    beq+ 0, L..BB3_32
 ; AIX32-NEXT:  L..BB3_34: # %cmpxchg.nostore82
 ; AIX32-NEXT:    crxor 20, 20, 20
 ; AIX32-NEXT:    lwsync
@@ -5096,26 +5072,25 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:    srw 6, 4, 20
 ; AIX32-NEXT:    clrlwi 6, 6, 16
 ; AIX32-NEXT:    cmplw 6, 3
-; AIX32-NEXT:    bne 0, L..BB3_40
+; AIX32-NEXT:    bne- 0, L..BB3_40
 ; AIX32-NEXT:  # %bb.37: # %cmpxchg.fencedstore53
 ; AIX32-NEXT:    extsb 5, 5
 ; AIX32-NEXT:    sync
 ; AIX32-NEXT:    clrlwi 5, 5, 16
 ; AIX32-NEXT:    slw 5, 5, 20
-; AIX32-NEXT:    .align 4
 ; AIX32-NEXT:  L..BB3_38: # %cmpxchg.trystore52
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    and 4, 4, 19
 ; AIX32-NEXT:    or 4, 4, 5
 ; AIX32-NEXT:    stwcx. 4, 0, 21
-; AIX32-NEXT:    beq 0, L..BB3_41
+; AIX32-NEXT:    beq+ 0, L..BB3_41
 ; AIX32-NEXT:  # %bb.39: # %cmpxchg.releasedload51
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    lwarx 4, 0, 21
 ; AIX32-NEXT:    srw 6, 4, 20
 ; AIX32-NEXT:    clrlwi 6, 6, 16
 ; AIX32-NEXT:    cmplw 6, 3
-; AIX32-NEXT:    beq 0, L..BB3_38
+; AIX32-NEXT:    beq+ 0, L..BB3_38
 ; AIX32-NEXT:  L..BB3_40: # %cmpxchg.nostore49
 ; AIX32-NEXT:    crxor 20, 20, 20
 ; AIX32-NEXT:    lwsync
@@ -5134,26 +5109,25 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:    srw 6, 4, 17
 ; AIX32-NEXT:    clrlwi 6, 6, 16
 ; AIX32-NEXT:    cmplw 6, 3
-; AIX32-NEXT:    bne 0, L..BB3_46
+; AIX32-NEXT:    bne- 0, L..BB3_46
 ; AIX32-NEXT:  # %bb.43: # %cmpxchg.fencedstore29
 ; AIX32-NEXT:    extsb 5, 5
 ; AIX32-NEXT:    sync
 ; AIX32-NEXT:    clrlwi 5, 5, 16
 ; AIX32-NEXT:    slw 5, 5, 17
-; AIX32-NEXT:    .align 4
 ; AIX32-NEXT:  L..BB3_44: # %cmpxchg.trystore28
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    and 4, 4, 16
 ; AIX32-NEXT:    or 4, 4, 5
 ; AIX32-NEXT:    stwcx. 4, 0, 18
-; AIX32-NEXT:    beq 0, L..BB3_47
+; AIX32-NEXT:    beq+ 0, L..BB3_47
 ; AIX32-NEXT:  # %bb.45: # %cmpxchg.releasedload27
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    lwarx 4, 0, 18
 ; AIX32-NEXT:    srw 6, 4, 17
 ; AIX32-NEXT:    clrlwi 6, 6, 16
 ; AIX32-NEXT:    cmplw 6, 3
-; AIX32-NEXT:    beq 0, L..BB3_44
+; AIX32-NEXT:    beq+ 0, L..BB3_44
 ; AIX32-NEXT:  L..BB3_46: # %cmpxchg.nostore25
 ; AIX32-NEXT:    crxor 20, 20, 20
 ; AIX32-NEXT:    lwsync
@@ -5170,20 +5144,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:    lbz 3, 0(30)
 ; AIX32-NEXT:    lwarx 5, 0, 15
 ; AIX32-NEXT:    cmplw 5, 3
-; AIX32-NEXT:    bne 0, L..BB3_52
+; AIX32-NEXT:    bne- 0, L..BB3_52
 ; AIX32-NEXT:  # %bb.49: # %cmpxchg.fencedstore10
 ; AIX32-NEXT:    extsb 4, 4
 ; AIX32-NEXT:    sync
-; AIX32-NEXT:    .align 5
 ; AIX32-NEXT:  L..BB3_50: # %cmpxchg.trystore9
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    stwcx. 4, 0, 15
-; AIX32-NEXT:    beq 0, L..BB3_53
+; AIX32-NEXT:    beq+ 0, L..BB3_53
 ; AIX32-NEXT:  # %bb.51: # %cmpxchg.releasedload8
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    lwarx 5, 0, 15
 ; AIX32-NEXT:    cmplw 5, 3
-; AIX32-NEXT:    beq 0, L..BB3_50
+; AIX32-NEXT:    beq+ 0, L..BB3_50
 ; AIX32-NEXT:  L..BB3_52: # %cmpxchg.nostore6
 ; AIX32-NEXT:    crxor 20, 20, 20
 ; AIX32-NEXT:    lwsync
@@ -5200,20 +5173,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; AIX32-NEXT:    lbz 3, 0(30)
 ; AIX32-NEXT:    lwarx 5, 0, 28
 ; AIX32-NEXT:    cmplw 5, 3
-; AIX32-NEXT:    bne 0, L..BB3_58
+; AIX32-NEXT:    bne- 0, L..BB3_58
 ; AIX32-NEXT:  # %bb.55: # %cmpxchg.fencedstore
 ; AIX32-NEXT:    extsb 4, 4
 ; AIX32-NEXT:    sync
-; AIX32-NEXT:    .align 5
 ; AIX32-NEXT:  L..BB3_56: # %cmpxchg.trystore
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    stwcx. 4, 0, 28
-; AIX32-NEXT:    beq 0, L..BB3_59
+; AIX32-NEXT:    beq+ 0, L..BB3_59
 ; AIX32-NEXT:  # %bb.57: # %cmpxchg.releasedload
 ; AIX32-NEXT:    #
 ; AIX32-NEXT:    lwarx 5, 0, 28
 ; AIX32-NEXT:    cmplw 5, 3
-; AIX32-NEXT:    beq 0, L..BB3_56
+; AIX32-NEXT:    beq+ 0, L..BB3_56
 ; AIX32-NEXT:  L..BB3_58: # %cmpxchg.nostore
 ; AIX32-NEXT:    crxor 20, 20, 20
 ; AIX32-NEXT:    lwsync
@@ -5838,21 +5810,20 @@ define dso_local i64 @cmpswplp(ptr noundef %ptr, ptr nocapture noundef readnone
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    ldarx 4, 0, 3
 ; CHECK-NEXT:    cmpld 4, 5
-; CHECK-NEXT:    bne 0, .LBB6_2
+; CHECK-NEXT:    bne- 0, .LBB6_3
 ; CHECK-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; CHECK-NEXT:    addi 4, 5, 1
+; CHECK-NEXT:    creqv 20, 20, 20
 ; CHECK-NEXT:    stdcx. 4, 0, 3
-; CHECK-NEXT:    beq 0, .LBB6_4
-; CHECK-NEXT:  .LBB6_2: # %cmpxchg.failure
-; CHECK-NEXT:    crxor 20, 20, 20
-; CHECK-NEXT:  .LBB6_3: # %cmpxchg.end
+; CHECK-NEXT:    bne- 0, .LBB6_3
+; CHECK-NEXT:  .LBB6_2: # %cmpxchg.end
 ; CHECK-NEXT:    li 3, 66
 ; CHECK-NEXT:    li 4, 55
 ; CHECK-NEXT:    isel 3, 4, 3, 20
 ; CHECK-NEXT:    blr
-; CHECK-NEXT:  .LBB6_4:
-; CHECK-NEXT:    creqv 20, 20, 20
-; CHECK-NEXT:    b .LBB6_3
+; CHECK-NEXT:  .LBB6_3: # %cmpxchg.failure
+; CHECK-NEXT:    crxor 20, 20, 20
+; CHECK-NEXT:    b .LBB6_2
 ;
 ; AIX32-LABEL: cmpswplp:
 ; AIX32:       # %bb.0: # %entry
diff --git a/llvm/test/CodeGen/PowerPC/atomic-compare-exchange-weak.ll b/llvm/test/CodeGen/PowerPC/atomic-compare-exchange-weak.ll
index 65a12a6222f24..ae071194b4479 100644
--- a/llvm/test/CodeGen/PowerPC/atomic-compare-exchange-weak.ll
+++ b/llvm/test/CodeGen/PowerPC/atomic-compare-exchange-weak.ll
@@ -19,13 +19,14 @@ define i32 @foo(ptr noundef %cp, ptr noundef %old, i32 noundef %c)  {
 ; CHECK-NEXT:    stw r5, -16(r1)
 ; CHECK-NEXT:    lwarx r6, 0, r3
 ; CHECK-NEXT:    cmplw r6, r7
-; CHECK-NEXT:    bne cr0, L..BB0_2
+; CHECK-NEXT:    bne- cr0, L..BB0_5
 ; CHECK-NEXT:  # %bb.1: # %cmpxchg.fencedstore
+; CHECK-NEXT:    creqv 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
 ; CHECK-NEXT:    stwcx. r5, 0, r3
-; CHECK-NEXT:    beq cr0, L..BB0_5
-; CHECK-NEXT:  L..BB0_2: # %cmpxchg.failure
-; CHECK-NEXT:    crxor 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
-; CHECK-NEXT:  # %bb.3: # %cmpxchg.store_expected
+; CHECK-NEXT:    bne- cr0, L..BB0_5
+; CHECK-NEXT:  # %bb.2: # %cmpxchg.end
+; CHECK-NEXT:    bc 12, 4*cr5+lt, L..BB0_4
+; CHECK-NEXT:  L..BB0_3: # %cmpxchg.store_expected
 ; CHECK-NEXT:    stw r6, 0(r4)
 ; CHECK-NEXT:  L..BB0_4: # %cmpxchg.continue
 ; CHECK-NEXT:    li r3, 0
@@ -33,9 +34,9 @@ define i32 @foo(ptr noundef %cp, ptr noundef %old, i32 noundef %c)  {
 ; CHECK-NEXT:    isel r3, r4, r3, 4*cr5+lt
 ; CHECK-NEXT:    stb r3, -17(r1)
 ; CHECK-NEXT:    blr
-; CHECK-NEXT:  L..BB0_5:
-; CHECK-NEXT:    creqv 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
-; CHECK-NEXT:    b L..BB0_4
+; CHECK-NEXT:  L..BB0_5: # %cmpxchg.failure
+; CHECK-NEXT:    crxor 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
+; CHECK-NEXT:    b L..BB0_3
 ;
 ; CHECK64-LABEL: foo:
 ; CHECK64:       # %bb.0: # %entry
@@ -46,13 +47,14 @@ define i32 @foo(ptr noundef %cp, ptr noundef %old, i32 noundef %c)  {
 ; CHECK64-NEXT:    stw r5, -24(r1)
 ; CHECK64-NEXT:    lwarx r6, 0, r3
 ; CHECK64-NEXT:    cmplw r6, r7
-; CHECK64-NEXT:    bne cr0, L..BB0_2
+; CHECK64-NEXT:    bne- cr0, L..BB0_5
 ; CHECK64-NEXT:  # %bb.1: # %cmpxchg.fencedstore
+; CHECK64-NEXT:    creqv 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
 ; CHECK64-NEXT:    stwcx. r5, 0, r3
-; CHECK64-NEXT:    beq cr0, L..BB0_5
-; CHECK64-NEXT:  L..BB0_2: # %cmpxchg.failure
-; CHECK64-NEXT:    crxor 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
-; CHECK64-NEXT:  # %bb.3: # %cmpxchg.store_expected
+; CHECK64-NEXT:    bne- cr0, L..BB0_5
+; CHECK64-NEXT:  # %bb.2: # %cmpxchg.end
+; CHECK64-NEXT:    bc 12, 4*cr5+lt, L..BB0_4
+; CHECK64-NEXT:  L..BB0_3: # %cmpxchg.store_expected
 ; CHECK64-NEXT:    stw r6, 0(r4)
 ; CHECK64-NEXT:  L..BB0_4: # %cmpxchg.continue
 ; CHECK64-NEXT:    li r3, 0
@@ -63,9 +65,9 @@ define i32 @foo(ptr noundef %cp, ptr noundef %old, i32 noundef %c)  {
 ; CHECK64-NEXT:    li r3, 0
 ; CHECK64-NEXT:    isel r3, r4, r3, 4*cr5+lt
 ; CHECK64-NEXT:    blr
-; CHECK64-NEXT:  L..BB0_5:
-; CHECK64-NEXT:    creqv 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
-; CHECK64-NEXT:    b L..BB0_4
+; CHECK64-NEXT:  L..BB0_5: # %cmpxchg.failure
+; CHECK64-NEXT:    crxor 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
+; CHECK64-NEXT:    b L..BB0_3
 entry:
   %cp.addr = alloca ptr, align 4
   %old.addr = alloca ptr, align 4
diff --git a/llvm/test/CodeGen/PowerPC/atomic-float.ll b/llvm/test/CodeGen/PowerPC/atomic-float.ll
index 600d28936c162..8232a44c7da26 100644
--- a/llvm/test/CodeGen/PowerPC/atomic-float.ll
+++ b/llvm/test/CodeGen/PowerPC/atomic-float.ll
@@ -9,37 +9,36 @@ define float @test_add(ptr %ptr, float %incr) {
 ; CHECK-64:       # %bb.0: # %entry
 ; CHECK-64-NEXT:    sync
 ; CHECK-64-NEXT:    lfs 0, 0(3)
-; CHECK-64-NEXT:    b .LBB0_3
-; CHECK-64-NEXT:  .LBB0_1:                                # %cmpxchg.nostore
-; CHECK-64-NEXT:                                          #   in Loop: Header=BB0_3 Depth=1
-; CHECK-64-NEXT:    crxor 20, 20, 20
-; CHECK-64-NEXT:  .LBB0_2:                                # %cmpxchg.end
-; CHECK-64-NEXT:                                          #   in Loop: Header=BB0_3 Depth=1
-; CHECK-64-NEXT:    stw 4, -12(1)
-; CHECK-64-NEXT:    lfs 0, -12(1)
-; CHECK-64-NEXT:    bc 12, 20, .LBB0_7
-; CHECK-64-NEXT:  .LBB0_3:                                # %atomicrmw.start
-; CHECK-64-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-64-NEXT:                                          #     Child Loop BB0_4 Depth 2
+; CHECK-64-NEXT:  .LBB0_1: # %atomicrmw.start
+; CHECK-64-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-64-NEXT:    # Child Loop BB0_2 Depth 2
 ; CHECK-64-NEXT:    fadds 2, 0, 1
 ; CHECK-64-NEXT:    stfs 2, -4(1)
 ; CHECK-64-NEXT:    stfs 0, -8(1)
 ; CHECK-64-NEXT:    lwz 5, -4(1)
 ; CHECK-64-NEXT:    lwz 6, -8(1)
-; CHECK-64-NEXT:  .LBB0_4:                                # %cmpxchg.start
-; CHECK-64-NEXT:                                          #   Parent Loop BB0_3 Depth=1
-; CHECK-64-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-64-NEXT:  .LBB0_2: # %cmpxchg.start
+; CHECK-64-NEXT:    # Parent Loop BB0_1 Depth=1
+; CHECK-64-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-64-NEXT:    lwarx 4, 0, 3
-; CHECK-64-NEXT:    cmplw   4, 6
-; CHECK-64-NEXT:    bne     0, .LBB0_1
-; CHECK-64-NEXT:  # %bb.5:                                # %cmpxchg.fencedstore
-; CHECK-64-NEXT:                                          #   in Loop: Header=BB0_4 Depth=2
+; CHECK-64-NEXT:    cmplw 4, 6
+; CHECK-64-NEXT:    bne- 0, .LBB0_5
+; CHECK-64-NEXT:  # %bb.3: # %cmpxchg.fencedstore
+; CHECK-64-NEXT:    #
 ; CHECK-64-NEXT:    stwcx. 5, 0, 3
-; CHECK-64-NEXT:    bne     0, .LBB0_4
-; CHECK-64-NEXT:  # %bb.6:                                #   in Loop: Header=BB0_3 Depth=1
 ; CHECK-64-NEXT:    creqv 20, 20, 20
-; CHECK-64-NEXT:    b .LBB0_2
-; CHECK-64-NEXT:  .LBB0_7:                                # %atomicrmw.end
+; CHECK-64-NEXT:    bne- 0, .LBB0_2
+; CHECK-64-NEXT:  .LBB0_4: # %cmpxchg.end
+; CHECK-64-NEXT:    #
+; CHECK-64-NEXT:    stw 4, -12(1)
+; CHECK-64-NEXT:    lfs 0, -12(1)
+; CHECK-64-NEXT:    bc 4, 20, .LBB0_1
+; CHECK-64-NEXT:    b .LBB0_6
+; CHECK-64-NEXT:  .LBB0_5: # %cmpxchg.nostore
+; CHECK-64-NEXT:    #
+; CHECK-64-NEXT:    crxor 20, 20, 20
+; CHECK-64-NEXT:    b .LBB0_4
+; CHECK-64-NEXT:  .LBB0_6: # %atomicrmw.end
 ; CHECK-64-NEXT:    fmr 1, 0
 ; CHECK-64-NEXT:    lwsync
 ; CHECK-64-NEXT:    blr
@@ -50,37 +49,36 @@ define float @test_add(ptr %ptr, float %incr) {
 ; CHECK-32-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-32-NEXT:    sync
 ; CHECK-32-NEXT:    lfs 0, 0(3)
-; CHECK-32-NEXT:    b .LBB0_3
-; CHECK-32-NEXT:  .LBB0_1:                                # %cmpxchg.nostore
-; CHECK-32-NEXT:                                          #   in Loop: Header=BB0_3 Depth=1
-; CHECK-32-NEXT:    crxor 20, 20, 20
-; CHECK-32-NEXT:  .LBB0_2:                                # %cmpxchg.end
-; CHECK-32-NEXT:                                          #   in Loop: Header=BB0_3 Depth=1
-; CHECK-32-NEXT:    stw 4, 20(1)
-; CHECK-32-NEXT:    lfs 0, 20(1)
-; CHECK-32-NEXT:    bc 12, 20, .LBB0_7
-; CHECK-32-NEXT:  .LBB0_3:                                # %atomicrmw.start
-; CHECK-32-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-32-NEXT:                                          #     Child Loop BB0_4 Depth 2
+; CHECK-32-NEXT:  .LBB0_1: # %atomicrmw.start
+; CHECK-32-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-32-NEXT:    # Child Loop BB0_2 Depth 2
 ; CHECK-32-NEXT:    fadds 2, 0, 1
 ; CHECK-32-NEXT:    stfs 2, 28(1)
 ; CHECK-32-NEXT:    stfs 0, 24(1)
 ; CHECK-32-NEXT:    lwz 5, 28(1)
 ; CHECK-32-NEXT:    lwz 6, 24(1)
-; CHECK-32-NEXT:  .LBB0_4:                                # %cmpxchg.start
-; CHECK-32-NEXT:                                          #   Parent Loop BB0_3 Depth=1
-; CHECK-32-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-32-NEXT:  .LBB0_2: # %cmpxchg.start
+; CHECK-32-NEXT:    # Parent Loop BB0_1 Depth=1
+; CHECK-32-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-32-NEXT:    lwarx 4, 0, 3
-; CHECK-32-NEXT:    cmplw   4, 6
-; CHECK-32-NEXT:    bne     0, .LBB0_1
-; CHECK-32-NEXT:  # %bb.5:                                # %cmpxchg.fencedstore
-; CHECK-32-NEXT:                                          #   in Loop: Header=BB0_4 Depth=2
+; CHECK-32-NEXT:    cmplw 4, 6
+; CHECK-32-NEXT:    bne- 0, .LBB0_5
+; CHECK-32-NEXT:  # %bb.3: # %cmpxchg.fencedstore
+; CHECK-32-NEXT:    #
 ; CHECK-32-NEXT:    stwcx. 5, 0, 3
-; CHECK-32-NEXT:    bne     0, .LBB0_4
-; CHECK-32-NEXT:  # %bb.6:                                #   in Loop: Header=BB0_3 Depth=1
 ; CHECK-32-NEXT:    creqv 20, 20, 20
-; CHECK-32-NEXT:    b .LBB0_2
-; CHECK-32-NEXT:  .LBB0_7:                                # %atomicrmw.end
+; CHECK-32-NEXT:    bne- 0, .LBB0_2
+; CHECK-32-NEXT:  .LBB0_4: # %cmpxchg.end
+; CHECK-32-NEXT:    #
+; CHECK-32-NEXT:    stw 4, 20(1)
+; CHECK-32-NEXT:    lfs 0, 20(1)
+; CHECK-32-NEXT:    bc 4, 20, .LBB0_1
+; CHECK-32-NEXT:    b .LBB0_6
+; CHECK-32-NEXT:  .LBB0_5: # %cmpxchg.nostore
+; CHECK-32-NEXT:    #
+; CHECK-32-NEXT:    crxor 20, 20, 20
+; CHECK-32-NEXT:    b .LBB0_4
+; CHECK-32-NEXT:  .LBB0_6: # %atomicrmw.end
 ; CHECK-32-NEXT:    fmr 1, 0
 ; CHECK-32-NEXT:    lwsync
 ; CHECK-32-NEXT:    addi 1, 1, 32
diff --git a/llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll b/llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll
index 27a26aaca8b26..ff176c80ab342 100644
--- a/llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll
+++ b/llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll
@@ -6,45 +6,49 @@ define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    rldicr 5, 3, 0, 61
-; CHECK-NEXT:    not     3, 3
+; CHECK-NEXT:    not 3, 3
 ; CHECK-NEXT:    li 6, 255
 ; CHECK-NEXT:    lwz 8, 0(5)
 ; CHECK-NEXT:    rlwinm 3, 3, 3, 27, 28
 ; CHECK-NEXT:    slw 6, 6, 3
-; CHECK-NEXT:    not     6, 6
-; CHECK-NEXT:    clrlwi  7, 4, 24
-; CHECK-NEXT:    b .LBB0_2
-; CHECK-NEXT:  .LBB0_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT:    mr      8, 9
-; CHECK-NEXT:  .LBB0_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB0_5 Depth 2
+; CHECK-NEXT:    not 6, 6
+; CHECK-NEXT:    clrlwi 7, 4, 24
+; CHECK-NEXT:  .LBB0_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB0_4 Depth 2
 ; CHECK-NEXT:    srw 9, 8, 3
-; CHECK-NEXT:    clrlwi  10, 9, 24
-; CHECK-NEXT:    cmplw   10, 7
-; CHECK-NEXT:    blt     0, .LBB0_4
-; CHECK-NEXT:  # %bb.3:                                #   in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT:    sub     9, 9, 4
-; CHECK-NEXT:  .LBB0_4:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT:    clrlwi  9, 9, 24
+; CHECK-NEXT:    clrlwi 10, 9, 24
+; CHECK-NEXT:    cmplw 10, 7
+; CHECK-NEXT:    blt 0, .LBB0_3
+; CHECK-NEXT:  # %bb.2:
+; CHECK-NEXT:    sub 9, 9, 4
+; CHECK-NEXT:  .LBB0_3: # %atomicrmw.start
+; CHECK-NEXT:    #
+; CHECK-NEXT:    clrlwi 9, 9, 24
 ; CHECK-NEXT:    slw 9, 9, 3
 ; CHECK-NEXT:    and 10, 8, 6
 ; CHECK-NEXT:    or 10, 10, 9
-; CHECK-NEXT:  .LBB0_5:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB0_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:  .LBB0_4: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB0_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    lwarx 9, 0, 5
-; CHECK-NEXT:    cmplw   9, 8
-; CHECK-NEXT:    bne     0, .LBB0_1
-; CHECK-NEXT:  # %bb.6:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB0_5 Depth=2
+; CHECK-NEXT:    cmplw 9, 8
+; CHECK-NEXT:    bne- 0, .LBB0_7
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 10, 0, 5
-; CHECK-NEXT:    bne     0, .LBB0_5
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    mr      8, 9
-; CHECK-NEXT:  # %bb.8:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB0_4
+; CHECK-NEXT:  # %bb.6: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 8, 9
+; CHECK-NEXT:    bc 4, 20, .LBB0_1
+; CHECK-NEXT:    b .LBB0_8
+; CHECK-NEXT:  .LBB0_7: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 8, 9
+; CHECK-NEXT:    b .LBB0_1
+; CHECK-NEXT:  .LBB0_8: # %atomicrmw.end
 ; CHECK-NEXT:    srw 3, 9, 3
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
@@ -57,47 +61,51 @@ define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    rldicr 5, 3, 0, 61
-; CHECK-NEXT:    clrlwi  3, 3, 30
+; CHECK-NEXT:    clrlwi 3, 3, 30
 ; CHECK-NEXT:    lis 6, 0
 ; CHECK-NEXT:    xori 3, 3, 2
 ; CHECK-NEXT:    lwz 8, 0(5)
 ; CHECK-NEXT:    ori 6, 6, 65535
 ; CHECK-NEXT:    slwi 3, 3, 3
 ; CHECK-NEXT:    slw 6, 6, 3
-; CHECK-NEXT:    not     6, 6
-; CHECK-NEXT:    clrlwi  7, 4, 16
-; CHECK-NEXT:    b .LBB1_2
-; CHECK-NEXT:  .LBB1_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    mr      8, 9
-; CHECK-NEXT:  .LBB1_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB1_5 Depth 2
+; CHECK-NEXT:    not 6, 6
+; CHECK-NEXT:    clrlwi 7, 4, 16
+; CHECK-NEXT:  .LBB1_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB1_4 Depth 2
 ; CHECK-NEXT:    srw 9, 8, 3
-; CHECK-NEXT:    clrlwi  10, 9, 16
-; CHECK-NEXT:    cmplw   10, 7
-; CHECK-NEXT:    blt     0, .LBB1_4
-; CHECK-NEXT:  # %bb.3:                                #   in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    sub     9, 9, 4
-; CHECK-NEXT:  .LBB1_4:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    clrlwi  9, 9, 16
+; CHECK-NEXT:    clrlwi 10, 9, 16
+; CHECK-NEXT:    cmplw 10, 7
+; CHECK-NEXT:    blt 0, .LBB1_3
+; CHECK-NEXT:  # %bb.2:
+; CHECK-NEXT:    sub 9, 9, 4
+; CHECK-NEXT:  .LBB1_3: # %atomicrmw.start
+; CHECK-NEXT:    #
+; CHECK-NEXT:    clrlwi 9, 9, 16
 ; CHECK-NEXT:    slw 9, 9, 3
 ; CHECK-NEXT:    and 10, 8, 6
 ; CHECK-NEXT:    or 10, 10, 9
-; CHECK-NEXT:  .LBB1_5:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB1_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:  .LBB1_4: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB1_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    lwarx 9, 0, 5
-; CHECK-NEXT:    cmplw   9, 8
-; CHECK-NEXT:    bne     0, .LBB1_1
-; CHECK-NEXT:  # %bb.6:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB1_5 Depth=2
+; CHECK-NEXT:    cmplw 9, 8
+; CHECK-NEXT:    bne- 0, .LBB1_7
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 10, 0, 5
-; CHECK-NEXT:    bne     0, .LBB1_5
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    mr      8, 9
-; CHECK-NEXT:  # %bb.8:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB1_4
+; CHECK-NEXT:  # %bb.6: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 8, 9
+; CHECK-NEXT:    bc 4, 20, .LBB1_1
+; CHECK-NEXT:    b .LBB1_8
+; CHECK-NEXT:  .LBB1_7: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 8, 9
+; CHECK-NEXT:    b .LBB1_1
+; CHECK-NEXT:  .LBB1_8: # %atomicrmw.end
 ; CHECK-NEXT:    srw 3, 9, 3
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
@@ -110,34 +118,38 @@ define i32 @atomicrmw_usub_cond_i32(ptr %ptr, i32 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    lwz 6, 0(3)
-; CHECK-NEXT:    b .LBB2_2
-; CHECK-NEXT:  .LBB2_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB2_2 Depth=1
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  .LBB2_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB2_5 Depth 2
-; CHECK-NEXT:    cmplw   6, 4
-; CHECK-NEXT:    bge 0, .LBB2_4
-; CHECK-NEXT:  # %bb.3:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB2_2 Depth=1
-; CHECK-NEXT:    mr      7, 6
-; CHECK-NEXT:    b .LBB2_5
-; CHECK-NEXT:  .LBB2_4:                                #   in Loop: Header=BB2_2 Depth=1
-; CHECK-NEXT:    sub     7, 6, 4
-; CHECK-NEXT:  .LBB2_5:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB2_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:  .LBB2_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB2_4 Depth 2
+; CHECK-NEXT:    cmplw 6, 4
+; CHECK-NEXT:    bge 0, .LBB2_3
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.start
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 7, 6
+; CHECK-NEXT:    b .LBB2_4
+; CHECK-NEXT:  .LBB2_3:
+; CHECK-NEXT:    sub 7, 6, 4
+; CHECK-NEXT:  .LBB2_4: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB2_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    lwarx 5, 0, 3
-; CHECK-NEXT:    cmplw   5, 6
-; CHECK-NEXT:    bne     0, .LBB2_1
-; CHECK-NEXT:  # %bb.6:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB2_5 Depth=2
+; CHECK-NEXT:    cmplw 5, 6
+; CHECK-NEXT:    bne- 0, .LBB2_7
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 7, 0, 3
-; CHECK-NEXT:    bne     0, .LBB2_5
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  # %bb.8:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB2_4
+; CHECK-NEXT:  # %bb.6: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    bc 4, 20, .LBB2_1
+; CHECK-NEXT:    b .LBB2_8
+; CHECK-NEXT:  .LBB2_7: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    b .LBB2_1
+; CHECK-NEXT:  .LBB2_8: # %atomicrmw.end
 ; CHECK-NEXT:    mr 3, 5
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
@@ -150,34 +162,38 @@ define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    ld 6, 0(3)
-; CHECK-NEXT:    b .LBB3_2
-; CHECK-NEXT:  .LBB3_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB3_2 Depth=1
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  .LBB3_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB3_5 Depth 2
-; CHECK-NEXT:    cmpld   6, 4
-; CHECK-NEXT:    bge 0, .LBB3_4
-; CHECK-NEXT:  # %bb.3:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB3_2 Depth=1
-; CHECK-NEXT:    mr      7, 6
-; CHECK-NEXT:    b .LBB3_5
-; CHECK-NEXT:  .LBB3_4:                                #   in Loop: Header=BB3_2 Depth=1
-; CHECK-NEXT:    sub     7, 6, 4
-; CHECK-NEXT:  .LBB3_5:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB3_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:  .LBB3_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB3_4 Depth 2
+; CHECK-NEXT:    cmpld 6, 4
+; CHECK-NEXT:    bge 0, .LBB3_3
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.start
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 7, 6
+; CHECK-NEXT:    b .LBB3_4
+; CHECK-NEXT:  .LBB3_3:
+; CHECK-NEXT:    sub 7, 6, 4
+; CHECK-NEXT:  .LBB3_4: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB3_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    ldarx 5, 0, 3
-; CHECK-NEXT:    cmpld   5, 6
-; CHECK-NEXT:    bne     0, .LBB3_1
-; CHECK-NEXT:  # %bb.6:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB3_5 Depth=2
+; CHECK-NEXT:    cmpld 5, 6
+; CHECK-NEXT:    bne- 0, .LBB3_7
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stdcx. 7, 0, 3
-; CHECK-NEXT:    bne     0, .LBB3_5
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  # %bb.8:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB3_4
+; CHECK-NEXT:  # %bb.6: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    bc 4, 20, .LBB3_1
+; CHECK-NEXT:    b .LBB3_8
+; CHECK-NEXT:  .LBB3_7: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    b .LBB3_1
+; CHECK-NEXT:  .LBB3_8: # %atomicrmw.end
 ; CHECK-NEXT:    mr 3, 5
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
@@ -190,47 +206,51 @@ define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    rldicr 5, 3, 0, 61
-; CHECK-NEXT:    not     3, 3
+; CHECK-NEXT:    not 3, 3
 ; CHECK-NEXT:    li 6, 255
 ; CHECK-NEXT:    lwz 7, 0(5)
 ; CHECK-NEXT:    rlwinm 3, 3, 3, 27, 28
 ; CHECK-NEXT:    slw 6, 6, 3
-; CHECK-NEXT:    not     6, 6
-; CHECK-NEXT:    clrlwi  4, 4, 24
-; CHECK-NEXT:    b .LBB4_2
-; CHECK-NEXT:  .LBB4_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB4_2 Depth=1
-; CHECK-NEXT:    mr      7, 8
-; CHECK-NEXT:  .LBB4_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB4_5 Depth 2
+; CHECK-NEXT:    not 6, 6
+; CHECK-NEXT:    clrlwi 4, 4, 24
+; CHECK-NEXT:  .LBB4_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB4_4 Depth 2
 ; CHECK-NEXT:    srw 8, 7, 3
-; CHECK-NEXT:    clrlwi  9, 8, 24
-; CHECK-NEXT:    sub     8, 9, 4
-; CHECK-NEXT:    cmplw   8, 9
+; CHECK-NEXT:    clrlwi 9, 8, 24
+; CHECK-NEXT:    sub 8, 9, 4
+; CHECK-NEXT:    cmplw 8, 9
 ; CHECK-NEXT:    li 9, 0
-; CHECK-NEXT:    bgt     0, .LBB4_4
-; CHECK-NEXT:  # %bb.3:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB4_2 Depth=1
-; CHECK-NEXT:    mr      9, 8
-; CHECK-NEXT:  .LBB4_4:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB4_2 Depth=1
+; CHECK-NEXT:    bgt 0, .LBB4_3
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.start
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 9, 8
+; CHECK-NEXT:  .LBB4_3: # %atomicrmw.start
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    slw 8, 9, 3
 ; CHECK-NEXT:    and 9, 7, 6
 ; CHECK-NEXT:    or 9, 9, 8
-; CHECK-NEXT:  .LBB4_5:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB4_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:  .LBB4_4: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB4_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    lwarx 8, 0, 5
-; CHECK-NEXT:    cmplw   8, 7
-; CHECK-NEXT:    bne     0, .LBB4_1
-; CHECK-NEXT:  # %bb.6:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB4_5 Depth=2
+; CHECK-NEXT:    cmplw 8, 7
+; CHECK-NEXT:    bne- 0, .LBB4_7
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 9, 0, 5
-; CHECK-NEXT:    bne     0, .LBB4_5
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    mr      7, 8
-; CHECK-NEXT:  # %bb.8:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB4_4
+; CHECK-NEXT:  # %bb.6: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 7, 8
+; CHECK-NEXT:    bc 4, 20, .LBB4_1
+; CHECK-NEXT:    b .LBB4_8
+; CHECK-NEXT:  .LBB4_7: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 7, 8
+; CHECK-NEXT:    b .LBB4_1
+; CHECK-NEXT:  .LBB4_8: # %atomicrmw.end
 ; CHECK-NEXT:    srw 3, 8, 3
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
@@ -243,49 +263,53 @@ define i16 @atomicrmw_usub_sat_i16(ptr %ptr, i16 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    rldicr 5, 3, 0, 61
-; CHECK-NEXT:    clrlwi  3, 3, 30
+; CHECK-NEXT:    clrlwi 3, 3, 30
 ; CHECK-NEXT:    lis 6, 0
 ; CHECK-NEXT:    xori 3, 3, 2
 ; CHECK-NEXT:    lwz 7, 0(5)
 ; CHECK-NEXT:    ori 6, 6, 65535
 ; CHECK-NEXT:    slwi 3, 3, 3
 ; CHECK-NEXT:    slw 6, 6, 3
-; CHECK-NEXT:    not     6, 6
-; CHECK-NEXT:    clrlwi  4, 4, 16
-; CHECK-NEXT:    b .LBB5_2
-; CHECK-NEXT:  .LBB5_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB5_2 Depth=1
-; CHECK-NEXT:    mr      7, 8
-; CHECK-NEXT:  .LBB5_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB5_5 Depth 2
+; CHECK-NEXT:    not 6, 6
+; CHECK-NEXT:    clrlwi 4, 4, 16
+; CHECK-NEXT:  .LBB5_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB5_4 Depth 2
 ; CHECK-NEXT:    srw 8, 7, 3
-; CHECK-NEXT:    clrlwi  9, 8, 16
-; CHECK-NEXT:    sub     8, 9, 4
-; CHECK-NEXT:    cmplw   8, 9
+; CHECK-NEXT:    clrlwi 9, 8, 16
+; CHECK-NEXT:    sub 8, 9, 4
+; CHECK-NEXT:    cmplw 8, 9
 ; CHECK-NEXT:    li 9, 0
-; CHECK-NEXT:    bgt     0, .LBB5_4
-; CHECK-NEXT:  # %bb.3:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB5_2 Depth=1
-; CHECK-NEXT:    mr      9, 8
-; CHECK-NEXT:  .LBB5_4:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB5_2 Depth=1
+; CHECK-NEXT:    bgt 0, .LBB5_3
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.start
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 9, 8
+; CHECK-NEXT:  .LBB5_3: # %atomicrmw.start
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    slw 8, 9, 3
 ; CHECK-NEXT:    and 9, 7, 6
 ; CHECK-NEXT:    or 9, 9, 8
-; CHECK-NEXT:  .LBB5_5:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB5_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:  .LBB5_4: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB5_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    lwarx 8, 0, 5
-; CHECK-NEXT:    cmplw   8, 7
-; CHECK-NEXT:    bne     0, .LBB5_1
-; CHECK-NEXT:  # %bb.6:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB5_5 Depth=2
+; CHECK-NEXT:    cmplw 8, 7
+; CHECK-NEXT:    bne- 0, .LBB5_7
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 9, 0, 5
-; CHECK-NEXT:    bne     0, .LBB5_5
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    mr      7, 8
-; CHECK-NEXT:  # %bb.8:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB5_4
+; CHECK-NEXT:  # %bb.6: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 7, 8
+; CHECK-NEXT:    bc 4, 20, .LBB5_1
+; CHECK-NEXT:    b .LBB5_8
+; CHECK-NEXT:  .LBB5_7: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 7, 8
+; CHECK-NEXT:    b .LBB5_1
+; CHECK-NEXT:  .LBB5_8: # %atomicrmw.end
 ; CHECK-NEXT:    srw 3, 8, 3
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
@@ -298,33 +322,37 @@ define i32 @atomicrmw_usub_sat_i32(ptr %ptr, i32 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    lwz 6, 0(3)
-; CHECK-NEXT:    b .LBB6_2
-; CHECK-NEXT:  .LBB6_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB6_2 Depth=1
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  .LBB6_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB6_4 Depth 2
-; CHECK-NEXT:    sub     5, 6, 4
-; CHECK-NEXT:    cmplw   5, 6
+; CHECK-NEXT:  .LBB6_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB6_3 Depth 2
+; CHECK-NEXT:    sub 5, 6, 4
+; CHECK-NEXT:    cmplw 5, 6
 ; CHECK-NEXT:    li 7, 0
-; CHECK-NEXT:    bgt     0, .LBB6_4
-; CHECK-NEXT:  # %bb.3:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB6_2 Depth=1
-; CHECK-NEXT:    mr      7, 5
-; CHECK-NEXT:  .LBB6_4:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB6_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:    bgt 0, .LBB6_3
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.start
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 7, 5
+; CHECK-NEXT:  .LBB6_3: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB6_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    lwarx 5, 0, 3
-; CHECK-NEXT:    cmplw   5, 6
-; CHECK-NEXT:    bne     0, .LBB6_1
-; CHECK-NEXT:  # %bb.5:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB6_4 Depth=2
+; CHECK-NEXT:    cmplw 5, 6
+; CHECK-NEXT:    bne- 0, .LBB6_6
+; CHECK-NEXT:  # %bb.4: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 7, 0, 3
-; CHECK-NEXT:    bne     0, .LBB6_4
-; CHECK-NEXT:  # %bb.6:
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  # %bb.7:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB6_3
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    bc 4, 20, .LBB6_1
+; CHECK-NEXT:    b .LBB6_7
+; CHECK-NEXT:  .LBB6_6: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    b .LBB6_1
+; CHECK-NEXT:  .LBB6_7: # %atomicrmw.end
 ; CHECK-NEXT:    mr 3, 5
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
@@ -337,33 +365,37 @@ define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    ld 6, 0(3)
-; CHECK-NEXT:    b .LBB7_2
-; CHECK-NEXT:  .LBB7_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB7_2 Depth=1
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  .LBB7_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB7_4 Depth 2
-; CHECK-NEXT:    subc    5, 6, 4
+; CHECK-NEXT:  .LBB7_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB7_3 Depth 2
+; CHECK-NEXT:    subc 5, 6, 4
 ; CHECK-NEXT:    li 7, 0
 ; CHECK-NEXT:    addze. 8, 7
-; CHECK-NEXT:    beq     0, .LBB7_4
-; CHECK-NEXT:  # %bb.3:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB7_2 Depth=1
-; CHECK-NEXT:    mr      7, 5
-; CHECK-NEXT:  .LBB7_4:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB7_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:    beq 0, .LBB7_3
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.start
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 7, 5
+; CHECK-NEXT:  .LBB7_3: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB7_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    ldarx 5, 0, 3
-; CHECK-NEXT:    cmpld   5, 6
-; CHECK-NEXT:    bne     0, .LBB7_1
-; CHECK-NEXT:  # %bb.5:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB7_4 Depth=2
+; CHECK-NEXT:    cmpld 5, 6
+; CHECK-NEXT:    bne- 0, .LBB7_6
+; CHECK-NEXT:  # %bb.4: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stdcx. 7, 0, 3
-; CHECK-NEXT:    bne     0, .LBB7_4
-; CHECK-NEXT:  # %bb.6:
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  # %bb.7:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB7_3
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    bc 4, 20, .LBB7_1
+; CHECK-NEXT:    b .LBB7_7
+; CHECK-NEXT:  .LBB7_6: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    b .LBB7_1
+; CHECK-NEXT:  .LBB7_7: # %atomicrmw.end
 ; CHECK-NEXT:    mr 3, 5
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll
index 6ced47bd6bcba..4dc6d0ad3d5c7 100644
--- a/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll
@@ -6,47 +6,51 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    rldicr 5, 3, 0, 61
-; CHECK-NEXT:    not     3, 3
+; CHECK-NEXT:    not 3, 3
 ; CHECK-NEXT:    li 6, 255
 ; CHECK-NEXT:    lwz 7, 0(5)
 ; CHECK-NEXT:    rlwinm 3, 3, 3, 27, 28
 ; CHECK-NEXT:    slw 6, 6, 3
-; CHECK-NEXT:    not     6, 6
-; CHECK-NEXT:    clrlwi  4, 4, 24
-; CHECK-NEXT:    b .LBB0_2
-; CHECK-NEXT:  .LBB0_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT:    mr      7, 8
-; CHECK-NEXT:  .LBB0_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB0_5 Depth 2
+; CHECK-NEXT:    not 6, 6
+; CHECK-NEXT:    clrlwi 4, 4, 24
+; CHECK-NEXT:  .LBB0_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB0_4 Depth 2
 ; CHECK-NEXT:    srw 8, 7, 3
-; CHECK-NEXT:    clrlwi  9, 8, 24
-; CHECK-NEXT:    cmplw   9, 4
+; CHECK-NEXT:    clrlwi 9, 8, 24
+; CHECK-NEXT:    cmplw 9, 4
 ; CHECK-NEXT:    li 9, 0
-; CHECK-NEXT:    bge 0, .LBB0_4
-; CHECK-NEXT:  # %bb.3:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT:    bge 0, .LBB0_3
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.start
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    addi 9, 8, 1
-; CHECK-NEXT:  .LBB0_4:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT:    clrlwi  8, 9, 24
+; CHECK-NEXT:  .LBB0_3: # %atomicrmw.start
+; CHECK-NEXT:    #
+; CHECK-NEXT:    clrlwi 8, 9, 24
 ; CHECK-NEXT:    slw 8, 8, 3
 ; CHECK-NEXT:    and 9, 7, 6
 ; CHECK-NEXT:    or 9, 9, 8
-; CHECK-NEXT:  .LBB0_5:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB0_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:  .LBB0_4: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB0_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    lwarx 8, 0, 5
-; CHECK-NEXT:    cmplw   8, 7
-; CHECK-NEXT:    bne     0, .LBB0_1
-; CHECK-NEXT:  # %bb.6:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB0_5 Depth=2
+; CHECK-NEXT:    cmplw 8, 7
+; CHECK-NEXT:    bne- 0, .LBB0_7
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 9, 0, 5
-; CHECK-NEXT:    bne     0, .LBB0_5
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    mr      7, 8
-; CHECK-NEXT:  # %bb.8:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB0_4
+; CHECK-NEXT:  # %bb.6: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 7, 8
+; CHECK-NEXT:    bc 4, 20, .LBB0_1
+; CHECK-NEXT:    b .LBB0_8
+; CHECK-NEXT:  .LBB0_7: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 7, 8
+; CHECK-NEXT:    b .LBB0_1
+; CHECK-NEXT:  .LBB0_8: # %atomicrmw.end
 ; CHECK-NEXT:    srw 3, 8, 3
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
@@ -59,49 +63,53 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    rldicr 5, 3, 0, 61
-; CHECK-NEXT:    clrlwi  3, 3, 30
+; CHECK-NEXT:    clrlwi 3, 3, 30
 ; CHECK-NEXT:    lis 6, 0
 ; CHECK-NEXT:    xori 3, 3, 2
 ; CHECK-NEXT:    lwz 7, 0(5)
 ; CHECK-NEXT:    ori 6, 6, 65535
 ; CHECK-NEXT:    slwi 3, 3, 3
 ; CHECK-NEXT:    slw 6, 6, 3
-; CHECK-NEXT:    not     6, 6
-; CHECK-NEXT:    clrlwi  4, 4, 16
-; CHECK-NEXT:    b .LBB1_2
-; CHECK-NEXT:  .LBB1_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    mr      7, 8
-; CHECK-NEXT:  .LBB1_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB1_5 Depth 2
+; CHECK-NEXT:    not 6, 6
+; CHECK-NEXT:    clrlwi 4, 4, 16
+; CHECK-NEXT:  .LBB1_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB1_4 Depth 2
 ; CHECK-NEXT:    srw 8, 7, 3
-; CHECK-NEXT:    clrlwi  9, 8, 16
-; CHECK-NEXT:    cmplw   9, 4
+; CHECK-NEXT:    clrlwi 9, 8, 16
+; CHECK-NEXT:    cmplw 9, 4
 ; CHECK-NEXT:    li 9, 0
-; CHECK-NEXT:    bge 0, .LBB1_4
-; CHECK-NEXT:  # %bb.3:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT:    bge 0, .LBB1_3
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.start
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    addi 9, 8, 1
-; CHECK-NEXT:  .LBB1_4:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT:    clrlwi  8, 9, 16
+; CHECK-NEXT:  .LBB1_3: # %atomicrmw.start
+; CHECK-NEXT:    #
+; CHECK-NEXT:    clrlwi 8, 9, 16
 ; CHECK-NEXT:    slw 8, 8, 3
 ; CHECK-NEXT:    and 9, 7, 6
 ; CHECK-NEXT:    or 9, 9, 8
-; CHECK-NEXT:  .LBB1_5:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB1_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:  .LBB1_4: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB1_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    lwarx 8, 0, 5
-; CHECK-NEXT:    cmplw   8, 7
-; CHECK-NEXT:    bne     0, .LBB1_1
-; CHECK-NEXT:  # %bb.6:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB1_5 Depth=2
+; CHECK-NEXT:    cmplw 8, 7
+; CHECK-NEXT:    bne- 0, .LBB1_7
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 9, 0, 5
-; CHECK-NEXT:    bne     0, .LBB1_5
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    mr      7, 8
-; CHECK-NEXT:  # %bb.8:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB1_4
+; CHECK-NEXT:  # %bb.6: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 7, 8
+; CHECK-NEXT:    bc 4, 20, .LBB1_1
+; CHECK-NEXT:    b .LBB1_8
+; CHECK-NEXT:  .LBB1_7: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 7, 8
+; CHECK-NEXT:    b .LBB1_1
+; CHECK-NEXT:  .LBB1_8: # %atomicrmw.end
 ; CHECK-NEXT:    srw 3, 8, 3
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
@@ -114,32 +122,36 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    lwz 6, 0(3)
-; CHECK-NEXT:    b .LBB2_2
-; CHECK-NEXT:  .LBB2_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB2_2 Depth=1
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  .LBB2_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB2_4 Depth 2
-; CHECK-NEXT:    cmplw   6, 4
+; CHECK-NEXT:  .LBB2_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB2_3 Depth 2
+; CHECK-NEXT:    cmplw 6, 4
 ; CHECK-NEXT:    li 7, 0
-; CHECK-NEXT:    bge 0, .LBB2_4
-; CHECK-NEXT:  # %bb.3:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT:    bge 0, .LBB2_3
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.start
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    addi 7, 6, 1
-; CHECK-NEXT:  .LBB2_4:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB2_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:  .LBB2_3: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB2_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    lwarx 5, 0, 3
-; CHECK-NEXT:    cmplw   5, 6
-; CHECK-NEXT:    bne     0, .LBB2_1
-; CHECK-NEXT:  # %bb.5:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB2_4 Depth=2
+; CHECK-NEXT:    cmplw 5, 6
+; CHECK-NEXT:    bne- 0, .LBB2_6
+; CHECK-NEXT:  # %bb.4: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 7, 0, 3
-; CHECK-NEXT:    bne     0, .LBB2_4
-; CHECK-NEXT:  # %bb.6:
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  # %bb.7:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB2_3
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    bc 4, 20, .LBB2_1
+; CHECK-NEXT:    b .LBB2_7
+; CHECK-NEXT:  .LBB2_6: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    b .LBB2_1
+; CHECK-NEXT:  .LBB2_7: # %atomicrmw.end
 ; CHECK-NEXT:    mr 3, 5
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
@@ -152,32 +164,36 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    ld 6, 0(3)
-; CHECK-NEXT:    b .LBB3_2
-; CHECK-NEXT:  .LBB3_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB3_2 Depth=1
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  .LBB3_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB3_4 Depth 2
-; CHECK-NEXT:    cmpld   6, 4
+; CHECK-NEXT:  .LBB3_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB3_3 Depth 2
+; CHECK-NEXT:    cmpld 6, 4
 ; CHECK-NEXT:    li 7, 0
-; CHECK-NEXT:    bge 0, .LBB3_4
-; CHECK-NEXT:  # %bb.3:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB3_2 Depth=1
+; CHECK-NEXT:    bge 0, .LBB3_3
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.start
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    addi 7, 6, 1
-; CHECK-NEXT:  .LBB3_4:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB3_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:  .LBB3_3: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB3_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    ldarx 5, 0, 3
-; CHECK-NEXT:    cmpld   5, 6
-; CHECK-NEXT:    bne     0, .LBB3_1
-; CHECK-NEXT:  # %bb.5:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB3_4 Depth=2
+; CHECK-NEXT:    cmpld 5, 6
+; CHECK-NEXT:    bne- 0, .LBB3_6
+; CHECK-NEXT:  # %bb.4: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stdcx. 7, 0, 3
-; CHECK-NEXT:    bne     0, .LBB3_4
-; CHECK-NEXT:  # %bb.6:
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  # %bb.7:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB3_3
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    bc 4, 20, .LBB3_1
+; CHECK-NEXT:    b .LBB3_7
+; CHECK-NEXT:  .LBB3_6: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    b .LBB3_1
+; CHECK-NEXT:  .LBB3_7: # %atomicrmw.end
 ; CHECK-NEXT:    mr 3, 5
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
@@ -190,48 +206,52 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    rldicr 5, 3, 0, 61
-; CHECK-NEXT:    not     3, 3
+; CHECK-NEXT:    not 3, 3
 ; CHECK-NEXT:    li 6, 255
 ; CHECK-NEXT:    lwz 8, 0(5)
 ; CHECK-NEXT:    rlwinm 3, 3, 3, 27, 28
 ; CHECK-NEXT:    slw 6, 6, 3
-; CHECK-NEXT:    not     6, 6
-; CHECK-NEXT:    clrlwi  7, 4, 24
-; CHECK-NEXT:    b .LBB4_2
-; CHECK-NEXT:  .LBB4_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB4_2 Depth=1
-; CHECK-NEXT:    mr      8, 9
-; CHECK-NEXT:  .LBB4_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB4_5 Depth 2
+; CHECK-NEXT:    not 6, 6
+; CHECK-NEXT:    clrlwi 7, 4, 24
+; CHECK-NEXT:  .LBB4_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB4_4 Depth 2
 ; CHECK-NEXT:    srw 9, 8, 3
 ; CHECK-NEXT:    andi. 10, 9, 255
 ; CHECK-NEXT:    cmplw 1, 10, 7
 ; CHECK-NEXT:    cror 20, 2, 5
-; CHECK-NEXT:    mr      10, 4
-; CHECK-NEXT:    bc 12, 20, .LBB4_4
-; CHECK-NEXT:  # %bb.3:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB4_2 Depth=1
+; CHECK-NEXT:    mr 10, 4
+; CHECK-NEXT:    bc 12, 20, .LBB4_3
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.start
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    addi 10, 9, -1
-; CHECK-NEXT:  .LBB4_4:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB4_2 Depth=1
-; CHECK-NEXT:    clrlwi  9, 10, 24
+; CHECK-NEXT:  .LBB4_3: # %atomicrmw.start
+; CHECK-NEXT:    #
+; CHECK-NEXT:    clrlwi 9, 10, 24
 ; CHECK-NEXT:    slw 9, 9, 3
 ; CHECK-NEXT:    and 10, 8, 6
 ; CHECK-NEXT:    or 10, 10, 9
-; CHECK-NEXT:  .LBB4_5:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB4_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:  .LBB4_4: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB4_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    lwarx 9, 0, 5
-; CHECK-NEXT:    cmplw   9, 8
-; CHECK-NEXT:    bne     0, .LBB4_1
-; CHECK-NEXT:  # %bb.6:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB4_5 Depth=2
+; CHECK-NEXT:    cmplw 9, 8
+; CHECK-NEXT:    bne- 0, .LBB4_7
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 10, 0, 5
-; CHECK-NEXT:    bne     0, .LBB4_5
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    mr      8, 9
-; CHECK-NEXT:  # %bb.8:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB4_4
+; CHECK-NEXT:  # %bb.6: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 8, 9
+; CHECK-NEXT:    bc 4, 20, .LBB4_1
+; CHECK-NEXT:    b .LBB4_8
+; CHECK-NEXT:  .LBB4_7: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 8, 9
+; CHECK-NEXT:    b .LBB4_1
+; CHECK-NEXT:  .LBB4_8: # %atomicrmw.end
 ; CHECK-NEXT:    srw 3, 9, 3
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
@@ -244,50 +264,54 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    rldicr 5, 3, 0, 61
-; CHECK-NEXT:    clrlwi  3, 3, 30
+; CHECK-NEXT:    clrlwi 3, 3, 30
 ; CHECK-NEXT:    lis 6, 0
 ; CHECK-NEXT:    xori 3, 3, 2
 ; CHECK-NEXT:    lwz 8, 0(5)
 ; CHECK-NEXT:    ori 6, 6, 65535
 ; CHECK-NEXT:    slwi 3, 3, 3
 ; CHECK-NEXT:    slw 6, 6, 3
-; CHECK-NEXT:    not     6, 6
-; CHECK-NEXT:    clrlwi  7, 4, 16
-; CHECK-NEXT:    b .LBB5_2
-; CHECK-NEXT:  .LBB5_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB5_2 Depth=1
-; CHECK-NEXT:    mr      8, 9
-; CHECK-NEXT:  .LBB5_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB5_5 Depth 2
+; CHECK-NEXT:    not 6, 6
+; CHECK-NEXT:    clrlwi 7, 4, 16
+; CHECK-NEXT:  .LBB5_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB5_4 Depth 2
 ; CHECK-NEXT:    srw 9, 8, 3
 ; CHECK-NEXT:    andi. 10, 9, 65535
 ; CHECK-NEXT:    cmplw 1, 10, 7
 ; CHECK-NEXT:    cror 20, 2, 5
-; CHECK-NEXT:    mr      10, 4
-; CHECK-NEXT:    bc 12, 20, .LBB5_4
-; CHECK-NEXT:  # %bb.3:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB5_2 Depth=1
+; CHECK-NEXT:    mr 10, 4
+; CHECK-NEXT:    bc 12, 20, .LBB5_3
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.start
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    addi 10, 9, -1
-; CHECK-NEXT:  .LBB5_4:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB5_2 Depth=1
-; CHECK-NEXT:    clrlwi  9, 10, 16
+; CHECK-NEXT:  .LBB5_3: # %atomicrmw.start
+; CHECK-NEXT:    #
+; CHECK-NEXT:    clrlwi 9, 10, 16
 ; CHECK-NEXT:    slw 9, 9, 3
 ; CHECK-NEXT:    and 10, 8, 6
 ; CHECK-NEXT:    or 10, 10, 9
-; CHECK-NEXT:  .LBB5_5:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB5_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:  .LBB5_4: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB5_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    lwarx 9, 0, 5
-; CHECK-NEXT:    cmplw   9, 8
-; CHECK-NEXT:    bne     0, .LBB5_1
-; CHECK-NEXT:  # %bb.6:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB5_5 Depth=2
+; CHECK-NEXT:    cmplw 9, 8
+; CHECK-NEXT:    bne- 0, .LBB5_7
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 10, 0, 5
-; CHECK-NEXT:    bne     0, .LBB5_5
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    mr      8, 9
-; CHECK-NEXT:  # %bb.8:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB5_4
+; CHECK-NEXT:  # %bb.6: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 8, 9
+; CHECK-NEXT:    bc 4, 20, .LBB5_1
+; CHECK-NEXT:    b .LBB5_8
+; CHECK-NEXT:  .LBB5_7: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 8, 9
+; CHECK-NEXT:    b .LBB5_1
+; CHECK-NEXT:  .LBB5_8: # %atomicrmw.end
 ; CHECK-NEXT:    srw 3, 9, 3
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
@@ -300,37 +324,41 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    lwz 6, 0(3)
-; CHECK-NEXT:    b .LBB6_2
-; CHECK-NEXT:  .LBB6_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB6_2 Depth=1
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  .LBB6_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB6_5 Depth 2
-; CHECK-NEXT:    cmpwi   6, 0
-; CHECK-NEXT:    mr      7, 4
-; CHECK-NEXT:    bc 12, 2, .LBB6_5
-; CHECK-NEXT:  # %bb.3:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB6_2 Depth=1
-; CHECK-NEXT:    cmplw   6, 4
-; CHECK-NEXT:    mr      7, 4
-; CHECK-NEXT:    bc 12, 1, .LBB6_5
-; CHECK-NEXT:  # %bb.4:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB6_2 Depth=1
+; CHECK-NEXT:  .LBB6_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB6_4 Depth 2
+; CHECK-NEXT:    cmpwi 6, 0
+; CHECK-NEXT:    mr 7, 4
+; CHECK-NEXT:    bc 12, 2, .LBB6_4
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.start
+; CHECK-NEXT:    #
+; CHECK-NEXT:    cmplw 6, 4
+; CHECK-NEXT:    mr 7, 4
+; CHECK-NEXT:    bc 12, 1, .LBB6_4
+; CHECK-NEXT:  # %bb.3: # %atomicrmw.start
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    addi 7, 6, -1
-; CHECK-NEXT:  .LBB6_5:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB6_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:  .LBB6_4: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB6_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    lwarx 5, 0, 3
-; CHECK-NEXT:    cmplw   5, 6
-; CHECK-NEXT:    bne     0, .LBB6_1
-; CHECK-NEXT:  # %bb.6:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB6_5 Depth=2
+; CHECK-NEXT:    cmplw 5, 6
+; CHECK-NEXT:    bne- 0, .LBB6_7
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. 7, 0, 3
-; CHECK-NEXT:    bne     0, .LBB6_5
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  # %bb.8:                                # %atomicrmw.end
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB6_4
+; CHECK-NEXT:  # %bb.6: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    bc 4, 20, .LBB6_1
+; CHECK-NEXT:    b .LBB6_8
+; CHECK-NEXT:  .LBB6_7: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    b .LBB6_1
+; CHECK-NEXT:  .LBB6_8: # %atomicrmw.end
 ; CHECK-NEXT:    mr 3, 5
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
@@ -343,38 +371,42 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    sync
 ; CHECK-NEXT:    ld 6, 0(3)
-; CHECK-NEXT:    b .LBB7_2
-; CHECK-NEXT:  .LBB7_1:                                # %cmpxchg.nostore
-; CHECK-NEXT:                                          #   in Loop: Header=BB7_2 Depth=1
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  .LBB7_2:                                # %atomicrmw.start
-; CHECK-NEXT:                                          # =>This Loop Header: Depth=1
-; CHECK-NEXT:                                          #     Child Loop BB7_5 Depth 2
-; CHECK-NEXT:    cmpdi   6, 0
-; CHECK-NEXT:    mr      7, 4
-; CHECK-NEXT:    bc 12, 2, .LBB7_5
-; CHECK-NEXT:  # %bb.3:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB7_2 Depth=1
-; CHECK-NEXT:    cmpld   6, 4
-; CHECK-NEXT:    mr      7, 4
-; CHECK-NEXT:    bc 12, 1, .LBB7_5
-; CHECK-NEXT:  # %bb.4:                                # %atomicrmw.start
-; CHECK-NEXT:                                          #   in Loop: Header=BB7_2 Depth=1
+; CHECK-NEXT:  .LBB7_1: # %atomicrmw.start
+; CHECK-NEXT:    # =>This Loop Header: Depth=1
+; CHECK-NEXT:    # Child Loop BB7_4 Depth 2
+; CHECK-NEXT:    cmpdi 6, 0
+; CHECK-NEXT:    mr 7, 4
+; CHECK-NEXT:    bc 12, 2, .LBB7_4
+; CHECK-NEXT:  # %bb.2: # %atomicrmw.start
+; CHECK-NEXT:    #
+; CHECK-NEXT:    cmpld 6, 4
+; CHECK-NEXT:    mr 7, 4
+; CHECK-NEXT:    bc 12, 1, .LBB7_4
+; CHECK-NEXT:  # %bb.3: # %atomicrmw.start
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    addi 7, 6, -1
-; CHECK-NEXT:  .LBB7_5:                                # %cmpxchg.start
-; CHECK-NEXT:                                          #   Parent Loop BB7_2 Depth=1
-; CHECK-NEXT:                                          # =>  This Inner Loop Header: Depth=2
+; CHECK-NEXT:  .LBB7_4: # %cmpxchg.start
+; CHECK-NEXT:    # Parent Loop BB7_1 Depth=1
+; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    ldarx 5, 0, 3
-; CHECK-NEXT:    cmpld   5, 6
-; CHECK-NEXT:    bne     0, .LBB7_1
-; CHECK-NEXT:  # %bb.6:                                # %cmpxchg.fencedstore
-; CHECK-NEXT:                                          #   in Loop: Header=BB7_5 Depth=2
+; CHECK-NEXT:    cmpld 5, 6
+; CHECK-NEXT:    bne- 0, .LBB7_7
+; CHECK-NEXT:  # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stdcx. 7, 0, 3
-; CHECK-NEXT:    bne     0, .LBB7_5
-; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    mr      6, 5
-; CHECK-NEXT:  # %bb.8:                                # %atomicrmw.end
-; CHECK-NEXT:    mr      3, 5
+; CHECK-NEXT:    creqv 20, 20, 20
+; CHECK-NEXT:    bne- 0, .LBB7_4
+; CHECK-NEXT:  # %bb.6: # %cmpxchg.end
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    bc 4, 20, .LBB7_1
+; CHECK-NEXT:    b .LBB7_8
+; CHECK-NEXT:  .LBB7_7: # %cmpxchg.nostore
+; CHECK-NEXT:    #
+; CHECK-NEXT:    mr 6, 5
+; CHECK-NEXT:    b .LBB7_1
+; CHECK-NEXT:  .LBB7_8: # %atomicrmw.end
+; CHECK-NEXT:    mr 3, 5
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
   %result = atomicrmw udec_wrap ptr %ptr, i64 %val seq_cst
diff --git a/llvm/test/CodeGen/PowerPC/atomics-regression.ll b/llvm/test/CodeGen/PowerPC/atomics-regression.ll
index 0474a479a1fef..90990bbb4124d 100644
--- a/llvm/test/CodeGen/PowerPC/atomics-regression.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics-regression.ll
@@ -402,16 +402,15 @@ define void @test40(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB40_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB40_1
+; PPC64LE-NEXT:    bne- 0, .LBB40_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.end
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic
@@ -423,16 +422,15 @@ define void @test41(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB41_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB41_1
+; PPC64LE-NEXT:    bne- 0, .LBB41_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -445,16 +443,15 @@ define void @test42(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB42_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB42_3
+; PPC64LE-NEXT:    bne- 0, .LBB42_3
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB42_1
+; PPC64LE-NEXT:    bne- 0, .LBB42_1
 ; PPC64LE-NEXT:  .LBB42_3: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -468,7 +465,7 @@ define void @test43(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
@@ -476,12 +473,12 @@ define void @test43(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:  .LBB43_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB43_2
+; PPC64LE-NEXT:    beq+ 0, .LBB43_2
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val release monotonic
   ret void
@@ -493,7 +490,7 @@ define void @test44(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB44_4
+; PPC64LE-NEXT:    bne- 0, .LBB44_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
@@ -501,12 +498,12 @@ define void @test44(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:  .LBB44_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB44_2
+; PPC64LE-NEXT:    beq+ 0, .LBB44_2
 ; PPC64LE-NEXT:  .LBB44_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -520,23 +517,21 @@ define void @test45(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB45_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB45_5
+; PPC64LE-NEXT:    beq+ 0, .LBB45_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB45_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB45_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB45_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB45_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acq_rel monotonic
@@ -549,20 +544,19 @@ define void @test46(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB46_4
+; PPC64LE-NEXT:    bne- 0, .LBB46_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB46_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB46_4
+; PPC64LE-NEXT:    beq+ 0, .LBB46_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB46_2
+; PPC64LE-NEXT:    beq+ 0, .LBB46_2
 ; PPC64LE-NEXT:  .LBB46_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -576,23 +570,21 @@ define void @test47(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB47_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB47_5
+; PPC64LE-NEXT:    beq+ 0, .LBB47_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB47_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB47_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB47_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB47_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val seq_cst monotonic
@@ -605,20 +597,19 @@ define void @test48(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB48_4
+; PPC64LE-NEXT:    bne- 0, .LBB48_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB48_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB48_4
+; PPC64LE-NEXT:    beq+ 0, .LBB48_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB48_2
+; PPC64LE-NEXT:    beq+ 0, .LBB48_2
 ; PPC64LE-NEXT:  .LBB48_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -632,20 +623,19 @@ define void @test49(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB49_4
+; PPC64LE-NEXT:    bne- 0, .LBB49_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB49_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB49_4
+; PPC64LE-NEXT:    beq+ 0, .LBB49_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB49_2
+; PPC64LE-NEXT:    beq+ 0, .LBB49_2
 ; PPC64LE-NEXT:  .LBB49_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -658,16 +648,15 @@ define void @test50(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB50_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB50_1
+; PPC64LE-NEXT:    bne- 0, .LBB50_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.end
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic
@@ -679,16 +668,15 @@ define void @test51(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB51_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB51_1
+; PPC64LE-NEXT:    bne- 0, .LBB51_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -701,16 +689,15 @@ define void @test52(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB52_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB52_3
+; PPC64LE-NEXT:    bne- 0, .LBB52_3
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB52_1
+; PPC64LE-NEXT:    bne- 0, .LBB52_1
 ; PPC64LE-NEXT:  .LBB52_3: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -724,7 +711,7 @@ define void @test53(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
@@ -732,12 +719,12 @@ define void @test53(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:  .LBB53_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB53_2
+; PPC64LE-NEXT:    beq+ 0, .LBB53_2
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val release monotonic
   ret void
@@ -749,7 +736,7 @@ define void @test54(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB54_4
+; PPC64LE-NEXT:    bne- 0, .LBB54_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
@@ -757,12 +744,12 @@ define void @test54(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:  .LBB54_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB54_2
+; PPC64LE-NEXT:    beq+ 0, .LBB54_2
 ; PPC64LE-NEXT:  .LBB54_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -776,23 +763,21 @@ define void @test55(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB55_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB55_5
+; PPC64LE-NEXT:    beq+ 0, .LBB55_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB55_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB55_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB55_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB55_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acq_rel monotonic
@@ -805,20 +790,19 @@ define void @test56(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB56_4
+; PPC64LE-NEXT:    bne- 0, .LBB56_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB56_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB56_4
+; PPC64LE-NEXT:    beq+ 0, .LBB56_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB56_2
+; PPC64LE-NEXT:    beq+ 0, .LBB56_2
 ; PPC64LE-NEXT:  .LBB56_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -832,23 +816,21 @@ define void @test57(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB57_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB57_5
+; PPC64LE-NEXT:    beq+ 0, .LBB57_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB57_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB57_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB57_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB57_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val seq_cst monotonic
@@ -861,20 +843,19 @@ define void @test58(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB58_4
+; PPC64LE-NEXT:    bne- 0, .LBB58_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB58_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB58_4
+; PPC64LE-NEXT:    beq+ 0, .LBB58_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB58_2
+; PPC64LE-NEXT:    beq+ 0, .LBB58_2
 ; PPC64LE-NEXT:  .LBB58_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -888,20 +869,19 @@ define void @test59(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB59_4
+; PPC64LE-NEXT:    bne- 0, .LBB59_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB59_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB59_4
+; PPC64LE-NEXT:    beq+ 0, .LBB59_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB59_2
+; PPC64LE-NEXT:    beq+ 0, .LBB59_2
 ; PPC64LE-NEXT:  .LBB59_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -912,16 +892,15 @@ define void @test59(ptr %ptr, i16 %cmp, i16 %val) {
 define void @test60(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE-LABEL: test60:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB60_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB60_1
+; PPC64LE-NEXT:    bne- 0, .LBB60_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.end
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic
@@ -931,16 +910,15 @@ define void @test60(ptr %ptr, i32 %cmp, i32 %val) {
 define void @test61(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE-LABEL: test61:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB61_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB61_1
+; PPC64LE-NEXT:    bne- 0, .LBB61_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -951,16 +929,15 @@ define void @test61(ptr %ptr, i32 %cmp, i32 %val) {
 define void @test62(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE-LABEL: test62:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB62_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB62_3
+; PPC64LE-NEXT:    bne- 0, .LBB62_3
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB62_1
+; PPC64LE-NEXT:    bne- 0, .LBB62_1
 ; PPC64LE-NEXT:  .LBB62_3: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -973,19 +950,19 @@ define void @test63(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB63_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB63_2
+; PPC64LE-NEXT:    beq+ 0, .LBB63_2
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val release monotonic
   ret void
@@ -996,19 +973,19 @@ define void @test64(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB64_4
+; PPC64LE-NEXT:    bne- 0, .LBB64_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB64_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB64_2
+; PPC64LE-NEXT:    beq+ 0, .LBB64_2
 ; PPC64LE-NEXT:  .LBB64_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1021,22 +998,20 @@ define void @test65(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB65_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB65_5
+; PPC64LE-NEXT:    beq+ 0, .LBB65_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB65_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB65_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB65_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB65_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acq_rel monotonic
@@ -1048,19 +1023,18 @@ define void @test66(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB66_4
+; PPC64LE-NEXT:    bne- 0, .LBB66_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB66_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB66_4
+; PPC64LE-NEXT:    beq+ 0, .LBB66_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB66_2
+; PPC64LE-NEXT:    beq+ 0, .LBB66_2
 ; PPC64LE-NEXT:  .LBB66_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1073,22 +1047,20 @@ define void @test67(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB67_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB67_5
+; PPC64LE-NEXT:    beq+ 0, .LBB67_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB67_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB67_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB67_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB67_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst monotonic
@@ -1100,19 +1072,18 @@ define void @test68(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB68_4
+; PPC64LE-NEXT:    bne- 0, .LBB68_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB68_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB68_4
+; PPC64LE-NEXT:    beq+ 0, .LBB68_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB68_2
+; PPC64LE-NEXT:    beq+ 0, .LBB68_2
 ; PPC64LE-NEXT:  .LBB68_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1125,19 +1096,18 @@ define void @test69(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB69_4
+; PPC64LE-NEXT:    bne- 0, .LBB69_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB69_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB69_4
+; PPC64LE-NEXT:    beq+ 0, .LBB69_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB69_2
+; PPC64LE-NEXT:    beq+ 0, .LBB69_2
 ; PPC64LE-NEXT:  .LBB69_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1148,16 +1118,15 @@ define void @test69(ptr %ptr, i32 %cmp, i32 %val) {
 define void @test70(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE-LABEL: test70:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB70_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB70_1
+; PPC64LE-NEXT:    bne- 0, .LBB70_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.end
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic
@@ -1167,16 +1136,15 @@ define void @test70(ptr %ptr, i64 %cmp, i64 %val) {
 define void @test71(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE-LABEL: test71:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB71_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB71_1
+; PPC64LE-NEXT:    bne- 0, .LBB71_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1187,16 +1155,15 @@ define void @test71(ptr %ptr, i64 %cmp, i64 %val) {
 define void @test72(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE-LABEL: test72:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB72_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB72_3
+; PPC64LE-NEXT:    bne- 0, .LBB72_3
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB72_1
+; PPC64LE-NEXT:    bne- 0, .LBB72_1
 ; PPC64LE-NEXT:  .LBB72_3: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1209,19 +1176,19 @@ define void @test73(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB73_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB73_2
+; PPC64LE-NEXT:    beq+ 0, .LBB73_2
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val release monotonic
   ret void
@@ -1232,19 +1199,19 @@ define void @test74(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB74_4
+; PPC64LE-NEXT:    bne- 0, .LBB74_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB74_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB74_2
+; PPC64LE-NEXT:    beq+ 0, .LBB74_2
 ; PPC64LE-NEXT:  .LBB74_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1257,22 +1224,20 @@ define void @test75(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB75_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB75_5
+; PPC64LE-NEXT:    beq+ 0, .LBB75_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB75_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB75_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB75_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB75_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acq_rel monotonic
@@ -1284,19 +1249,18 @@ define void @test76(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB76_4
+; PPC64LE-NEXT:    bne- 0, .LBB76_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB76_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB76_4
+; PPC64LE-NEXT:    beq+ 0, .LBB76_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB76_2
+; PPC64LE-NEXT:    beq+ 0, .LBB76_2
 ; PPC64LE-NEXT:  .LBB76_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1309,22 +1273,20 @@ define void @test77(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB77_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB77_5
+; PPC64LE-NEXT:    beq+ 0, .LBB77_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB77_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB77_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB77_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB77_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val seq_cst monotonic
@@ -1336,19 +1298,18 @@ define void @test78(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB78_4
+; PPC64LE-NEXT:    bne- 0, .LBB78_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB78_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB78_4
+; PPC64LE-NEXT:    beq+ 0, .LBB78_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB78_2
+; PPC64LE-NEXT:    beq+ 0, .LBB78_2
 ; PPC64LE-NEXT:  .LBB78_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1361,19 +1322,18 @@ define void @test79(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB79_4
+; PPC64LE-NEXT:    bne- 0, .LBB79_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB79_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB79_4
+; PPC64LE-NEXT:    beq+ 0, .LBB79_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB79_2
+; PPC64LE-NEXT:    beq+ 0, .LBB79_2
 ; PPC64LE-NEXT:  .LBB79_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1386,16 +1346,15 @@ define void @test80(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB80_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB80_1
+; PPC64LE-NEXT:    bne- 0, .LBB80_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.end
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val syncscope("singlethread") monotonic monotonic
@@ -1407,16 +1366,15 @@ define void @test81(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB81_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB81_1
+; PPC64LE-NEXT:    bne- 0, .LBB81_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1429,16 +1387,15 @@ define void @test82(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB82_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB82_3
+; PPC64LE-NEXT:    bne- 0, .LBB82_3
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB82_1
+; PPC64LE-NEXT:    bne- 0, .LBB82_1
 ; PPC64LE-NEXT:  .LBB82_3: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1452,7 +1409,7 @@ define void @test83(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
@@ -1460,12 +1417,12 @@ define void @test83(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:  .LBB83_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB83_2
+; PPC64LE-NEXT:    beq+ 0, .LBB83_2
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val syncscope("singlethread") release monotonic
   ret void
@@ -1477,7 +1434,7 @@ define void @test84(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB84_4
+; PPC64LE-NEXT:    bne- 0, .LBB84_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
@@ -1485,12 +1442,12 @@ define void @test84(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:  .LBB84_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB84_2
+; PPC64LE-NEXT:    beq+ 0, .LBB84_2
 ; PPC64LE-NEXT:  .LBB84_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1504,23 +1461,21 @@ define void @test85(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB85_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB85_5
+; PPC64LE-NEXT:    beq+ 0, .LBB85_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB85_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB85_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB85_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB85_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val syncscope("singlethread") acq_rel monotonic
@@ -1533,20 +1488,19 @@ define void @test86(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB86_4
+; PPC64LE-NEXT:    bne- 0, .LBB86_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB86_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB86_4
+; PPC64LE-NEXT:    beq+ 0, .LBB86_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB86_2
+; PPC64LE-NEXT:    beq+ 0, .LBB86_2
 ; PPC64LE-NEXT:  .LBB86_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1560,23 +1514,21 @@ define void @test87(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB87_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB87_5
+; PPC64LE-NEXT:    beq+ 0, .LBB87_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB87_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB87_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB87_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB87_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val syncscope("singlethread") seq_cst monotonic
@@ -1589,20 +1541,19 @@ define void @test88(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB88_4
+; PPC64LE-NEXT:    bne- 0, .LBB88_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB88_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB88_4
+; PPC64LE-NEXT:    beq+ 0, .LBB88_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB88_2
+; PPC64LE-NEXT:    beq+ 0, .LBB88_2
 ; PPC64LE-NEXT:  .LBB88_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1616,20 +1567,19 @@ define void @test89(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB89_4
+; PPC64LE-NEXT:    bne- 0, .LBB89_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB89_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB89_4
+; PPC64LE-NEXT:    beq+ 0, .LBB89_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB89_2
+; PPC64LE-NEXT:    beq+ 0, .LBB89_2
 ; PPC64LE-NEXT:  .LBB89_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1642,16 +1592,15 @@ define void @test90(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB90_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB90_1
+; PPC64LE-NEXT:    bne- 0, .LBB90_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.end
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val syncscope("singlethread") monotonic monotonic
@@ -1663,16 +1612,15 @@ define void @test91(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB91_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB91_1
+; PPC64LE-NEXT:    bne- 0, .LBB91_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1685,16 +1633,15 @@ define void @test92(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB92_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB92_3
+; PPC64LE-NEXT:    bne- 0, .LBB92_3
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB92_1
+; PPC64LE-NEXT:    bne- 0, .LBB92_1
 ; PPC64LE-NEXT:  .LBB92_3: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1708,7 +1655,7 @@ define void @test93(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
@@ -1716,12 +1663,12 @@ define void @test93(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:  .LBB93_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB93_2
+; PPC64LE-NEXT:    beq+ 0, .LBB93_2
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val syncscope("singlethread") release monotonic
   ret void
@@ -1733,7 +1680,7 @@ define void @test94(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB94_4
+; PPC64LE-NEXT:    bne- 0, .LBB94_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
@@ -1741,12 +1688,12 @@ define void @test94(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:  .LBB94_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB94_2
+; PPC64LE-NEXT:    beq+ 0, .LBB94_2
 ; PPC64LE-NEXT:  .LBB94_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1760,23 +1707,21 @@ define void @test95(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB95_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB95_5
+; PPC64LE-NEXT:    beq+ 0, .LBB95_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB95_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB95_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB95_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB95_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val syncscope("singlethread") acq_rel monotonic
@@ -1789,20 +1734,19 @@ define void @test96(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB96_4
+; PPC64LE-NEXT:    bne- 0, .LBB96_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB96_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB96_4
+; PPC64LE-NEXT:    beq+ 0, .LBB96_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB96_2
+; PPC64LE-NEXT:    beq+ 0, .LBB96_2
 ; PPC64LE-NEXT:  .LBB96_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1816,23 +1760,21 @@ define void @test97(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB97_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB97_5
+; PPC64LE-NEXT:    beq+ 0, .LBB97_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB97_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB97_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB97_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB97_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val syncscope("singlethread") seq_cst monotonic
@@ -1845,20 +1787,19 @@ define void @test98(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB98_4
+; PPC64LE-NEXT:    bne- 0, .LBB98_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB98_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB98_4
+; PPC64LE-NEXT:    beq+ 0, .LBB98_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB98_2
+; PPC64LE-NEXT:    beq+ 0, .LBB98_2
 ; PPC64LE-NEXT:  .LBB98_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1872,20 +1813,19 @@ define void @test99(ptr %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB99_4
+; PPC64LE-NEXT:    bne- 0, .LBB99_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:    clrlwi 5, 5, 16
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB99_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    sthcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB99_4
+; PPC64LE-NEXT:    beq+ 0, .LBB99_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB99_2
+; PPC64LE-NEXT:    beq+ 0, .LBB99_2
 ; PPC64LE-NEXT:  .LBB99_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1896,16 +1836,15 @@ define void @test99(ptr %ptr, i16 %cmp, i16 %val) {
 define void @test100(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE-LABEL: test100:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB100_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB100_1
+; PPC64LE-NEXT:    bne- 0, .LBB100_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.end
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val syncscope("singlethread") monotonic monotonic
@@ -1915,16 +1854,15 @@ define void @test100(ptr %ptr, i32 %cmp, i32 %val) {
 define void @test101(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE-LABEL: test101:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB101_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB101_1
+; PPC64LE-NEXT:    bne- 0, .LBB101_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1935,16 +1873,15 @@ define void @test101(ptr %ptr, i32 %cmp, i32 %val) {
 define void @test102(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE-LABEL: test102:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB102_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB102_3
+; PPC64LE-NEXT:    bne- 0, .LBB102_3
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB102_1
+; PPC64LE-NEXT:    bne- 0, .LBB102_1
 ; PPC64LE-NEXT:  .LBB102_3: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -1957,19 +1894,19 @@ define void @test103(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB103_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB103_2
+; PPC64LE-NEXT:    beq+ 0, .LBB103_2
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val syncscope("singlethread") release monotonic
   ret void
@@ -1980,19 +1917,19 @@ define void @test104(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB104_4
+; PPC64LE-NEXT:    bne- 0, .LBB104_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB104_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB104_2
+; PPC64LE-NEXT:    beq+ 0, .LBB104_2
 ; PPC64LE-NEXT:  .LBB104_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -2005,22 +1942,20 @@ define void @test105(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB105_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB105_5
+; PPC64LE-NEXT:    beq+ 0, .LBB105_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB105_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB105_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB105_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB105_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val syncscope("singlethread") acq_rel monotonic
@@ -2032,19 +1967,18 @@ define void @test106(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB106_4
+; PPC64LE-NEXT:    bne- 0, .LBB106_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB106_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB106_4
+; PPC64LE-NEXT:    beq+ 0, .LBB106_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB106_2
+; PPC64LE-NEXT:    beq+ 0, .LBB106_2
 ; PPC64LE-NEXT:  .LBB106_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -2057,22 +1991,20 @@ define void @test107(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB107_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB107_5
+; PPC64LE-NEXT:    beq+ 0, .LBB107_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB107_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB107_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB107_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB107_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val syncscope("singlethread") seq_cst monotonic
@@ -2084,19 +2016,18 @@ define void @test108(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB108_4
+; PPC64LE-NEXT:    bne- 0, .LBB108_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB108_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB108_4
+; PPC64LE-NEXT:    beq+ 0, .LBB108_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB108_2
+; PPC64LE-NEXT:    beq+ 0, .LBB108_2
 ; PPC64LE-NEXT:  .LBB108_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -2109,19 +2040,18 @@ define void @test109(ptr %ptr, i32 %cmp, i32 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB109_4
+; PPC64LE-NEXT:    bne- 0, .LBB109_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB109_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stwcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB109_4
+; PPC64LE-NEXT:    beq+ 0, .LBB109_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lwarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB109_2
+; PPC64LE-NEXT:    beq+ 0, .LBB109_2
 ; PPC64LE-NEXT:  .LBB109_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -2132,16 +2062,15 @@ define void @test109(ptr %ptr, i32 %cmp, i32 %val) {
 define void @test110(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE-LABEL: test110:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB110_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB110_1
+; PPC64LE-NEXT:    bne- 0, .LBB110_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.end
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val syncscope("singlethread") monotonic monotonic
@@ -2151,16 +2080,15 @@ define void @test110(ptr %ptr, i64 %cmp, i64 %val) {
 define void @test111(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE-LABEL: test111:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB111_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB111_1
+; PPC64LE-NEXT:    bne- 0, .LBB111_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -2171,16 +2099,15 @@ define void @test111(ptr %ptr, i64 %cmp, i64 %val) {
 define void @test112(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE-LABEL: test112:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB112_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB112_3
+; PPC64LE-NEXT:    bne- 0, .LBB112_3
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB112_1
+; PPC64LE-NEXT:    bne- 0, .LBB112_1
 ; PPC64LE-NEXT:  .LBB112_3: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -2193,19 +2120,19 @@ define void @test113(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB113_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB113_2
+; PPC64LE-NEXT:    beq+ 0, .LBB113_2
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val syncscope("singlethread") release monotonic
   ret void
@@ -2216,19 +2143,19 @@ define void @test114(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB114_4
+; PPC64LE-NEXT:    bne- 0, .LBB114_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB114_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beqlr 0
+; PPC64LE-NEXT:    beqlr+ 0
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB114_2
+; PPC64LE-NEXT:    beq+ 0, .LBB114_2
 ; PPC64LE-NEXT:  .LBB114_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -2241,22 +2168,20 @@ define void @test115(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB115_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB115_5
+; PPC64LE-NEXT:    beq+ 0, .LBB115_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB115_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB115_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB115_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB115_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val syncscope("singlethread") acq_rel monotonic
@@ -2268,19 +2193,18 @@ define void @test116(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB116_4
+; PPC64LE-NEXT:    bne- 0, .LBB116_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    lwsync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB116_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB116_4
+; PPC64LE-NEXT:    beq+ 0, .LBB116_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB116_2
+; PPC64LE-NEXT:    beq+ 0, .LBB116_2
 ; PPC64LE-NEXT:  .LBB116_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -2293,22 +2217,20 @@ define void @test117(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB117_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB117_5
+; PPC64LE-NEXT:    beq+ 0, .LBB117_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB117_2
-; PPC64LE-NEXT:  # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT:    beq+ 0, .LBB117_2
 ; PPC64LE-NEXT:    blr
-; PPC64LE-NEXT:  .LBB117_5: # %cmpxchg.success
+; PPC64LE-NEXT:  .LBB117_4: # %cmpxchg.success
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val syncscope("singlethread") seq_cst monotonic
@@ -2320,19 +2242,18 @@ define void @test118(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB118_4
+; PPC64LE-NEXT:    bne- 0, .LBB118_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB118_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB118_4
+; PPC64LE-NEXT:    beq+ 0, .LBB118_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB118_2
+; PPC64LE-NEXT:    beq+ 0, .LBB118_2
 ; PPC64LE-NEXT:  .LBB118_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
@@ -2345,19 +2266,18 @@ define void @test119(ptr %ptr, i64 %cmp, i64 %val) {
 ; PPC64LE:       # %bb.0: # %cmpxchg.start
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    bne 0, .LBB119_4
+; PPC64LE-NEXT:    bne- 0, .LBB119_4
 ; PPC64LE-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    sync
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB119_2: # %cmpxchg.trystore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stdcx. 5, 0, 3
-; PPC64LE-NEXT:    beq 0, .LBB119_4
+; PPC64LE-NEXT:    beq+ 0, .LBB119_4
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.releasedload
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    ldarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpld 6, 4
-; PPC64LE-NEXT:    beq 0, .LBB119_2
+; PPC64LE-NEXT:    beq+ 0, .LBB119_2
 ; PPC64LE-NEXT:  .LBB119_4: # %cmpxchg.nostore
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/atomics.ll b/llvm/test/CodeGen/PowerPC/atomics.ll
index 40786057ead5f..183c8e1323f2e 100644
--- a/llvm/test/CodeGen/PowerPC/atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics.ll
@@ -138,67 +138,67 @@ define void @store_i64_seq_cst(ptr %mem) {
 ; Atomic CmpXchg
 define i8 @cas_strong_i8_sc_sc(ptr %mem) {
 ; PPC32-LABEL: cas_strong_i8_sc_sc:
-; PPC32:       # %bb.0:
+; PPC32:       # %bb.0: # %cmpxchg.start
 ; PPC32-NEXT:    rlwinm r5, r3, 0, 0, 29
 ; PPC32-NEXT:    lwarx r4, 0, r5
-; PPC32-NEXT:    not     r3, r3
+; PPC32-NEXT:    not r3, r3
 ; PPC32-NEXT:    rlwinm r3, r3, 3, 27, 28
 ; PPC32-NEXT:    srw r6, r4, r3
 ; PPC32-NEXT:    andi. r6, r6, 255
-; PPC32-NEXT:    bne     cr0, .LBB8_4
-; PPC32-NEXT:  # %bb.1:                                # %cmpxchg.fencedstore
+; PPC32-NEXT:    bne- cr0, .LBB8_4
+; PPC32-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC32-NEXT:    li r6, 255
 ; PPC32-NEXT:    li r7, 1
 ; PPC32-NEXT:    slw r6, r6, r3
-; PPC32-NEXT:    not     r6, r6
+; PPC32-NEXT:    not r6, r6
 ; PPC32-NEXT:    slw r7, r7, r3
 ; PPC32-NEXT:    sync
-; PPC32-NEXT:  .LBB8_2:                                # %cmpxchg.trystore
-; PPC32-NEXT:                                          # =>This Inner Loop Header: Depth=1
+; PPC32-NEXT:  .LBB8_2: # %cmpxchg.trystore
+; PPC32-NEXT:    #
 ; PPC32-NEXT:    and r8, r4, r6
 ; PPC32-NEXT:    or r8, r8, r7
 ; PPC32-NEXT:    stwcx. r8, 0, r5
-; PPC32-NEXT:    beq     cr0, .LBB8_4
-; PPC32-NEXT:  # %bb.3:                                # %cmpxchg.releasedload
-; PPC32-NEXT:                                          #   in Loop: Header=BB8_2 Depth=1
+; PPC32-NEXT:    beq+ cr0, .LBB8_4
+; PPC32-NEXT:  # %bb.3: # %cmpxchg.releasedload
+; PPC32-NEXT:    #
 ; PPC32-NEXT:    lwarx r4, 0, r5
 ; PPC32-NEXT:    srw r8, r4, r3
 ; PPC32-NEXT:    andi. r8, r8, 255
-; PPC32-NEXT:    beq     cr0, .LBB8_2
-; PPC32-NEXT:  .LBB8_4:                                # %cmpxchg.nostore
+; PPC32-NEXT:    beq+ cr0, .LBB8_2
+; PPC32-NEXT:  .LBB8_4: # %cmpxchg.nostore
 ; PPC32-NEXT:    srw r3, r4, r3
 ; PPC32-NEXT:    lwsync
 ; PPC32-NEXT:    blr
 ;
 ; PPC64-LABEL: cas_strong_i8_sc_sc:
-; PPC64:       # %bb.0:
+; PPC64:       # %bb.0: # %cmpxchg.start
 ; PPC64-NEXT:    rldicr r5, r3, 0, 61
-; PPC64-NEXT:    not     r3, r3
+; PPC64-NEXT:    not r3, r3
 ; PPC64-NEXT:    lwarx r4, 0, r5
 ; PPC64-NEXT:    rlwinm r3, r3, 3, 27, 28
 ; PPC64-NEXT:    srw r6, r4, r3
 ; PPC64-NEXT:    andi. r6, r6, 255
-; PPC64-NEXT:    bne     cr0, .LBB8_4
-; PPC64-NEXT:  # %bb.1:                                # %cmpxchg.fencedstore
+; PPC64-NEXT:    bne- cr0, .LBB8_4
+; PPC64-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64-NEXT:    li r6, 255
 ; PPC64-NEXT:    li r7, 1
 ; PPC64-NEXT:    slw r6, r6, r3
-; PPC64-NEXT:    not     r6, r6
+; PPC64-NEXT:    not r6, r6
 ; PPC64-NEXT:    slw r7, r7, r3
 ; PPC64-NEXT:    sync
-; PPC64-NEXT:  .LBB8_2:                                # %cmpxchg.trystore
-; PPC64-NEXT:                                          # =>This Inner Loop Header: Depth=1
+; PPC64-NEXT:  .LBB8_2: # %cmpxchg.trystore
+; PPC64-NEXT:    #
 ; PPC64-NEXT:    and r8, r4, r6
 ; PPC64-NEXT:    or r8, r8, r7
 ; PPC64-NEXT:    stwcx. r8, 0, r5
-; PPC64-NEXT:    beq     cr0, .LBB8_4
-; PPC64-NEXT:  # %bb.3:                                # %cmpxchg.releasedload
-; PPC64-NEXT:                                          #   in Loop: Header=BB8_2 Depth=1
+; PPC64-NEXT:    beq+ cr0, .LBB8_4
+; PPC64-NEXT:  # %bb.3: # %cmpxchg.releasedload
+; PPC64-NEXT:    #
 ; PPC64-NEXT:    lwarx r4, 0, r5
 ; PPC64-NEXT:    srw r8, r4, r3
 ; PPC64-NEXT:    andi. r8, r8, 255
-; PPC64-NEXT:    beq     cr0, .LBB8_2
-; PPC64-NEXT:  .LBB8_4:                                # %cmpxchg.nostore
+; PPC64-NEXT:    beq+ cr0, .LBB8_2
+; PPC64-NEXT:  .LBB8_4: # %cmpxchg.nostore
 ; PPC64-NEXT:    srw r3, r4, r3
 ; PPC64-NEXT:    lwsync
 ; PPC64-NEXT:    blr
@@ -208,54 +208,50 @@ define i8 @cas_strong_i8_sc_sc(ptr %mem) {
 }
 define i16 @cas_weak_i16_acquire_acquire(ptr %mem) {
 ; PPC32-LABEL: cas_weak_i16_acquire_acquire:
-; PPC32:       # %bb.0:
+; PPC32:       # %bb.0: # %cmpxchg.start
 ; PPC32-NEXT:    rlwinm r4, r3, 0, 0, 29
 ; PPC32-NEXT:    lwarx r5, 0, r4
-; PPC32-NEXT:    clrlwi  r3, r3, 30
+; PPC32-NEXT:    clrlwi r3, r3, 30
 ; PPC32-NEXT:    xori r3, r3, 2
 ; PPC32-NEXT:    slwi r6, r3, 3
 ; PPC32-NEXT:    srw r3, r5, r6
 ; PPC32-NEXT:    andi. r7, r3, 65535
-; PPC32-NEXT:    beq     cr0, .LBB9_2
-; PPC32-NEXT:  # %bb.1:                                # %cmpxchg.failure
-; PPC32-NEXT:    lwsync
-; PPC32-NEXT:    blr
-; PPC32-NEXT:  .LBB9_2:                                # %cmpxchg.fencedstore
+; PPC32-NEXT:    bne- cr0, .LBB9_2
+; PPC32-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC32-NEXT:    lis r7, 0
 ; PPC32-NEXT:    ori r7, r7, 65535
 ; PPC32-NEXT:    slw r7, r7, r6
 ; PPC32-NEXT:    li r8, 1
-; PPC32-NEXT:    not     r7, r7
+; PPC32-NEXT:    not r7, r7
 ; PPC32-NEXT:    slw r6, r8, r6
 ; PPC32-NEXT:    and r5, r5, r7
 ; PPC32-NEXT:    or r5, r5, r6
 ; PPC32-NEXT:    stwcx. r5, 0, r4
+; PPC32-NEXT:  .LBB9_2: # %cmpxchg.failure
 ; PPC32-NEXT:    lwsync
 ; PPC32-NEXT:    blr
 ;
 ; PPC64-LABEL: cas_weak_i16_acquire_acquire:
-; PPC64:       # %bb.0:
-; PPC64-NEXT:   rldicr r4, r3, 0, 61
-; PPC64-NEXT:    clrlwi  r3, r3, 30
+; PPC64:       # %bb.0: # %cmpxchg.start
+; PPC64-NEXT:    rldicr r4, r3, 0, 61
+; PPC64-NEXT:    clrlwi r3, r3, 30
 ; PPC64-NEXT:    lwarx r5, 0, r4
 ; PPC64-NEXT:    xori r3, r3, 2
 ; PPC64-NEXT:    slwi r6, r3, 3
 ; PPC64-NEXT:    srw r3, r5, r6
 ; PPC64-NEXT:    andi. r7, r3, 65535
-; PPC64-NEXT:    beq     cr0, .LBB9_2
-; PPC64-NEXT:  # %bb.1:                                # %cmpxchg.failure
-; PPC64-NEXT:    lwsync
-; PPC64-NEXT:    blr
-; PPC64-NEXT:  .LBB9_2:                                # %cmpxchg.fencedstore
+; PPC64-NEXT:    bne- cr0, .LBB9_2
+; PPC64-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64-NEXT:    lis r7, 0
 ; PPC64-NEXT:    ori r7, r7, 65535
 ; PPC64-NEXT:    slw r7, r7, r6
 ; PPC64-NEXT:    li r8, 1
-; PPC64-NEXT:    not     r7, r7
+; PPC64-NEXT:    not r7, r7
 ; PPC64-NEXT:    slw r6, r8, r6
 ; PPC64-NEXT:    and r5, r5, r7
 ; PPC64-NEXT:    or r5, r5, r6
 ; PPC64-NEXT:    stwcx. r5, 0, r4
+; PPC64-NEXT:  .LBB9_2: # %cmpxchg.failure
 ; PPC64-NEXT:    lwsync
 ; PPC64-NEXT:    blr
   %val = cmpxchg weak ptr %mem, i16 0, i16 1 acquire acquire
@@ -264,24 +260,24 @@ define i16 @cas_weak_i16_acquire_acquire(ptr %mem) {
 }
 define i32 @cas_strong_i32_acqrel_acquire(ptr %mem) {
 ; CHECK-LABEL: cas_strong_i32_acqrel_acquire:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    mr      r4, r3
+; CHECK:       # %bb.0: # %cmpxchg.start
+; CHECK-NEXT:    mr r4, r3
 ; CHECK-NEXT:    lwarx r3, 0, r3
-; CHECK-NEXT:    cmplwi  r3, 0
-; CHECK-NEXT:    bne     cr0, .LBB10_4
-; CHECK-NEXT:  # %bb.1:                                # %cmpxchg.fencedstore
+; CHECK-NEXT:    cmplwi r3, 0
+; CHECK-NEXT:    bne- cr0, .LBB10_4
+; CHECK-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; CHECK-NEXT:    li r5, 1
 ; CHECK-NEXT:    lwsync
-; CHECK-NEXT:  .LBB10_2:                               # %cmpxchg.trystore
-; CHECK-NEXT:                                          # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:  .LBB10_2: # %cmpxchg.trystore
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    stwcx. r5, 0, r4
-; CHECK-NEXT:    beq     cr0, .LBB10_4
-; CHECK-NEXT:  # %bb.3:                                # %cmpxchg.releasedload
-; CHECK-NEXT:                                          #   in Loop: Header=BB10_2 Depth=1
+; CHECK-NEXT:    beq+ cr0, .LBB10_4
+; CHECK-NEXT:  # %bb.3: # %cmpxchg.releasedload
+; CHECK-NEXT:    #
 ; CHECK-NEXT:    lwarx r3, 0, r4
-; CHECK-NEXT:    cmplwi  r3, 0
-; CHECK-NEXT:    beq     cr0, .LBB10_2
-; CHECK-NEXT:  .LBB10_4:                               # %cmpxchg.nostore
+; CHECK-NEXT:    cmplwi r3, 0
+; CHECK-NEXT:    beq+ cr0, .LBB10_2
+; CHECK-NEXT:  .LBB10_4: # %cmpxchg.nostore
 ; CHECK-NEXT:    lwsync
 ; CHECK-NEXT:    blr
   %val = cmpxchg ptr %mem, i32 0, i32 1 acq_rel acquire
@@ -313,12 +309,12 @@ define i64 @cas_weak_i64_release_monotonic(ptr %mem) {
 ; PPC32-NEXT:    blr
 ;
 ; PPC64-LABEL: cas_weak_i64_release_monotonic:
-; PPC64:       # %bb.0:
-; PPC64-NEXT:    mr      r4, r3
+; PPC64:       # %bb.0: # %cmpxchg.start
+; PPC64-NEXT:    mr r4, r3
 ; PPC64-NEXT:    ldarx r3, 0, r3
-; PPC64-NEXT:    cmpldi  r3, 0
-; PPC64-NEXT:    bnelr   cr0
-; PPC64-NEXT:  # %bb.1:                                # %cmpxchg.fencedstore
+; PPC64-NEXT:    cmpldi r3, 0
+; PPC64-NEXT:    bnelr- cr0
+; PPC64-NEXT:  # %bb.1: # %cmpxchg.fencedstore
 ; PPC64-NEXT:    li r5, 1
 ; PPC64-NEXT:    lwsync
 ; PPC64-NEXT:    stdcx. r5, 0, r4
diff --git a/llvm/test/CodeGen/PowerPC/loop-comment.ll b/llvm/test/CodeGen/PowerPC/loop-comment.ll
index 530e67b4804fb..b4ceb36768904 100644
--- a/llvm/test/CodeGen/PowerPC/loop-comment.ll
+++ b/llvm/test/CodeGen/PowerPC/loop-comment.ll
@@ -6,16 +6,15 @@ define void @test(ptr %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE:       # %bb.0:
 ; PPC64LE-NEXT:    clrlwi 5, 5, 24
 ; PPC64LE-NEXT:    clrlwi 4, 4, 24
-; PPC64LE-NEXT:    .p2align 5
 ; PPC64LE-NEXT:  .LBB0_1: # %cmpxchg.start
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmplw 6, 4
-; PPC64LE-NEXT:    bnelr 0
+; PPC64LE-NEXT:    bnelr- 0
 ; PPC64LE-NEXT:  # %bb.2: # %cmpxchg.fencedstore
 ; PPC64LE-NEXT:    #
 ; PPC64LE-NEXT:    stbcx. 5, 0, 3
-; PPC64LE-NEXT:    bne 0, .LBB0_1
+; PPC64LE-NEXT:    bne- 0, .LBB0_1
 ; PPC64LE-NEXT:  # %bb.3: # %cmpxchg.end
 ; PPC64LE-NEXT:    blr
   %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic



More information about the llvm-commits mailing list