[llvm] [PowerPC] support branch hint for AtomicExpandImpl::expandAtomicCmpXchg (PR #152366)
zhijian lin via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 8 11:39:37 PDT 2025
https://github.com/diggerlin updated https://github.com/llvm/llvm-project/pull/152366
>From 5afd4560e10021571218fc10ef3131527d75e85a Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Wed, 16 Jul 2025 18:57:19 +0000
Subject: [PATCH 1/5] add branch hint
---
llvm/include/llvm/CodeGen/TargetLowering.h | 5 +++++
llvm/lib/CodeGen/AtomicExpandPass.cpp | 9 ++++++---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 5 +++++
llvm/lib/Target/PowerPC/PPCISelLowering.h | 1 +
4 files changed, 17 insertions(+), 3 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index cbdc1b6031680..e7fb34b036e8b 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2332,6 +2332,11 @@ class LLVM_ABI TargetLoweringBase {
virtual Instruction *emitTrailingFence(IRBuilderBase &Builder,
Instruction *Inst,
AtomicOrdering Ord) const;
+
+ virtual MDNode *getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const {
+ return nullptr;
+ }
+
/// @}
// Emits code that executes when the comparison result in the ll/sc
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 3f3d5dc90711f..a5319f3a2440a 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1454,7 +1454,8 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// If the cmpxchg doesn't actually need any ordering when it fails, we can
// jump straight past that fence instruction (if it exists).
- Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
+ Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB,
+ TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
Builder.SetInsertPoint(ReleasingStoreBB);
if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
@@ -1473,7 +1474,8 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
Builder.CreateCondBr(StoreSuccess, SuccessBB,
- CI->isWeak() ? FailureBB : RetryBB);
+ CI->isWeak() ? FailureBB : RetryBB,
+ TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
Builder.SetInsertPoint(ReleasedLoadBB);
Value *SecondLoad;
@@ -1486,7 +1488,8 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// If the cmpxchg doesn't actually need any ordering when it fails, we can
// jump straight past that fence instruction (if it exists).
- Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
+ Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB,
+ TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
// Update PHI node in TryStoreBB.
LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
} else
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 459525ed4ee9a..853923ac4125a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -68,6 +68,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsPowerPC.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
@@ -12816,6 +12817,10 @@ Value *PPCTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
return Builder.CreateXor(Call, Builder.getInt32(1));
}
+MDNode *PPCTargetLowering::getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const {
+ return MDBuilder(Ctx).createLikelyBranchWeights();
+}
+
// The mappings for emitLeading/TrailingFence is taken from
// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
Instruction *PPCTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 124c7116dc3b5..9f73c5587805a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -938,6 +938,7 @@ namespace llvm {
Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
AtomicOrdering Ord) const override;
+ virtual MDNode *getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const override;
bool shouldInlineQuadwordAtomics() const;
TargetLowering::AtomicExpansionKind
>From b6976bf482cda0e2b74102847f68efd02d28c26c Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Thu, 24 Jul 2025 13:56:14 +0000
Subject: [PATCH 2/5] modify test case
---
llvm/include/llvm/CodeGen/TargetLowering.h | 3 +-
llvm/lib/CodeGen/AtomicExpandPass.cpp | 16 +-
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 3 +-
llvm/lib/Target/PowerPC/PPCISelLowering.h | 3 +-
.../CodeGen/PowerPC/PR35812-neg-cmpxchg.ll | 72 +--
llvm/test/CodeGen/PowerPC/all-atomics.ll | 211 +++----
.../PowerPC/atomic-compare-exchange-weak.ll | 34 +-
llvm/test/CodeGen/PowerPC/atomic-float.ll | 90 ++-
.../PowerPC/atomicrmw-cond-sub-clamp.ll | 466 ++++++++-------
.../PowerPC/atomicrmw-uinc-udec-wrap.ll | 462 ++++++++-------
.../CodeGen/PowerPC/atomics-regression.ll | 544 ++++++++----------
llvm/test/CodeGen/PowerPC/atomics.ll | 114 ++--
llvm/test/CodeGen/PowerPC/loop-comment.ll | 5 +-
13 files changed, 991 insertions(+), 1032 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index e7fb34b036e8b..027bcc5bc53ae 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2333,7 +2333,8 @@ class LLVM_ABI TargetLoweringBase {
Instruction *Inst,
AtomicOrdering Ord) const;
- virtual MDNode *getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const {
+ virtual MDNode *
+ getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const {
return nullptr;
}
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index a5319f3a2440a..abaa8b6e841f6 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1454,8 +1454,9 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// If the cmpxchg doesn't actually need any ordering when it fails, we can
// jump straight past that fence instruction (if it exists).
- Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB,
- TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
+ Builder.CreateCondBr(
+ ShouldStore, ReleasingStoreBB, NoStoreBB,
+ TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
Builder.SetInsertPoint(ReleasingStoreBB);
if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
@@ -1473,9 +1474,9 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
StoreSuccess = Builder.CreateICmpEQ(
StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
- Builder.CreateCondBr(StoreSuccess, SuccessBB,
- CI->isWeak() ? FailureBB : RetryBB,
- TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
+ Builder.CreateCondBr(
+ StoreSuccess, SuccessBB, CI->isWeak() ? FailureBB : RetryBB,
+ TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
Builder.SetInsertPoint(ReleasedLoadBB);
Value *SecondLoad;
@@ -1488,8 +1489,9 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// If the cmpxchg doesn't actually need any ordering when it fails, we can
// jump straight past that fence instruction (if it exists).
- Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB,
- TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
+ Builder.CreateCondBr(
+ ShouldStore, TryStoreBB, NoStoreBB,
+ TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
// Update PHI node in TryStoreBB.
LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
} else
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 853923ac4125a..b218532e56b6a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12817,7 +12817,8 @@ Value *PPCTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
return Builder.CreateXor(Call, Builder.getInt32(1));
}
-MDNode *PPCTargetLowering::getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const {
+MDNode *PPCTargetLowering::getTrueBranchHintWeightForAtomicCmpXchgg(
+ LLVMContext &Ctx) const {
return MDBuilder(Ctx).createLikelyBranchWeights();
}
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 9f73c5587805a..4892a3c603a6c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -938,7 +938,8 @@ namespace llvm {
Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
AtomicOrdering Ord) const override;
- virtual MDNode *getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const override;
+ virtual MDNode *
+ getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const override;
bool shouldInlineQuadwordAtomics() const;
TargetLowering::AtomicExpansionKind
diff --git a/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll b/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
index b7852c3c3e6e0..2d8e0e869a860 100644
--- a/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
+++ b/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
@@ -19,51 +19,53 @@ define signext i32 @main() nounwind {
; CHECK-NEXT: addi 3, 1, 46
; CHECK-NEXT: lharx 4, 0, 3
; CHECK-NEXT: cmplwi 4, 33059
-; CHECK-NEXT: bne 0, .LBB0_4
+; CHECK-NEXT: bne- 0, .LBB0_4
; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore
; CHECK-NEXT: sync
; CHECK-NEXT: li 4, 234
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB0_2: # %cmpxchg.trystore
; CHECK-NEXT: #
; CHECK-NEXT: sthcx. 4, 0, 3
-; CHECK-NEXT: beq 0, .LBB0_7
+; CHECK-NEXT: beq+ 0, .LBB0_5
; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload
; CHECK-NEXT: #
; CHECK-NEXT: lharx 5, 0, 3
; CHECK-NEXT: cmplwi 5, 33059
-; CHECK-NEXT: beq 0, .LBB0_2
+; CHECK-NEXT: beq+ 0, .LBB0_2
; CHECK-NEXT: .LBB0_4: # %cmpxchg.nostore
; CHECK-NEXT: lwsync
-; CHECK-NEXT: b .LBB0_8
-; CHECK-NEXT: .LBB0_5: # %L.B0000
+; CHECK-NEXT: crxor 20, 20, 20
+; CHECK-NEXT: b .LBB0_6
+; CHECK-NEXT: .LBB0_5: # %cmpxchg.success
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: creqv 20, 20, 20
+; CHECK-NEXT: .LBB0_6: # %cmpxchg.end
+; CHECK-NEXT: bc 4, 20, .LBB0_9
+; CHECK-NEXT: # %bb.7: # %L.B0000
; CHECK-NEXT: lhz 3, 46(1)
; CHECK-NEXT: cmplwi 3, 234
-; CHECK-NEXT: bne 0, .LBB0_9
-; CHECK-NEXT: # %bb.6: # %L.B0001
+; CHECK-NEXT: bne 0, .LBB0_10
+; CHECK-NEXT: # %bb.8: # %L.B0001
; CHECK-NEXT: addis 3, 2, .L_MergedGlobals at toc@ha
; CHECK-NEXT: addi 3, 3, .L_MergedGlobals at toc@l
; CHECK-NEXT: bl puts
; CHECK-NEXT: nop
; CHECK-NEXT: li 3, 0
-; CHECK-NEXT: b .LBB0_11
-; CHECK-NEXT: .LBB0_7: # %cmpxchg.success
-; CHECK-NEXT: lwsync
-; CHECK-NEXT: b .LBB0_5
-; CHECK-NEXT: .LBB0_8: # %L.B0003
+; CHECK-NEXT: b .LBB0_12
+; CHECK-NEXT: .LBB0_9: # %L.B0003
; CHECK-NEXT: addis 3, 2, .L_MergedGlobals at toc@ha
; CHECK-NEXT: addi 3, 3, .L_MergedGlobals at toc@l
; CHECK-NEXT: addi 3, 3, 16
-; CHECK-NEXT: b .LBB0_10
-; CHECK-NEXT: .LBB0_9: # %L.B0005
+; CHECK-NEXT: b .LBB0_11
+; CHECK-NEXT: .LBB0_10: # %L.B0005
; CHECK-NEXT: addis 3, 2, .L_MergedGlobals at toc@ha
; CHECK-NEXT: addi 3, 3, .L_MergedGlobals at toc@l
; CHECK-NEXT: addi 3, 3, 64
-; CHECK-NEXT: .LBB0_10: # %L.B0003
+; CHECK-NEXT: .LBB0_11: # %L.B0003
; CHECK-NEXT: bl puts
; CHECK-NEXT: nop
; CHECK-NEXT: li 3, 1
-; CHECK-NEXT: .LBB0_11: # %L.B0003
+; CHECK-NEXT: .LBB0_12: # %L.B0003
; CHECK-NEXT: addi 1, 1, 48
; CHECK-NEXT: ld 0, 16(1)
; CHECK-NEXT: mtlr 0
@@ -83,7 +85,7 @@ define signext i32 @main() nounwind {
; CHECK-P7-NEXT: srw 6, 5, 4
; CHECK-P7-NEXT: clrlwi 6, 6, 16
; CHECK-P7-NEXT: cmplwi 6, 33059
-; CHECK-P7-NEXT: bne 0, .LBB0_4
+; CHECK-P7-NEXT: bne- 0, .LBB0_4
; CHECK-P7-NEXT: # %bb.1: # %cmpxchg.fencedstore
; CHECK-P7-NEXT: lis 6, 0
; CHECK-P7-NEXT: li 7, 234
@@ -92,51 +94,53 @@ define signext i32 @main() nounwind {
; CHECK-P7-NEXT: slw 7, 7, 4
; CHECK-P7-NEXT: slw 6, 6, 4
; CHECK-P7-NEXT: not 6, 6
-; CHECK-P7-NEXT: .p2align 4
; CHECK-P7-NEXT: .LBB0_2: # %cmpxchg.trystore
; CHECK-P7-NEXT: #
; CHECK-P7-NEXT: and 5, 5, 6
; CHECK-P7-NEXT: or 5, 5, 7
; CHECK-P7-NEXT: stwcx. 5, 0, 3
-; CHECK-P7-NEXT: beq 0, .LBB0_7
+; CHECK-P7-NEXT: beq+ 0, .LBB0_5
; CHECK-P7-NEXT: # %bb.3: # %cmpxchg.releasedload
; CHECK-P7-NEXT: #
; CHECK-P7-NEXT: lwarx 5, 0, 3
; CHECK-P7-NEXT: srw 8, 5, 4
; CHECK-P7-NEXT: clrlwi 8, 8, 16
; CHECK-P7-NEXT: cmplwi 8, 33059
-; CHECK-P7-NEXT: beq 0, .LBB0_2
+; CHECK-P7-NEXT: beq+ 0, .LBB0_2
; CHECK-P7-NEXT: .LBB0_4: # %cmpxchg.nostore
+; CHECK-P7-NEXT: crxor 20, 20, 20
; CHECK-P7-NEXT: lwsync
-; CHECK-P7-NEXT: b .LBB0_8
-; CHECK-P7-NEXT: .LBB0_5: # %L.B0000
+; CHECK-P7-NEXT: b .LBB0_6
+; CHECK-P7-NEXT: .LBB0_5: # %cmpxchg.success
+; CHECK-P7-NEXT: lwsync
+; CHECK-P7-NEXT: creqv 20, 20, 20
+; CHECK-P7-NEXT: .LBB0_6: # %cmpxchg.end
+; CHECK-P7-NEXT: bc 4, 20, .LBB0_9
+; CHECK-P7-NEXT: # %bb.7: # %L.B0000
; CHECK-P7-NEXT: lhz 3, 46(1)
; CHECK-P7-NEXT: cmplwi 3, 234
-; CHECK-P7-NEXT: bne 0, .LBB0_9
-; CHECK-P7-NEXT: # %bb.6: # %L.B0001
+; CHECK-P7-NEXT: bne 0, .LBB0_10
+; CHECK-P7-NEXT: # %bb.8: # %L.B0001
; CHECK-P7-NEXT: addis 3, 2, .L_MergedGlobals at toc@ha
; CHECK-P7-NEXT: addi 3, 3, .L_MergedGlobals at toc@l
; CHECK-P7-NEXT: bl puts
; CHECK-P7-NEXT: nop
; CHECK-P7-NEXT: li 3, 0
-; CHECK-P7-NEXT: b .LBB0_11
-; CHECK-P7-NEXT: .LBB0_7: # %cmpxchg.success
-; CHECK-P7-NEXT: lwsync
-; CHECK-P7-NEXT: b .LBB0_5
-; CHECK-P7-NEXT: .LBB0_8: # %L.B0003
+; CHECK-P7-NEXT: b .LBB0_12
+; CHECK-P7-NEXT: .LBB0_9: # %L.B0003
; CHECK-P7-NEXT: addis 3, 2, .L_MergedGlobals at toc@ha
; CHECK-P7-NEXT: addi 3, 3, .L_MergedGlobals at toc@l
; CHECK-P7-NEXT: addi 3, 3, 16
-; CHECK-P7-NEXT: b .LBB0_10
-; CHECK-P7-NEXT: .LBB0_9: # %L.B0005
+; CHECK-P7-NEXT: b .LBB0_11
+; CHECK-P7-NEXT: .LBB0_10: # %L.B0005
; CHECK-P7-NEXT: addis 3, 2, .L_MergedGlobals at toc@ha
; CHECK-P7-NEXT: addi 3, 3, .L_MergedGlobals at toc@l
; CHECK-P7-NEXT: addi 3, 3, 64
-; CHECK-P7-NEXT: .LBB0_10: # %L.B0003
+; CHECK-P7-NEXT: .LBB0_11: # %L.B0003
; CHECK-P7-NEXT: bl puts
; CHECK-P7-NEXT: nop
; CHECK-P7-NEXT: li 3, 1
-; CHECK-P7-NEXT: .LBB0_11: # %L.B0003
+; CHECK-P7-NEXT: .LBB0_12: # %L.B0003
; CHECK-P7-NEXT: addi 1, 1, 48
; CHECK-P7-NEXT: ld 0, 16(1)
; CHECK-P7-NEXT: mtlr 0
diff --git a/llvm/test/CodeGen/PowerPC/all-atomics.ll b/llvm/test/CodeGen/PowerPC/all-atomics.ll
index 07afea75aec67..7e892fc4ae6eb 100644
--- a/llvm/test/CodeGen/PowerPC/all-atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/all-atomics.ll
@@ -4347,19 +4347,18 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-NEXT: lbz 8, sc at toc@l(4)
; CHECK-NEXT: lbarx 5, 0, 6
; CHECK-NEXT: cmplw 5, 7
-; CHECK-NEXT: bne 0, .LBB3_4
+; CHECK-NEXT: bne- 0, .LBB3_4
; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore276
; CHECK-NEXT: sync
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_2: # %cmpxchg.trystore275
; CHECK-NEXT: #
; CHECK-NEXT: stbcx. 8, 0, 6
-; CHECK-NEXT: beq 0, .LBB3_4
+; CHECK-NEXT: beq+ 0, .LBB3_4
; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload274
; CHECK-NEXT: #
; CHECK-NEXT: lbarx 5, 0, 6
; CHECK-NEXT: cmplw 5, 7
-; CHECK-NEXT: beq 0, .LBB3_2
+; CHECK-NEXT: beq+ 0, .LBB3_2
; CHECK-NEXT: .LBB3_4: # %cmpxchg.nostore272
; CHECK-NEXT: addi 7, 3, uc at toc@l
; CHECK-NEXT: lwsync
@@ -4367,20 +4366,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-NEXT: lbz 9, uc at toc@l(3)
; CHECK-NEXT: lbarx 8, 0, 7
; CHECK-NEXT: cmplw 8, 9
-; CHECK-NEXT: bne 0, .LBB3_8
+; CHECK-NEXT: bne- 0, .LBB3_8
; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore257
; CHECK-NEXT: sync
; CHECK-NEXT: clrlwi 5, 5, 24
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_6: # %cmpxchg.trystore256
; CHECK-NEXT: #
; CHECK-NEXT: stbcx. 5, 0, 7
-; CHECK-NEXT: beq 0, .LBB3_8
+; CHECK-NEXT: beq+ 0, .LBB3_8
; CHECK-NEXT: # %bb.7: # %cmpxchg.releasedload255
; CHECK-NEXT: #
; CHECK-NEXT: lbarx 8, 0, 7
; CHECK-NEXT: cmplw 8, 9
-; CHECK-NEXT: beq 0, .LBB3_6
+; CHECK-NEXT: beq+ 0, .LBB3_6
; CHECK-NEXT: .LBB3_8: # %cmpxchg.nostore253
; CHECK-NEXT: addis 5, 2, ss at toc@ha
; CHECK-NEXT: lwsync
@@ -4390,21 +4388,20 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-NEXT: addi 8, 5, ss at toc@l
; CHECK-NEXT: lharx 9, 0, 8
; CHECK-NEXT: cmplw 9, 10
-; CHECK-NEXT: bne 0, .LBB3_12
+; CHECK-NEXT: bne- 0, .LBB3_12
; CHECK-NEXT: # %bb.9: # %cmpxchg.fencedstore238
; CHECK-NEXT: extsb 11, 11
; CHECK-NEXT: sync
; CHECK-NEXT: clrlwi 11, 11, 16
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_10: # %cmpxchg.trystore237
; CHECK-NEXT: #
; CHECK-NEXT: sthcx. 11, 0, 8
-; CHECK-NEXT: beq 0, .LBB3_12
+; CHECK-NEXT: beq+ 0, .LBB3_12
; CHECK-NEXT: # %bb.11: # %cmpxchg.releasedload236
; CHECK-NEXT: #
; CHECK-NEXT: lharx 9, 0, 8
; CHECK-NEXT: cmplw 9, 10
-; CHECK-NEXT: beq 0, .LBB3_10
+; CHECK-NEXT: beq+ 0, .LBB3_10
; CHECK-NEXT: .LBB3_12: # %cmpxchg.nostore234
; CHECK-NEXT: lwsync
; CHECK-NEXT: sth 9, ss at toc@l(5)
@@ -4414,21 +4411,20 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-NEXT: addi 9, 5, us at toc@l
; CHECK-NEXT: lharx 10, 0, 9
; CHECK-NEXT: cmplw 10, 11
-; CHECK-NEXT: bne 0, .LBB3_16
+; CHECK-NEXT: bne- 0, .LBB3_16
; CHECK-NEXT: # %bb.13: # %cmpxchg.fencedstore219
; CHECK-NEXT: extsb 12, 12
; CHECK-NEXT: sync
; CHECK-NEXT: clrlwi 12, 12, 16
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_14: # %cmpxchg.trystore218
; CHECK-NEXT: #
; CHECK-NEXT: sthcx. 12, 0, 9
-; CHECK-NEXT: beq 0, .LBB3_16
+; CHECK-NEXT: beq+ 0, .LBB3_16
; CHECK-NEXT: # %bb.15: # %cmpxchg.releasedload217
; CHECK-NEXT: #
; CHECK-NEXT: lharx 10, 0, 9
; CHECK-NEXT: cmplw 10, 11
-; CHECK-NEXT: beq 0, .LBB3_14
+; CHECK-NEXT: beq+ 0, .LBB3_14
; CHECK-NEXT: .LBB3_16: # %cmpxchg.nostore215
; CHECK-NEXT: lwsync
; CHECK-NEXT: sth 10, us at toc@l(5)
@@ -4438,20 +4434,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-NEXT: addi 10, 5, si at toc@l
; CHECK-NEXT: lwarx 11, 0, 10
; CHECK-NEXT: cmplw 11, 12
-; CHECK-NEXT: bne 0, .LBB3_20
+; CHECK-NEXT: bne- 0, .LBB3_20
; CHECK-NEXT: # %bb.17: # %cmpxchg.fencedstore200
; CHECK-NEXT: extsb 0, 0
; CHECK-NEXT: sync
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_18: # %cmpxchg.trystore199
; CHECK-NEXT: #
; CHECK-NEXT: stwcx. 0, 0, 10
-; CHECK-NEXT: beq 0, .LBB3_20
+; CHECK-NEXT: beq+ 0, .LBB3_20
; CHECK-NEXT: # %bb.19: # %cmpxchg.releasedload198
; CHECK-NEXT: #
; CHECK-NEXT: lwarx 11, 0, 10
; CHECK-NEXT: cmplw 11, 12
-; CHECK-NEXT: beq 0, .LBB3_18
+; CHECK-NEXT: beq+ 0, .LBB3_18
; CHECK-NEXT: .LBB3_20: # %cmpxchg.nostore196
; CHECK-NEXT: lwsync
; CHECK-NEXT: stw 11, si at toc@l(5)
@@ -4461,20 +4456,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-NEXT: addi 11, 5, ui at toc@l
; CHECK-NEXT: lwarx 12, 0, 11
; CHECK-NEXT: cmplw 12, 0
-; CHECK-NEXT: bne 0, .LBB3_24
+; CHECK-NEXT: bne- 0, .LBB3_24
; CHECK-NEXT: # %bb.21: # %cmpxchg.fencedstore181
; CHECK-NEXT: extsb 30, 30
; CHECK-NEXT: sync
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_22: # %cmpxchg.trystore180
; CHECK-NEXT: #
; CHECK-NEXT: stwcx. 30, 0, 11
-; CHECK-NEXT: beq 0, .LBB3_24
+; CHECK-NEXT: beq+ 0, .LBB3_24
; CHECK-NEXT: # %bb.23: # %cmpxchg.releasedload179
; CHECK-NEXT: #
; CHECK-NEXT: lwarx 12, 0, 11
; CHECK-NEXT: cmplw 12, 0
-; CHECK-NEXT: beq 0, .LBB3_22
+; CHECK-NEXT: beq+ 0, .LBB3_22
; CHECK-NEXT: .LBB3_24: # %cmpxchg.nostore177
; CHECK-NEXT: addis 30, 2, sll at toc@ha
; CHECK-NEXT: lwsync
@@ -4484,20 +4478,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-NEXT: addi 12, 30, sll at toc@l
; CHECK-NEXT: ldarx 0, 0, 12
; CHECK-NEXT: cmpld 0, 29
-; CHECK-NEXT: bne 0, .LBB3_28
+; CHECK-NEXT: bne- 0, .LBB3_28
; CHECK-NEXT: # %bb.25: # %cmpxchg.fencedstore162
; CHECK-NEXT: extsb 28, 28
; CHECK-NEXT: sync
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_26: # %cmpxchg.trystore161
; CHECK-NEXT: #
; CHECK-NEXT: stdcx. 28, 0, 12
-; CHECK-NEXT: beq 0, .LBB3_28
+; CHECK-NEXT: beq+ 0, .LBB3_28
; CHECK-NEXT: # %bb.27: # %cmpxchg.releasedload160
; CHECK-NEXT: #
; CHECK-NEXT: ldarx 0, 0, 12
; CHECK-NEXT: cmpld 0, 29
-; CHECK-NEXT: beq 0, .LBB3_26
+; CHECK-NEXT: beq+ 0, .LBB3_26
; CHECK-NEXT: .LBB3_28: # %cmpxchg.nostore158
; CHECK-NEXT: lwsync
; CHECK-NEXT: std 0, sll at toc@l(30)
@@ -4507,20 +4500,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-NEXT: addi 0, 30, ull at toc@l
; CHECK-NEXT: ldarx 29, 0, 0
; CHECK-NEXT: cmpld 29, 28
-; CHECK-NEXT: bne 0, .LBB3_32
+; CHECK-NEXT: bne- 0, .LBB3_32
; CHECK-NEXT: # %bb.29: # %cmpxchg.fencedstore143
; CHECK-NEXT: extsb 27, 27
; CHECK-NEXT: sync
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_30: # %cmpxchg.trystore142
; CHECK-NEXT: #
; CHECK-NEXT: stdcx. 27, 0, 0
-; CHECK-NEXT: beq 0, .LBB3_32
+; CHECK-NEXT: beq+ 0, .LBB3_32
; CHECK-NEXT: # %bb.31: # %cmpxchg.releasedload141
; CHECK-NEXT: #
; CHECK-NEXT: ldarx 29, 0, 0
; CHECK-NEXT: cmpld 29, 28
-; CHECK-NEXT: beq 0, .LBB3_30
+; CHECK-NEXT: beq+ 0, .LBB3_30
; CHECK-NEXT: .LBB3_32: # %cmpxchg.nostore139
; CHECK-NEXT: lwsync
; CHECK-NEXT: std 29, ull at toc@l(30)
@@ -4528,19 +4520,18 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-NEXT: lbz 29, sc at toc@l(4)
; CHECK-NEXT: lbarx 28, 0, 6
; CHECK-NEXT: cmplw 28, 30
-; CHECK-NEXT: bne 0, .LBB3_36
+; CHECK-NEXT: bne- 0, .LBB3_36
; CHECK-NEXT: # %bb.33: # %cmpxchg.fencedstore124
; CHECK-NEXT: sync
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_34: # %cmpxchg.trystore123
; CHECK-NEXT: #
; CHECK-NEXT: stbcx. 29, 0, 6
-; CHECK-NEXT: beq 0, .LBB3_37
+; CHECK-NEXT: beq+ 0, .LBB3_37
; CHECK-NEXT: # %bb.35: # %cmpxchg.releasedload122
; CHECK-NEXT: #
; CHECK-NEXT: lbarx 28, 0, 6
; CHECK-NEXT: cmplw 28, 30
-; CHECK-NEXT: beq 0, .LBB3_34
+; CHECK-NEXT: beq+ 0, .LBB3_34
; CHECK-NEXT: .LBB3_36: # %cmpxchg.nostore120
; CHECK-NEXT: lwsync
; CHECK-NEXT: crxor 20, 20, 20
@@ -4557,19 +4548,18 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-NEXT: lbz 6, uc at toc@l(3)
; CHECK-NEXT: lbarx 29, 0, 7
; CHECK-NEXT: cmplw 29, 6
-; CHECK-NEXT: bne 0, .LBB3_42
+; CHECK-NEXT: bne- 0, .LBB3_42
; CHECK-NEXT: # %bb.39: # %cmpxchg.fencedstore105
; CHECK-NEXT: sync
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_40: # %cmpxchg.trystore104
; CHECK-NEXT: #
; CHECK-NEXT: stbcx. 30, 0, 7
-; CHECK-NEXT: beq 0, .LBB3_43
+; CHECK-NEXT: beq+ 0, .LBB3_43
; CHECK-NEXT: # %bb.41: # %cmpxchg.releasedload103
; CHECK-NEXT: #
; CHECK-NEXT: lbarx 29, 0, 7
; CHECK-NEXT: cmplw 29, 6
-; CHECK-NEXT: beq 0, .LBB3_40
+; CHECK-NEXT: beq+ 0, .LBB3_40
; CHECK-NEXT: .LBB3_42: # %cmpxchg.nostore101
; CHECK-NEXT: lwsync
; CHECK-NEXT: crxor 20, 20, 20
@@ -4586,21 +4576,20 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-NEXT: lbz 6, uc at toc@l(3)
; CHECK-NEXT: lharx 30, 0, 8
; CHECK-NEXT: cmplw 30, 6
-; CHECK-NEXT: bne 0, .LBB3_48
+; CHECK-NEXT: bne- 0, .LBB3_48
; CHECK-NEXT: # %bb.45: # %cmpxchg.fencedstore86
; CHECK-NEXT: extsb 7, 7
; CHECK-NEXT: sync
; CHECK-NEXT: clrlwi 7, 7, 16
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_46: # %cmpxchg.trystore85
; CHECK-NEXT: #
; CHECK-NEXT: sthcx. 7, 0, 8
-; CHECK-NEXT: beq 0, .LBB3_49
+; CHECK-NEXT: beq+ 0, .LBB3_49
; CHECK-NEXT: # %bb.47: # %cmpxchg.releasedload84
; CHECK-NEXT: #
; CHECK-NEXT: lharx 30, 0, 8
; CHECK-NEXT: cmplw 30, 6
-; CHECK-NEXT: beq 0, .LBB3_46
+; CHECK-NEXT: beq+ 0, .LBB3_46
; CHECK-NEXT: .LBB3_48: # %cmpxchg.nostore82
; CHECK-NEXT: lwsync
; CHECK-NEXT: crxor 20, 20, 20
@@ -4617,21 +4606,20 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-NEXT: lbz 6, uc at toc@l(3)
; CHECK-NEXT: lharx 8, 0, 9
; CHECK-NEXT: cmplw 8, 6
-; CHECK-NEXT: bne 0, .LBB3_54
+; CHECK-NEXT: bne- 0, .LBB3_54
; CHECK-NEXT: # %bb.51: # %cmpxchg.fencedstore67
; CHECK-NEXT: extsb 7, 7
; CHECK-NEXT: sync
; CHECK-NEXT: clrlwi 7, 7, 16
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_52: # %cmpxchg.trystore66
; CHECK-NEXT: #
; CHECK-NEXT: sthcx. 7, 0, 9
-; CHECK-NEXT: beq 0, .LBB3_55
+; CHECK-NEXT: beq+ 0, .LBB3_55
; CHECK-NEXT: # %bb.53: # %cmpxchg.releasedload65
; CHECK-NEXT: #
; CHECK-NEXT: lharx 8, 0, 9
; CHECK-NEXT: cmplw 8, 6
-; CHECK-NEXT: beq 0, .LBB3_52
+; CHECK-NEXT: beq+ 0, .LBB3_52
; CHECK-NEXT: .LBB3_54: # %cmpxchg.nostore63
; CHECK-NEXT: lwsync
; CHECK-NEXT: crxor 20, 20, 20
@@ -4648,20 +4636,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-NEXT: lbz 6, uc at toc@l(3)
; CHECK-NEXT: lwarx 8, 0, 10
; CHECK-NEXT: cmplw 8, 6
-; CHECK-NEXT: bne 0, .LBB3_60
+; CHECK-NEXT: bne- 0, .LBB3_60
; CHECK-NEXT: # %bb.57: # %cmpxchg.fencedstore48
; CHECK-NEXT: extsb 7, 7
; CHECK-NEXT: sync
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_58: # %cmpxchg.trystore47
; CHECK-NEXT: #
; CHECK-NEXT: stwcx. 7, 0, 10
-; CHECK-NEXT: beq 0, .LBB3_61
+; CHECK-NEXT: beq+ 0, .LBB3_61
; CHECK-NEXT: # %bb.59: # %cmpxchg.releasedload46
; CHECK-NEXT: #
; CHECK-NEXT: lwarx 8, 0, 10
; CHECK-NEXT: cmplw 8, 6
-; CHECK-NEXT: beq 0, .LBB3_58
+; CHECK-NEXT: beq+ 0, .LBB3_58
; CHECK-NEXT: .LBB3_60: # %cmpxchg.nostore44
; CHECK-NEXT: lwsync
; CHECK-NEXT: crxor 20, 20, 20
@@ -4678,20 +4665,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-NEXT: lbz 6, uc at toc@l(3)
; CHECK-NEXT: lwarx 8, 0, 11
; CHECK-NEXT: cmplw 8, 6
-; CHECK-NEXT: bne 0, .LBB3_66
+; CHECK-NEXT: bne- 0, .LBB3_66
; CHECK-NEXT: # %bb.63: # %cmpxchg.fencedstore29
; CHECK-NEXT: extsb 7, 7
; CHECK-NEXT: sync
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_64: # %cmpxchg.trystore28
; CHECK-NEXT: #
; CHECK-NEXT: stwcx. 7, 0, 11
-; CHECK-NEXT: beq 0, .LBB3_67
+; CHECK-NEXT: beq+ 0, .LBB3_67
; CHECK-NEXT: # %bb.65: # %cmpxchg.releasedload27
; CHECK-NEXT: #
; CHECK-NEXT: lwarx 8, 0, 11
; CHECK-NEXT: cmplw 8, 6
-; CHECK-NEXT: beq 0, .LBB3_64
+; CHECK-NEXT: beq+ 0, .LBB3_64
; CHECK-NEXT: .LBB3_66: # %cmpxchg.nostore25
; CHECK-NEXT: lwsync
; CHECK-NEXT: crxor 20, 20, 20
@@ -4708,20 +4694,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-NEXT: lbz 6, uc at toc@l(3)
; CHECK-NEXT: ldarx 8, 0, 12
; CHECK-NEXT: cmpld 8, 6
-; CHECK-NEXT: bne 0, .LBB3_72
+; CHECK-NEXT: bne- 0, .LBB3_72
; CHECK-NEXT: # %bb.69: # %cmpxchg.fencedstore10
; CHECK-NEXT: extsb 7, 7
; CHECK-NEXT: sync
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_70: # %cmpxchg.trystore9
; CHECK-NEXT: #
; CHECK-NEXT: stdcx. 7, 0, 12
-; CHECK-NEXT: beq 0, .LBB3_73
+; CHECK-NEXT: beq+ 0, .LBB3_73
; CHECK-NEXT: # %bb.71: # %cmpxchg.releasedload8
; CHECK-NEXT: #
; CHECK-NEXT: ldarx 8, 0, 12
; CHECK-NEXT: cmpld 8, 6
-; CHECK-NEXT: beq 0, .LBB3_70
+; CHECK-NEXT: beq+ 0, .LBB3_70
; CHECK-NEXT: .LBB3_72: # %cmpxchg.nostore6
; CHECK-NEXT: lwsync
; CHECK-NEXT: crxor 20, 20, 20
@@ -4738,20 +4723,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-NEXT: stw 6, ui at toc@l(5)
; CHECK-NEXT: ldarx 6, 0, 0
; CHECK-NEXT: cmpld 6, 3
-; CHECK-NEXT: bne 0, .LBB3_78
+; CHECK-NEXT: bne- 0, .LBB3_78
; CHECK-NEXT: # %bb.75: # %cmpxchg.fencedstore
; CHECK-NEXT: extsb 4, 4
; CHECK-NEXT: sync
-; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_76: # %cmpxchg.trystore
; CHECK-NEXT: #
; CHECK-NEXT: stdcx. 4, 0, 0
-; CHECK-NEXT: beq 0, .LBB3_79
+; CHECK-NEXT: beq+ 0, .LBB3_79
; CHECK-NEXT: # %bb.77: # %cmpxchg.releasedload
; CHECK-NEXT: #
; CHECK-NEXT: ldarx 6, 0, 0
; CHECK-NEXT: cmpld 6, 3
-; CHECK-NEXT: beq 0, .LBB3_76
+; CHECK-NEXT: beq+ 0, .LBB3_76
; CHECK-NEXT: .LBB3_78: # %cmpxchg.nostore
; CHECK-NEXT: lwsync
; CHECK-NEXT: crxor 20, 20, 20
@@ -4807,24 +4791,23 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; AIX32-NEXT: srw 6, 3, 26
; AIX32-NEXT: clrlwi 6, 6, 24
; AIX32-NEXT: cmplw 6, 4
-; AIX32-NEXT: bne 0, L..BB3_4
+; AIX32-NEXT: bne- 0, L..BB3_4
; AIX32-NEXT: # %bb.1: # %cmpxchg.fencedstore289
; AIX32-NEXT: sync
; AIX32-NEXT: slw 5, 5, 26
-; AIX32-NEXT: .align 4
; AIX32-NEXT: L..BB3_2: # %cmpxchg.trystore288
; AIX32-NEXT: #
; AIX32-NEXT: and 6, 3, 25
; AIX32-NEXT: or 6, 6, 5
; AIX32-NEXT: stwcx. 6, 0, 27
-; AIX32-NEXT: beq 0, L..BB3_4
+; AIX32-NEXT: beq+ 0, L..BB3_4
; AIX32-NEXT: # %bb.3: # %cmpxchg.releasedload287
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 3, 0, 27
; AIX32-NEXT: srw 6, 3, 26
; AIX32-NEXT: clrlwi 6, 6, 24
; AIX32-NEXT: cmplw 6, 4
-; AIX32-NEXT: beq 0, L..BB3_2
+; AIX32-NEXT: beq+ 0, L..BB3_2
; AIX32-NEXT: L..BB3_4: # %cmpxchg.nostore285
; AIX32-NEXT: not 4, 30
; AIX32-NEXT: srw 5, 3, 26
@@ -4840,25 +4823,24 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; AIX32-NEXT: srw 6, 4, 23
; AIX32-NEXT: clrlwi 6, 6, 24
; AIX32-NEXT: cmplw 6, 3
-; AIX32-NEXT: bne 0, L..BB3_8
+; AIX32-NEXT: bne- 0, L..BB3_8
; AIX32-NEXT: # %bb.5: # %cmpxchg.fencedstore256
; AIX32-NEXT: clrlwi 5, 5, 24
; AIX32-NEXT: sync
; AIX32-NEXT: slw 5, 5, 23
-; AIX32-NEXT: .align 4
; AIX32-NEXT: L..BB3_6: # %cmpxchg.trystore255
; AIX32-NEXT: #
; AIX32-NEXT: and 6, 4, 22
; AIX32-NEXT: or 6, 6, 5
; AIX32-NEXT: stwcx. 6, 0, 24
-; AIX32-NEXT: beq 0, L..BB3_8
+; AIX32-NEXT: beq+ 0, L..BB3_8
; AIX32-NEXT: # %bb.7: # %cmpxchg.releasedload254
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 4, 0, 24
; AIX32-NEXT: srw 6, 4, 23
; AIX32-NEXT: clrlwi 6, 6, 24
; AIX32-NEXT: cmplw 6, 3
-; AIX32-NEXT: beq 0, L..BB3_6
+; AIX32-NEXT: beq+ 0, L..BB3_6
; AIX32-NEXT: L..BB3_8: # %cmpxchg.nostore252
; AIX32-NEXT: srw 4, 4, 23
; AIX32-NEXT: lwsync
@@ -4878,26 +4860,25 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; AIX32-NEXT: srw 8, 5, 20
; AIX32-NEXT: clrlwi 8, 8, 16
; AIX32-NEXT: cmplw 8, 6
-; AIX32-NEXT: bne 0, L..BB3_12
+; AIX32-NEXT: bne- 0, L..BB3_12
; AIX32-NEXT: # %bb.9: # %cmpxchg.fencedstore223
; AIX32-NEXT: extsb 7, 7
; AIX32-NEXT: sync
; AIX32-NEXT: clrlwi 7, 7, 16
; AIX32-NEXT: slw 7, 7, 20
-; AIX32-NEXT: .align 4
; AIX32-NEXT: L..BB3_10: # %cmpxchg.trystore222
; AIX32-NEXT: #
; AIX32-NEXT: and 8, 5, 19
; AIX32-NEXT: or 8, 8, 7
; AIX32-NEXT: stwcx. 8, 0, 21
-; AIX32-NEXT: beq 0, L..BB3_12
+; AIX32-NEXT: beq+ 0, L..BB3_12
; AIX32-NEXT: # %bb.11: # %cmpxchg.releasedload221
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 5, 0, 21
; AIX32-NEXT: srw 8, 5, 20
; AIX32-NEXT: clrlwi 8, 8, 16
; AIX32-NEXT: cmplw 8, 6
-; AIX32-NEXT: beq 0, L..BB3_10
+; AIX32-NEXT: beq+ 0, L..BB3_10
; AIX32-NEXT: L..BB3_12: # %cmpxchg.nostore219
; AIX32-NEXT: srw 5, 5, 20
; AIX32-NEXT: lwsync
@@ -4915,26 +4896,25 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; AIX32-NEXT: srw 7, 3, 17
; AIX32-NEXT: clrlwi 7, 7, 16
; AIX32-NEXT: cmplw 7, 5
-; AIX32-NEXT: bne 0, L..BB3_16
+; AIX32-NEXT: bne- 0, L..BB3_16
; AIX32-NEXT: # %bb.13: # %cmpxchg.fencedstore190
; AIX32-NEXT: extsb 6, 6
; AIX32-NEXT: sync
; AIX32-NEXT: clrlwi 6, 6, 16
; AIX32-NEXT: slw 6, 6, 17
-; AIX32-NEXT: .align 4
; AIX32-NEXT: L..BB3_14: # %cmpxchg.trystore189
; AIX32-NEXT: #
; AIX32-NEXT: and 7, 3, 16
; AIX32-NEXT: or 7, 7, 6
; AIX32-NEXT: stwcx. 7, 0, 18
-; AIX32-NEXT: beq 0, L..BB3_16
+; AIX32-NEXT: beq+ 0, L..BB3_16
; AIX32-NEXT: # %bb.15: # %cmpxchg.releasedload188
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 3, 0, 18
; AIX32-NEXT: srw 7, 3, 17
; AIX32-NEXT: clrlwi 7, 7, 16
; AIX32-NEXT: cmplw 7, 5
-; AIX32-NEXT: beq 0, L..BB3_14
+; AIX32-NEXT: beq+ 0, L..BB3_14
; AIX32-NEXT: L..BB3_16: # %cmpxchg.nostore186
; AIX32-NEXT: srw 3, 3, 17
; AIX32-NEXT: lwsync
@@ -4944,20 +4924,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; AIX32-NEXT: lbz 4, 0(30)
; AIX32-NEXT: lwarx 3, 0, 15
; AIX32-NEXT: cmplw 3, 4
-; AIX32-NEXT: bne 0, L..BB3_20
+; AIX32-NEXT: bne- 0, L..BB3_20
; AIX32-NEXT: # %bb.17: # %cmpxchg.fencedstore171
; AIX32-NEXT: extsb 5, 5
; AIX32-NEXT: sync
-; AIX32-NEXT: .align 5
; AIX32-NEXT: L..BB3_18: # %cmpxchg.trystore170
; AIX32-NEXT: #
; AIX32-NEXT: stwcx. 5, 0, 15
-; AIX32-NEXT: beq 0, L..BB3_20
+; AIX32-NEXT: beq+ 0, L..BB3_20
; AIX32-NEXT: # %bb.19: # %cmpxchg.releasedload169
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 3, 0, 15
; AIX32-NEXT: cmplw 3, 4
-; AIX32-NEXT: beq 0, L..BB3_18
+; AIX32-NEXT: beq+ 0, L..BB3_18
; AIX32-NEXT: L..BB3_20: # %cmpxchg.nostore167
; AIX32-NEXT: lwsync
; AIX32-NEXT: lwz 28, L..C5(2) # @ui
@@ -4966,20 +4945,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; AIX32-NEXT: lbz 5, 0(29)
; AIX32-NEXT: lwarx 3, 0, 28
; AIX32-NEXT: cmplw 3, 4
-; AIX32-NEXT: bne 0, L..BB3_24
+; AIX32-NEXT: bne- 0, L..BB3_24
; AIX32-NEXT: # %bb.21: # %cmpxchg.fencedstore152
; AIX32-NEXT: extsb 5, 5
; AIX32-NEXT: sync
-; AIX32-NEXT: .align 5
; AIX32-NEXT: L..BB3_22: # %cmpxchg.trystore151
; AIX32-NEXT: #
; AIX32-NEXT: stwcx. 5, 0, 28
-; AIX32-NEXT: beq 0, L..BB3_24
+; AIX32-NEXT: beq+ 0, L..BB3_24
; AIX32-NEXT: # %bb.23: # %cmpxchg.releasedload150
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 3, 0, 28
; AIX32-NEXT: cmplw 3, 4
-; AIX32-NEXT: beq 0, L..BB3_22
+; AIX32-NEXT: beq+ 0, L..BB3_22
; AIX32-NEXT: L..BB3_24: # %cmpxchg.nostore148
; AIX32-NEXT: lwsync
; AIX32-NEXT: stw 3, 0(28)
@@ -5024,24 +5002,23 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; AIX32-NEXT: srw 6, 4, 26
; AIX32-NEXT: clrlwi 6, 6, 24
; AIX32-NEXT: cmplw 6, 3
-; AIX32-NEXT: bne 0, L..BB3_28
+; AIX32-NEXT: bne- 0, L..BB3_28
; AIX32-NEXT: # %bb.25: # %cmpxchg.fencedstore119
; AIX32-NEXT: sync
; AIX32-NEXT: slw 5, 5, 26
-; AIX32-NEXT: .align 4
; AIX32-NEXT: L..BB3_26: # %cmpxchg.trystore118
; AIX32-NEXT: #
; AIX32-NEXT: and 4, 4, 25
; AIX32-NEXT: or 4, 4, 5
; AIX32-NEXT: stwcx. 4, 0, 27
-; AIX32-NEXT: beq 0, L..BB3_29
+; AIX32-NEXT: beq+ 0, L..BB3_29
; AIX32-NEXT: # %bb.27: # %cmpxchg.releasedload117
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 4, 0, 27
; AIX32-NEXT: srw 6, 4, 26
; AIX32-NEXT: clrlwi 6, 6, 24
; AIX32-NEXT: cmplw 6, 3
-; AIX32-NEXT: beq 0, L..BB3_26
+; AIX32-NEXT: beq+ 0, L..BB3_26
; AIX32-NEXT: L..BB3_28: # %cmpxchg.nostore115
; AIX32-NEXT: crxor 20, 20, 20
; AIX32-NEXT: lwsync
@@ -5060,24 +5037,23 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; AIX32-NEXT: srw 6, 4, 23
; AIX32-NEXT: clrlwi 6, 6, 24
; AIX32-NEXT: cmplw 6, 3
-; AIX32-NEXT: bne 0, L..BB3_34
+; AIX32-NEXT: bne- 0, L..BB3_34
; AIX32-NEXT: # %bb.31: # %cmpxchg.fencedstore86
; AIX32-NEXT: sync
; AIX32-NEXT: slw 5, 5, 23
-; AIX32-NEXT: .align 4
; AIX32-NEXT: L..BB3_32: # %cmpxchg.trystore85
; AIX32-NEXT: #
; AIX32-NEXT: and 4, 4, 22
; AIX32-NEXT: or 4, 4, 5
; AIX32-NEXT: stwcx. 4, 0, 24
-; AIX32-NEXT: beq 0, L..BB3_35
+; AIX32-NEXT: beq+ 0, L..BB3_35
; AIX32-NEXT: # %bb.33: # %cmpxchg.releasedload84
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 4, 0, 24
; AIX32-NEXT: srw 6, 4, 23
; AIX32-NEXT: clrlwi 6, 6, 24
; AIX32-NEXT: cmplw 6, 3
-; AIX32-NEXT: beq 0, L..BB3_32
+; AIX32-NEXT: beq+ 0, L..BB3_32
; AIX32-NEXT: L..BB3_34: # %cmpxchg.nostore82
; AIX32-NEXT: crxor 20, 20, 20
; AIX32-NEXT: lwsync
@@ -5096,26 +5072,25 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; AIX32-NEXT: srw 6, 4, 20
; AIX32-NEXT: clrlwi 6, 6, 16
; AIX32-NEXT: cmplw 6, 3
-; AIX32-NEXT: bne 0, L..BB3_40
+; AIX32-NEXT: bne- 0, L..BB3_40
; AIX32-NEXT: # %bb.37: # %cmpxchg.fencedstore53
; AIX32-NEXT: extsb 5, 5
; AIX32-NEXT: sync
; AIX32-NEXT: clrlwi 5, 5, 16
; AIX32-NEXT: slw 5, 5, 20
-; AIX32-NEXT: .align 4
; AIX32-NEXT: L..BB3_38: # %cmpxchg.trystore52
; AIX32-NEXT: #
; AIX32-NEXT: and 4, 4, 19
; AIX32-NEXT: or 4, 4, 5
; AIX32-NEXT: stwcx. 4, 0, 21
-; AIX32-NEXT: beq 0, L..BB3_41
+; AIX32-NEXT: beq+ 0, L..BB3_41
; AIX32-NEXT: # %bb.39: # %cmpxchg.releasedload51
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 4, 0, 21
; AIX32-NEXT: srw 6, 4, 20
; AIX32-NEXT: clrlwi 6, 6, 16
; AIX32-NEXT: cmplw 6, 3
-; AIX32-NEXT: beq 0, L..BB3_38
+; AIX32-NEXT: beq+ 0, L..BB3_38
; AIX32-NEXT: L..BB3_40: # %cmpxchg.nostore49
; AIX32-NEXT: crxor 20, 20, 20
; AIX32-NEXT: lwsync
@@ -5134,26 +5109,25 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; AIX32-NEXT: srw 6, 4, 17
; AIX32-NEXT: clrlwi 6, 6, 16
; AIX32-NEXT: cmplw 6, 3
-; AIX32-NEXT: bne 0, L..BB3_46
+; AIX32-NEXT: bne- 0, L..BB3_46
; AIX32-NEXT: # %bb.43: # %cmpxchg.fencedstore29
; AIX32-NEXT: extsb 5, 5
; AIX32-NEXT: sync
; AIX32-NEXT: clrlwi 5, 5, 16
; AIX32-NEXT: slw 5, 5, 17
-; AIX32-NEXT: .align 4
; AIX32-NEXT: L..BB3_44: # %cmpxchg.trystore28
; AIX32-NEXT: #
; AIX32-NEXT: and 4, 4, 16
; AIX32-NEXT: or 4, 4, 5
; AIX32-NEXT: stwcx. 4, 0, 18
-; AIX32-NEXT: beq 0, L..BB3_47
+; AIX32-NEXT: beq+ 0, L..BB3_47
; AIX32-NEXT: # %bb.45: # %cmpxchg.releasedload27
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 4, 0, 18
; AIX32-NEXT: srw 6, 4, 17
; AIX32-NEXT: clrlwi 6, 6, 16
; AIX32-NEXT: cmplw 6, 3
-; AIX32-NEXT: beq 0, L..BB3_44
+; AIX32-NEXT: beq+ 0, L..BB3_44
; AIX32-NEXT: L..BB3_46: # %cmpxchg.nostore25
; AIX32-NEXT: crxor 20, 20, 20
; AIX32-NEXT: lwsync
@@ -5170,20 +5144,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; AIX32-NEXT: lbz 3, 0(30)
; AIX32-NEXT: lwarx 5, 0, 15
; AIX32-NEXT: cmplw 5, 3
-; AIX32-NEXT: bne 0, L..BB3_52
+; AIX32-NEXT: bne- 0, L..BB3_52
; AIX32-NEXT: # %bb.49: # %cmpxchg.fencedstore10
; AIX32-NEXT: extsb 4, 4
; AIX32-NEXT: sync
-; AIX32-NEXT: .align 5
; AIX32-NEXT: L..BB3_50: # %cmpxchg.trystore9
; AIX32-NEXT: #
; AIX32-NEXT: stwcx. 4, 0, 15
-; AIX32-NEXT: beq 0, L..BB3_53
+; AIX32-NEXT: beq+ 0, L..BB3_53
; AIX32-NEXT: # %bb.51: # %cmpxchg.releasedload8
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 5, 0, 15
; AIX32-NEXT: cmplw 5, 3
-; AIX32-NEXT: beq 0, L..BB3_50
+; AIX32-NEXT: beq+ 0, L..BB3_50
; AIX32-NEXT: L..BB3_52: # %cmpxchg.nostore6
; AIX32-NEXT: crxor 20, 20, 20
; AIX32-NEXT: lwsync
@@ -5200,20 +5173,19 @@ define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; AIX32-NEXT: lbz 3, 0(30)
; AIX32-NEXT: lwarx 5, 0, 28
; AIX32-NEXT: cmplw 5, 3
-; AIX32-NEXT: bne 0, L..BB3_58
+; AIX32-NEXT: bne- 0, L..BB3_58
; AIX32-NEXT: # %bb.55: # %cmpxchg.fencedstore
; AIX32-NEXT: extsb 4, 4
; AIX32-NEXT: sync
-; AIX32-NEXT: .align 5
; AIX32-NEXT: L..BB3_56: # %cmpxchg.trystore
; AIX32-NEXT: #
; AIX32-NEXT: stwcx. 4, 0, 28
-; AIX32-NEXT: beq 0, L..BB3_59
+; AIX32-NEXT: beq+ 0, L..BB3_59
; AIX32-NEXT: # %bb.57: # %cmpxchg.releasedload
; AIX32-NEXT: #
; AIX32-NEXT: lwarx 5, 0, 28
; AIX32-NEXT: cmplw 5, 3
-; AIX32-NEXT: beq 0, L..BB3_56
+; AIX32-NEXT: beq+ 0, L..BB3_56
; AIX32-NEXT: L..BB3_58: # %cmpxchg.nostore
; AIX32-NEXT: crxor 20, 20, 20
; AIX32-NEXT: lwsync
@@ -5838,21 +5810,20 @@ define dso_local i64 @cmpswplp(ptr noundef %ptr, ptr nocapture noundef readnone
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: ldarx 4, 0, 3
; CHECK-NEXT: cmpld 4, 5
-; CHECK-NEXT: bne 0, .LBB6_2
+; CHECK-NEXT: bne- 0, .LBB6_3
; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore
; CHECK-NEXT: addi 4, 5, 1
+; CHECK-NEXT: creqv 20, 20, 20
; CHECK-NEXT: stdcx. 4, 0, 3
-; CHECK-NEXT: beq 0, .LBB6_4
-; CHECK-NEXT: .LBB6_2: # %cmpxchg.failure
-; CHECK-NEXT: crxor 20, 20, 20
-; CHECK-NEXT: .LBB6_3: # %cmpxchg.end
+; CHECK-NEXT: bne- 0, .LBB6_3
+; CHECK-NEXT: .LBB6_2: # %cmpxchg.end
; CHECK-NEXT: li 3, 66
; CHECK-NEXT: li 4, 55
; CHECK-NEXT: isel 3, 4, 3, 20
; CHECK-NEXT: blr
-; CHECK-NEXT: .LBB6_4:
-; CHECK-NEXT: creqv 20, 20, 20
-; CHECK-NEXT: b .LBB6_3
+; CHECK-NEXT: .LBB6_3: # %cmpxchg.failure
+; CHECK-NEXT: crxor 20, 20, 20
+; CHECK-NEXT: b .LBB6_2
;
; AIX32-LABEL: cmpswplp:
; AIX32: # %bb.0: # %entry
diff --git a/llvm/test/CodeGen/PowerPC/atomic-compare-exchange-weak.ll b/llvm/test/CodeGen/PowerPC/atomic-compare-exchange-weak.ll
index 65a12a6222f24..ae071194b4479 100644
--- a/llvm/test/CodeGen/PowerPC/atomic-compare-exchange-weak.ll
+++ b/llvm/test/CodeGen/PowerPC/atomic-compare-exchange-weak.ll
@@ -19,13 +19,14 @@ define i32 @foo(ptr noundef %cp, ptr noundef %old, i32 noundef %c) {
; CHECK-NEXT: stw r5, -16(r1)
; CHECK-NEXT: lwarx r6, 0, r3
; CHECK-NEXT: cmplw r6, r7
-; CHECK-NEXT: bne cr0, L..BB0_2
+; CHECK-NEXT: bne- cr0, L..BB0_5
; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; CHECK-NEXT: creqv 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
; CHECK-NEXT: stwcx. r5, 0, r3
-; CHECK-NEXT: beq cr0, L..BB0_5
-; CHECK-NEXT: L..BB0_2: # %cmpxchg.failure
-; CHECK-NEXT: crxor 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
-; CHECK-NEXT: # %bb.3: # %cmpxchg.store_expected
+; CHECK-NEXT: bne- cr0, L..BB0_5
+; CHECK-NEXT: # %bb.2: # %cmpxchg.end
+; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_4
+; CHECK-NEXT: L..BB0_3: # %cmpxchg.store_expected
; CHECK-NEXT: stw r6, 0(r4)
; CHECK-NEXT: L..BB0_4: # %cmpxchg.continue
; CHECK-NEXT: li r3, 0
@@ -33,9 +34,9 @@ define i32 @foo(ptr noundef %cp, ptr noundef %old, i32 noundef %c) {
; CHECK-NEXT: isel r3, r4, r3, 4*cr5+lt
; CHECK-NEXT: stb r3, -17(r1)
; CHECK-NEXT: blr
-; CHECK-NEXT: L..BB0_5:
-; CHECK-NEXT: creqv 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
-; CHECK-NEXT: b L..BB0_4
+; CHECK-NEXT: L..BB0_5: # %cmpxchg.failure
+; CHECK-NEXT: crxor 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
+; CHECK-NEXT: b L..BB0_3
;
; CHECK64-LABEL: foo:
; CHECK64: # %bb.0: # %entry
@@ -46,13 +47,14 @@ define i32 @foo(ptr noundef %cp, ptr noundef %old, i32 noundef %c) {
; CHECK64-NEXT: stw r5, -24(r1)
; CHECK64-NEXT: lwarx r6, 0, r3
; CHECK64-NEXT: cmplw r6, r7
-; CHECK64-NEXT: bne cr0, L..BB0_2
+; CHECK64-NEXT: bne- cr0, L..BB0_5
; CHECK64-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; CHECK64-NEXT: creqv 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
; CHECK64-NEXT: stwcx. r5, 0, r3
-; CHECK64-NEXT: beq cr0, L..BB0_5
-; CHECK64-NEXT: L..BB0_2: # %cmpxchg.failure
-; CHECK64-NEXT: crxor 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
-; CHECK64-NEXT: # %bb.3: # %cmpxchg.store_expected
+; CHECK64-NEXT: bne- cr0, L..BB0_5
+; CHECK64-NEXT: # %bb.2: # %cmpxchg.end
+; CHECK64-NEXT: bc 12, 4*cr5+lt, L..BB0_4
+; CHECK64-NEXT: L..BB0_3: # %cmpxchg.store_expected
; CHECK64-NEXT: stw r6, 0(r4)
; CHECK64-NEXT: L..BB0_4: # %cmpxchg.continue
; CHECK64-NEXT: li r3, 0
@@ -63,9 +65,9 @@ define i32 @foo(ptr noundef %cp, ptr noundef %old, i32 noundef %c) {
; CHECK64-NEXT: li r3, 0
; CHECK64-NEXT: isel r3, r4, r3, 4*cr5+lt
; CHECK64-NEXT: blr
-; CHECK64-NEXT: L..BB0_5:
-; CHECK64-NEXT: creqv 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
-; CHECK64-NEXT: b L..BB0_4
+; CHECK64-NEXT: L..BB0_5: # %cmpxchg.failure
+; CHECK64-NEXT: crxor 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
+; CHECK64-NEXT: b L..BB0_3
entry:
%cp.addr = alloca ptr, align 4
%old.addr = alloca ptr, align 4
diff --git a/llvm/test/CodeGen/PowerPC/atomic-float.ll b/llvm/test/CodeGen/PowerPC/atomic-float.ll
index 600d28936c162..8232a44c7da26 100644
--- a/llvm/test/CodeGen/PowerPC/atomic-float.ll
+++ b/llvm/test/CodeGen/PowerPC/atomic-float.ll
@@ -9,37 +9,36 @@ define float @test_add(ptr %ptr, float %incr) {
; CHECK-64: # %bb.0: # %entry
; CHECK-64-NEXT: sync
; CHECK-64-NEXT: lfs 0, 0(3)
-; CHECK-64-NEXT: b .LBB0_3
-; CHECK-64-NEXT: .LBB0_1: # %cmpxchg.nostore
-; CHECK-64-NEXT: # in Loop: Header=BB0_3 Depth=1
-; CHECK-64-NEXT: crxor 20, 20, 20
-; CHECK-64-NEXT: .LBB0_2: # %cmpxchg.end
-; CHECK-64-NEXT: # in Loop: Header=BB0_3 Depth=1
-; CHECK-64-NEXT: stw 4, -12(1)
-; CHECK-64-NEXT: lfs 0, -12(1)
-; CHECK-64-NEXT: bc 12, 20, .LBB0_7
-; CHECK-64-NEXT: .LBB0_3: # %atomicrmw.start
-; CHECK-64-NEXT: # =>This Loop Header: Depth=1
-; CHECK-64-NEXT: # Child Loop BB0_4 Depth 2
+; CHECK-64-NEXT: .LBB0_1: # %atomicrmw.start
+; CHECK-64-NEXT: # =>This Loop Header: Depth=1
+; CHECK-64-NEXT: # Child Loop BB0_2 Depth 2
; CHECK-64-NEXT: fadds 2, 0, 1
; CHECK-64-NEXT: stfs 2, -4(1)
; CHECK-64-NEXT: stfs 0, -8(1)
; CHECK-64-NEXT: lwz 5, -4(1)
; CHECK-64-NEXT: lwz 6, -8(1)
-; CHECK-64-NEXT: .LBB0_4: # %cmpxchg.start
-; CHECK-64-NEXT: # Parent Loop BB0_3 Depth=1
-; CHECK-64-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-64-NEXT: .LBB0_2: # %cmpxchg.start
+; CHECK-64-NEXT: # Parent Loop BB0_1 Depth=1
+; CHECK-64-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-64-NEXT: lwarx 4, 0, 3
-; CHECK-64-NEXT: cmplw 4, 6
-; CHECK-64-NEXT: bne 0, .LBB0_1
-; CHECK-64-NEXT: # %bb.5: # %cmpxchg.fencedstore
-; CHECK-64-NEXT: # in Loop: Header=BB0_4 Depth=2
+; CHECK-64-NEXT: cmplw 4, 6
+; CHECK-64-NEXT: bne- 0, .LBB0_5
+; CHECK-64-NEXT: # %bb.3: # %cmpxchg.fencedstore
+; CHECK-64-NEXT: #
; CHECK-64-NEXT: stwcx. 5, 0, 3
-; CHECK-64-NEXT: bne 0, .LBB0_4
-; CHECK-64-NEXT: # %bb.6: # in Loop: Header=BB0_3 Depth=1
; CHECK-64-NEXT: creqv 20, 20, 20
-; CHECK-64-NEXT: b .LBB0_2
-; CHECK-64-NEXT: .LBB0_7: # %atomicrmw.end
+; CHECK-64-NEXT: bne- 0, .LBB0_2
+; CHECK-64-NEXT: .LBB0_4: # %cmpxchg.end
+; CHECK-64-NEXT: #
+; CHECK-64-NEXT: stw 4, -12(1)
+; CHECK-64-NEXT: lfs 0, -12(1)
+; CHECK-64-NEXT: bc 4, 20, .LBB0_1
+; CHECK-64-NEXT: b .LBB0_6
+; CHECK-64-NEXT: .LBB0_5: # %cmpxchg.nostore
+; CHECK-64-NEXT: #
+; CHECK-64-NEXT: crxor 20, 20, 20
+; CHECK-64-NEXT: b .LBB0_4
+; CHECK-64-NEXT: .LBB0_6: # %atomicrmw.end
; CHECK-64-NEXT: fmr 1, 0
; CHECK-64-NEXT: lwsync
; CHECK-64-NEXT: blr
@@ -50,37 +49,36 @@ define float @test_add(ptr %ptr, float %incr) {
; CHECK-32-NEXT: .cfi_def_cfa_offset 32
; CHECK-32-NEXT: sync
; CHECK-32-NEXT: lfs 0, 0(3)
-; CHECK-32-NEXT: b .LBB0_3
-; CHECK-32-NEXT: .LBB0_1: # %cmpxchg.nostore
-; CHECK-32-NEXT: # in Loop: Header=BB0_3 Depth=1
-; CHECK-32-NEXT: crxor 20, 20, 20
-; CHECK-32-NEXT: .LBB0_2: # %cmpxchg.end
-; CHECK-32-NEXT: # in Loop: Header=BB0_3 Depth=1
-; CHECK-32-NEXT: stw 4, 20(1)
-; CHECK-32-NEXT: lfs 0, 20(1)
-; CHECK-32-NEXT: bc 12, 20, .LBB0_7
-; CHECK-32-NEXT: .LBB0_3: # %atomicrmw.start
-; CHECK-32-NEXT: # =>This Loop Header: Depth=1
-; CHECK-32-NEXT: # Child Loop BB0_4 Depth 2
+; CHECK-32-NEXT: .LBB0_1: # %atomicrmw.start
+; CHECK-32-NEXT: # =>This Loop Header: Depth=1
+; CHECK-32-NEXT: # Child Loop BB0_2 Depth 2
; CHECK-32-NEXT: fadds 2, 0, 1
; CHECK-32-NEXT: stfs 2, 28(1)
; CHECK-32-NEXT: stfs 0, 24(1)
; CHECK-32-NEXT: lwz 5, 28(1)
; CHECK-32-NEXT: lwz 6, 24(1)
-; CHECK-32-NEXT: .LBB0_4: # %cmpxchg.start
-; CHECK-32-NEXT: # Parent Loop BB0_3 Depth=1
-; CHECK-32-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-32-NEXT: .LBB0_2: # %cmpxchg.start
+; CHECK-32-NEXT: # Parent Loop BB0_1 Depth=1
+; CHECK-32-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-32-NEXT: lwarx 4, 0, 3
-; CHECK-32-NEXT: cmplw 4, 6
-; CHECK-32-NEXT: bne 0, .LBB0_1
-; CHECK-32-NEXT: # %bb.5: # %cmpxchg.fencedstore
-; CHECK-32-NEXT: # in Loop: Header=BB0_4 Depth=2
+; CHECK-32-NEXT: cmplw 4, 6
+; CHECK-32-NEXT: bne- 0, .LBB0_5
+; CHECK-32-NEXT: # %bb.3: # %cmpxchg.fencedstore
+; CHECK-32-NEXT: #
; CHECK-32-NEXT: stwcx. 5, 0, 3
-; CHECK-32-NEXT: bne 0, .LBB0_4
-; CHECK-32-NEXT: # %bb.6: # in Loop: Header=BB0_3 Depth=1
; CHECK-32-NEXT: creqv 20, 20, 20
-; CHECK-32-NEXT: b .LBB0_2
-; CHECK-32-NEXT: .LBB0_7: # %atomicrmw.end
+; CHECK-32-NEXT: bne- 0, .LBB0_2
+; CHECK-32-NEXT: .LBB0_4: # %cmpxchg.end
+; CHECK-32-NEXT: #
+; CHECK-32-NEXT: stw 4, 20(1)
+; CHECK-32-NEXT: lfs 0, 20(1)
+; CHECK-32-NEXT: bc 4, 20, .LBB0_1
+; CHECK-32-NEXT: b .LBB0_6
+; CHECK-32-NEXT: .LBB0_5: # %cmpxchg.nostore
+; CHECK-32-NEXT: #
+; CHECK-32-NEXT: crxor 20, 20, 20
+; CHECK-32-NEXT: b .LBB0_4
+; CHECK-32-NEXT: .LBB0_6: # %atomicrmw.end
; CHECK-32-NEXT: fmr 1, 0
; CHECK-32-NEXT: lwsync
; CHECK-32-NEXT: addi 1, 1, 32
diff --git a/llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll b/llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll
index 27a26aaca8b26..ff176c80ab342 100644
--- a/llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll
+++ b/llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll
@@ -6,45 +6,49 @@ define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) {
; CHECK: # %bb.0:
; CHECK-NEXT: sync
; CHECK-NEXT: rldicr 5, 3, 0, 61
-; CHECK-NEXT: not 3, 3
+; CHECK-NEXT: not 3, 3
; CHECK-NEXT: li 6, 255
; CHECK-NEXT: lwz 8, 0(5)
; CHECK-NEXT: rlwinm 3, 3, 3, 27, 28
; CHECK-NEXT: slw 6, 6, 3
-; CHECK-NEXT: not 6, 6
-; CHECK-NEXT: clrlwi 7, 4, 24
-; CHECK-NEXT: b .LBB0_2
-; CHECK-NEXT: .LBB0_1: # %cmpxchg.nostore
-; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT: mr 8, 9
-; CHECK-NEXT: .LBB0_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB0_5 Depth 2
+; CHECK-NEXT: not 6, 6
+; CHECK-NEXT: clrlwi 7, 4, 24
+; CHECK-NEXT: .LBB0_1: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB0_4 Depth 2
; CHECK-NEXT: srw 9, 8, 3
-; CHECK-NEXT: clrlwi 10, 9, 24
-; CHECK-NEXT: cmplw 10, 7
-; CHECK-NEXT: blt 0, .LBB0_4
-; CHECK-NEXT: # %bb.3: # in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT: sub 9, 9, 4
-; CHECK-NEXT: .LBB0_4: # %atomicrmw.start
-; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT: clrlwi 9, 9, 24
+; CHECK-NEXT: clrlwi 10, 9, 24
+; CHECK-NEXT: cmplw 10, 7
+; CHECK-NEXT: blt 0, .LBB0_3
+; CHECK-NEXT: # %bb.2:
+; CHECK-NEXT: sub 9, 9, 4
+; CHECK-NEXT: .LBB0_3: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: clrlwi 9, 9, 24
; CHECK-NEXT: slw 9, 9, 3
; CHECK-NEXT: and 10, 8, 6
; CHECK-NEXT: or 10, 10, 9
-; CHECK-NEXT: .LBB0_5: # %cmpxchg.start
-; CHECK-NEXT: # Parent Loop BB0_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: .LBB0_4: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 9, 0, 5
-; CHECK-NEXT: cmplw 9, 8
-; CHECK-NEXT: bne 0, .LBB0_1
-; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
-; CHECK-NEXT: # in Loop: Header=BB0_5 Depth=2
+; CHECK-NEXT: cmplw 9, 8
+; CHECK-NEXT: bne- 0, .LBB0_7
+; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT: #
; CHECK-NEXT: stwcx. 10, 0, 5
-; CHECK-NEXT: bne 0, .LBB0_5
-; CHECK-NEXT: # %bb.7:
-; CHECK-NEXT: mr 8, 9
-; CHECK-NEXT: # %bb.8: # %atomicrmw.end
+; CHECK-NEXT: creqv 20, 20, 20
+; CHECK-NEXT: bne- 0, .LBB0_4
+; CHECK-NEXT: # %bb.6: # %cmpxchg.end
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 8, 9
+; CHECK-NEXT: bc 4, 20, .LBB0_1
+; CHECK-NEXT: b .LBB0_8
+; CHECK-NEXT: .LBB0_7: # %cmpxchg.nostore
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 8, 9
+; CHECK-NEXT: b .LBB0_1
+; CHECK-NEXT: .LBB0_8: # %atomicrmw.end
; CHECK-NEXT: srw 3, 9, 3
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
@@ -57,47 +61,51 @@ define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) {
; CHECK: # %bb.0:
; CHECK-NEXT: sync
; CHECK-NEXT: rldicr 5, 3, 0, 61
-; CHECK-NEXT: clrlwi 3, 3, 30
+; CHECK-NEXT: clrlwi 3, 3, 30
; CHECK-NEXT: lis 6, 0
; CHECK-NEXT: xori 3, 3, 2
; CHECK-NEXT: lwz 8, 0(5)
; CHECK-NEXT: ori 6, 6, 65535
; CHECK-NEXT: slwi 3, 3, 3
; CHECK-NEXT: slw 6, 6, 3
-; CHECK-NEXT: not 6, 6
-; CHECK-NEXT: clrlwi 7, 4, 16
-; CHECK-NEXT: b .LBB1_2
-; CHECK-NEXT: .LBB1_1: # %cmpxchg.nostore
-; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: mr 8, 9
-; CHECK-NEXT: .LBB1_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB1_5 Depth 2
+; CHECK-NEXT: not 6, 6
+; CHECK-NEXT: clrlwi 7, 4, 16
+; CHECK-NEXT: .LBB1_1: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB1_4 Depth 2
; CHECK-NEXT: srw 9, 8, 3
-; CHECK-NEXT: clrlwi 10, 9, 16
-; CHECK-NEXT: cmplw 10, 7
-; CHECK-NEXT: blt 0, .LBB1_4
-; CHECK-NEXT: # %bb.3: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: sub 9, 9, 4
-; CHECK-NEXT: .LBB1_4: # %atomicrmw.start
-; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: clrlwi 9, 9, 16
+; CHECK-NEXT: clrlwi 10, 9, 16
+; CHECK-NEXT: cmplw 10, 7
+; CHECK-NEXT: blt 0, .LBB1_3
+; CHECK-NEXT: # %bb.2:
+; CHECK-NEXT: sub 9, 9, 4
+; CHECK-NEXT: .LBB1_3: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: clrlwi 9, 9, 16
; CHECK-NEXT: slw 9, 9, 3
; CHECK-NEXT: and 10, 8, 6
; CHECK-NEXT: or 10, 10, 9
-; CHECK-NEXT: .LBB1_5: # %cmpxchg.start
-; CHECK-NEXT: # Parent Loop BB1_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: .LBB1_4: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB1_1 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 9, 0, 5
-; CHECK-NEXT: cmplw 9, 8
-; CHECK-NEXT: bne 0, .LBB1_1
-; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
-; CHECK-NEXT: # in Loop: Header=BB1_5 Depth=2
+; CHECK-NEXT: cmplw 9, 8
+; CHECK-NEXT: bne- 0, .LBB1_7
+; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT: #
; CHECK-NEXT: stwcx. 10, 0, 5
-; CHECK-NEXT: bne 0, .LBB1_5
-; CHECK-NEXT: # %bb.7:
-; CHECK-NEXT: mr 8, 9
-; CHECK-NEXT: # %bb.8: # %atomicrmw.end
+; CHECK-NEXT: creqv 20, 20, 20
+; CHECK-NEXT: bne- 0, .LBB1_4
+; CHECK-NEXT: # %bb.6: # %cmpxchg.end
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 8, 9
+; CHECK-NEXT: bc 4, 20, .LBB1_1
+; CHECK-NEXT: b .LBB1_8
+; CHECK-NEXT: .LBB1_7: # %cmpxchg.nostore
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 8, 9
+; CHECK-NEXT: b .LBB1_1
+; CHECK-NEXT: .LBB1_8: # %atomicrmw.end
; CHECK-NEXT: srw 3, 9, 3
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
@@ -110,34 +118,38 @@ define i32 @atomicrmw_usub_cond_i32(ptr %ptr, i32 %val) {
; CHECK: # %bb.0:
; CHECK-NEXT: sync
; CHECK-NEXT: lwz 6, 0(3)
-; CHECK-NEXT: b .LBB2_2
-; CHECK-NEXT: .LBB2_1: # %cmpxchg.nostore
-; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
-; CHECK-NEXT: mr 6, 5
-; CHECK-NEXT: .LBB2_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB2_5 Depth 2
-; CHECK-NEXT: cmplw 6, 4
-; CHECK-NEXT: bge 0, .LBB2_4
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
-; CHECK-NEXT: mr 7, 6
-; CHECK-NEXT: b .LBB2_5
-; CHECK-NEXT: .LBB2_4: # in Loop: Header=BB2_2 Depth=1
-; CHECK-NEXT: sub 7, 6, 4
-; CHECK-NEXT: .LBB2_5: # %cmpxchg.start
-; CHECK-NEXT: # Parent Loop BB2_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: .LBB2_1: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB2_4 Depth 2
+; CHECK-NEXT: cmplw 6, 4
+; CHECK-NEXT: bge 0, .LBB2_3
+; CHECK-NEXT: # %bb.2: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 7, 6
+; CHECK-NEXT: b .LBB2_4
+; CHECK-NEXT: .LBB2_3:
+; CHECK-NEXT: sub 7, 6, 4
+; CHECK-NEXT: .LBB2_4: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB2_1 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 5, 0, 3
-; CHECK-NEXT: cmplw 5, 6
-; CHECK-NEXT: bne 0, .LBB2_1
-; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
-; CHECK-NEXT: # in Loop: Header=BB2_5 Depth=2
+; CHECK-NEXT: cmplw 5, 6
+; CHECK-NEXT: bne- 0, .LBB2_7
+; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT: #
; CHECK-NEXT: stwcx. 7, 0, 3
-; CHECK-NEXT: bne 0, .LBB2_5
-; CHECK-NEXT: # %bb.7:
-; CHECK-NEXT: mr 6, 5
-; CHECK-NEXT: # %bb.8: # %atomicrmw.end
+; CHECK-NEXT: creqv 20, 20, 20
+; CHECK-NEXT: bne- 0, .LBB2_4
+; CHECK-NEXT: # %bb.6: # %cmpxchg.end
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: bc 4, 20, .LBB2_1
+; CHECK-NEXT: b .LBB2_8
+; CHECK-NEXT: .LBB2_7: # %cmpxchg.nostore
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: b .LBB2_1
+; CHECK-NEXT: .LBB2_8: # %atomicrmw.end
; CHECK-NEXT: mr 3, 5
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
@@ -150,34 +162,38 @@ define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) {
; CHECK: # %bb.0:
; CHECK-NEXT: sync
; CHECK-NEXT: ld 6, 0(3)
-; CHECK-NEXT: b .LBB3_2
-; CHECK-NEXT: .LBB3_1: # %cmpxchg.nostore
-; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1
-; CHECK-NEXT: mr 6, 5
-; CHECK-NEXT: .LBB3_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB3_5 Depth 2
-; CHECK-NEXT: cmpld 6, 4
-; CHECK-NEXT: bge 0, .LBB3_4
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1
-; CHECK-NEXT: mr 7, 6
-; CHECK-NEXT: b .LBB3_5
-; CHECK-NEXT: .LBB3_4: # in Loop: Header=BB3_2 Depth=1
-; CHECK-NEXT: sub 7, 6, 4
-; CHECK-NEXT: .LBB3_5: # %cmpxchg.start
-; CHECK-NEXT: # Parent Loop BB3_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: .LBB3_1: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB3_4 Depth 2
+; CHECK-NEXT: cmpld 6, 4
+; CHECK-NEXT: bge 0, .LBB3_3
+; CHECK-NEXT: # %bb.2: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 7, 6
+; CHECK-NEXT: b .LBB3_4
+; CHECK-NEXT: .LBB3_3:
+; CHECK-NEXT: sub 7, 6, 4
+; CHECK-NEXT: .LBB3_4: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB3_1 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldarx 5, 0, 3
-; CHECK-NEXT: cmpld 5, 6
-; CHECK-NEXT: bne 0, .LBB3_1
-; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
-; CHECK-NEXT: # in Loop: Header=BB3_5 Depth=2
+; CHECK-NEXT: cmpld 5, 6
+; CHECK-NEXT: bne- 0, .LBB3_7
+; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT: #
; CHECK-NEXT: stdcx. 7, 0, 3
-; CHECK-NEXT: bne 0, .LBB3_5
-; CHECK-NEXT: # %bb.7:
-; CHECK-NEXT: mr 6, 5
-; CHECK-NEXT: # %bb.8: # %atomicrmw.end
+; CHECK-NEXT: creqv 20, 20, 20
+; CHECK-NEXT: bne- 0, .LBB3_4
+; CHECK-NEXT: # %bb.6: # %cmpxchg.end
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: bc 4, 20, .LBB3_1
+; CHECK-NEXT: b .LBB3_8
+; CHECK-NEXT: .LBB3_7: # %cmpxchg.nostore
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: b .LBB3_1
+; CHECK-NEXT: .LBB3_8: # %atomicrmw.end
; CHECK-NEXT: mr 3, 5
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
@@ -190,47 +206,51 @@ define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) {
; CHECK: # %bb.0:
; CHECK-NEXT: sync
; CHECK-NEXT: rldicr 5, 3, 0, 61
-; CHECK-NEXT: not 3, 3
+; CHECK-NEXT: not 3, 3
; CHECK-NEXT: li 6, 255
; CHECK-NEXT: lwz 7, 0(5)
; CHECK-NEXT: rlwinm 3, 3, 3, 27, 28
; CHECK-NEXT: slw 6, 6, 3
-; CHECK-NEXT: not 6, 6
-; CHECK-NEXT: clrlwi 4, 4, 24
-; CHECK-NEXT: b .LBB4_2
-; CHECK-NEXT: .LBB4_1: # %cmpxchg.nostore
-; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1
-; CHECK-NEXT: mr 7, 8
-; CHECK-NEXT: .LBB4_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB4_5 Depth 2
+; CHECK-NEXT: not 6, 6
+; CHECK-NEXT: clrlwi 4, 4, 24
+; CHECK-NEXT: .LBB4_1: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB4_4 Depth 2
; CHECK-NEXT: srw 8, 7, 3
-; CHECK-NEXT: clrlwi 9, 8, 24
-; CHECK-NEXT: sub 8, 9, 4
-; CHECK-NEXT: cmplw 8, 9
+; CHECK-NEXT: clrlwi 9, 8, 24
+; CHECK-NEXT: sub 8, 9, 4
+; CHECK-NEXT: cmplw 8, 9
; CHECK-NEXT: li 9, 0
-; CHECK-NEXT: bgt 0, .LBB4_4
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1
-; CHECK-NEXT: mr 9, 8
-; CHECK-NEXT: .LBB4_4: # %atomicrmw.start
-; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1
+; CHECK-NEXT: bgt 0, .LBB4_3
+; CHECK-NEXT: # %bb.2: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 9, 8
+; CHECK-NEXT: .LBB4_3: # %atomicrmw.start
+; CHECK-NEXT: #
; CHECK-NEXT: slw 8, 9, 3
; CHECK-NEXT: and 9, 7, 6
; CHECK-NEXT: or 9, 9, 8
-; CHECK-NEXT: .LBB4_5: # %cmpxchg.start
-; CHECK-NEXT: # Parent Loop BB4_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: .LBB4_4: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB4_1 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 8, 0, 5
-; CHECK-NEXT: cmplw 8, 7
-; CHECK-NEXT: bne 0, .LBB4_1
-; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
-; CHECK-NEXT: # in Loop: Header=BB4_5 Depth=2
+; CHECK-NEXT: cmplw 8, 7
+; CHECK-NEXT: bne- 0, .LBB4_7
+; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT: #
; CHECK-NEXT: stwcx. 9, 0, 5
-; CHECK-NEXT: bne 0, .LBB4_5
-; CHECK-NEXT: # %bb.7:
-; CHECK-NEXT: mr 7, 8
-; CHECK-NEXT: # %bb.8: # %atomicrmw.end
+; CHECK-NEXT: creqv 20, 20, 20
+; CHECK-NEXT: bne- 0, .LBB4_4
+; CHECK-NEXT: # %bb.6: # %cmpxchg.end
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 7, 8
+; CHECK-NEXT: bc 4, 20, .LBB4_1
+; CHECK-NEXT: b .LBB4_8
+; CHECK-NEXT: .LBB4_7: # %cmpxchg.nostore
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 7, 8
+; CHECK-NEXT: b .LBB4_1
+; CHECK-NEXT: .LBB4_8: # %atomicrmw.end
; CHECK-NEXT: srw 3, 8, 3
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
@@ -243,49 +263,53 @@ define i16 @atomicrmw_usub_sat_i16(ptr %ptr, i16 %val) {
; CHECK: # %bb.0:
; CHECK-NEXT: sync
; CHECK-NEXT: rldicr 5, 3, 0, 61
-; CHECK-NEXT: clrlwi 3, 3, 30
+; CHECK-NEXT: clrlwi 3, 3, 30
; CHECK-NEXT: lis 6, 0
; CHECK-NEXT: xori 3, 3, 2
; CHECK-NEXT: lwz 7, 0(5)
; CHECK-NEXT: ori 6, 6, 65535
; CHECK-NEXT: slwi 3, 3, 3
; CHECK-NEXT: slw 6, 6, 3
-; CHECK-NEXT: not 6, 6
-; CHECK-NEXT: clrlwi 4, 4, 16
-; CHECK-NEXT: b .LBB5_2
-; CHECK-NEXT: .LBB5_1: # %cmpxchg.nostore
-; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1
-; CHECK-NEXT: mr 7, 8
-; CHECK-NEXT: .LBB5_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB5_5 Depth 2
+; CHECK-NEXT: not 6, 6
+; CHECK-NEXT: clrlwi 4, 4, 16
+; CHECK-NEXT: .LBB5_1: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB5_4 Depth 2
; CHECK-NEXT: srw 8, 7, 3
-; CHECK-NEXT: clrlwi 9, 8, 16
-; CHECK-NEXT: sub 8, 9, 4
-; CHECK-NEXT: cmplw 8, 9
+; CHECK-NEXT: clrlwi 9, 8, 16
+; CHECK-NEXT: sub 8, 9, 4
+; CHECK-NEXT: cmplw 8, 9
; CHECK-NEXT: li 9, 0
-; CHECK-NEXT: bgt 0, .LBB5_4
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1
-; CHECK-NEXT: mr 9, 8
-; CHECK-NEXT: .LBB5_4: # %atomicrmw.start
-; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1
+; CHECK-NEXT: bgt 0, .LBB5_3
+; CHECK-NEXT: # %bb.2: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 9, 8
+; CHECK-NEXT: .LBB5_3: # %atomicrmw.start
+; CHECK-NEXT: #
; CHECK-NEXT: slw 8, 9, 3
; CHECK-NEXT: and 9, 7, 6
; CHECK-NEXT: or 9, 9, 8
-; CHECK-NEXT: .LBB5_5: # %cmpxchg.start
-; CHECK-NEXT: # Parent Loop BB5_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: .LBB5_4: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB5_1 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 8, 0, 5
-; CHECK-NEXT: cmplw 8, 7
-; CHECK-NEXT: bne 0, .LBB5_1
-; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
-; CHECK-NEXT: # in Loop: Header=BB5_5 Depth=2
+; CHECK-NEXT: cmplw 8, 7
+; CHECK-NEXT: bne- 0, .LBB5_7
+; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT: #
; CHECK-NEXT: stwcx. 9, 0, 5
-; CHECK-NEXT: bne 0, .LBB5_5
-; CHECK-NEXT: # %bb.7:
-; CHECK-NEXT: mr 7, 8
-; CHECK-NEXT: # %bb.8: # %atomicrmw.end
+; CHECK-NEXT: creqv 20, 20, 20
+; CHECK-NEXT: bne- 0, .LBB5_4
+; CHECK-NEXT: # %bb.6: # %cmpxchg.end
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 7, 8
+; CHECK-NEXT: bc 4, 20, .LBB5_1
+; CHECK-NEXT: b .LBB5_8
+; CHECK-NEXT: .LBB5_7: # %cmpxchg.nostore
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 7, 8
+; CHECK-NEXT: b .LBB5_1
+; CHECK-NEXT: .LBB5_8: # %atomicrmw.end
; CHECK-NEXT: srw 3, 8, 3
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
@@ -298,33 +322,37 @@ define i32 @atomicrmw_usub_sat_i32(ptr %ptr, i32 %val) {
; CHECK: # %bb.0:
; CHECK-NEXT: sync
; CHECK-NEXT: lwz 6, 0(3)
-; CHECK-NEXT: b .LBB6_2
-; CHECK-NEXT: .LBB6_1: # %cmpxchg.nostore
-; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1
-; CHECK-NEXT: mr 6, 5
-; CHECK-NEXT: .LBB6_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB6_4 Depth 2
-; CHECK-NEXT: sub 5, 6, 4
-; CHECK-NEXT: cmplw 5, 6
+; CHECK-NEXT: .LBB6_1: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB6_3 Depth 2
+; CHECK-NEXT: sub 5, 6, 4
+; CHECK-NEXT: cmplw 5, 6
; CHECK-NEXT: li 7, 0
-; CHECK-NEXT: bgt 0, .LBB6_4
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1
-; CHECK-NEXT: mr 7, 5
-; CHECK-NEXT: .LBB6_4: # %cmpxchg.start
-; CHECK-NEXT: # Parent Loop BB6_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: bgt 0, .LBB6_3
+; CHECK-NEXT: # %bb.2: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 7, 5
+; CHECK-NEXT: .LBB6_3: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB6_1 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 5, 0, 3
-; CHECK-NEXT: cmplw 5, 6
-; CHECK-NEXT: bne 0, .LBB6_1
-; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore
-; CHECK-NEXT: # in Loop: Header=BB6_4 Depth=2
+; CHECK-NEXT: cmplw 5, 6
+; CHECK-NEXT: bne- 0, .LBB6_6
+; CHECK-NEXT: # %bb.4: # %cmpxchg.fencedstore
+; CHECK-NEXT: #
; CHECK-NEXT: stwcx. 7, 0, 3
-; CHECK-NEXT: bne 0, .LBB6_4
-; CHECK-NEXT: # %bb.6:
-; CHECK-NEXT: mr 6, 5
-; CHECK-NEXT: # %bb.7: # %atomicrmw.end
+; CHECK-NEXT: creqv 20, 20, 20
+; CHECK-NEXT: bne- 0, .LBB6_3
+; CHECK-NEXT: # %bb.5: # %cmpxchg.end
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: bc 4, 20, .LBB6_1
+; CHECK-NEXT: b .LBB6_7
+; CHECK-NEXT: .LBB6_6: # %cmpxchg.nostore
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: b .LBB6_1
+; CHECK-NEXT: .LBB6_7: # %atomicrmw.end
; CHECK-NEXT: mr 3, 5
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
@@ -337,33 +365,37 @@ define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) {
; CHECK: # %bb.0:
; CHECK-NEXT: sync
; CHECK-NEXT: ld 6, 0(3)
-; CHECK-NEXT: b .LBB7_2
-; CHECK-NEXT: .LBB7_1: # %cmpxchg.nostore
-; CHECK-NEXT: # in Loop: Header=BB7_2 Depth=1
-; CHECK-NEXT: mr 6, 5
-; CHECK-NEXT: .LBB7_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB7_4 Depth 2
-; CHECK-NEXT: subc 5, 6, 4
+; CHECK-NEXT: .LBB7_1: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB7_3 Depth 2
+; CHECK-NEXT: subc 5, 6, 4
; CHECK-NEXT: li 7, 0
; CHECK-NEXT: addze. 8, 7
-; CHECK-NEXT: beq 0, .LBB7_4
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: # in Loop: Header=BB7_2 Depth=1
-; CHECK-NEXT: mr 7, 5
-; CHECK-NEXT: .LBB7_4: # %cmpxchg.start
-; CHECK-NEXT: # Parent Loop BB7_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: beq 0, .LBB7_3
+; CHECK-NEXT: # %bb.2: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 7, 5
+; CHECK-NEXT: .LBB7_3: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB7_1 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldarx 5, 0, 3
-; CHECK-NEXT: cmpld 5, 6
-; CHECK-NEXT: bne 0, .LBB7_1
-; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore
-; CHECK-NEXT: # in Loop: Header=BB7_4 Depth=2
+; CHECK-NEXT: cmpld 5, 6
+; CHECK-NEXT: bne- 0, .LBB7_6
+; CHECK-NEXT: # %bb.4: # %cmpxchg.fencedstore
+; CHECK-NEXT: #
; CHECK-NEXT: stdcx. 7, 0, 3
-; CHECK-NEXT: bne 0, .LBB7_4
-; CHECK-NEXT: # %bb.6:
-; CHECK-NEXT: mr 6, 5
-; CHECK-NEXT: # %bb.7: # %atomicrmw.end
+; CHECK-NEXT: creqv 20, 20, 20
+; CHECK-NEXT: bne- 0, .LBB7_3
+; CHECK-NEXT: # %bb.5: # %cmpxchg.end
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: bc 4, 20, .LBB7_1
+; CHECK-NEXT: b .LBB7_7
+; CHECK-NEXT: .LBB7_6: # %cmpxchg.nostore
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: b .LBB7_1
+; CHECK-NEXT: .LBB7_7: # %atomicrmw.end
; CHECK-NEXT: mr 3, 5
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
diff --git a/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll
index 6ced47bd6bcba..4dc6d0ad3d5c7 100644
--- a/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll
@@ -6,47 +6,51 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; CHECK: # %bb.0:
; CHECK-NEXT: sync
; CHECK-NEXT: rldicr 5, 3, 0, 61
-; CHECK-NEXT: not 3, 3
+; CHECK-NEXT: not 3, 3
; CHECK-NEXT: li 6, 255
; CHECK-NEXT: lwz 7, 0(5)
; CHECK-NEXT: rlwinm 3, 3, 3, 27, 28
; CHECK-NEXT: slw 6, 6, 3
-; CHECK-NEXT: not 6, 6
-; CHECK-NEXT: clrlwi 4, 4, 24
-; CHECK-NEXT: b .LBB0_2
-; CHECK-NEXT: .LBB0_1: # %cmpxchg.nostore
-; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT: mr 7, 8
-; CHECK-NEXT: .LBB0_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB0_5 Depth 2
+; CHECK-NEXT: not 6, 6
+; CHECK-NEXT: clrlwi 4, 4, 24
+; CHECK-NEXT: .LBB0_1: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB0_4 Depth 2
; CHECK-NEXT: srw 8, 7, 3
-; CHECK-NEXT: clrlwi 9, 8, 24
-; CHECK-NEXT: cmplw 9, 4
+; CHECK-NEXT: clrlwi 9, 8, 24
+; CHECK-NEXT: cmplw 9, 4
; CHECK-NEXT: li 9, 0
-; CHECK-NEXT: bge 0, .LBB0_4
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: bge 0, .LBB0_3
+; CHECK-NEXT: # %bb.2: # %atomicrmw.start
+; CHECK-NEXT: #
; CHECK-NEXT: addi 9, 8, 1
-; CHECK-NEXT: .LBB0_4: # %atomicrmw.start
-; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT: clrlwi 8, 9, 24
+; CHECK-NEXT: .LBB0_3: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: clrlwi 8, 9, 24
; CHECK-NEXT: slw 8, 8, 3
; CHECK-NEXT: and 9, 7, 6
; CHECK-NEXT: or 9, 9, 8
-; CHECK-NEXT: .LBB0_5: # %cmpxchg.start
-; CHECK-NEXT: # Parent Loop BB0_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: .LBB0_4: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 8, 0, 5
-; CHECK-NEXT: cmplw 8, 7
-; CHECK-NEXT: bne 0, .LBB0_1
-; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
-; CHECK-NEXT: # in Loop: Header=BB0_5 Depth=2
+; CHECK-NEXT: cmplw 8, 7
+; CHECK-NEXT: bne- 0, .LBB0_7
+; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT: #
; CHECK-NEXT: stwcx. 9, 0, 5
-; CHECK-NEXT: bne 0, .LBB0_5
-; CHECK-NEXT: # %bb.7:
-; CHECK-NEXT: mr 7, 8
-; CHECK-NEXT: # %bb.8: # %atomicrmw.end
+; CHECK-NEXT: creqv 20, 20, 20
+; CHECK-NEXT: bne- 0, .LBB0_4
+; CHECK-NEXT: # %bb.6: # %cmpxchg.end
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 7, 8
+; CHECK-NEXT: bc 4, 20, .LBB0_1
+; CHECK-NEXT: b .LBB0_8
+; CHECK-NEXT: .LBB0_7: # %cmpxchg.nostore
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 7, 8
+; CHECK-NEXT: b .LBB0_1
+; CHECK-NEXT: .LBB0_8: # %atomicrmw.end
; CHECK-NEXT: srw 3, 8, 3
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
@@ -59,49 +63,53 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
; CHECK: # %bb.0:
; CHECK-NEXT: sync
; CHECK-NEXT: rldicr 5, 3, 0, 61
-; CHECK-NEXT: clrlwi 3, 3, 30
+; CHECK-NEXT: clrlwi 3, 3, 30
; CHECK-NEXT: lis 6, 0
; CHECK-NEXT: xori 3, 3, 2
; CHECK-NEXT: lwz 7, 0(5)
; CHECK-NEXT: ori 6, 6, 65535
; CHECK-NEXT: slwi 3, 3, 3
; CHECK-NEXT: slw 6, 6, 3
-; CHECK-NEXT: not 6, 6
-; CHECK-NEXT: clrlwi 4, 4, 16
-; CHECK-NEXT: b .LBB1_2
-; CHECK-NEXT: .LBB1_1: # %cmpxchg.nostore
-; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: mr 7, 8
-; CHECK-NEXT: .LBB1_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB1_5 Depth 2
+; CHECK-NEXT: not 6, 6
+; CHECK-NEXT: clrlwi 4, 4, 16
+; CHECK-NEXT: .LBB1_1: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB1_4 Depth 2
; CHECK-NEXT: srw 8, 7, 3
-; CHECK-NEXT: clrlwi 9, 8, 16
-; CHECK-NEXT: cmplw 9, 4
+; CHECK-NEXT: clrlwi 9, 8, 16
+; CHECK-NEXT: cmplw 9, 4
; CHECK-NEXT: li 9, 0
-; CHECK-NEXT: bge 0, .LBB1_4
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT: bge 0, .LBB1_3
+; CHECK-NEXT: # %bb.2: # %atomicrmw.start
+; CHECK-NEXT: #
; CHECK-NEXT: addi 9, 8, 1
-; CHECK-NEXT: .LBB1_4: # %atomicrmw.start
-; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: clrlwi 8, 9, 16
+; CHECK-NEXT: .LBB1_3: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: clrlwi 8, 9, 16
; CHECK-NEXT: slw 8, 8, 3
; CHECK-NEXT: and 9, 7, 6
; CHECK-NEXT: or 9, 9, 8
-; CHECK-NEXT: .LBB1_5: # %cmpxchg.start
-; CHECK-NEXT: # Parent Loop BB1_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: .LBB1_4: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB1_1 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 8, 0, 5
-; CHECK-NEXT: cmplw 8, 7
-; CHECK-NEXT: bne 0, .LBB1_1
-; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
-; CHECK-NEXT: # in Loop: Header=BB1_5 Depth=2
+; CHECK-NEXT: cmplw 8, 7
+; CHECK-NEXT: bne- 0, .LBB1_7
+; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT: #
; CHECK-NEXT: stwcx. 9, 0, 5
-; CHECK-NEXT: bne 0, .LBB1_5
-; CHECK-NEXT: # %bb.7:
-; CHECK-NEXT: mr 7, 8
-; CHECK-NEXT: # %bb.8: # %atomicrmw.end
+; CHECK-NEXT: creqv 20, 20, 20
+; CHECK-NEXT: bne- 0, .LBB1_4
+; CHECK-NEXT: # %bb.6: # %cmpxchg.end
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 7, 8
+; CHECK-NEXT: bc 4, 20, .LBB1_1
+; CHECK-NEXT: b .LBB1_8
+; CHECK-NEXT: .LBB1_7: # %cmpxchg.nostore
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 7, 8
+; CHECK-NEXT: b .LBB1_1
+; CHECK-NEXT: .LBB1_8: # %atomicrmw.end
; CHECK-NEXT: srw 3, 8, 3
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
@@ -114,32 +122,36 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
; CHECK: # %bb.0:
; CHECK-NEXT: sync
; CHECK-NEXT: lwz 6, 0(3)
-; CHECK-NEXT: b .LBB2_2
-; CHECK-NEXT: .LBB2_1: # %cmpxchg.nostore
-; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
-; CHECK-NEXT: mr 6, 5
-; CHECK-NEXT: .LBB2_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB2_4 Depth 2
-; CHECK-NEXT: cmplw 6, 4
+; CHECK-NEXT: .LBB2_1: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB2_3 Depth 2
+; CHECK-NEXT: cmplw 6, 4
; CHECK-NEXT: li 7, 0
-; CHECK-NEXT: bge 0, .LBB2_4
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT: bge 0, .LBB2_3
+; CHECK-NEXT: # %bb.2: # %atomicrmw.start
+; CHECK-NEXT: #
; CHECK-NEXT: addi 7, 6, 1
-; CHECK-NEXT: .LBB2_4: # %cmpxchg.start
-; CHECK-NEXT: # Parent Loop BB2_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: .LBB2_3: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB2_1 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 5, 0, 3
-; CHECK-NEXT: cmplw 5, 6
-; CHECK-NEXT: bne 0, .LBB2_1
-; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore
-; CHECK-NEXT: # in Loop: Header=BB2_4 Depth=2
+; CHECK-NEXT: cmplw 5, 6
+; CHECK-NEXT: bne- 0, .LBB2_6
+; CHECK-NEXT: # %bb.4: # %cmpxchg.fencedstore
+; CHECK-NEXT: #
; CHECK-NEXT: stwcx. 7, 0, 3
-; CHECK-NEXT: bne 0, .LBB2_4
-; CHECK-NEXT: # %bb.6:
-; CHECK-NEXT: mr 6, 5
-; CHECK-NEXT: # %bb.7: # %atomicrmw.end
+; CHECK-NEXT: creqv 20, 20, 20
+; CHECK-NEXT: bne- 0, .LBB2_3
+; CHECK-NEXT: # %bb.5: # %cmpxchg.end
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: bc 4, 20, .LBB2_1
+; CHECK-NEXT: b .LBB2_7
+; CHECK-NEXT: .LBB2_6: # %cmpxchg.nostore
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: b .LBB2_1
+; CHECK-NEXT: .LBB2_7: # %atomicrmw.end
; CHECK-NEXT: mr 3, 5
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
@@ -152,32 +164,36 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; CHECK: # %bb.0:
; CHECK-NEXT: sync
; CHECK-NEXT: ld 6, 0(3)
-; CHECK-NEXT: b .LBB3_2
-; CHECK-NEXT: .LBB3_1: # %cmpxchg.nostore
-; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1
-; CHECK-NEXT: mr 6, 5
-; CHECK-NEXT: .LBB3_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB3_4 Depth 2
-; CHECK-NEXT: cmpld 6, 4
+; CHECK-NEXT: .LBB3_1: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB3_3 Depth 2
+; CHECK-NEXT: cmpld 6, 4
; CHECK-NEXT: li 7, 0
-; CHECK-NEXT: bge 0, .LBB3_4
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1
+; CHECK-NEXT: bge 0, .LBB3_3
+; CHECK-NEXT: # %bb.2: # %atomicrmw.start
+; CHECK-NEXT: #
; CHECK-NEXT: addi 7, 6, 1
-; CHECK-NEXT: .LBB3_4: # %cmpxchg.start
-; CHECK-NEXT: # Parent Loop BB3_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: .LBB3_3: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB3_1 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldarx 5, 0, 3
-; CHECK-NEXT: cmpld 5, 6
-; CHECK-NEXT: bne 0, .LBB3_1
-; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore
-; CHECK-NEXT: # in Loop: Header=BB3_4 Depth=2
+; CHECK-NEXT: cmpld 5, 6
+; CHECK-NEXT: bne- 0, .LBB3_6
+; CHECK-NEXT: # %bb.4: # %cmpxchg.fencedstore
+; CHECK-NEXT: #
; CHECK-NEXT: stdcx. 7, 0, 3
-; CHECK-NEXT: bne 0, .LBB3_4
-; CHECK-NEXT: # %bb.6:
-; CHECK-NEXT: mr 6, 5
-; CHECK-NEXT: # %bb.7: # %atomicrmw.end
+; CHECK-NEXT: creqv 20, 20, 20
+; CHECK-NEXT: bne- 0, .LBB3_3
+; CHECK-NEXT: # %bb.5: # %cmpxchg.end
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: bc 4, 20, .LBB3_1
+; CHECK-NEXT: b .LBB3_7
+; CHECK-NEXT: .LBB3_6: # %cmpxchg.nostore
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: b .LBB3_1
+; CHECK-NEXT: .LBB3_7: # %atomicrmw.end
; CHECK-NEXT: mr 3, 5
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
@@ -190,48 +206,52 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; CHECK: # %bb.0:
; CHECK-NEXT: sync
; CHECK-NEXT: rldicr 5, 3, 0, 61
-; CHECK-NEXT: not 3, 3
+; CHECK-NEXT: not 3, 3
; CHECK-NEXT: li 6, 255
; CHECK-NEXT: lwz 8, 0(5)
; CHECK-NEXT: rlwinm 3, 3, 3, 27, 28
; CHECK-NEXT: slw 6, 6, 3
-; CHECK-NEXT: not 6, 6
-; CHECK-NEXT: clrlwi 7, 4, 24
-; CHECK-NEXT: b .LBB4_2
-; CHECK-NEXT: .LBB4_1: # %cmpxchg.nostore
-; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1
-; CHECK-NEXT: mr 8, 9
-; CHECK-NEXT: .LBB4_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB4_5 Depth 2
+; CHECK-NEXT: not 6, 6
+; CHECK-NEXT: clrlwi 7, 4, 24
+; CHECK-NEXT: .LBB4_1: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB4_4 Depth 2
; CHECK-NEXT: srw 9, 8, 3
; CHECK-NEXT: andi. 10, 9, 255
; CHECK-NEXT: cmplw 1, 10, 7
; CHECK-NEXT: cror 20, 2, 5
-; CHECK-NEXT: mr 10, 4
-; CHECK-NEXT: bc 12, 20, .LBB4_4
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1
+; CHECK-NEXT: mr 10, 4
+; CHECK-NEXT: bc 12, 20, .LBB4_3
+; CHECK-NEXT: # %bb.2: # %atomicrmw.start
+; CHECK-NEXT: #
; CHECK-NEXT: addi 10, 9, -1
-; CHECK-NEXT: .LBB4_4: # %atomicrmw.start
-; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1
-; CHECK-NEXT: clrlwi 9, 10, 24
+; CHECK-NEXT: .LBB4_3: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: clrlwi 9, 10, 24
; CHECK-NEXT: slw 9, 9, 3
; CHECK-NEXT: and 10, 8, 6
; CHECK-NEXT: or 10, 10, 9
-; CHECK-NEXT: .LBB4_5: # %cmpxchg.start
-; CHECK-NEXT: # Parent Loop BB4_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: .LBB4_4: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB4_1 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 9, 0, 5
-; CHECK-NEXT: cmplw 9, 8
-; CHECK-NEXT: bne 0, .LBB4_1
-; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
-; CHECK-NEXT: # in Loop: Header=BB4_5 Depth=2
+; CHECK-NEXT: cmplw 9, 8
+; CHECK-NEXT: bne- 0, .LBB4_7
+; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT: #
; CHECK-NEXT: stwcx. 10, 0, 5
-; CHECK-NEXT: bne 0, .LBB4_5
-; CHECK-NEXT: # %bb.7:
-; CHECK-NEXT: mr 8, 9
-; CHECK-NEXT: # %bb.8: # %atomicrmw.end
+; CHECK-NEXT: creqv 20, 20, 20
+; CHECK-NEXT: bne- 0, .LBB4_4
+; CHECK-NEXT: # %bb.6: # %cmpxchg.end
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 8, 9
+; CHECK-NEXT: bc 4, 20, .LBB4_1
+; CHECK-NEXT: b .LBB4_8
+; CHECK-NEXT: .LBB4_7: # %cmpxchg.nostore
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 8, 9
+; CHECK-NEXT: b .LBB4_1
+; CHECK-NEXT: .LBB4_8: # %atomicrmw.end
; CHECK-NEXT: srw 3, 9, 3
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
@@ -244,50 +264,54 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
; CHECK: # %bb.0:
; CHECK-NEXT: sync
; CHECK-NEXT: rldicr 5, 3, 0, 61
-; CHECK-NEXT: clrlwi 3, 3, 30
+; CHECK-NEXT: clrlwi 3, 3, 30
; CHECK-NEXT: lis 6, 0
; CHECK-NEXT: xori 3, 3, 2
; CHECK-NEXT: lwz 8, 0(5)
; CHECK-NEXT: ori 6, 6, 65535
; CHECK-NEXT: slwi 3, 3, 3
; CHECK-NEXT: slw 6, 6, 3
-; CHECK-NEXT: not 6, 6
-; CHECK-NEXT: clrlwi 7, 4, 16
-; CHECK-NEXT: b .LBB5_2
-; CHECK-NEXT: .LBB5_1: # %cmpxchg.nostore
-; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1
-; CHECK-NEXT: mr 8, 9
-; CHECK-NEXT: .LBB5_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB5_5 Depth 2
+; CHECK-NEXT: not 6, 6
+; CHECK-NEXT: clrlwi 7, 4, 16
+; CHECK-NEXT: .LBB5_1: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB5_4 Depth 2
; CHECK-NEXT: srw 9, 8, 3
; CHECK-NEXT: andi. 10, 9, 65535
; CHECK-NEXT: cmplw 1, 10, 7
; CHECK-NEXT: cror 20, 2, 5
-; CHECK-NEXT: mr 10, 4
-; CHECK-NEXT: bc 12, 20, .LBB5_4
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1
+; CHECK-NEXT: mr 10, 4
+; CHECK-NEXT: bc 12, 20, .LBB5_3
+; CHECK-NEXT: # %bb.2: # %atomicrmw.start
+; CHECK-NEXT: #
; CHECK-NEXT: addi 10, 9, -1
-; CHECK-NEXT: .LBB5_4: # %atomicrmw.start
-; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1
-; CHECK-NEXT: clrlwi 9, 10, 16
+; CHECK-NEXT: .LBB5_3: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: clrlwi 9, 10, 16
; CHECK-NEXT: slw 9, 9, 3
; CHECK-NEXT: and 10, 8, 6
; CHECK-NEXT: or 10, 10, 9
-; CHECK-NEXT: .LBB5_5: # %cmpxchg.start
-; CHECK-NEXT: # Parent Loop BB5_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: .LBB5_4: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB5_1 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 9, 0, 5
-; CHECK-NEXT: cmplw 9, 8
-; CHECK-NEXT: bne 0, .LBB5_1
-; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
-; CHECK-NEXT: # in Loop: Header=BB5_5 Depth=2
+; CHECK-NEXT: cmplw 9, 8
+; CHECK-NEXT: bne- 0, .LBB5_7
+; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT: #
; CHECK-NEXT: stwcx. 10, 0, 5
-; CHECK-NEXT: bne 0, .LBB5_5
-; CHECK-NEXT: # %bb.7:
-; CHECK-NEXT: mr 8, 9
-; CHECK-NEXT: # %bb.8: # %atomicrmw.end
+; CHECK-NEXT: creqv 20, 20, 20
+; CHECK-NEXT: bne- 0, .LBB5_4
+; CHECK-NEXT: # %bb.6: # %cmpxchg.end
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 8, 9
+; CHECK-NEXT: bc 4, 20, .LBB5_1
+; CHECK-NEXT: b .LBB5_8
+; CHECK-NEXT: .LBB5_7: # %cmpxchg.nostore
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 8, 9
+; CHECK-NEXT: b .LBB5_1
+; CHECK-NEXT: .LBB5_8: # %atomicrmw.end
; CHECK-NEXT: srw 3, 9, 3
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
@@ -300,37 +324,41 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
; CHECK: # %bb.0:
; CHECK-NEXT: sync
; CHECK-NEXT: lwz 6, 0(3)
-; CHECK-NEXT: b .LBB6_2
-; CHECK-NEXT: .LBB6_1: # %cmpxchg.nostore
-; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1
-; CHECK-NEXT: mr 6, 5
-; CHECK-NEXT: .LBB6_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB6_5 Depth 2
-; CHECK-NEXT: cmpwi 6, 0
-; CHECK-NEXT: mr 7, 4
-; CHECK-NEXT: bc 12, 2, .LBB6_5
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1
-; CHECK-NEXT: cmplw 6, 4
-; CHECK-NEXT: mr 7, 4
-; CHECK-NEXT: bc 12, 1, .LBB6_5
-; CHECK-NEXT: # %bb.4: # %atomicrmw.start
-; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1
+; CHECK-NEXT: .LBB6_1: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB6_4 Depth 2
+; CHECK-NEXT: cmpwi 6, 0
+; CHECK-NEXT: mr 7, 4
+; CHECK-NEXT: bc 12, 2, .LBB6_4
+; CHECK-NEXT: # %bb.2: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: cmplw 6, 4
+; CHECK-NEXT: mr 7, 4
+; CHECK-NEXT: bc 12, 1, .LBB6_4
+; CHECK-NEXT: # %bb.3: # %atomicrmw.start
+; CHECK-NEXT: #
; CHECK-NEXT: addi 7, 6, -1
-; CHECK-NEXT: .LBB6_5: # %cmpxchg.start
-; CHECK-NEXT: # Parent Loop BB6_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: .LBB6_4: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB6_1 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 5, 0, 3
-; CHECK-NEXT: cmplw 5, 6
-; CHECK-NEXT: bne 0, .LBB6_1
-; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
-; CHECK-NEXT: # in Loop: Header=BB6_5 Depth=2
+; CHECK-NEXT: cmplw 5, 6
+; CHECK-NEXT: bne- 0, .LBB6_7
+; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT: #
; CHECK-NEXT: stwcx. 7, 0, 3
-; CHECK-NEXT: bne 0, .LBB6_5
-; CHECK-NEXT: # %bb.7:
-; CHECK-NEXT: mr 6, 5
-; CHECK-NEXT: # %bb.8: # %atomicrmw.end
+; CHECK-NEXT: creqv 20, 20, 20
+; CHECK-NEXT: bne- 0, .LBB6_4
+; CHECK-NEXT: # %bb.6: # %cmpxchg.end
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: bc 4, 20, .LBB6_1
+; CHECK-NEXT: b .LBB6_8
+; CHECK-NEXT: .LBB6_7: # %cmpxchg.nostore
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: b .LBB6_1
+; CHECK-NEXT: .LBB6_8: # %atomicrmw.end
; CHECK-NEXT: mr 3, 5
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
@@ -343,38 +371,42 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; CHECK: # %bb.0:
; CHECK-NEXT: sync
; CHECK-NEXT: ld 6, 0(3)
-; CHECK-NEXT: b .LBB7_2
-; CHECK-NEXT: .LBB7_1: # %cmpxchg.nostore
-; CHECK-NEXT: # in Loop: Header=BB7_2 Depth=1
-; CHECK-NEXT: mr 6, 5
-; CHECK-NEXT: .LBB7_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB7_5 Depth 2
-; CHECK-NEXT: cmpdi 6, 0
-; CHECK-NEXT: mr 7, 4
-; CHECK-NEXT: bc 12, 2, .LBB7_5
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: # in Loop: Header=BB7_2 Depth=1
-; CHECK-NEXT: cmpld 6, 4
-; CHECK-NEXT: mr 7, 4
-; CHECK-NEXT: bc 12, 1, .LBB7_5
-; CHECK-NEXT: # %bb.4: # %atomicrmw.start
-; CHECK-NEXT: # in Loop: Header=BB7_2 Depth=1
+; CHECK-NEXT: .LBB7_1: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB7_4 Depth 2
+; CHECK-NEXT: cmpdi 6, 0
+; CHECK-NEXT: mr 7, 4
+; CHECK-NEXT: bc 12, 2, .LBB7_4
+; CHECK-NEXT: # %bb.2: # %atomicrmw.start
+; CHECK-NEXT: #
+; CHECK-NEXT: cmpld 6, 4
+; CHECK-NEXT: mr 7, 4
+; CHECK-NEXT: bc 12, 1, .LBB7_4
+; CHECK-NEXT: # %bb.3: # %atomicrmw.start
+; CHECK-NEXT: #
; CHECK-NEXT: addi 7, 6, -1
-; CHECK-NEXT: .LBB7_5: # %cmpxchg.start
-; CHECK-NEXT: # Parent Loop BB7_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: .LBB7_4: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB7_1 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldarx 5, 0, 3
-; CHECK-NEXT: cmpld 5, 6
-; CHECK-NEXT: bne 0, .LBB7_1
-; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
-; CHECK-NEXT: # in Loop: Header=BB7_5 Depth=2
+; CHECK-NEXT: cmpld 5, 6
+; CHECK-NEXT: bne- 0, .LBB7_7
+; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT: #
; CHECK-NEXT: stdcx. 7, 0, 3
-; CHECK-NEXT: bne 0, .LBB7_5
-; CHECK-NEXT: # %bb.7:
-; CHECK-NEXT: mr 6, 5
-; CHECK-NEXT: # %bb.8: # %atomicrmw.end
-; CHECK-NEXT: mr 3, 5
+; CHECK-NEXT: creqv 20, 20, 20
+; CHECK-NEXT: bne- 0, .LBB7_4
+; CHECK-NEXT: # %bb.6: # %cmpxchg.end
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: bc 4, 20, .LBB7_1
+; CHECK-NEXT: b .LBB7_8
+; CHECK-NEXT: .LBB7_7: # %cmpxchg.nostore
+; CHECK-NEXT: #
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: b .LBB7_1
+; CHECK-NEXT: .LBB7_8: # %atomicrmw.end
+; CHECK-NEXT: mr 3, 5
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
%result = atomicrmw udec_wrap ptr %ptr, i64 %val seq_cst
diff --git a/llvm/test/CodeGen/PowerPC/atomics-regression.ll b/llvm/test/CodeGen/PowerPC/atomics-regression.ll
index 0474a479a1fef..90990bbb4124d 100644
--- a/llvm/test/CodeGen/PowerPC/atomics-regression.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics-regression.ll
@@ -402,16 +402,15 @@ define void @test40(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: clrlwi 5, 5, 24
; PPC64LE-NEXT: clrlwi 4, 4, 24
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB40_1: # %cmpxchg.start
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB40_1
+; PPC64LE-NEXT: bne- 0, .LBB40_1
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.end
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic
@@ -423,16 +422,15 @@ define void @test41(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: clrlwi 5, 5, 24
; PPC64LE-NEXT: clrlwi 4, 4, 24
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB41_1: # %cmpxchg.start
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB41_1
+; PPC64LE-NEXT: bne- 0, .LBB41_1
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -445,16 +443,15 @@ define void @test42(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: clrlwi 5, 5, 24
; PPC64LE-NEXT: clrlwi 4, 4, 24
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB42_1: # %cmpxchg.start
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB42_3
+; PPC64LE-NEXT: bne- 0, .LBB42_3
; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB42_1
+; PPC64LE-NEXT: bne- 0, .LBB42_1
; PPC64LE-NEXT: .LBB42_3: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -468,7 +465,7 @@ define void @test43(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: clrlwi 4, 4, 24
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: clrlwi 5, 5, 24
@@ -476,12 +473,12 @@ define void @test43(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-NEXT: .LBB43_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: beqlr+ 0
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB43_2
+; PPC64LE-NEXT: beq+ 0, .LBB43_2
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val release monotonic
ret void
@@ -493,7 +490,7 @@ define void @test44(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: clrlwi 4, 4, 24
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB44_4
+; PPC64LE-NEXT: bne- 0, .LBB44_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: clrlwi 5, 5, 24
@@ -501,12 +498,12 @@ define void @test44(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-NEXT: .LBB44_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: beqlr+ 0
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB44_2
+; PPC64LE-NEXT: beq+ 0, .LBB44_2
; PPC64LE-NEXT: .LBB44_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -520,23 +517,21 @@ define void @test45(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: clrlwi 4, 4, 24
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: clrlwi 5, 5, 24
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB45_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB45_5
+; PPC64LE-NEXT: beq+ 0, .LBB45_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB45_2
-; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: beq+ 0, .LBB45_2
; PPC64LE-NEXT: blr
-; PPC64LE-NEXT: .LBB45_5: # %cmpxchg.success
+; PPC64LE-NEXT: .LBB45_4: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acq_rel monotonic
@@ -549,20 +544,19 @@ define void @test46(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: clrlwi 4, 4, 24
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB46_4
+; PPC64LE-NEXT: bne- 0, .LBB46_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: clrlwi 5, 5, 24
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB46_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB46_4
+; PPC64LE-NEXT: beq+ 0, .LBB46_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB46_2
+; PPC64LE-NEXT: beq+ 0, .LBB46_2
; PPC64LE-NEXT: .LBB46_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -576,23 +570,21 @@ define void @test47(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: clrlwi 4, 4, 24
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: clrlwi 5, 5, 24
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB47_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB47_5
+; PPC64LE-NEXT: beq+ 0, .LBB47_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB47_2
-; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: beq+ 0, .LBB47_2
; PPC64LE-NEXT: blr
-; PPC64LE-NEXT: .LBB47_5: # %cmpxchg.success
+; PPC64LE-NEXT: .LBB47_4: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val seq_cst monotonic
@@ -605,20 +597,19 @@ define void @test48(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: clrlwi 4, 4, 24
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB48_4
+; PPC64LE-NEXT: bne- 0, .LBB48_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: clrlwi 5, 5, 24
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB48_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB48_4
+; PPC64LE-NEXT: beq+ 0, .LBB48_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB48_2
+; PPC64LE-NEXT: beq+ 0, .LBB48_2
; PPC64LE-NEXT: .LBB48_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -632,20 +623,19 @@ define void @test49(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: clrlwi 4, 4, 24
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB49_4
+; PPC64LE-NEXT: bne- 0, .LBB49_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: clrlwi 5, 5, 24
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB49_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB49_4
+; PPC64LE-NEXT: beq+ 0, .LBB49_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB49_2
+; PPC64LE-NEXT: beq+ 0, .LBB49_2
; PPC64LE-NEXT: .LBB49_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -658,16 +648,15 @@ define void @test50(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: clrlwi 5, 5, 16
; PPC64LE-NEXT: clrlwi 4, 4, 16
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB50_1: # %cmpxchg.start
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB50_1
+; PPC64LE-NEXT: bne- 0, .LBB50_1
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.end
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic
@@ -679,16 +668,15 @@ define void @test51(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: clrlwi 5, 5, 16
; PPC64LE-NEXT: clrlwi 4, 4, 16
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB51_1: # %cmpxchg.start
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB51_1
+; PPC64LE-NEXT: bne- 0, .LBB51_1
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -701,16 +689,15 @@ define void @test52(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: clrlwi 5, 5, 16
; PPC64LE-NEXT: clrlwi 4, 4, 16
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB52_1: # %cmpxchg.start
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB52_3
+; PPC64LE-NEXT: bne- 0, .LBB52_3
; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB52_1
+; PPC64LE-NEXT: bne- 0, .LBB52_1
; PPC64LE-NEXT: .LBB52_3: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -724,7 +711,7 @@ define void @test53(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: clrlwi 4, 4, 16
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: clrlwi 5, 5, 16
@@ -732,12 +719,12 @@ define void @test53(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-NEXT: .LBB53_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: beqlr+ 0
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB53_2
+; PPC64LE-NEXT: beq+ 0, .LBB53_2
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val release monotonic
ret void
@@ -749,7 +736,7 @@ define void @test54(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: clrlwi 4, 4, 16
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB54_4
+; PPC64LE-NEXT: bne- 0, .LBB54_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: clrlwi 5, 5, 16
@@ -757,12 +744,12 @@ define void @test54(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-NEXT: .LBB54_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: beqlr+ 0
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB54_2
+; PPC64LE-NEXT: beq+ 0, .LBB54_2
; PPC64LE-NEXT: .LBB54_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -776,23 +763,21 @@ define void @test55(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: clrlwi 4, 4, 16
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: clrlwi 5, 5, 16
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB55_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB55_5
+; PPC64LE-NEXT: beq+ 0, .LBB55_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB55_2
-; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: beq+ 0, .LBB55_2
; PPC64LE-NEXT: blr
-; PPC64LE-NEXT: .LBB55_5: # %cmpxchg.success
+; PPC64LE-NEXT: .LBB55_4: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acq_rel monotonic
@@ -805,20 +790,19 @@ define void @test56(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: clrlwi 4, 4, 16
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB56_4
+; PPC64LE-NEXT: bne- 0, .LBB56_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: clrlwi 5, 5, 16
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB56_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB56_4
+; PPC64LE-NEXT: beq+ 0, .LBB56_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB56_2
+; PPC64LE-NEXT: beq+ 0, .LBB56_2
; PPC64LE-NEXT: .LBB56_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -832,23 +816,21 @@ define void @test57(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: clrlwi 4, 4, 16
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: clrlwi 5, 5, 16
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB57_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB57_5
+; PPC64LE-NEXT: beq+ 0, .LBB57_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB57_2
-; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: beq+ 0, .LBB57_2
; PPC64LE-NEXT: blr
-; PPC64LE-NEXT: .LBB57_5: # %cmpxchg.success
+; PPC64LE-NEXT: .LBB57_4: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val seq_cst monotonic
@@ -861,20 +843,19 @@ define void @test58(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: clrlwi 4, 4, 16
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB58_4
+; PPC64LE-NEXT: bne- 0, .LBB58_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: clrlwi 5, 5, 16
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB58_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB58_4
+; PPC64LE-NEXT: beq+ 0, .LBB58_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB58_2
+; PPC64LE-NEXT: beq+ 0, .LBB58_2
; PPC64LE-NEXT: .LBB58_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -888,20 +869,19 @@ define void @test59(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: clrlwi 4, 4, 16
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB59_4
+; PPC64LE-NEXT: bne- 0, .LBB59_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: clrlwi 5, 5, 16
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB59_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB59_4
+; PPC64LE-NEXT: beq+ 0, .LBB59_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB59_2
+; PPC64LE-NEXT: beq+ 0, .LBB59_2
; PPC64LE-NEXT: .LBB59_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -912,16 +892,15 @@ define void @test59(ptr %ptr, i16 %cmp, i16 %val) {
define void @test60(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE-LABEL: test60:
; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB60_1: # %cmpxchg.start
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB60_1
+; PPC64LE-NEXT: bne- 0, .LBB60_1
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.end
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic
@@ -931,16 +910,15 @@ define void @test60(ptr %ptr, i32 %cmp, i32 %val) {
define void @test61(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE-LABEL: test61:
; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB61_1: # %cmpxchg.start
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB61_1
+; PPC64LE-NEXT: bne- 0, .LBB61_1
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -951,16 +929,15 @@ define void @test61(ptr %ptr, i32 %cmp, i32 %val) {
define void @test62(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE-LABEL: test62:
; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB62_1: # %cmpxchg.start
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB62_3
+; PPC64LE-NEXT: bne- 0, .LBB62_3
; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB62_1
+; PPC64LE-NEXT: bne- 0, .LBB62_1
; PPC64LE-NEXT: .LBB62_3: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -973,19 +950,19 @@ define void @test63(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB63_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: beqlr+ 0
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB63_2
+; PPC64LE-NEXT: beq+ 0, .LBB63_2
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val release monotonic
ret void
@@ -996,19 +973,19 @@ define void @test64(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB64_4
+; PPC64LE-NEXT: bne- 0, .LBB64_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB64_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: beqlr+ 0
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB64_2
+; PPC64LE-NEXT: beq+ 0, .LBB64_2
; PPC64LE-NEXT: .LBB64_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -1021,22 +998,20 @@ define void @test65(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB65_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB65_5
+; PPC64LE-NEXT: beq+ 0, .LBB65_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB65_2
-; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: beq+ 0, .LBB65_2
; PPC64LE-NEXT: blr
-; PPC64LE-NEXT: .LBB65_5: # %cmpxchg.success
+; PPC64LE-NEXT: .LBB65_4: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acq_rel monotonic
@@ -1048,19 +1023,18 @@ define void @test66(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB66_4
+; PPC64LE-NEXT: bne- 0, .LBB66_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB66_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB66_4
+; PPC64LE-NEXT: beq+ 0, .LBB66_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB66_2
+; PPC64LE-NEXT: beq+ 0, .LBB66_2
; PPC64LE-NEXT: .LBB66_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -1073,22 +1047,20 @@ define void @test67(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB67_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB67_5
+; PPC64LE-NEXT: beq+ 0, .LBB67_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB67_2
-; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: beq+ 0, .LBB67_2
; PPC64LE-NEXT: blr
-; PPC64LE-NEXT: .LBB67_5: # %cmpxchg.success
+; PPC64LE-NEXT: .LBB67_4: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst monotonic
@@ -1100,19 +1072,18 @@ define void @test68(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB68_4
+; PPC64LE-NEXT: bne- 0, .LBB68_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB68_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB68_4
+; PPC64LE-NEXT: beq+ 0, .LBB68_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB68_2
+; PPC64LE-NEXT: beq+ 0, .LBB68_2
; PPC64LE-NEXT: .LBB68_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -1125,19 +1096,18 @@ define void @test69(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB69_4
+; PPC64LE-NEXT: bne- 0, .LBB69_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB69_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB69_4
+; PPC64LE-NEXT: beq+ 0, .LBB69_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB69_2
+; PPC64LE-NEXT: beq+ 0, .LBB69_2
; PPC64LE-NEXT: .LBB69_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -1148,16 +1118,15 @@ define void @test69(ptr %ptr, i32 %cmp, i32 %val) {
define void @test70(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE-LABEL: test70:
; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB70_1: # %cmpxchg.start
; PPC64LE-NEXT: #
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB70_1
+; PPC64LE-NEXT: bne- 0, .LBB70_1
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.end
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic
@@ -1167,16 +1136,15 @@ define void @test70(ptr %ptr, i64 %cmp, i64 %val) {
define void @test71(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE-LABEL: test71:
; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB71_1: # %cmpxchg.start
; PPC64LE-NEXT: #
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB71_1
+; PPC64LE-NEXT: bne- 0, .LBB71_1
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -1187,16 +1155,15 @@ define void @test71(ptr %ptr, i64 %cmp, i64 %val) {
define void @test72(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE-LABEL: test72:
; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB72_1: # %cmpxchg.start
; PPC64LE-NEXT: #
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: bne 0, .LBB72_3
+; PPC64LE-NEXT: bne- 0, .LBB72_3
; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB72_1
+; PPC64LE-NEXT: bne- 0, .LBB72_1
; PPC64LE-NEXT: .LBB72_3: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -1209,19 +1176,19 @@ define void @test73(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB73_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: beqlr+ 0
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: beq 0, .LBB73_2
+; PPC64LE-NEXT: beq+ 0, .LBB73_2
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val release monotonic
ret void
@@ -1232,19 +1199,19 @@ define void @test74(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: bne 0, .LBB74_4
+; PPC64LE-NEXT: bne- 0, .LBB74_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB74_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: beqlr+ 0
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: beq 0, .LBB74_2
+; PPC64LE-NEXT: beq+ 0, .LBB74_2
; PPC64LE-NEXT: .LBB74_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -1257,22 +1224,20 @@ define void @test75(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB75_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB75_5
+; PPC64LE-NEXT: beq+ 0, .LBB75_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: beq 0, .LBB75_2
-; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: beq+ 0, .LBB75_2
; PPC64LE-NEXT: blr
-; PPC64LE-NEXT: .LBB75_5: # %cmpxchg.success
+; PPC64LE-NEXT: .LBB75_4: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acq_rel monotonic
@@ -1284,19 +1249,18 @@ define void @test76(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: bne 0, .LBB76_4
+; PPC64LE-NEXT: bne- 0, .LBB76_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB76_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB76_4
+; PPC64LE-NEXT: beq+ 0, .LBB76_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: beq 0, .LBB76_2
+; PPC64LE-NEXT: beq+ 0, .LBB76_2
; PPC64LE-NEXT: .LBB76_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -1309,22 +1273,20 @@ define void @test77(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB77_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB77_5
+; PPC64LE-NEXT: beq+ 0, .LBB77_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: beq 0, .LBB77_2
-; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: beq+ 0, .LBB77_2
; PPC64LE-NEXT: blr
-; PPC64LE-NEXT: .LBB77_5: # %cmpxchg.success
+; PPC64LE-NEXT: .LBB77_4: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val seq_cst monotonic
@@ -1336,19 +1298,18 @@ define void @test78(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: bne 0, .LBB78_4
+; PPC64LE-NEXT: bne- 0, .LBB78_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB78_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB78_4
+; PPC64LE-NEXT: beq+ 0, .LBB78_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: beq 0, .LBB78_2
+; PPC64LE-NEXT: beq+ 0, .LBB78_2
; PPC64LE-NEXT: .LBB78_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -1361,19 +1322,18 @@ define void @test79(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: bne 0, .LBB79_4
+; PPC64LE-NEXT: bne- 0, .LBB79_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB79_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB79_4
+; PPC64LE-NEXT: beq+ 0, .LBB79_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: beq 0, .LBB79_2
+; PPC64LE-NEXT: beq+ 0, .LBB79_2
; PPC64LE-NEXT: .LBB79_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -1386,16 +1346,15 @@ define void @test80(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: clrlwi 5, 5, 24
; PPC64LE-NEXT: clrlwi 4, 4, 24
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB80_1: # %cmpxchg.start
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB80_1
+; PPC64LE-NEXT: bne- 0, .LBB80_1
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.end
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val syncscope("singlethread") monotonic monotonic
@@ -1407,16 +1366,15 @@ define void @test81(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: clrlwi 5, 5, 24
; PPC64LE-NEXT: clrlwi 4, 4, 24
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB81_1: # %cmpxchg.start
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB81_1
+; PPC64LE-NEXT: bne- 0, .LBB81_1
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -1429,16 +1387,15 @@ define void @test82(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: clrlwi 5, 5, 24
; PPC64LE-NEXT: clrlwi 4, 4, 24
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB82_1: # %cmpxchg.start
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB82_3
+; PPC64LE-NEXT: bne- 0, .LBB82_3
; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB82_1
+; PPC64LE-NEXT: bne- 0, .LBB82_1
; PPC64LE-NEXT: .LBB82_3: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -1452,7 +1409,7 @@ define void @test83(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: clrlwi 4, 4, 24
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: clrlwi 5, 5, 24
@@ -1460,12 +1417,12 @@ define void @test83(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-NEXT: .LBB83_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: beqlr+ 0
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB83_2
+; PPC64LE-NEXT: beq+ 0, .LBB83_2
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val syncscope("singlethread") release monotonic
ret void
@@ -1477,7 +1434,7 @@ define void @test84(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: clrlwi 4, 4, 24
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB84_4
+; PPC64LE-NEXT: bne- 0, .LBB84_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: clrlwi 5, 5, 24
@@ -1485,12 +1442,12 @@ define void @test84(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-NEXT: .LBB84_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: beqlr+ 0
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB84_2
+; PPC64LE-NEXT: beq+ 0, .LBB84_2
; PPC64LE-NEXT: .LBB84_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -1504,23 +1461,21 @@ define void @test85(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: clrlwi 4, 4, 24
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: clrlwi 5, 5, 24
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB85_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB85_5
+; PPC64LE-NEXT: beq+ 0, .LBB85_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB85_2
-; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: beq+ 0, .LBB85_2
; PPC64LE-NEXT: blr
-; PPC64LE-NEXT: .LBB85_5: # %cmpxchg.success
+; PPC64LE-NEXT: .LBB85_4: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val syncscope("singlethread") acq_rel monotonic
@@ -1533,20 +1488,19 @@ define void @test86(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: clrlwi 4, 4, 24
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB86_4
+; PPC64LE-NEXT: bne- 0, .LBB86_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: clrlwi 5, 5, 24
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB86_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB86_4
+; PPC64LE-NEXT: beq+ 0, .LBB86_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB86_2
+; PPC64LE-NEXT: beq+ 0, .LBB86_2
; PPC64LE-NEXT: .LBB86_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -1560,23 +1514,21 @@ define void @test87(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: clrlwi 4, 4, 24
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: clrlwi 5, 5, 24
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB87_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB87_5
+; PPC64LE-NEXT: beq+ 0, .LBB87_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB87_2
-; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: beq+ 0, .LBB87_2
; PPC64LE-NEXT: blr
-; PPC64LE-NEXT: .LBB87_5: # %cmpxchg.success
+; PPC64LE-NEXT: .LBB87_4: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val syncscope("singlethread") seq_cst monotonic
@@ -1589,20 +1541,19 @@ define void @test88(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: clrlwi 4, 4, 24
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB88_4
+; PPC64LE-NEXT: bne- 0, .LBB88_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: clrlwi 5, 5, 24
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB88_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB88_4
+; PPC64LE-NEXT: beq+ 0, .LBB88_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB88_2
+; PPC64LE-NEXT: beq+ 0, .LBB88_2
; PPC64LE-NEXT: .LBB88_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -1616,20 +1567,19 @@ define void @test89(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: clrlwi 4, 4, 24
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB89_4
+; PPC64LE-NEXT: bne- 0, .LBB89_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: clrlwi 5, 5, 24
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB89_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB89_4
+; PPC64LE-NEXT: beq+ 0, .LBB89_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB89_2
+; PPC64LE-NEXT: beq+ 0, .LBB89_2
; PPC64LE-NEXT: .LBB89_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -1642,16 +1592,15 @@ define void @test90(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: clrlwi 5, 5, 16
; PPC64LE-NEXT: clrlwi 4, 4, 16
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB90_1: # %cmpxchg.start
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB90_1
+; PPC64LE-NEXT: bne- 0, .LBB90_1
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.end
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val syncscope("singlethread") monotonic monotonic
@@ -1663,16 +1612,15 @@ define void @test91(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: clrlwi 5, 5, 16
; PPC64LE-NEXT: clrlwi 4, 4, 16
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB91_1: # %cmpxchg.start
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB91_1
+; PPC64LE-NEXT: bne- 0, .LBB91_1
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -1685,16 +1633,15 @@ define void @test92(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: clrlwi 5, 5, 16
; PPC64LE-NEXT: clrlwi 4, 4, 16
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB92_1: # %cmpxchg.start
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB92_3
+; PPC64LE-NEXT: bne- 0, .LBB92_3
; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB92_1
+; PPC64LE-NEXT: bne- 0, .LBB92_1
; PPC64LE-NEXT: .LBB92_3: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -1708,7 +1655,7 @@ define void @test93(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: clrlwi 4, 4, 16
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: clrlwi 5, 5, 16
@@ -1716,12 +1663,12 @@ define void @test93(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-NEXT: .LBB93_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: beqlr+ 0
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB93_2
+; PPC64LE-NEXT: beq+ 0, .LBB93_2
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val syncscope("singlethread") release monotonic
ret void
@@ -1733,7 +1680,7 @@ define void @test94(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: clrlwi 4, 4, 16
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB94_4
+; PPC64LE-NEXT: bne- 0, .LBB94_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: clrlwi 5, 5, 16
@@ -1741,12 +1688,12 @@ define void @test94(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-NEXT: .LBB94_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: beqlr+ 0
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB94_2
+; PPC64LE-NEXT: beq+ 0, .LBB94_2
; PPC64LE-NEXT: .LBB94_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -1760,23 +1707,21 @@ define void @test95(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: clrlwi 4, 4, 16
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: clrlwi 5, 5, 16
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB95_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB95_5
+; PPC64LE-NEXT: beq+ 0, .LBB95_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB95_2
-; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: beq+ 0, .LBB95_2
; PPC64LE-NEXT: blr
-; PPC64LE-NEXT: .LBB95_5: # %cmpxchg.success
+; PPC64LE-NEXT: .LBB95_4: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val syncscope("singlethread") acq_rel monotonic
@@ -1789,20 +1734,19 @@ define void @test96(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: clrlwi 4, 4, 16
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB96_4
+; PPC64LE-NEXT: bne- 0, .LBB96_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: clrlwi 5, 5, 16
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB96_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB96_4
+; PPC64LE-NEXT: beq+ 0, .LBB96_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB96_2
+; PPC64LE-NEXT: beq+ 0, .LBB96_2
; PPC64LE-NEXT: .LBB96_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -1816,23 +1760,21 @@ define void @test97(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: clrlwi 4, 4, 16
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: clrlwi 5, 5, 16
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB97_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB97_5
+; PPC64LE-NEXT: beq+ 0, .LBB97_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB97_2
-; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: beq+ 0, .LBB97_2
; PPC64LE-NEXT: blr
-; PPC64LE-NEXT: .LBB97_5: # %cmpxchg.success
+; PPC64LE-NEXT: .LBB97_4: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val syncscope("singlethread") seq_cst monotonic
@@ -1845,20 +1787,19 @@ define void @test98(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: clrlwi 4, 4, 16
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB98_4
+; PPC64LE-NEXT: bne- 0, .LBB98_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: clrlwi 5, 5, 16
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB98_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB98_4
+; PPC64LE-NEXT: beq+ 0, .LBB98_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB98_2
+; PPC64LE-NEXT: beq+ 0, .LBB98_2
; PPC64LE-NEXT: .LBB98_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -1872,20 +1813,19 @@ define void @test99(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: clrlwi 4, 4, 16
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB99_4
+; PPC64LE-NEXT: bne- 0, .LBB99_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: clrlwi 5, 5, 16
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB99_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB99_4
+; PPC64LE-NEXT: beq+ 0, .LBB99_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lharx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB99_2
+; PPC64LE-NEXT: beq+ 0, .LBB99_2
; PPC64LE-NEXT: .LBB99_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -1896,16 +1836,15 @@ define void @test99(ptr %ptr, i16 %cmp, i16 %val) {
define void @test100(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE-LABEL: test100:
; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB100_1: # %cmpxchg.start
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB100_1
+; PPC64LE-NEXT: bne- 0, .LBB100_1
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.end
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val syncscope("singlethread") monotonic monotonic
@@ -1915,16 +1854,15 @@ define void @test100(ptr %ptr, i32 %cmp, i32 %val) {
define void @test101(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE-LABEL: test101:
; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB101_1: # %cmpxchg.start
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB101_1
+; PPC64LE-NEXT: bne- 0, .LBB101_1
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -1935,16 +1873,15 @@ define void @test101(ptr %ptr, i32 %cmp, i32 %val) {
define void @test102(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE-LABEL: test102:
; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB102_1: # %cmpxchg.start
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB102_3
+; PPC64LE-NEXT: bne- 0, .LBB102_3
; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB102_1
+; PPC64LE-NEXT: bne- 0, .LBB102_1
; PPC64LE-NEXT: .LBB102_3: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -1957,19 +1894,19 @@ define void @test103(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB103_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: beqlr+ 0
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB103_2
+; PPC64LE-NEXT: beq+ 0, .LBB103_2
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val syncscope("singlethread") release monotonic
ret void
@@ -1980,19 +1917,19 @@ define void @test104(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB104_4
+; PPC64LE-NEXT: bne- 0, .LBB104_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB104_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: beqlr+ 0
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB104_2
+; PPC64LE-NEXT: beq+ 0, .LBB104_2
; PPC64LE-NEXT: .LBB104_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -2005,22 +1942,20 @@ define void @test105(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB105_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB105_5
+; PPC64LE-NEXT: beq+ 0, .LBB105_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB105_2
-; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: beq+ 0, .LBB105_2
; PPC64LE-NEXT: blr
-; PPC64LE-NEXT: .LBB105_5: # %cmpxchg.success
+; PPC64LE-NEXT: .LBB105_4: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val syncscope("singlethread") acq_rel monotonic
@@ -2032,19 +1967,18 @@ define void @test106(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB106_4
+; PPC64LE-NEXT: bne- 0, .LBB106_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB106_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB106_4
+; PPC64LE-NEXT: beq+ 0, .LBB106_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB106_2
+; PPC64LE-NEXT: beq+ 0, .LBB106_2
; PPC64LE-NEXT: .LBB106_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -2057,22 +1991,20 @@ define void @test107(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB107_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB107_5
+; PPC64LE-NEXT: beq+ 0, .LBB107_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB107_2
-; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: beq+ 0, .LBB107_2
; PPC64LE-NEXT: blr
-; PPC64LE-NEXT: .LBB107_5: # %cmpxchg.success
+; PPC64LE-NEXT: .LBB107_4: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val syncscope("singlethread") seq_cst monotonic
@@ -2084,19 +2016,18 @@ define void @test108(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB108_4
+; PPC64LE-NEXT: bne- 0, .LBB108_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB108_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB108_4
+; PPC64LE-NEXT: beq+ 0, .LBB108_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB108_2
+; PPC64LE-NEXT: beq+ 0, .LBB108_2
; PPC64LE-NEXT: .LBB108_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -2109,19 +2040,18 @@ define void @test109(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB109_4
+; PPC64LE-NEXT: bne- 0, .LBB109_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB109_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB109_4
+; PPC64LE-NEXT: beq+ 0, .LBB109_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lwarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: beq 0, .LBB109_2
+; PPC64LE-NEXT: beq+ 0, .LBB109_2
; PPC64LE-NEXT: .LBB109_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -2132,16 +2062,15 @@ define void @test109(ptr %ptr, i32 %cmp, i32 %val) {
define void @test110(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE-LABEL: test110:
; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB110_1: # %cmpxchg.start
; PPC64LE-NEXT: #
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB110_1
+; PPC64LE-NEXT: bne- 0, .LBB110_1
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.end
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val syncscope("singlethread") monotonic monotonic
@@ -2151,16 +2080,15 @@ define void @test110(ptr %ptr, i64 %cmp, i64 %val) {
define void @test111(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE-LABEL: test111:
; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB111_1: # %cmpxchg.start
; PPC64LE-NEXT: #
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB111_1
+; PPC64LE-NEXT: bne- 0, .LBB111_1
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -2171,16 +2099,15 @@ define void @test111(ptr %ptr, i64 %cmp, i64 %val) {
define void @test112(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE-LABEL: test112:
; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB112_1: # %cmpxchg.start
; PPC64LE-NEXT: #
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: bne 0, .LBB112_3
+; PPC64LE-NEXT: bne- 0, .LBB112_3
; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB112_1
+; PPC64LE-NEXT: bne- 0, .LBB112_1
; PPC64LE-NEXT: .LBB112_3: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -2193,19 +2120,19 @@ define void @test113(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB113_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: beqlr+ 0
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: beq 0, .LBB113_2
+; PPC64LE-NEXT: beq+ 0, .LBB113_2
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val syncscope("singlethread") release monotonic
ret void
@@ -2216,19 +2143,19 @@ define void @test114(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: bne 0, .LBB114_4
+; PPC64LE-NEXT: bne- 0, .LBB114_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB114_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: beqlr+ 0
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: beq 0, .LBB114_2
+; PPC64LE-NEXT: beq+ 0, .LBB114_2
; PPC64LE-NEXT: .LBB114_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -2241,22 +2168,20 @@ define void @test115(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB115_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB115_5
+; PPC64LE-NEXT: beq+ 0, .LBB115_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: beq 0, .LBB115_2
-; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: beq+ 0, .LBB115_2
; PPC64LE-NEXT: blr
-; PPC64LE-NEXT: .LBB115_5: # %cmpxchg.success
+; PPC64LE-NEXT: .LBB115_4: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val syncscope("singlethread") acq_rel monotonic
@@ -2268,19 +2193,18 @@ define void @test116(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: bne 0, .LBB116_4
+; PPC64LE-NEXT: bne- 0, .LBB116_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB116_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB116_4
+; PPC64LE-NEXT: beq+ 0, .LBB116_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: beq 0, .LBB116_2
+; PPC64LE-NEXT: beq+ 0, .LBB116_2
; PPC64LE-NEXT: .LBB116_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -2293,22 +2217,20 @@ define void @test117(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB117_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB117_5
+; PPC64LE-NEXT: beq+ 0, .LBB117_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: beq 0, .LBB117_2
-; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: beq+ 0, .LBB117_2
; PPC64LE-NEXT: blr
-; PPC64LE-NEXT: .LBB117_5: # %cmpxchg.success
+; PPC64LE-NEXT: .LBB117_4: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val syncscope("singlethread") seq_cst monotonic
@@ -2320,19 +2242,18 @@ define void @test118(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: bne 0, .LBB118_4
+; PPC64LE-NEXT: bne- 0, .LBB118_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB118_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB118_4
+; PPC64LE-NEXT: beq+ 0, .LBB118_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: beq 0, .LBB118_2
+; PPC64LE-NEXT: beq+ 0, .LBB118_2
; PPC64LE-NEXT: .LBB118_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
@@ -2345,19 +2266,18 @@ define void @test119(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: bne 0, .LBB119_4
+; PPC64LE-NEXT: bne- 0, .LBB119_4
; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB119_2: # %cmpxchg.trystore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: beq 0, .LBB119_4
+; PPC64LE-NEXT: beq+ 0, .LBB119_4
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
; PPC64LE-NEXT: #
; PPC64LE-NEXT: ldarx 6, 0, 3
; PPC64LE-NEXT: cmpld 6, 4
-; PPC64LE-NEXT: beq 0, .LBB119_2
+; PPC64LE-NEXT: beq+ 0, .LBB119_2
; PPC64LE-NEXT: .LBB119_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
diff --git a/llvm/test/CodeGen/PowerPC/atomics.ll b/llvm/test/CodeGen/PowerPC/atomics.ll
index 40786057ead5f..183c8e1323f2e 100644
--- a/llvm/test/CodeGen/PowerPC/atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics.ll
@@ -138,67 +138,67 @@ define void @store_i64_seq_cst(ptr %mem) {
; Atomic CmpXchg
define i8 @cas_strong_i8_sc_sc(ptr %mem) {
; PPC32-LABEL: cas_strong_i8_sc_sc:
-; PPC32: # %bb.0:
+; PPC32: # %bb.0: # %cmpxchg.start
; PPC32-NEXT: rlwinm r5, r3, 0, 0, 29
; PPC32-NEXT: lwarx r4, 0, r5
-; PPC32-NEXT: not r3, r3
+; PPC32-NEXT: not r3, r3
; PPC32-NEXT: rlwinm r3, r3, 3, 27, 28
; PPC32-NEXT: srw r6, r4, r3
; PPC32-NEXT: andi. r6, r6, 255
-; PPC32-NEXT: bne cr0, .LBB8_4
-; PPC32-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC32-NEXT: bne- cr0, .LBB8_4
+; PPC32-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC32-NEXT: li r6, 255
; PPC32-NEXT: li r7, 1
; PPC32-NEXT: slw r6, r6, r3
-; PPC32-NEXT: not r6, r6
+; PPC32-NEXT: not r6, r6
; PPC32-NEXT: slw r7, r7, r3
; PPC32-NEXT: sync
-; PPC32-NEXT: .LBB8_2: # %cmpxchg.trystore
-; PPC32-NEXT: # =>This Inner Loop Header: Depth=1
+; PPC32-NEXT: .LBB8_2: # %cmpxchg.trystore
+; PPC32-NEXT: #
; PPC32-NEXT: and r8, r4, r6
; PPC32-NEXT: or r8, r8, r7
; PPC32-NEXT: stwcx. r8, 0, r5
-; PPC32-NEXT: beq cr0, .LBB8_4
-; PPC32-NEXT: # %bb.3: # %cmpxchg.releasedload
-; PPC32-NEXT: # in Loop: Header=BB8_2 Depth=1
+; PPC32-NEXT: beq+ cr0, .LBB8_4
+; PPC32-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC32-NEXT: #
; PPC32-NEXT: lwarx r4, 0, r5
; PPC32-NEXT: srw r8, r4, r3
; PPC32-NEXT: andi. r8, r8, 255
-; PPC32-NEXT: beq cr0, .LBB8_2
-; PPC32-NEXT: .LBB8_4: # %cmpxchg.nostore
+; PPC32-NEXT: beq+ cr0, .LBB8_2
+; PPC32-NEXT: .LBB8_4: # %cmpxchg.nostore
; PPC32-NEXT: srw r3, r4, r3
; PPC32-NEXT: lwsync
; PPC32-NEXT: blr
;
; PPC64-LABEL: cas_strong_i8_sc_sc:
-; PPC64: # %bb.0:
+; PPC64: # %bb.0: # %cmpxchg.start
; PPC64-NEXT: rldicr r5, r3, 0, 61
-; PPC64-NEXT: not r3, r3
+; PPC64-NEXT: not r3, r3
; PPC64-NEXT: lwarx r4, 0, r5
; PPC64-NEXT: rlwinm r3, r3, 3, 27, 28
; PPC64-NEXT: srw r6, r4, r3
; PPC64-NEXT: andi. r6, r6, 255
-; PPC64-NEXT: bne cr0, .LBB8_4
-; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64-NEXT: bne- cr0, .LBB8_4
+; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64-NEXT: li r6, 255
; PPC64-NEXT: li r7, 1
; PPC64-NEXT: slw r6, r6, r3
-; PPC64-NEXT: not r6, r6
+; PPC64-NEXT: not r6, r6
; PPC64-NEXT: slw r7, r7, r3
; PPC64-NEXT: sync
-; PPC64-NEXT: .LBB8_2: # %cmpxchg.trystore
-; PPC64-NEXT: # =>This Inner Loop Header: Depth=1
+; PPC64-NEXT: .LBB8_2: # %cmpxchg.trystore
+; PPC64-NEXT: #
; PPC64-NEXT: and r8, r4, r6
; PPC64-NEXT: or r8, r8, r7
; PPC64-NEXT: stwcx. r8, 0, r5
-; PPC64-NEXT: beq cr0, .LBB8_4
-; PPC64-NEXT: # %bb.3: # %cmpxchg.releasedload
-; PPC64-NEXT: # in Loop: Header=BB8_2 Depth=1
+; PPC64-NEXT: beq+ cr0, .LBB8_4
+; PPC64-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64-NEXT: #
; PPC64-NEXT: lwarx r4, 0, r5
; PPC64-NEXT: srw r8, r4, r3
; PPC64-NEXT: andi. r8, r8, 255
-; PPC64-NEXT: beq cr0, .LBB8_2
-; PPC64-NEXT: .LBB8_4: # %cmpxchg.nostore
+; PPC64-NEXT: beq+ cr0, .LBB8_2
+; PPC64-NEXT: .LBB8_4: # %cmpxchg.nostore
; PPC64-NEXT: srw r3, r4, r3
; PPC64-NEXT: lwsync
; PPC64-NEXT: blr
@@ -208,54 +208,50 @@ define i8 @cas_strong_i8_sc_sc(ptr %mem) {
}
define i16 @cas_weak_i16_acquire_acquire(ptr %mem) {
; PPC32-LABEL: cas_weak_i16_acquire_acquire:
-; PPC32: # %bb.0:
+; PPC32: # %bb.0: # %cmpxchg.start
; PPC32-NEXT: rlwinm r4, r3, 0, 0, 29
; PPC32-NEXT: lwarx r5, 0, r4
-; PPC32-NEXT: clrlwi r3, r3, 30
+; PPC32-NEXT: clrlwi r3, r3, 30
; PPC32-NEXT: xori r3, r3, 2
; PPC32-NEXT: slwi r6, r3, 3
; PPC32-NEXT: srw r3, r5, r6
; PPC32-NEXT: andi. r7, r3, 65535
-; PPC32-NEXT: beq cr0, .LBB9_2
-; PPC32-NEXT: # %bb.1: # %cmpxchg.failure
-; PPC32-NEXT: lwsync
-; PPC32-NEXT: blr
-; PPC32-NEXT: .LBB9_2: # %cmpxchg.fencedstore
+; PPC32-NEXT: bne- cr0, .LBB9_2
+; PPC32-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC32-NEXT: lis r7, 0
; PPC32-NEXT: ori r7, r7, 65535
; PPC32-NEXT: slw r7, r7, r6
; PPC32-NEXT: li r8, 1
-; PPC32-NEXT: not r7, r7
+; PPC32-NEXT: not r7, r7
; PPC32-NEXT: slw r6, r8, r6
; PPC32-NEXT: and r5, r5, r7
; PPC32-NEXT: or r5, r5, r6
; PPC32-NEXT: stwcx. r5, 0, r4
+; PPC32-NEXT: .LBB9_2: # %cmpxchg.failure
; PPC32-NEXT: lwsync
; PPC32-NEXT: blr
;
; PPC64-LABEL: cas_weak_i16_acquire_acquire:
-; PPC64: # %bb.0:
-; PPC64-NEXT: rldicr r4, r3, 0, 61
-; PPC64-NEXT: clrlwi r3, r3, 30
+; PPC64: # %bb.0: # %cmpxchg.start
+; PPC64-NEXT: rldicr r4, r3, 0, 61
+; PPC64-NEXT: clrlwi r3, r3, 30
; PPC64-NEXT: lwarx r5, 0, r4
; PPC64-NEXT: xori r3, r3, 2
; PPC64-NEXT: slwi r6, r3, 3
; PPC64-NEXT: srw r3, r5, r6
; PPC64-NEXT: andi. r7, r3, 65535
-; PPC64-NEXT: beq cr0, .LBB9_2
-; PPC64-NEXT: # %bb.1: # %cmpxchg.failure
-; PPC64-NEXT: lwsync
-; PPC64-NEXT: blr
-; PPC64-NEXT: .LBB9_2: # %cmpxchg.fencedstore
+; PPC64-NEXT: bne- cr0, .LBB9_2
+; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64-NEXT: lis r7, 0
; PPC64-NEXT: ori r7, r7, 65535
; PPC64-NEXT: slw r7, r7, r6
; PPC64-NEXT: li r8, 1
-; PPC64-NEXT: not r7, r7
+; PPC64-NEXT: not r7, r7
; PPC64-NEXT: slw r6, r8, r6
; PPC64-NEXT: and r5, r5, r7
; PPC64-NEXT: or r5, r5, r6
; PPC64-NEXT: stwcx. r5, 0, r4
+; PPC64-NEXT: .LBB9_2: # %cmpxchg.failure
; PPC64-NEXT: lwsync
; PPC64-NEXT: blr
%val = cmpxchg weak ptr %mem, i16 0, i16 1 acquire acquire
@@ -264,24 +260,24 @@ define i16 @cas_weak_i16_acquire_acquire(ptr %mem) {
}
define i32 @cas_strong_i32_acqrel_acquire(ptr %mem) {
; CHECK-LABEL: cas_strong_i32_acqrel_acquire:
-; CHECK: # %bb.0:
-; CHECK-NEXT: mr r4, r3
+; CHECK: # %bb.0: # %cmpxchg.start
+; CHECK-NEXT: mr r4, r3
; CHECK-NEXT: lwarx r3, 0, r3
-; CHECK-NEXT: cmplwi r3, 0
-; CHECK-NEXT: bne cr0, .LBB10_4
-; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; CHECK-NEXT: cmplwi r3, 0
+; CHECK-NEXT: bne- cr0, .LBB10_4
+; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore
; CHECK-NEXT: li r5, 1
; CHECK-NEXT: lwsync
-; CHECK-NEXT: .LBB10_2: # %cmpxchg.trystore
-; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: .LBB10_2: # %cmpxchg.trystore
+; CHECK-NEXT: #
; CHECK-NEXT: stwcx. r5, 0, r4
-; CHECK-NEXT: beq cr0, .LBB10_4
-; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload
-; CHECK-NEXT: # in Loop: Header=BB10_2 Depth=1
+; CHECK-NEXT: beq+ cr0, .LBB10_4
+; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload
+; CHECK-NEXT: #
; CHECK-NEXT: lwarx r3, 0, r4
-; CHECK-NEXT: cmplwi r3, 0
-; CHECK-NEXT: beq cr0, .LBB10_2
-; CHECK-NEXT: .LBB10_4: # %cmpxchg.nostore
+; CHECK-NEXT: cmplwi r3, 0
+; CHECK-NEXT: beq+ cr0, .LBB10_2
+; CHECK-NEXT: .LBB10_4: # %cmpxchg.nostore
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
%val = cmpxchg ptr %mem, i32 0, i32 1 acq_rel acquire
@@ -313,12 +309,12 @@ define i64 @cas_weak_i64_release_monotonic(ptr %mem) {
; PPC32-NEXT: blr
;
; PPC64-LABEL: cas_weak_i64_release_monotonic:
-; PPC64: # %bb.0:
-; PPC64-NEXT: mr r4, r3
+; PPC64: # %bb.0: # %cmpxchg.start
+; PPC64-NEXT: mr r4, r3
; PPC64-NEXT: ldarx r3, 0, r3
-; PPC64-NEXT: cmpldi r3, 0
-; PPC64-NEXT: bnelr cr0
-; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64-NEXT: cmpldi r3, 0
+; PPC64-NEXT: bnelr- cr0
+; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64-NEXT: li r5, 1
; PPC64-NEXT: lwsync
; PPC64-NEXT: stdcx. r5, 0, r4
diff --git a/llvm/test/CodeGen/PowerPC/loop-comment.ll b/llvm/test/CodeGen/PowerPC/loop-comment.ll
index 530e67b4804fb..b4ceb36768904 100644
--- a/llvm/test/CodeGen/PowerPC/loop-comment.ll
+++ b/llvm/test/CodeGen/PowerPC/loop-comment.ll
@@ -6,16 +6,15 @@ define void @test(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE: # %bb.0:
; PPC64LE-NEXT: clrlwi 5, 5, 24
; PPC64LE-NEXT: clrlwi 4, 4, 24
-; PPC64LE-NEXT: .p2align 5
; PPC64LE-NEXT: .LBB0_1: # %cmpxchg.start
; PPC64LE-NEXT: #
; PPC64LE-NEXT: lbarx 6, 0, 3
; PPC64LE-NEXT: cmplw 6, 4
-; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: bnelr- 0
; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB0_1
+; PPC64LE-NEXT: bne- 0, .LBB0_1
; PPC64LE-NEXT: # %bb.3: # %cmpxchg.end
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic
>From 98d946ac4e944c92eb4a83bbff4bd6bf05e99daa Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Thu, 7 Aug 2025 14:00:50 +0000
Subject: [PATCH 3/5] change function name from
getTrueBranchHintWeightForAtomicCmpXchgg to
getTrueBranchHintWeightForAtomicCmpXchg
---
llvm/include/llvm/CodeGen/TargetLowering.h | 2 +-
llvm/lib/CodeGen/AtomicExpandPass.cpp | 6 +++---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 2 +-
llvm/lib/Target/PowerPC/PPCISelLowering.h | 2 +-
4 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 027bcc5bc53ae..609453164fcc7 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2334,7 +2334,7 @@ class LLVM_ABI TargetLoweringBase {
AtomicOrdering Ord) const;
virtual MDNode *
- getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const {
+ getTrueBranchHintWeightForAtomicCmpXchg(LLVMContext &Ctx) const {
return nullptr;
}
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index abaa8b6e841f6..64fc6359ac726 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1456,7 +1456,7 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// jump straight past that fence instruction (if it exists).
Builder.CreateCondBr(
ShouldStore, ReleasingStoreBB, NoStoreBB,
- TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
+ TLI->getTrueBranchHintWeightForAtomicCmpXchg(F->getContext()));
Builder.SetInsertPoint(ReleasingStoreBB);
if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
@@ -1476,7 +1476,7 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
Builder.CreateCondBr(
StoreSuccess, SuccessBB, CI->isWeak() ? FailureBB : RetryBB,
- TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
+ TLI->getTrueBranchHintWeightForAtomicCmpXchg(F->getContext()));
Builder.SetInsertPoint(ReleasedLoadBB);
Value *SecondLoad;
@@ -1491,7 +1491,7 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// jump straight past that fence instruction (if it exists).
Builder.CreateCondBr(
ShouldStore, TryStoreBB, NoStoreBB,
- TLI->getTrueBranchHintWeightForAtomicCmpXchgg(F->getContext()));
+ TLI->getTrueBranchHintWeightForAtomicCmpXchg(F->getContext()));
// Update PHI node in TryStoreBB.
LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
} else
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index b218532e56b6a..865d3f5465859 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12817,7 +12817,7 @@ Value *PPCTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
return Builder.CreateXor(Call, Builder.getInt32(1));
}
-MDNode *PPCTargetLowering::getTrueBranchHintWeightForAtomicCmpXchgg(
+MDNode *PPCTargetLowering::getTrueBranchHintWeightForAtomicCmpXchg(
LLVMContext &Ctx) const {
return MDBuilder(Ctx).createLikelyBranchWeights();
}
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 4892a3c603a6c..4b78955db9fbf 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -939,7 +939,7 @@ namespace llvm {
AtomicOrdering Ord) const override;
virtual MDNode *
- getTrueBranchHintWeightForAtomicCmpXchgg(LLVMContext &Ctx) const override;
+ getTrueBranchHintWeightForAtomicCmpXchg(LLVMContext &Ctx) const override;
bool shouldInlineQuadwordAtomics() const;
TargetLowering::AtomicExpansionKind
>From 5d2dcdd75dba26fc36aa910d1ef5fdca00b84650 Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Thu, 7 Aug 2025 17:24:01 +0000
Subject: [PATCH 4/5] using the target independ code
---
llvm/include/llvm/CodeGen/TargetLowering.h | 6 ------
llvm/lib/CodeGen/AtomicExpandPass.cpp | 13 ++++++-------
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 6 ------
llvm/lib/Target/PowerPC/PPCISelLowering.h | 2 --
llvm/test/CodeGen/AArch64/atomic-ops.ll | 10 +++++-----
5 files changed, 11 insertions(+), 26 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 609453164fcc7..cbdc1b6031680 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2332,12 +2332,6 @@ class LLVM_ABI TargetLoweringBase {
virtual Instruction *emitTrailingFence(IRBuilderBase &Builder,
Instruction *Inst,
AtomicOrdering Ord) const;
-
- virtual MDNode *
- getTrueBranchHintWeightForAtomicCmpXchg(LLVMContext &Ctx) const {
- return nullptr;
- }
-
/// @}
// Emits code that executes when the comparison result in the ll/sc
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 64fc6359ac726..0d90cdb52a448 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1454,9 +1454,8 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// If the cmpxchg doesn't actually need any ordering when it fails, we can
// jump straight past that fence instruction (if it exists).
- Builder.CreateCondBr(
- ShouldStore, ReleasingStoreBB, NoStoreBB,
- TLI->getTrueBranchHintWeightForAtomicCmpXchg(F->getContext()));
+ Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB,
+ MDBuilder(F->getContext()).createLikelyBranchWeights());
Builder.SetInsertPoint(ReleasingStoreBB);
if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
@@ -1474,9 +1473,9 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
StoreSuccess = Builder.CreateICmpEQ(
StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
- Builder.CreateCondBr(
- StoreSuccess, SuccessBB, CI->isWeak() ? FailureBB : RetryBB,
- TLI->getTrueBranchHintWeightForAtomicCmpXchg(F->getContext()));
+ Builder.CreateCondBr(StoreSuccess, SuccessBB,
+ CI->isWeak() ? FailureBB : RetryBB,
+ MDBuilder(F->getContext()).createLikelyBranchWeights());
Builder.SetInsertPoint(ReleasedLoadBB);
Value *SecondLoad;
@@ -1491,7 +1490,7 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// jump straight past that fence instruction (if it exists).
Builder.CreateCondBr(
ShouldStore, TryStoreBB, NoStoreBB,
- TLI->getTrueBranchHintWeightForAtomicCmpXchg(F->getContext()));
+ MDBuilder(F->getContext()).createLikelyBranchWeights());
// Update PHI node in TryStoreBB.
LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
} else
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 865d3f5465859..459525ed4ee9a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -68,7 +68,6 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsPowerPC.h"
-#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
@@ -12817,11 +12816,6 @@ Value *PPCTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
return Builder.CreateXor(Call, Builder.getInt32(1));
}
-MDNode *PPCTargetLowering::getTrueBranchHintWeightForAtomicCmpXchg(
- LLVMContext &Ctx) const {
- return MDBuilder(Ctx).createLikelyBranchWeights();
-}
-
// The mappings for emitLeading/TrailingFence is taken from
// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
Instruction *PPCTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 4b78955db9fbf..124c7116dc3b5 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -938,8 +938,6 @@ namespace llvm {
Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
AtomicOrdering Ord) const override;
- virtual MDNode *
- getTrueBranchHintWeightForAtomicCmpXchg(LLVMContext &Ctx) const override;
bool shouldInlineQuadwordAtomics() const;
TargetLowering::AtomicExpansionKind
diff --git a/llvm/test/CodeGen/AArch64/atomic-ops.ll b/llvm/test/CodeGen/AArch64/atomic-ops.ll
index d8ac89f76b321..deeba7ef3ce2c 100644
--- a/llvm/test/CodeGen/AArch64/atomic-ops.ll
+++ b/llvm/test/CodeGen/AArch64/atomic-ops.ll
@@ -1090,18 +1090,18 @@ define dso_local void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
; INLINE_ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1
; INLINE_ATOMICS-NEXT: ldxr x8, [x9]
; INLINE_ATOMICS-NEXT: cmp x8, x0
-; INLINE_ATOMICS-NEXT: b.ne .LBB43_3
+; INLINE_ATOMICS-NEXT: b.ne .LBB43_4
; INLINE_ATOMICS-NEXT: // %bb.2: // %cmpxchg.trystore
; INLINE_ATOMICS-NEXT: // in Loop: Header=BB43_1 Depth=1
; INLINE_ATOMICS-NEXT: stxr w10, x1, [x9]
; INLINE_ATOMICS-NEXT: cbnz w10, .LBB43_1
-; INLINE_ATOMICS-NEXT: b .LBB43_4
-; INLINE_ATOMICS-NEXT: .LBB43_3: // %cmpxchg.nostore
-; INLINE_ATOMICS-NEXT: clrex
-; INLINE_ATOMICS-NEXT: .LBB43_4: // %cmpxchg.end
+; INLINE_ATOMICS-NEXT: .LBB43_3: // %cmpxchg.end
; INLINE_ATOMICS-NEXT: adrp x9, var64
; INLINE_ATOMICS-NEXT: str x8, [x9, :lo12:var64]
; INLINE_ATOMICS-NEXT: ret
+; INLINE_ATOMICS-NEXT: .LBB43_4: // %cmpxchg.nostore
+; INLINE_ATOMICS-NEXT: clrex
+; INLINE_ATOMICS-NEXT: b .LBB43_3
;
; OUTLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i64:
; OUTLINE_ATOMICS: // %bb.0:
>From 70c1ccf4aba144516a8e8193b6e60bb9987ad37b Mon Sep 17 00:00:00 2001
From: zhijian <zhijian at ca.ibm.com>
Date: Fri, 8 Aug 2025 18:43:53 +0000
Subject: [PATCH 5/5] change test case
---
.../AArch64/GlobalISel/arm64-atomic.ll | 60 ++--
.../AArch64/GlobalISel/arm64-pcsections.ll | 128 ++++----
llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll | 10 +-
llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll | 294 +++++++++---------
llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll | 294 +++++++++---------
llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll | 294 +++++++++---------
llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll | 294 +++++++++---------
llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll | 30 +-
llvm/test/CodeGen/ARM/atomic-cmpxchg.ll | 31 +-
llvm/test/CodeGen/ARM/cmpxchg-idioms.ll | 25 +-
llvm/test/CodeGen/ARM/cmpxchg-weak.ll | 17 +-
.../CodeGen/Hexagon/atomic-opaque-basic.ll | 1 -
.../AtomicExpand/ARM/atomic-expansion-v7.ll | 55 +---
.../AtomicExpand/ARM/atomic-expansion-v8.ll | 41 +--
.../AtomicExpand/ARM/cmpxchg-weak.ll | 16 +-
15 files changed, 758 insertions(+), 832 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
index 5bc041aef88ba..e6bf3ab674717 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
@@ -6002,15 +6002,17 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
; CHECK-NOLSE-O1-NEXT: b.ne LBB67_4
; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore
; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB67_1 Depth=1
-; CHECK-NOLSE-O1-NEXT: stxrb w9, w2, [x8]
-; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB67_1
-; CHECK-NOLSE-O1-NEXT: ; %bb.3:
-; CHECK-NOLSE-O1-NEXT: mov w1, #1 ; =0x1
+; CHECK-NOLSE-O1-NEXT: stxrb w10, w2, [x8]
+; CHECK-NOLSE-O1-NEXT: mov w9, #1 ; =0x1
+; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB67_1
+; CHECK-NOLSE-O1-NEXT: ; %bb.3: ; %cmpxchg.end
+; CHECK-NOLSE-O1-NEXT: mov w1, w9
; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0
; CHECK-NOLSE-O1-NEXT: ret
; CHECK-NOLSE-O1-NEXT: LBB67_4: ; %cmpxchg.nostore
-; CHECK-NOLSE-O1-NEXT: mov w1, wzr
+; CHECK-NOLSE-O1-NEXT: mov w9, wzr
; CHECK-NOLSE-O1-NEXT: clrex
+; CHECK-NOLSE-O1-NEXT: mov w1, w9
; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0
; CHECK-NOLSE-O1-NEXT: ret
;
@@ -6108,15 +6110,17 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
; CHECK-NOLSE-O1-NEXT: b.ne LBB68_4
; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore
; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB68_1 Depth=1
-; CHECK-NOLSE-O1-NEXT: stxrh w9, w2, [x8]
-; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB68_1
-; CHECK-NOLSE-O1-NEXT: ; %bb.3:
-; CHECK-NOLSE-O1-NEXT: mov w1, #1 ; =0x1
+; CHECK-NOLSE-O1-NEXT: stxrh w10, w2, [x8]
+; CHECK-NOLSE-O1-NEXT: mov w9, #1 ; =0x1
+; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB68_1
+; CHECK-NOLSE-O1-NEXT: ; %bb.3: ; %cmpxchg.end
+; CHECK-NOLSE-O1-NEXT: mov w1, w9
; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0
; CHECK-NOLSE-O1-NEXT: ret
; CHECK-NOLSE-O1-NEXT: LBB68_4: ; %cmpxchg.nostore
-; CHECK-NOLSE-O1-NEXT: mov w1, wzr
+; CHECK-NOLSE-O1-NEXT: mov w9, wzr
; CHECK-NOLSE-O1-NEXT: clrex
+; CHECK-NOLSE-O1-NEXT: mov w1, w9
; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0
; CHECK-NOLSE-O1-NEXT: ret
;
@@ -6206,6 +6210,7 @@ define { i32, i1 } @cmpxchg_i32(ptr %ptr, i32 %desired, i32 %new) {
; CHECK-NOLSE-O1-LABEL: cmpxchg_i32:
; CHECK-NOLSE-O1: ; %bb.0:
; CHECK-NOLSE-O1-NEXT: mov x8, x0
+; CHECK-NOLSE-O1-NEXT: mov w9, #1 ; =0x1
; CHECK-NOLSE-O1-NEXT: LBB69_1: ; %cmpxchg.start
; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT: ldxr w0, [x8]
@@ -6213,15 +6218,16 @@ define { i32, i1 } @cmpxchg_i32(ptr %ptr, i32 %desired, i32 %new) {
; CHECK-NOLSE-O1-NEXT: b.ne LBB69_4
; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore
; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB69_1 Depth=1
-; CHECK-NOLSE-O1-NEXT: stxr w9, w2, [x8]
-; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB69_1
-; CHECK-NOLSE-O1-NEXT: ; %bb.3:
-; CHECK-NOLSE-O1-NEXT: mov w1, #1 ; =0x1
+; CHECK-NOLSE-O1-NEXT: stxr w10, w2, [x8]
+; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB69_1
+; CHECK-NOLSE-O1-NEXT: ; %bb.3: ; %cmpxchg.end
+; CHECK-NOLSE-O1-NEXT: mov w1, w9
; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0
; CHECK-NOLSE-O1-NEXT: ret
; CHECK-NOLSE-O1-NEXT: LBB69_4: ; %cmpxchg.nostore
-; CHECK-NOLSE-O1-NEXT: mov w1, wzr
+; CHECK-NOLSE-O1-NEXT: mov w9, wzr
; CHECK-NOLSE-O1-NEXT: clrex
+; CHECK-NOLSE-O1-NEXT: mov w1, w9
; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0
; CHECK-NOLSE-O1-NEXT: ret
;
@@ -6306,6 +6312,7 @@ define { i64, i1 } @cmpxchg_i64(ptr %ptr, i64 %desired, i64 %new) {
; CHECK-NOLSE-O1-LABEL: cmpxchg_i64:
; CHECK-NOLSE-O1: ; %bb.0:
; CHECK-NOLSE-O1-NEXT: mov x8, x0
+; CHECK-NOLSE-O1-NEXT: mov w9, #1 ; =0x1
; CHECK-NOLSE-O1-NEXT: LBB70_1: ; %cmpxchg.start
; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT: ldxr x0, [x8]
@@ -6313,14 +6320,15 @@ define { i64, i1 } @cmpxchg_i64(ptr %ptr, i64 %desired, i64 %new) {
; CHECK-NOLSE-O1-NEXT: b.ne LBB70_4
; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore
; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB70_1 Depth=1
-; CHECK-NOLSE-O1-NEXT: stxr w9, x2, [x8]
-; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB70_1
-; CHECK-NOLSE-O1-NEXT: ; %bb.3:
-; CHECK-NOLSE-O1-NEXT: mov w1, #1 ; =0x1
+; CHECK-NOLSE-O1-NEXT: stxr w10, x2, [x8]
+; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB70_1
+; CHECK-NOLSE-O1-NEXT: ; %bb.3: ; %cmpxchg.end
+; CHECK-NOLSE-O1-NEXT: mov w1, w9
; CHECK-NOLSE-O1-NEXT: ret
; CHECK-NOLSE-O1-NEXT: LBB70_4: ; %cmpxchg.nostore
-; CHECK-NOLSE-O1-NEXT: mov w1, wzr
+; CHECK-NOLSE-O1-NEXT: mov w9, wzr
; CHECK-NOLSE-O1-NEXT: clrex
+; CHECK-NOLSE-O1-NEXT: mov w1, w9
; CHECK-NOLSE-O1-NEXT: ret
;
; CHECK-OUTLINE-O1-LABEL: cmpxchg_i64:
@@ -6404,6 +6412,7 @@ define { ptr, i1 } @cmpxchg_ptr(ptr %ptr, ptr %desired, ptr %new) {
; CHECK-NOLSE-O1-LABEL: cmpxchg_ptr:
; CHECK-NOLSE-O1: ; %bb.0:
; CHECK-NOLSE-O1-NEXT: mov x8, x0
+; CHECK-NOLSE-O1-NEXT: mov w9, #1 ; =0x1
; CHECK-NOLSE-O1-NEXT: LBB71_1: ; %cmpxchg.start
; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NOLSE-O1-NEXT: ldxr x0, [x8]
@@ -6411,14 +6420,15 @@ define { ptr, i1 } @cmpxchg_ptr(ptr %ptr, ptr %desired, ptr %new) {
; CHECK-NOLSE-O1-NEXT: b.ne LBB71_4
; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore
; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB71_1 Depth=1
-; CHECK-NOLSE-O1-NEXT: stxr w9, x2, [x8]
-; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB71_1
-; CHECK-NOLSE-O1-NEXT: ; %bb.3:
-; CHECK-NOLSE-O1-NEXT: mov w1, #1 ; =0x1
+; CHECK-NOLSE-O1-NEXT: stxr w10, x2, [x8]
+; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB71_1
+; CHECK-NOLSE-O1-NEXT: ; %bb.3: ; %cmpxchg.end
+; CHECK-NOLSE-O1-NEXT: mov w1, w9
; CHECK-NOLSE-O1-NEXT: ret
; CHECK-NOLSE-O1-NEXT: LBB71_4: ; %cmpxchg.nostore
-; CHECK-NOLSE-O1-NEXT: mov w1, wzr
+; CHECK-NOLSE-O1-NEXT: mov w9, wzr
; CHECK-NOLSE-O1-NEXT: clrex
+; CHECK-NOLSE-O1-NEXT: mov w1, w9
; CHECK-NOLSE-O1-NEXT: ret
;
; CHECK-OUTLINE-O1-LABEL: cmpxchg_ptr:
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
index 4a85d8490d2e9..cab2741be9929 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
@@ -9,7 +9,7 @@ define i32 @val_compare_and_swap(ptr %p, i32 %cmp, i32 %new) {
; CHECK-NEXT: liveins: $w1, $w2, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.cmpxchg.start:
- ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
+ ; CHECK-NEXT: successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
; CHECK-NEXT: liveins: $w1, $w2, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
@@ -17,7 +17,7 @@ define i32 @val_compare_and_swap(ptr %p, i32 %cmp, i32 %new) {
; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.cmpxchg.trystore:
- ; CHECK-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000)
+ ; CHECK-NEXT: successors: %bb.4(0x7ffff800), %bb.1(0x00000800)
; CHECK-NEXT: liveins: $w1, $w2, $x0, $x8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: early-clobber renamable $w9 = STXRW renamable $w2, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p)
@@ -49,7 +49,7 @@ define i32 @val_compare_and_swap_from_load(ptr %p, i32 %cmp, ptr %pnew) {
; CHECK-NEXT: renamable $w9 = LDRWui killed renamable $x2, 0, implicit-def renamable $x9, pcsections !0 :: (load (s32) from %ir.pnew)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.cmpxchg.start:
- ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
+ ; CHECK-NEXT: successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
; CHECK-NEXT: liveins: $w1, $x0, $x9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
@@ -57,7 +57,7 @@ define i32 @val_compare_and_swap_from_load(ptr %p, i32 %cmp, ptr %pnew) {
; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.cmpxchg.trystore:
- ; CHECK-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000)
+ ; CHECK-NEXT: successors: %bb.4(0x7ffff800), %bb.1(0x00000800)
; CHECK-NEXT: liveins: $w1, $x0, $x8, $x9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: early-clobber renamable $w10 = STXRW renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p)
@@ -88,7 +88,7 @@ define i32 @val_compare_and_swap_rel(ptr %p, i32 %cmp, i32 %new) {
; CHECK-NEXT: liveins: $w1, $w2, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.cmpxchg.start:
- ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
+ ; CHECK-NEXT: successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
; CHECK-NEXT: liveins: $w1, $w2, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
@@ -96,7 +96,7 @@ define i32 @val_compare_and_swap_rel(ptr %p, i32 %cmp, i32 %new) {
; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.cmpxchg.trystore:
- ; CHECK-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000)
+ ; CHECK-NEXT: successors: %bb.4(0x7ffff800), %bb.1(0x00000800)
; CHECK-NEXT: liveins: $w1, $w2, $x0, $x8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: early-clobber renamable $w9 = STLXRW renamable $w2, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p)
@@ -126,7 +126,7 @@ define i64 @val_compare_and_swap_64(ptr %p, i64 %cmp, i64 %new) {
; CHECK-NEXT: liveins: $x0, $x1, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.cmpxchg.start:
- ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
+ ; CHECK-NEXT: successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
; CHECK-NEXT: liveins: $x0, $x1, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $x8 = LDXRX renamable $x0, pcsections !0 :: (volatile load (s64) from %ir.p)
@@ -134,7 +134,7 @@ define i64 @val_compare_and_swap_64(ptr %p, i64 %cmp, i64 %new) {
; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.cmpxchg.trystore:
- ; CHECK-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000)
+ ; CHECK-NEXT: successors: %bb.4(0x7ffff800), %bb.1(0x00000800)
; CHECK-NEXT: liveins: $x0, $x1, $x2, $x8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: early-clobber renamable $w9 = STXRX renamable $x2, renamable $x0, pcsections !0 :: (volatile store (s64) into %ir.p)
@@ -164,7 +164,7 @@ define i64 @val_compare_and_swap_64_monotonic_seqcst(ptr %p, i64 %cmp, i64 %new)
; CHECK-NEXT: liveins: $x0, $x1, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.cmpxchg.start:
- ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
+ ; CHECK-NEXT: successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
; CHECK-NEXT: liveins: $x0, $x1, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $x8 = LDAXRX renamable $x0, pcsections !0 :: (volatile load (s64) from %ir.p)
@@ -172,7 +172,7 @@ define i64 @val_compare_and_swap_64_monotonic_seqcst(ptr %p, i64 %cmp, i64 %new)
; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.cmpxchg.trystore:
- ; CHECK-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000)
+ ; CHECK-NEXT: successors: %bb.4(0x7ffff800), %bb.1(0x00000800)
; CHECK-NEXT: liveins: $x0, $x1, $x2, $x8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: early-clobber renamable $w9 = STLXRX renamable $x2, renamable $x0, pcsections !0 :: (volatile store (s64) into %ir.p)
@@ -202,7 +202,7 @@ define i64 @val_compare_and_swap_64_release_acquire(ptr %p, i64 %cmp, i64 %new)
; CHECK-NEXT: liveins: $x0, $x1, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.cmpxchg.start:
- ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
+ ; CHECK-NEXT: successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
; CHECK-NEXT: liveins: $x0, $x1, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $x8 = LDAXRX renamable $x0, pcsections !0 :: (volatile load (s64) from %ir.p)
@@ -210,7 +210,7 @@ define i64 @val_compare_and_swap_64_release_acquire(ptr %p, i64 %cmp, i64 %new)
; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.cmpxchg.trystore:
- ; CHECK-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000)
+ ; CHECK-NEXT: successors: %bb.4(0x7ffff800), %bb.1(0x00000800)
; CHECK-NEXT: liveins: $x0, $x1, $x2, $x8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: early-clobber renamable $w9 = STLXRX renamable $x2, renamable $x0, pcsections !0 :: (volatile store (s64) into %ir.p)
@@ -240,7 +240,7 @@ define i32 @fetch_and_nand(ptr %p) {
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
- ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+ ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = LDXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
@@ -265,7 +265,7 @@ define i64 @fetch_and_nand_64(ptr %p) {
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
- ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+ ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $x8 = LDAXRX renamable $x0, pcsections !0 :: (volatile load (s64) from %ir.p)
@@ -292,7 +292,7 @@ define i32 @fetch_and_or(ptr %p) {
; CHECK-NEXT: renamable $w9 = MOVZWi 5, 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
- ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+ ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w9, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
@@ -316,7 +316,7 @@ define i64 @fetch_and_or_64(ptr %p) {
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
- ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+ ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $x8 = LDXRX renamable $x0, pcsections !0 :: (volatile load (s64) from %ir.p)
@@ -723,7 +723,7 @@ define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
- ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+ ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
@@ -747,7 +747,7 @@ define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
- ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+ ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
@@ -770,7 +770,7 @@ define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
- ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+ ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
@@ -794,7 +794,7 @@ define i8 @atomicrmw_and_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
- ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+ ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
@@ -818,7 +818,7 @@ define i8 @atomicrmw_or_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
- ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+ ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
@@ -842,7 +842,7 @@ define i8 @atomicrmw_xor_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
- ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+ ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
@@ -866,7 +866,7 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
- ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+ ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
@@ -892,7 +892,7 @@ define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
- ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+ ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
@@ -920,7 +920,7 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: renamable $w9 = ANDWri killed renamable $w1, 7
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
- ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+ ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w9, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
@@ -948,7 +948,7 @@ define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: renamable $w9 = ANDWri killed renamable $w1, 7
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
- ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+ ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w9, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
@@ -974,7 +974,7 @@ define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
- ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+ ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
@@ -998,7 +998,7 @@ define i16 @atomicrmw_xchg_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
- ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+ ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
@@ -1021,7 +1021,7 @@ define i16 @atomicrmw_sub_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
- ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+ ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
@@ -1045,7 +1045,7 @@ define i16 @atomicrmw_and_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
- ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+ ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
@@ -1069,7 +1069,7 @@ define i16 @atomicrmw_or_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
- ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+ ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
@@ -1093,7 +1093,7 @@ define i16 @atomicrmw_xor_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
- ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+ ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
@@ -1117,7 +1117,7 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
- ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+ ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
@@ -1143,7 +1143,7 @@ define i16 @atomicrmw_max_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
- ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+ ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
@@ -1171,7 +1171,7 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: renamable $w9 = ANDWri killed renamable $w1, 15
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
- ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+ ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w9, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = LDAXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
@@ -1199,7 +1199,7 @@ define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) {
; CHECK-NEXT: renamable $w9 = ANDWri killed renamable $w1, 15
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.atomicrmw.start:
- ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
+ ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w9, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w8 = LDXRH renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
@@ -1227,34 +1227,35 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
; CHECK-NEXT: $x8 = ORRXrs $xzr, $x0, 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.cmpxchg.start:
- ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.4(0x04000000)
+ ; CHECK-NEXT: successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
; CHECK-NEXT: liveins: $w1, $w2, $x8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w0 = LDXRB renamable $x8, implicit-def renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
; CHECK-NEXT: renamable $w9 = ANDWri renamable $w0, 7, pcsections !0
; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 0, implicit-def $nzcv, pcsections !0
- ; CHECK-NEXT: Bcc 1, %bb.4, implicit killed $nzcv, pcsections !0
+ ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.cmpxchg.trystore:
- ; CHECK-NEXT: successors: %bb.3(0x04000000), %bb.1(0x7c000000)
+ ; CHECK-NEXT: successors: %bb.4(0x7ffff800), %bb.1(0x00000800)
; CHECK-NEXT: liveins: $w1, $w2, $x0, $x8
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: early-clobber renamable $w9 = STXRB renamable $w2, renamable $x8, pcsections !0 :: (volatile store (s8) into %ir.ptr)
- ; CHECK-NEXT: CBNZW killed renamable $w9, %bb.1
+ ; CHECK-NEXT: early-clobber renamable $w10 = STXRB renamable $w2, renamable $x8, pcsections !0 :: (volatile store (s8) into %ir.ptr)
+ ; CHECK-NEXT: renamable $w9 = MOVZWi 1, 0
+ ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1
+ ; CHECK-NEXT: B %bb.4
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: bb.3.cmpxchg.nostore:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w1 = MOVZWi 1, 0
- ; CHECK-NEXT: $w0 = KILL renamable $w0, implicit killed $x0
- ; CHECK-NEXT: RET undef $lr, implicit $w0, implicit $w1
+ ; CHECK-NEXT: $w9 = ORRWrs $wzr, $wzr, 0
+ ; CHECK-NEXT: CLREX 15, pcsections !0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.4.cmpxchg.nostore:
- ; CHECK-NEXT: liveins: $x0
+ ; CHECK-NEXT: bb.4.cmpxchg.end:
+ ; CHECK-NEXT: liveins: $w9, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $w1 = ORRWrs $wzr, $wzr, 0
- ; CHECK-NEXT: CLREX 15, pcsections !0
; CHECK-NEXT: $w0 = KILL renamable $w0, implicit killed $x0
+ ; CHECK-NEXT: $w1 = ORRWrs $wzr, killed $w9, 0
; CHECK-NEXT: RET undef $lr, implicit $w0, implicit $w1
%res = cmpxchg ptr %ptr, i8 %desired, i8 %new monotonic monotonic, !pcsections !0
ret { i8, i1 } %res
@@ -1269,35 +1270,36 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
; CHECK-NEXT: $x8 = ORRXrs $xzr, $x0, 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.cmpxchg.start:
- ; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.4(0x04000000)
+ ; CHECK-NEXT: successors: %bb.2(0x7ffff800), %bb.3(0x00000800)
; CHECK-NEXT: liveins: $w1, $w2, $x8
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $w0 = LDXRH renamable $x8, implicit-def renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
; CHECK-NEXT: renamable $w9 = ANDWri renamable $w0, 15, pcsections !0
; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 8, implicit-def $nzcv, pcsections !0
- ; CHECK-NEXT: Bcc 1, %bb.4, implicit killed $nzcv, pcsections !0
+ ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2.cmpxchg.trystore:
- ; CHECK-NEXT: successors: %bb.3(0x04000000), %bb.1(0x7c000000)
+ ; CHECK-NEXT: successors: %bb.4(0x7ffff800), %bb.1(0x00000800)
; CHECK-NEXT: liveins: $w1, $w2, $x0, $x8
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: early-clobber renamable $w9 = STXRH renamable $w2, renamable $x8, pcsections !0 :: (volatile store (s16) into %ir.ptr)
- ; CHECK-NEXT: CBNZW killed renamable $w9, %bb.1
+ ; CHECK-NEXT: early-clobber renamable $w10 = STXRH renamable $w2, renamable $x8, pcsections !0 :: (volatile store (s16) into %ir.ptr)
+ ; CHECK-NEXT: renamable $w9 = MOVZWi 1, 0
+ ; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1
+ ; CHECK-NEXT: B %bb.4
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: bb.3.cmpxchg.nostore:
+ ; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w1 = MOVZWi 1, 0
- ; CHECK-NEXT: $w0 = KILL renamable $w0, implicit killed $x0
- ; CHECK-NEXT: RET undef $lr, implicit $w0, implicit $w1
+ ; CHECK-NEXT: $w9 = ORRWrs $wzr, $wzr, 0
+ ; CHECK-NEXT: CLREX 15, pcsections !0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.4.cmpxchg.nostore:
- ; CHECK-NEXT: liveins: $x0
+ ; CHECK-NEXT: bb.4.cmpxchg.end:
+ ; CHECK-NEXT: liveins: $w9, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $w1 = ORRWrs $wzr, $wzr, 0
- ; CHECK-NEXT: CLREX 15, pcsections !0
; CHECK-NEXT: $w0 = KILL renamable $w0, implicit killed $x0
- ; CHECK-NEXT: RET undef $lr, implicit $w0, implicit $w1
+ ; CHECK-NEXT: $w1 = ORRWrs $wzr, killed $w9, 0
+ ; CHECK-NEXT: RET undef $lr, implicit $w0, implicit $w1
%res = cmpxchg ptr %ptr, i16 %desired, i16 %new monotonic monotonic, !pcsections !0
ret { i16, i1 } %res
}
diff --git a/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll b/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll
index 42cb3d4e9589d..bf78429da52f3 100644
--- a/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll
+++ b/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll
@@ -850,18 +850,18 @@ define dso_local void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldxr x8, [x9]
; CHECK-NEXT: cmp x8, x0
-; CHECK-NEXT: b.ne .LBB43_3
+; CHECK-NEXT: b.ne .LBB43_4
; CHECK-NEXT: // %bb.2: // %cmpxchg.trystore
; CHECK-NEXT: // in Loop: Header=BB43_1 Depth=1
; CHECK-NEXT: stxr w10, x1, [x9]
; CHECK-NEXT: cbnz w10, .LBB43_1
-; CHECK-NEXT: b .LBB43_4
-; CHECK-NEXT: .LBB43_3: // %cmpxchg.nostore
-; CHECK-NEXT: clrex
-; CHECK-NEXT: .LBB43_4: // %cmpxchg.end
+; CHECK-NEXT: .LBB43_3: // %cmpxchg.end
; CHECK-NEXT: adrp x9, var64
; CHECK-NEXT: str x8, [x9, :lo12:var64]
; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB43_4: // %cmpxchg.nostore
+; CHECK-NEXT: clrex
+; CHECK-NEXT: b .LBB43_3
%pair = cmpxchg ptr @var64, i64 %wanted, i64 %new monotonic monotonic
%old = extractvalue { i64, i1 } %pair, 0
store i64 %old, ptr @var64
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
index 21729b9dfd101..24a6c3c440e18 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
@@ -49,15 +49,9 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
; SOFTFP-NOLSE-NEXT: mov w20, w1
; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: b .LBB0_2
-; SOFTFP-NOLSE-NEXT: .LBB0_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB0_6
-; SOFTFP-NOLSE-NEXT: .LBB0_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB0_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB0_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB0_2 Depth 2
; SOFTFP-NOLSE-NEXT: mov w22, w0
; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
@@ -68,19 +62,25 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: bl __addsf3
; SOFTFP-NOLSE-NEXT: bl __truncsfhf2
; SOFTFP-NOLSE-NEXT: mov w8, w0
-; SOFTFP-NOLSE-NEXT: .LBB0_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB0_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth
-; SOFTFP-NOLSE-NEXT: b.ne .LBB0_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB0_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB0_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB0_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB0_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB0_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_1
+; SOFTFP-NOLSE-NEXT: b .LBB0_6
+; SOFTFP-NOLSE-NEXT: .LBB0_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_1
; SOFTFP-NOLSE-NEXT: .LBB0_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
@@ -137,15 +137,9 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
; SOFTFP-NOLSE-NEXT: mov w20, w1
; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: b .LBB1_2
-; SOFTFP-NOLSE-NEXT: .LBB1_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB1_6
-; SOFTFP-NOLSE-NEXT: .LBB1_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB1_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB1_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB1_2 Depth 2
; SOFTFP-NOLSE-NEXT: mov w22, w0
; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
@@ -156,19 +150,25 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: bl __addsf3
; SOFTFP-NOLSE-NEXT: bl __truncsfhf2
; SOFTFP-NOLSE-NEXT: mov w8, w0
-; SOFTFP-NOLSE-NEXT: .LBB1_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB1_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth
-; SOFTFP-NOLSE-NEXT: b.ne .LBB1_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB1_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB1_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB1_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB1_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB1_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_1
+; SOFTFP-NOLSE-NEXT: b .LBB1_6
+; SOFTFP-NOLSE-NEXT: .LBB1_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_1
; SOFTFP-NOLSE-NEXT: .LBB1_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
@@ -236,34 +236,34 @@ define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
; SOFTFP-NOLSE-NEXT: mov x19, x0
; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
; SOFTFP-NOLSE-NEXT: lsl w20, w1, #16
-; SOFTFP-NOLSE-NEXT: b .LBB2_2
-; SOFTFP-NOLSE-NEXT: .LBB2_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB2_6
-; SOFTFP-NOLSE-NEXT: .LBB2_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB2_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB2_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB2_2 Depth 2
; SOFTFP-NOLSE-NEXT: mov w21, w0
; SOFTFP-NOLSE-NEXT: lsl w0, w0, #16
; SOFTFP-NOLSE-NEXT: mov w1, w20
; SOFTFP-NOLSE-NEXT: bl __addsf3
; SOFTFP-NOLSE-NEXT: bl __truncsfbf2
; SOFTFP-NOLSE-NEXT: mov w8, w0
-; SOFTFP-NOLSE-NEXT: .LBB2_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB2_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB2_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB2_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth
-; SOFTFP-NOLSE-NEXT: b.ne .LBB2_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB2_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB2_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB2_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB2_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB2_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_1
+; SOFTFP-NOLSE-NEXT: b .LBB2_6
+; SOFTFP-NOLSE-NEXT: .LBB2_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_1
; SOFTFP-NOLSE-NEXT: .LBB2_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
@@ -330,34 +330,34 @@ define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
; SOFTFP-NOLSE-NEXT: mov x19, x0
; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
; SOFTFP-NOLSE-NEXT: lsl w20, w1, #16
-; SOFTFP-NOLSE-NEXT: b .LBB3_2
-; SOFTFP-NOLSE-NEXT: .LBB3_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB3_6
-; SOFTFP-NOLSE-NEXT: .LBB3_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB3_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB3_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB3_2 Depth 2
; SOFTFP-NOLSE-NEXT: mov w21, w0
; SOFTFP-NOLSE-NEXT: lsl w0, w0, #16
; SOFTFP-NOLSE-NEXT: mov w1, w20
; SOFTFP-NOLSE-NEXT: bl __addsf3
; SOFTFP-NOLSE-NEXT: bl __truncsfbf2
; SOFTFP-NOLSE-NEXT: mov w8, w0
-; SOFTFP-NOLSE-NEXT: .LBB3_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB3_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB3_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB3_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth
-; SOFTFP-NOLSE-NEXT: b.ne .LBB3_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB3_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB3_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB3_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB3_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB3_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_1
+; SOFTFP-NOLSE-NEXT: b .LBB3_6
+; SOFTFP-NOLSE-NEXT: .LBB3_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_1
; SOFTFP-NOLSE-NEXT: .LBB3_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
@@ -406,32 +406,32 @@ define float @test_atomicrmw_fadd_f32_seq_cst_align4(ptr %ptr, float %value) #0
; SOFTFP-NOLSE-NEXT: mov x19, x0
; SOFTFP-NOLSE-NEXT: ldr w0, [x0]
; SOFTFP-NOLSE-NEXT: mov w20, w1
-; SOFTFP-NOLSE-NEXT: b .LBB4_2
-; SOFTFP-NOLSE-NEXT: .LBB4_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB4_6
-; SOFTFP-NOLSE-NEXT: .LBB4_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB4_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB4_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB4_2 Depth 2
; SOFTFP-NOLSE-NEXT: mov w1, w20
; SOFTFP-NOLSE-NEXT: mov w21, w0
; SOFTFP-NOLSE-NEXT: bl __addsf3
; SOFTFP-NOLSE-NEXT: mov w8, w0
-; SOFTFP-NOLSE-NEXT: .LBB4_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB4_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB4_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB4_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxr w0, [x19]
; SOFTFP-NOLSE-NEXT: cmp w0, w21
-; SOFTFP-NOLSE-NEXT: b.ne .LBB4_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB4_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxr w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB4_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB4_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB4_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB4_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_1
+; SOFTFP-NOLSE-NEXT: b .LBB4_6
+; SOFTFP-NOLSE-NEXT: .LBB4_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_1
; SOFTFP-NOLSE-NEXT: .LBB4_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
@@ -480,32 +480,32 @@ define double @test_atomicrmw_fadd_f32_seq_cst_align8(ptr %ptr, double %value) #
; SOFTFP-NOLSE-NEXT: mov x19, x0
; SOFTFP-NOLSE-NEXT: ldr x0, [x0]
; SOFTFP-NOLSE-NEXT: mov x20, x1
-; SOFTFP-NOLSE-NEXT: b .LBB5_2
-; SOFTFP-NOLSE-NEXT: .LBB5_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB5_6
-; SOFTFP-NOLSE-NEXT: .LBB5_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB5_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB5_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB5_2 Depth 2
; SOFTFP-NOLSE-NEXT: mov x1, x20
; SOFTFP-NOLSE-NEXT: mov x21, x0
; SOFTFP-NOLSE-NEXT: bl __adddf3
; SOFTFP-NOLSE-NEXT: mov x8, x0
-; SOFTFP-NOLSE-NEXT: .LBB5_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB5_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB5_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB5_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxr x0, [x19]
; SOFTFP-NOLSE-NEXT: cmp x0, x21
-; SOFTFP-NOLSE-NEXT: b.ne .LBB5_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB5_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxr w9, x8, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB5_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB5_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB5_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB5_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB5_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB5_1
+; SOFTFP-NOLSE-NEXT: b .LBB5_6
+; SOFTFP-NOLSE-NEXT: .LBB5_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB5_1
; SOFTFP-NOLSE-NEXT: .LBB5_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
@@ -701,16 +701,9 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: mov w19, w2
; SOFTFP-NOLSE-NEXT: mov x20, x0
-; SOFTFP-NOLSE-NEXT: b .LBB7_2
-; SOFTFP-NOLSE-NEXT: .LBB7_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB7_6
-; SOFTFP-NOLSE-NEXT: .LBB7_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB7_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB7_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB7_2 Depth 2
; SOFTFP-NOLSE-NEXT: and w0, w19, #0xffff
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
; SOFTFP-NOLSE-NEXT: mov w24, w0
@@ -731,20 +724,27 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
; SOFTFP-NOLSE-NEXT: mov w8, w22
; SOFTFP-NOLSE-NEXT: bfi w0, w24, #16, #16
; SOFTFP-NOLSE-NEXT: bfi w8, w23, #16, #16
-; SOFTFP-NOLSE-NEXT: .LBB7_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB7_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB7_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB7_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxr w22, [x20]
; SOFTFP-NOLSE-NEXT: cmp w22, w8
-; SOFTFP-NOLSE-NEXT: b.ne .LBB7_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB7_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxr w9, w0, [x20]
-; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB7_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB7_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB7_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB7_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_1
+; SOFTFP-NOLSE-NEXT: b .LBB7_6
+; SOFTFP-NOLSE-NEXT: .LBB7_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_1
; SOFTFP-NOLSE-NEXT: .LBB7_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: mov w0, w22
; SOFTFP-NOLSE-NEXT: mov w1, w23
@@ -817,16 +817,9 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
; SOFTFP-NOLSE-NEXT: lsl w21, w8, #16
; SOFTFP-NOLSE-NEXT: mov x19, x0
; SOFTFP-NOLSE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: b .LBB8_2
-; SOFTFP-NOLSE-NEXT: .LBB8_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB8_6
-; SOFTFP-NOLSE-NEXT: .LBB8_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB8_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB8_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB8_2 Depth 2
; SOFTFP-NOLSE-NEXT: lsl w23, w1, #16
; SOFTFP-NOLSE-NEXT: mov w1, w20
; SOFTFP-NOLSE-NEXT: mov w0, w23
@@ -839,20 +832,27 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
; SOFTFP-NOLSE-NEXT: bl __truncsfbf2
; SOFTFP-NOLSE-NEXT: bfxil w23, w22, #0, #16
; SOFTFP-NOLSE-NEXT: bfi w0, w24, #16, #16
-; SOFTFP-NOLSE-NEXT: .LBB8_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB8_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB8_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB8_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxr w22, [x19]
; SOFTFP-NOLSE-NEXT: cmp w22, w23
-; SOFTFP-NOLSE-NEXT: b.ne .LBB8_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB8_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxr w8, w0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB8_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB8_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB8_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB8_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_1
+; SOFTFP-NOLSE-NEXT: b .LBB8_6
+; SOFTFP-NOLSE-NEXT: .LBB8_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_1
; SOFTFP-NOLSE-NEXT: .LBB8_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: mov w0, w22
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
@@ -906,16 +906,9 @@ define <2 x float> @test_atomicrmw_fadd_v2f32_seq_cst_align8(ptr %ptr, <2 x floa
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: mov w19, w2
; SOFTFP-NOLSE-NEXT: mov x20, x0
-; SOFTFP-NOLSE-NEXT: b .LBB9_2
-; SOFTFP-NOLSE-NEXT: .LBB9_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB9_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB9_6
-; SOFTFP-NOLSE-NEXT: .LBB9_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB9_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB9_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB9_2 Depth 2
; SOFTFP-NOLSE-NEXT: mov w0, w23
; SOFTFP-NOLSE-NEXT: mov w1, w19
; SOFTFP-NOLSE-NEXT: bl __addsf3
@@ -928,20 +921,27 @@ define <2 x float> @test_atomicrmw_fadd_v2f32_seq_cst_align8(ptr %ptr, <2 x floa
; SOFTFP-NOLSE-NEXT: // kill: def $w23 killed $w23 killed $x23 def $x23
; SOFTFP-NOLSE-NEXT: orr x8, x8, x24, lsl #32
; SOFTFP-NOLSE-NEXT: orr x9, x9, x23, lsl #32
-; SOFTFP-NOLSE-NEXT: .LBB9_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB9_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB9_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB9_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxr x22, [x20]
; SOFTFP-NOLSE-NEXT: cmp x22, x9
-; SOFTFP-NOLSE-NEXT: b.ne .LBB9_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB9_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB9_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB9_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxr w10, x8, [x20]
-; SOFTFP-NOLSE-NEXT: cbnz w10, .LBB9_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB9_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w10, .LBB9_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB9_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB9_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB9_1
+; SOFTFP-NOLSE-NEXT: b .LBB9_6
+; SOFTFP-NOLSE-NEXT: .LBB9_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB9_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB9_1
; SOFTFP-NOLSE-NEXT: .LBB9_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: mov w0, w22
; SOFTFP-NOLSE-NEXT: mov w1, w23
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
index e3e18a1f91c6d..16825c9dcd178 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
@@ -51,15 +51,9 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
; SOFTFP-NOLSE-NEXT: mov w20, w1
; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: b .LBB0_2
-; SOFTFP-NOLSE-NEXT: .LBB0_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB0_6
-; SOFTFP-NOLSE-NEXT: .LBB0_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB0_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB0_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB0_2 Depth 2
; SOFTFP-NOLSE-NEXT: mov w22, w0
; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
@@ -70,19 +64,25 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: bl fmaxf
; SOFTFP-NOLSE-NEXT: bl __truncsfhf2
; SOFTFP-NOLSE-NEXT: mov w8, w0
-; SOFTFP-NOLSE-NEXT: .LBB0_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB0_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth
-; SOFTFP-NOLSE-NEXT: b.ne .LBB0_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB0_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB0_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB0_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB0_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB0_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_1
+; SOFTFP-NOLSE-NEXT: b .LBB0_6
+; SOFTFP-NOLSE-NEXT: .LBB0_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_1
; SOFTFP-NOLSE-NEXT: .LBB0_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
@@ -139,15 +139,9 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
; SOFTFP-NOLSE-NEXT: mov w20, w1
; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: b .LBB1_2
-; SOFTFP-NOLSE-NEXT: .LBB1_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB1_6
-; SOFTFP-NOLSE-NEXT: .LBB1_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB1_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB1_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB1_2 Depth 2
; SOFTFP-NOLSE-NEXT: mov w22, w0
; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
@@ -158,19 +152,25 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: bl fmaxf
; SOFTFP-NOLSE-NEXT: bl __truncsfhf2
; SOFTFP-NOLSE-NEXT: mov w8, w0
-; SOFTFP-NOLSE-NEXT: .LBB1_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB1_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth
-; SOFTFP-NOLSE-NEXT: b.ne .LBB1_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB1_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB1_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB1_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB1_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB1_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_1
+; SOFTFP-NOLSE-NEXT: b .LBB1_6
+; SOFTFP-NOLSE-NEXT: .LBB1_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_1
; SOFTFP-NOLSE-NEXT: .LBB1_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
@@ -238,34 +238,34 @@ define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
; SOFTFP-NOLSE-NEXT: mov x19, x0
; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
; SOFTFP-NOLSE-NEXT: lsl w20, w1, #16
-; SOFTFP-NOLSE-NEXT: b .LBB2_2
-; SOFTFP-NOLSE-NEXT: .LBB2_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB2_6
-; SOFTFP-NOLSE-NEXT: .LBB2_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB2_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB2_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB2_2 Depth 2
; SOFTFP-NOLSE-NEXT: mov w21, w0
; SOFTFP-NOLSE-NEXT: lsl w0, w0, #16
; SOFTFP-NOLSE-NEXT: mov w1, w20
; SOFTFP-NOLSE-NEXT: bl fmaxf
; SOFTFP-NOLSE-NEXT: bl __truncsfbf2
; SOFTFP-NOLSE-NEXT: mov w8, w0
-; SOFTFP-NOLSE-NEXT: .LBB2_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB2_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB2_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB2_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth
-; SOFTFP-NOLSE-NEXT: b.ne .LBB2_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB2_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB2_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB2_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB2_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB2_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_1
+; SOFTFP-NOLSE-NEXT: b .LBB2_6
+; SOFTFP-NOLSE-NEXT: .LBB2_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_1
; SOFTFP-NOLSE-NEXT: .LBB2_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
@@ -332,34 +332,34 @@ define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
; SOFTFP-NOLSE-NEXT: mov x19, x0
; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
; SOFTFP-NOLSE-NEXT: lsl w20, w1, #16
-; SOFTFP-NOLSE-NEXT: b .LBB3_2
-; SOFTFP-NOLSE-NEXT: .LBB3_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB3_6
-; SOFTFP-NOLSE-NEXT: .LBB3_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB3_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB3_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB3_2 Depth 2
; SOFTFP-NOLSE-NEXT: mov w21, w0
; SOFTFP-NOLSE-NEXT: lsl w0, w0, #16
; SOFTFP-NOLSE-NEXT: mov w1, w20
; SOFTFP-NOLSE-NEXT: bl fmaxf
; SOFTFP-NOLSE-NEXT: bl __truncsfbf2
; SOFTFP-NOLSE-NEXT: mov w8, w0
-; SOFTFP-NOLSE-NEXT: .LBB3_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB3_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB3_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB3_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth
-; SOFTFP-NOLSE-NEXT: b.ne .LBB3_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB3_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB3_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB3_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB3_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB3_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_1
+; SOFTFP-NOLSE-NEXT: b .LBB3_6
+; SOFTFP-NOLSE-NEXT: .LBB3_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_1
; SOFTFP-NOLSE-NEXT: .LBB3_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
@@ -408,32 +408,32 @@ define float @test_atomicrmw_fmax_f32_seq_cst_align4(ptr %ptr, float %value) #0
; SOFTFP-NOLSE-NEXT: mov x19, x0
; SOFTFP-NOLSE-NEXT: ldr w0, [x0]
; SOFTFP-NOLSE-NEXT: mov w20, w1
-; SOFTFP-NOLSE-NEXT: b .LBB4_2
-; SOFTFP-NOLSE-NEXT: .LBB4_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB4_6
-; SOFTFP-NOLSE-NEXT: .LBB4_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB4_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB4_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB4_2 Depth 2
; SOFTFP-NOLSE-NEXT: mov w1, w20
; SOFTFP-NOLSE-NEXT: mov w21, w0
; SOFTFP-NOLSE-NEXT: bl fmaxf
; SOFTFP-NOLSE-NEXT: mov w8, w0
-; SOFTFP-NOLSE-NEXT: .LBB4_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB4_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB4_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB4_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxr w0, [x19]
; SOFTFP-NOLSE-NEXT: cmp w0, w21
-; SOFTFP-NOLSE-NEXT: b.ne .LBB4_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB4_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxr w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB4_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB4_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB4_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB4_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_1
+; SOFTFP-NOLSE-NEXT: b .LBB4_6
+; SOFTFP-NOLSE-NEXT: .LBB4_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_1
; SOFTFP-NOLSE-NEXT: .LBB4_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
@@ -482,32 +482,32 @@ define double @test_atomicrmw_fmax_f32_seq_cst_align8(ptr %ptr, double %value) #
; SOFTFP-NOLSE-NEXT: mov x19, x0
; SOFTFP-NOLSE-NEXT: ldr x0, [x0]
; SOFTFP-NOLSE-NEXT: mov x20, x1
-; SOFTFP-NOLSE-NEXT: b .LBB5_2
-; SOFTFP-NOLSE-NEXT: .LBB5_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB5_6
-; SOFTFP-NOLSE-NEXT: .LBB5_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB5_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB5_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB5_2 Depth 2
; SOFTFP-NOLSE-NEXT: mov x1, x20
; SOFTFP-NOLSE-NEXT: mov x21, x0
; SOFTFP-NOLSE-NEXT: bl fmax
; SOFTFP-NOLSE-NEXT: mov x8, x0
-; SOFTFP-NOLSE-NEXT: .LBB5_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB5_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB5_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB5_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxr x0, [x19]
; SOFTFP-NOLSE-NEXT: cmp x0, x21
-; SOFTFP-NOLSE-NEXT: b.ne .LBB5_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB5_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxr w9, x8, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB5_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB5_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB5_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB5_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB5_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB5_1
+; SOFTFP-NOLSE-NEXT: b .LBB5_6
+; SOFTFP-NOLSE-NEXT: .LBB5_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB5_1
; SOFTFP-NOLSE-NEXT: .LBB5_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
@@ -581,16 +581,9 @@ define <2 x half> @test_atomicrmw_fmax_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: mov w19, w2
; SOFTFP-NOLSE-NEXT: mov x20, x0
-; SOFTFP-NOLSE-NEXT: b .LBB6_2
-; SOFTFP-NOLSE-NEXT: .LBB6_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB6_6
-; SOFTFP-NOLSE-NEXT: .LBB6_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB6_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB6_2 Depth 2
; SOFTFP-NOLSE-NEXT: and w0, w19, #0xffff
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
; SOFTFP-NOLSE-NEXT: mov w24, w0
@@ -611,20 +604,27 @@ define <2 x half> @test_atomicrmw_fmax_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
; SOFTFP-NOLSE-NEXT: mov w8, w22
; SOFTFP-NOLSE-NEXT: bfi w0, w24, #16, #16
; SOFTFP-NOLSE-NEXT: bfi w8, w23, #16, #16
-; SOFTFP-NOLSE-NEXT: .LBB6_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB6_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB6_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB6_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxr w22, [x20]
; SOFTFP-NOLSE-NEXT: cmp w22, w8
-; SOFTFP-NOLSE-NEXT: b.ne .LBB6_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB6_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxr w9, w0, [x20]
-; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB6_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB6_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB6_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB6_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB6_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB6_1
+; SOFTFP-NOLSE-NEXT: b .LBB6_6
+; SOFTFP-NOLSE-NEXT: .LBB6_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB6_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB6_1
; SOFTFP-NOLSE-NEXT: .LBB6_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: mov w0, w22
; SOFTFP-NOLSE-NEXT: mov w1, w23
@@ -725,16 +725,9 @@ define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
; SOFTFP-NOLSE-NEXT: lsl w21, w8, #16
; SOFTFP-NOLSE-NEXT: mov x19, x0
; SOFTFP-NOLSE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: b .LBB7_2
-; SOFTFP-NOLSE-NEXT: .LBB7_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB7_6
-; SOFTFP-NOLSE-NEXT: .LBB7_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB7_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB7_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB7_2 Depth 2
; SOFTFP-NOLSE-NEXT: lsl w23, w1, #16
; SOFTFP-NOLSE-NEXT: mov w1, w20
; SOFTFP-NOLSE-NEXT: mov w0, w23
@@ -747,20 +740,27 @@ define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
; SOFTFP-NOLSE-NEXT: bl __truncsfbf2
; SOFTFP-NOLSE-NEXT: bfxil w23, w22, #0, #16
; SOFTFP-NOLSE-NEXT: bfi w0, w24, #16, #16
-; SOFTFP-NOLSE-NEXT: .LBB7_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB7_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB7_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB7_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxr w22, [x19]
; SOFTFP-NOLSE-NEXT: cmp w22, w23
-; SOFTFP-NOLSE-NEXT: b.ne .LBB7_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB7_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxr w8, w0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB7_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB7_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB7_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB7_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_1
+; SOFTFP-NOLSE-NEXT: b .LBB7_6
+; SOFTFP-NOLSE-NEXT: .LBB7_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_1
; SOFTFP-NOLSE-NEXT: .LBB7_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: mov w0, w22
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
@@ -814,16 +814,9 @@ define <2 x float> @test_atomicrmw_fmax_v2f32_seq_cst_align8(ptr %ptr, <2 x floa
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: mov w19, w2
; SOFTFP-NOLSE-NEXT: mov x20, x0
-; SOFTFP-NOLSE-NEXT: b .LBB8_2
-; SOFTFP-NOLSE-NEXT: .LBB8_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB8_6
-; SOFTFP-NOLSE-NEXT: .LBB8_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB8_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB8_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB8_2 Depth 2
; SOFTFP-NOLSE-NEXT: mov w0, w23
; SOFTFP-NOLSE-NEXT: mov w1, w19
; SOFTFP-NOLSE-NEXT: bl fmaxf
@@ -836,20 +829,27 @@ define <2 x float> @test_atomicrmw_fmax_v2f32_seq_cst_align8(ptr %ptr, <2 x floa
; SOFTFP-NOLSE-NEXT: // kill: def $w23 killed $w23 killed $x23 def $x23
; SOFTFP-NOLSE-NEXT: orr x8, x8, x24, lsl #32
; SOFTFP-NOLSE-NEXT: orr x9, x9, x23, lsl #32
-; SOFTFP-NOLSE-NEXT: .LBB8_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB8_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB8_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB8_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxr x22, [x20]
; SOFTFP-NOLSE-NEXT: cmp x22, x9
-; SOFTFP-NOLSE-NEXT: b.ne .LBB8_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB8_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxr w10, x8, [x20]
-; SOFTFP-NOLSE-NEXT: cbnz w10, .LBB8_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB8_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w10, .LBB8_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB8_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_1
+; SOFTFP-NOLSE-NEXT: b .LBB8_6
+; SOFTFP-NOLSE-NEXT: .LBB8_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_1
; SOFTFP-NOLSE-NEXT: .LBB8_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: mov w0, w22
; SOFTFP-NOLSE-NEXT: mov w1, w23
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
index 10de6777bd285..314075c619103 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
@@ -51,15 +51,9 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
; SOFTFP-NOLSE-NEXT: mov w20, w1
; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: b .LBB0_2
-; SOFTFP-NOLSE-NEXT: .LBB0_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB0_6
-; SOFTFP-NOLSE-NEXT: .LBB0_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB0_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB0_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB0_2 Depth 2
; SOFTFP-NOLSE-NEXT: mov w22, w0
; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
@@ -70,19 +64,25 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: bl fminf
; SOFTFP-NOLSE-NEXT: bl __truncsfhf2
; SOFTFP-NOLSE-NEXT: mov w8, w0
-; SOFTFP-NOLSE-NEXT: .LBB0_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB0_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth
-; SOFTFP-NOLSE-NEXT: b.ne .LBB0_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB0_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB0_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB0_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB0_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB0_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_1
+; SOFTFP-NOLSE-NEXT: b .LBB0_6
+; SOFTFP-NOLSE-NEXT: .LBB0_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_1
; SOFTFP-NOLSE-NEXT: .LBB0_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
@@ -139,15 +139,9 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
; SOFTFP-NOLSE-NEXT: mov w20, w1
; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: b .LBB1_2
-; SOFTFP-NOLSE-NEXT: .LBB1_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB1_6
-; SOFTFP-NOLSE-NEXT: .LBB1_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB1_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB1_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB1_2 Depth 2
; SOFTFP-NOLSE-NEXT: mov w22, w0
; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
@@ -158,19 +152,25 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: bl fminf
; SOFTFP-NOLSE-NEXT: bl __truncsfhf2
; SOFTFP-NOLSE-NEXT: mov w8, w0
-; SOFTFP-NOLSE-NEXT: .LBB1_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB1_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth
-; SOFTFP-NOLSE-NEXT: b.ne .LBB1_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB1_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB1_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB1_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB1_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB1_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_1
+; SOFTFP-NOLSE-NEXT: b .LBB1_6
+; SOFTFP-NOLSE-NEXT: .LBB1_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_1
; SOFTFP-NOLSE-NEXT: .LBB1_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
@@ -238,34 +238,34 @@ define bfloat @test_atomicrmw_fmin_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
; SOFTFP-NOLSE-NEXT: mov x19, x0
; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
; SOFTFP-NOLSE-NEXT: lsl w20, w1, #16
-; SOFTFP-NOLSE-NEXT: b .LBB2_2
-; SOFTFP-NOLSE-NEXT: .LBB2_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB2_6
-; SOFTFP-NOLSE-NEXT: .LBB2_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB2_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB2_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB2_2 Depth 2
; SOFTFP-NOLSE-NEXT: mov w21, w0
; SOFTFP-NOLSE-NEXT: lsl w0, w0, #16
; SOFTFP-NOLSE-NEXT: mov w1, w20
; SOFTFP-NOLSE-NEXT: bl fminf
; SOFTFP-NOLSE-NEXT: bl __truncsfbf2
; SOFTFP-NOLSE-NEXT: mov w8, w0
-; SOFTFP-NOLSE-NEXT: .LBB2_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB2_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB2_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB2_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth
-; SOFTFP-NOLSE-NEXT: b.ne .LBB2_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB2_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB2_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB2_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB2_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB2_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_1
+; SOFTFP-NOLSE-NEXT: b .LBB2_6
+; SOFTFP-NOLSE-NEXT: .LBB2_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_1
; SOFTFP-NOLSE-NEXT: .LBB2_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
@@ -332,34 +332,34 @@ define bfloat @test_atomicrmw_fmin_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
; SOFTFP-NOLSE-NEXT: mov x19, x0
; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
; SOFTFP-NOLSE-NEXT: lsl w20, w1, #16
-; SOFTFP-NOLSE-NEXT: b .LBB3_2
-; SOFTFP-NOLSE-NEXT: .LBB3_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB3_6
-; SOFTFP-NOLSE-NEXT: .LBB3_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB3_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB3_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB3_2 Depth 2
; SOFTFP-NOLSE-NEXT: mov w21, w0
; SOFTFP-NOLSE-NEXT: lsl w0, w0, #16
; SOFTFP-NOLSE-NEXT: mov w1, w20
; SOFTFP-NOLSE-NEXT: bl fminf
; SOFTFP-NOLSE-NEXT: bl __truncsfbf2
; SOFTFP-NOLSE-NEXT: mov w8, w0
-; SOFTFP-NOLSE-NEXT: .LBB3_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB3_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB3_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB3_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth
-; SOFTFP-NOLSE-NEXT: b.ne .LBB3_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB3_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB3_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB3_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB3_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB3_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_1
+; SOFTFP-NOLSE-NEXT: b .LBB3_6
+; SOFTFP-NOLSE-NEXT: .LBB3_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_1
; SOFTFP-NOLSE-NEXT: .LBB3_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
@@ -408,32 +408,32 @@ define float @test_atomicrmw_fmin_f32_seq_cst_align4(ptr %ptr, float %value) #0
; SOFTFP-NOLSE-NEXT: mov x19, x0
; SOFTFP-NOLSE-NEXT: ldr w0, [x0]
; SOFTFP-NOLSE-NEXT: mov w20, w1
-; SOFTFP-NOLSE-NEXT: b .LBB4_2
-; SOFTFP-NOLSE-NEXT: .LBB4_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB4_6
-; SOFTFP-NOLSE-NEXT: .LBB4_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB4_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB4_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB4_2 Depth 2
; SOFTFP-NOLSE-NEXT: mov w1, w20
; SOFTFP-NOLSE-NEXT: mov w21, w0
; SOFTFP-NOLSE-NEXT: bl fminf
; SOFTFP-NOLSE-NEXT: mov w8, w0
-; SOFTFP-NOLSE-NEXT: .LBB4_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB4_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB4_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB4_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxr w0, [x19]
; SOFTFP-NOLSE-NEXT: cmp w0, w21
-; SOFTFP-NOLSE-NEXT: b.ne .LBB4_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB4_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxr w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB4_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB4_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB4_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB4_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_1
+; SOFTFP-NOLSE-NEXT: b .LBB4_6
+; SOFTFP-NOLSE-NEXT: .LBB4_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_1
; SOFTFP-NOLSE-NEXT: .LBB4_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
@@ -482,32 +482,32 @@ define double @test_atomicrmw_fmin_f32_seq_cst_align8(ptr %ptr, double %value) #
; SOFTFP-NOLSE-NEXT: mov x19, x0
; SOFTFP-NOLSE-NEXT: ldr x0, [x0]
; SOFTFP-NOLSE-NEXT: mov x20, x1
-; SOFTFP-NOLSE-NEXT: b .LBB5_2
-; SOFTFP-NOLSE-NEXT: .LBB5_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB5_6
-; SOFTFP-NOLSE-NEXT: .LBB5_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB5_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB5_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB5_2 Depth 2
; SOFTFP-NOLSE-NEXT: mov x1, x20
; SOFTFP-NOLSE-NEXT: mov x21, x0
; SOFTFP-NOLSE-NEXT: bl fmin
; SOFTFP-NOLSE-NEXT: mov x8, x0
-; SOFTFP-NOLSE-NEXT: .LBB5_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB5_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB5_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB5_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxr x0, [x19]
; SOFTFP-NOLSE-NEXT: cmp x0, x21
-; SOFTFP-NOLSE-NEXT: b.ne .LBB5_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB5_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxr w9, x8, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB5_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB5_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB5_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB5_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB5_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB5_1
+; SOFTFP-NOLSE-NEXT: b .LBB5_6
+; SOFTFP-NOLSE-NEXT: .LBB5_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB5_1
; SOFTFP-NOLSE-NEXT: .LBB5_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
@@ -581,16 +581,9 @@ define <2 x half> @test_atomicrmw_fmin_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: mov w19, w2
; SOFTFP-NOLSE-NEXT: mov x20, x0
-; SOFTFP-NOLSE-NEXT: b .LBB6_2
-; SOFTFP-NOLSE-NEXT: .LBB6_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB6_6
-; SOFTFP-NOLSE-NEXT: .LBB6_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB6_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB6_2 Depth 2
; SOFTFP-NOLSE-NEXT: and w0, w19, #0xffff
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
; SOFTFP-NOLSE-NEXT: mov w24, w0
@@ -611,20 +604,27 @@ define <2 x half> @test_atomicrmw_fmin_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
; SOFTFP-NOLSE-NEXT: mov w8, w22
; SOFTFP-NOLSE-NEXT: bfi w0, w24, #16, #16
; SOFTFP-NOLSE-NEXT: bfi w8, w23, #16, #16
-; SOFTFP-NOLSE-NEXT: .LBB6_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB6_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB6_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB6_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxr w22, [x20]
; SOFTFP-NOLSE-NEXT: cmp w22, w8
-; SOFTFP-NOLSE-NEXT: b.ne .LBB6_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB6_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxr w9, w0, [x20]
-; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB6_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB6_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB6_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB6_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB6_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB6_1
+; SOFTFP-NOLSE-NEXT: b .LBB6_6
+; SOFTFP-NOLSE-NEXT: .LBB6_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB6_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB6_1
; SOFTFP-NOLSE-NEXT: .LBB6_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: mov w0, w22
; SOFTFP-NOLSE-NEXT: mov w1, w23
@@ -725,16 +725,9 @@ define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
; SOFTFP-NOLSE-NEXT: lsl w21, w8, #16
; SOFTFP-NOLSE-NEXT: mov x19, x0
; SOFTFP-NOLSE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: b .LBB7_2
-; SOFTFP-NOLSE-NEXT: .LBB7_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB7_6
-; SOFTFP-NOLSE-NEXT: .LBB7_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB7_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB7_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB7_2 Depth 2
; SOFTFP-NOLSE-NEXT: lsl w23, w1, #16
; SOFTFP-NOLSE-NEXT: mov w1, w20
; SOFTFP-NOLSE-NEXT: mov w0, w23
@@ -747,20 +740,27 @@ define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
; SOFTFP-NOLSE-NEXT: bl __truncsfbf2
; SOFTFP-NOLSE-NEXT: bfxil w23, w22, #0, #16
; SOFTFP-NOLSE-NEXT: bfi w0, w24, #16, #16
-; SOFTFP-NOLSE-NEXT: .LBB7_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB7_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB7_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB7_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxr w22, [x19]
; SOFTFP-NOLSE-NEXT: cmp w22, w23
-; SOFTFP-NOLSE-NEXT: b.ne .LBB7_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB7_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxr w8, w0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB7_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB7_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB7_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB7_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_1
+; SOFTFP-NOLSE-NEXT: b .LBB7_6
+; SOFTFP-NOLSE-NEXT: .LBB7_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_1
; SOFTFP-NOLSE-NEXT: .LBB7_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: mov w0, w22
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
@@ -814,16 +814,9 @@ define <2 x float> @test_atomicrmw_fmin_v2f32_seq_cst_align8(ptr %ptr, <2 x floa
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: mov w19, w2
; SOFTFP-NOLSE-NEXT: mov x20, x0
-; SOFTFP-NOLSE-NEXT: b .LBB8_2
-; SOFTFP-NOLSE-NEXT: .LBB8_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB8_6
-; SOFTFP-NOLSE-NEXT: .LBB8_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB8_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB8_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB8_2 Depth 2
; SOFTFP-NOLSE-NEXT: mov w0, w23
; SOFTFP-NOLSE-NEXT: mov w1, w19
; SOFTFP-NOLSE-NEXT: bl fminf
@@ -836,20 +829,27 @@ define <2 x float> @test_atomicrmw_fmin_v2f32_seq_cst_align8(ptr %ptr, <2 x floa
; SOFTFP-NOLSE-NEXT: // kill: def $w23 killed $w23 killed $x23 def $x23
; SOFTFP-NOLSE-NEXT: orr x8, x8, x24, lsl #32
; SOFTFP-NOLSE-NEXT: orr x9, x9, x23, lsl #32
-; SOFTFP-NOLSE-NEXT: .LBB8_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB8_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB8_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB8_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxr x22, [x20]
; SOFTFP-NOLSE-NEXT: cmp x22, x9
-; SOFTFP-NOLSE-NEXT: b.ne .LBB8_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB8_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxr w10, x8, [x20]
-; SOFTFP-NOLSE-NEXT: cbnz w10, .LBB8_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB8_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w10, .LBB8_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB8_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_1
+; SOFTFP-NOLSE-NEXT: b .LBB8_6
+; SOFTFP-NOLSE-NEXT: .LBB8_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_1
; SOFTFP-NOLSE-NEXT: .LBB8_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: mov w0, w22
; SOFTFP-NOLSE-NEXT: mov w1, w23
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll
index 82e0f14e68e26..6bb541684c2bd 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll
@@ -49,15 +49,9 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
; SOFTFP-NOLSE-NEXT: mov w20, w1
; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: b .LBB0_2
-; SOFTFP-NOLSE-NEXT: .LBB0_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB0_6
-; SOFTFP-NOLSE-NEXT: .LBB0_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB0_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB0_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB0_2 Depth 2
; SOFTFP-NOLSE-NEXT: mov w22, w0
; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
@@ -68,19 +62,25 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: bl __subsf3
; SOFTFP-NOLSE-NEXT: bl __truncsfhf2
; SOFTFP-NOLSE-NEXT: mov w8, w0
-; SOFTFP-NOLSE-NEXT: .LBB0_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB0_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth
-; SOFTFP-NOLSE-NEXT: b.ne .LBB0_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB0_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB0_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB0_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB0_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB0_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_1
+; SOFTFP-NOLSE-NEXT: b .LBB0_6
+; SOFTFP-NOLSE-NEXT: .LBB0_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB0_1
; SOFTFP-NOLSE-NEXT: .LBB0_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
@@ -137,15 +137,9 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
; SOFTFP-NOLSE-NEXT: mov w20, w1
; SOFTFP-NOLSE-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: b .LBB1_2
-; SOFTFP-NOLSE-NEXT: .LBB1_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB1_6
-; SOFTFP-NOLSE-NEXT: .LBB1_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB1_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB1_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB1_2 Depth 2
; SOFTFP-NOLSE-NEXT: mov w22, w0
; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
@@ -156,19 +150,25 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: bl __subsf3
; SOFTFP-NOLSE-NEXT: bl __truncsfhf2
; SOFTFP-NOLSE-NEXT: mov w8, w0
-; SOFTFP-NOLSE-NEXT: .LBB1_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB1_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth
-; SOFTFP-NOLSE-NEXT: b.ne .LBB1_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB1_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB1_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB1_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB1_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB1_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_1
+; SOFTFP-NOLSE-NEXT: b .LBB1_6
+; SOFTFP-NOLSE-NEXT: .LBB1_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB1_1
; SOFTFP-NOLSE-NEXT: .LBB1_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
@@ -236,34 +236,34 @@ define bfloat @test_atomicrmw_fsub_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
; SOFTFP-NOLSE-NEXT: mov x19, x0
; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
; SOFTFP-NOLSE-NEXT: lsl w20, w1, #16
-; SOFTFP-NOLSE-NEXT: b .LBB2_2
-; SOFTFP-NOLSE-NEXT: .LBB2_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB2_6
-; SOFTFP-NOLSE-NEXT: .LBB2_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB2_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB2_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB2_2 Depth 2
; SOFTFP-NOLSE-NEXT: mov w21, w0
; SOFTFP-NOLSE-NEXT: lsl w0, w0, #16
; SOFTFP-NOLSE-NEXT: mov w1, w20
; SOFTFP-NOLSE-NEXT: bl __subsf3
; SOFTFP-NOLSE-NEXT: bl __truncsfbf2
; SOFTFP-NOLSE-NEXT: mov w8, w0
-; SOFTFP-NOLSE-NEXT: .LBB2_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB2_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB2_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB2_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth
-; SOFTFP-NOLSE-NEXT: b.ne .LBB2_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB2_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB2_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB2_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB2_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB2_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_1
+; SOFTFP-NOLSE-NEXT: b .LBB2_6
+; SOFTFP-NOLSE-NEXT: .LBB2_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB2_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB2_1
; SOFTFP-NOLSE-NEXT: .LBB2_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
@@ -330,34 +330,34 @@ define bfloat @test_atomicrmw_fsub_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
; SOFTFP-NOLSE-NEXT: mov x19, x0
; SOFTFP-NOLSE-NEXT: ldrh w0, [x0]
; SOFTFP-NOLSE-NEXT: lsl w20, w1, #16
-; SOFTFP-NOLSE-NEXT: b .LBB3_2
-; SOFTFP-NOLSE-NEXT: .LBB3_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB3_6
-; SOFTFP-NOLSE-NEXT: .LBB3_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB3_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB3_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB3_2 Depth 2
; SOFTFP-NOLSE-NEXT: mov w21, w0
; SOFTFP-NOLSE-NEXT: lsl w0, w0, #16
; SOFTFP-NOLSE-NEXT: mov w1, w20
; SOFTFP-NOLSE-NEXT: bl __subsf3
; SOFTFP-NOLSE-NEXT: bl __truncsfbf2
; SOFTFP-NOLSE-NEXT: mov w8, w0
-; SOFTFP-NOLSE-NEXT: .LBB3_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB3_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB3_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB3_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth
-; SOFTFP-NOLSE-NEXT: b.ne .LBB3_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB3_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxrh w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB3_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB3_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB3_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB3_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_1
+; SOFTFP-NOLSE-NEXT: b .LBB3_6
+; SOFTFP-NOLSE-NEXT: .LBB3_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB3_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB3_1
; SOFTFP-NOLSE-NEXT: .LBB3_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
@@ -406,32 +406,32 @@ define float @test_atomicrmw_fsub_f32_seq_cst_align4(ptr %ptr, float %value) #0
; SOFTFP-NOLSE-NEXT: mov x19, x0
; SOFTFP-NOLSE-NEXT: ldr w0, [x0]
; SOFTFP-NOLSE-NEXT: mov w20, w1
-; SOFTFP-NOLSE-NEXT: b .LBB4_2
-; SOFTFP-NOLSE-NEXT: .LBB4_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB4_6
-; SOFTFP-NOLSE-NEXT: .LBB4_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB4_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB4_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB4_2 Depth 2
; SOFTFP-NOLSE-NEXT: mov w1, w20
; SOFTFP-NOLSE-NEXT: mov w21, w0
; SOFTFP-NOLSE-NEXT: bl __subsf3
; SOFTFP-NOLSE-NEXT: mov w8, w0
-; SOFTFP-NOLSE-NEXT: .LBB4_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB4_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB4_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB4_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxr w0, [x19]
; SOFTFP-NOLSE-NEXT: cmp w0, w21
-; SOFTFP-NOLSE-NEXT: b.ne .LBB4_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB4_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxr w9, w8, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB4_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB4_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB4_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB4_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_1
+; SOFTFP-NOLSE-NEXT: b .LBB4_6
+; SOFTFP-NOLSE-NEXT: .LBB4_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB4_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB4_1
; SOFTFP-NOLSE-NEXT: .LBB4_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: // kill: def $w0 killed $w0 killed $x0
@@ -480,32 +480,32 @@ define double @test_atomicrmw_fsub_f32_seq_cst_align8(ptr %ptr, double %value) #
; SOFTFP-NOLSE-NEXT: mov x19, x0
; SOFTFP-NOLSE-NEXT: ldr x0, [x0]
; SOFTFP-NOLSE-NEXT: mov x20, x1
-; SOFTFP-NOLSE-NEXT: b .LBB5_2
-; SOFTFP-NOLSE-NEXT: .LBB5_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB5_6
-; SOFTFP-NOLSE-NEXT: .LBB5_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB5_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB5_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB5_2 Depth 2
; SOFTFP-NOLSE-NEXT: mov x1, x20
; SOFTFP-NOLSE-NEXT: mov x21, x0
; SOFTFP-NOLSE-NEXT: bl __subdf3
; SOFTFP-NOLSE-NEXT: mov x8, x0
-; SOFTFP-NOLSE-NEXT: .LBB5_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB5_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB5_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB5_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxr x0, [x19]
; SOFTFP-NOLSE-NEXT: cmp x0, x21
-; SOFTFP-NOLSE-NEXT: b.ne .LBB5_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB5_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxr w9, x8, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB5_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB5_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB5_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB5_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB5_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB5_1
+; SOFTFP-NOLSE-NEXT: b .LBB5_6
+; SOFTFP-NOLSE-NEXT: .LBB5_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB5_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB5_1
; SOFTFP-NOLSE-NEXT: .LBB5_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
; SOFTFP-NOLSE-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
@@ -701,16 +701,9 @@ define <2 x half> @test_atomicrmw_fsub_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: mov w19, w2
; SOFTFP-NOLSE-NEXT: mov x20, x0
-; SOFTFP-NOLSE-NEXT: b .LBB7_2
-; SOFTFP-NOLSE-NEXT: .LBB7_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB7_6
-; SOFTFP-NOLSE-NEXT: .LBB7_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB7_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB7_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB7_2 Depth 2
; SOFTFP-NOLSE-NEXT: and w0, w19, #0xffff
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
; SOFTFP-NOLSE-NEXT: mov w24, w0
@@ -731,20 +724,27 @@ define <2 x half> @test_atomicrmw_fsub_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
; SOFTFP-NOLSE-NEXT: mov w8, w22
; SOFTFP-NOLSE-NEXT: bfi w0, w24, #16, #16
; SOFTFP-NOLSE-NEXT: bfi w8, w23, #16, #16
-; SOFTFP-NOLSE-NEXT: .LBB7_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB7_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB7_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB7_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxr w22, [x20]
; SOFTFP-NOLSE-NEXT: cmp w22, w8
-; SOFTFP-NOLSE-NEXT: b.ne .LBB7_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB7_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxr w9, w0, [x20]
-; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB7_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB7_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w9, .LBB7_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB7_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_1
+; SOFTFP-NOLSE-NEXT: b .LBB7_6
+; SOFTFP-NOLSE-NEXT: .LBB7_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB7_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: lsr w23, w22, #16
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB7_1
; SOFTFP-NOLSE-NEXT: .LBB7_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: mov w0, w22
; SOFTFP-NOLSE-NEXT: mov w1, w23
@@ -817,16 +817,9 @@ define <2 x bfloat> @test_atomicrmw_fsub_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
; SOFTFP-NOLSE-NEXT: lsl w21, w8, #16
; SOFTFP-NOLSE-NEXT: mov x19, x0
; SOFTFP-NOLSE-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill
-; SOFTFP-NOLSE-NEXT: b .LBB8_2
-; SOFTFP-NOLSE-NEXT: .LBB8_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB8_6
-; SOFTFP-NOLSE-NEXT: .LBB8_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB8_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB8_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB8_2 Depth 2
; SOFTFP-NOLSE-NEXT: lsl w23, w1, #16
; SOFTFP-NOLSE-NEXT: mov w1, w20
; SOFTFP-NOLSE-NEXT: mov w0, w23
@@ -839,20 +832,27 @@ define <2 x bfloat> @test_atomicrmw_fsub_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
; SOFTFP-NOLSE-NEXT: bl __truncsfbf2
; SOFTFP-NOLSE-NEXT: bfxil w23, w22, #0, #16
; SOFTFP-NOLSE-NEXT: bfi w0, w24, #16, #16
-; SOFTFP-NOLSE-NEXT: .LBB8_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB8_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB8_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB8_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxr w22, [x19]
; SOFTFP-NOLSE-NEXT: cmp w22, w23
-; SOFTFP-NOLSE-NEXT: b.ne .LBB8_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB8_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxr w8, w0, [x19]
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB8_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB8_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB8_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB8_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_1
+; SOFTFP-NOLSE-NEXT: b .LBB8_6
+; SOFTFP-NOLSE-NEXT: .LBB8_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB8_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: lsr w1, w22, #16
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB8_1
; SOFTFP-NOLSE-NEXT: .LBB8_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: mov w0, w22
; SOFTFP-NOLSE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
@@ -906,16 +906,9 @@ define <2 x float> @test_atomicrmw_fsub_v2f32_seq_cst_align8(ptr %ptr, <2 x floa
; SOFTFP-NOLSE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
; SOFTFP-NOLSE-NEXT: mov w19, w2
; SOFTFP-NOLSE-NEXT: mov x20, x0
-; SOFTFP-NOLSE-NEXT: b .LBB9_2
-; SOFTFP-NOLSE-NEXT: .LBB9_1: // %cmpxchg.nostore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB9_2 Depth=1
-; SOFTFP-NOLSE-NEXT: mov w8, wzr
-; SOFTFP-NOLSE-NEXT: clrex
-; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32
-; SOFTFP-NOLSE-NEXT: cbnz w8, .LBB9_6
-; SOFTFP-NOLSE-NEXT: .LBB9_2: // %atomicrmw.start
+; SOFTFP-NOLSE-NEXT: .LBB9_1: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
-; SOFTFP-NOLSE-NEXT: // Child Loop BB9_3 Depth 2
+; SOFTFP-NOLSE-NEXT: // Child Loop BB9_2 Depth 2
; SOFTFP-NOLSE-NEXT: mov w0, w23
; SOFTFP-NOLSE-NEXT: mov w1, w19
; SOFTFP-NOLSE-NEXT: bl __subsf3
@@ -928,20 +921,27 @@ define <2 x float> @test_atomicrmw_fsub_v2f32_seq_cst_align8(ptr %ptr, <2 x floa
; SOFTFP-NOLSE-NEXT: // kill: def $w23 killed $w23 killed $x23 def $x23
; SOFTFP-NOLSE-NEXT: orr x8, x8, x24, lsl #32
; SOFTFP-NOLSE-NEXT: orr x9, x9, x23, lsl #32
-; SOFTFP-NOLSE-NEXT: .LBB9_3: // %cmpxchg.start
-; SOFTFP-NOLSE-NEXT: // Parent Loop BB9_2 Depth=1
+; SOFTFP-NOLSE-NEXT: .LBB9_2: // %cmpxchg.start
+; SOFTFP-NOLSE-NEXT: // Parent Loop BB9_1 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxr x22, [x20]
; SOFTFP-NOLSE-NEXT: cmp x22, x9
-; SOFTFP-NOLSE-NEXT: b.ne .LBB9_1
-; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
-; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB9_3 Depth=2
+; SOFTFP-NOLSE-NEXT: b.ne .LBB9_5
+; SOFTFP-NOLSE-NEXT: // %bb.3: // %cmpxchg.trystore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB9_2 Depth=2
; SOFTFP-NOLSE-NEXT: stlxr w10, x8, [x20]
-; SOFTFP-NOLSE-NEXT: cbnz w10, .LBB9_3
-; SOFTFP-NOLSE-NEXT: // %bb.5: // in Loop: Header=BB9_2 Depth=1
+; SOFTFP-NOLSE-NEXT: cbnz w10, .LBB9_2
+; SOFTFP-NOLSE-NEXT: // %bb.4: // in Loop: Header=BB9_1 Depth=1
; SOFTFP-NOLSE-NEXT: mov w8, #1 // =0x1
; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32
-; SOFTFP-NOLSE-NEXT: cbz w8, .LBB9_2
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB9_1
+; SOFTFP-NOLSE-NEXT: b .LBB9_6
+; SOFTFP-NOLSE-NEXT: .LBB9_5: // %cmpxchg.nostore
+; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB9_1 Depth=1
+; SOFTFP-NOLSE-NEXT: mov w8, wzr
+; SOFTFP-NOLSE-NEXT: clrex
+; SOFTFP-NOLSE-NEXT: lsr x23, x22, #32
+; SOFTFP-NOLSE-NEXT: cbz w8, .LBB9_1
; SOFTFP-NOLSE-NEXT: .LBB9_6: // %atomicrmw.end
; SOFTFP-NOLSE-NEXT: mov w0, w22
; SOFTFP-NOLSE-NEXT: mov w1, w23
diff --git a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
index b7817ebe59b9b..3f4dd116d91f8 100644
--- a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
+++ b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
@@ -181,41 +181,41 @@ define i1 @test_conditional2(i32 %a, i32 %b, ptr %c) {
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldaxr w8, [x19]
; CHECK-NEXT: cmp w8, w21
-; CHECK-NEXT: b.ne LBB3_4
+; CHECK-NEXT: b.ne LBB3_9
; CHECK-NEXT: ; %bb.2: ; %cmpxchg.trystore
; CHECK-NEXT: ; in Loop: Header=BB3_1 Depth=1
; CHECK-NEXT: stlxr w8, w20, [x19]
; CHECK-NEXT: cbnz w8, LBB3_1
; CHECK-NEXT: ; %bb.3:
; CHECK-NEXT: mov w8, #1 ; =0x1
-; CHECK-NEXT: b LBB3_5
-; CHECK-NEXT: LBB3_4: ; %cmpxchg.nostore
-; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: clrex
-; CHECK-NEXT: LBB3_5: ; %for.cond.preheader
+; CHECK-NEXT: LBB3_4: ; %for.cond.preheader
; CHECK-NEXT: mov w22, #2 ; =0x2
-; CHECK-NEXT: LBB3_6: ; %for.cond
+; CHECK-NEXT: LBB3_5: ; %for.cond
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: cbz w22, LBB3_9
-; CHECK-NEXT: ; %bb.7: ; %for.body
-; CHECK-NEXT: ; in Loop: Header=BB3_6 Depth=1
+; CHECK-NEXT: cbz w22, LBB3_8
+; CHECK-NEXT: ; %bb.6: ; %for.body
+; CHECK-NEXT: ; in Loop: Header=BB3_5 Depth=1
; CHECK-NEXT: sub w22, w22, #1
; CHECK-NEXT: orr w9, w21, w20
; CHECK-NEXT: ldr w10, [x19, w22, sxtw #2]
; CHECK-NEXT: cmp w9, w10
-; CHECK-NEXT: b.eq LBB3_6
-; CHECK-NEXT: ; %bb.8: ; %if.then
-; CHECK-NEXT: ; in Loop: Header=BB3_6 Depth=1
+; CHECK-NEXT: b.eq LBB3_5
+; CHECK-NEXT: ; %bb.7: ; %if.then
+; CHECK-NEXT: ; in Loop: Header=BB3_5 Depth=1
; CHECK-NEXT: str w9, [x19, w22, sxtw #2]
; CHECK-NEXT: bl _foo
; CHECK-NEXT: mov w8, wzr
-; CHECK-NEXT: b LBB3_6
-; CHECK-NEXT: LBB3_9: ; %for.cond.cleanup
+; CHECK-NEXT: b LBB3_5
+; CHECK-NEXT: LBB3_8: ; %for.cond.cleanup
; CHECK-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload
; CHECK-NEXT: ldp x22, x21, [sp], #48 ; 16-byte Folded Reload
; CHECK-NEXT: ret
+; CHECK-NEXT: LBB3_9: ; %cmpxchg.nostore
+; CHECK-NEXT: mov w8, wzr
+; CHECK-NEXT: clrex
+; CHECK-NEXT: b LBB3_4
;
; OUTLINE-ATOMICS-LABEL: test_conditional2:
; OUTLINE-ATOMICS: ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll b/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll
index 4bf42d4ac9629..c37a21be5ca4d 100644
--- a/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll
+++ b/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll
@@ -36,16 +36,18 @@ define zeroext i1 @test_cmpxchg_res_i8(ptr %addr, i8 %desired, i8 zeroext %new)
; CHECK-THUMB-NEXT: bx r1
;
; CHECK-ARMV6-LABEL: test_cmpxchg_res_i8:
-; CHECK-ARMV6: uxtb r1, r1
+; CHECK-ARMV6: .fnstart
+; CHECK-ARMV6-NEXT: uxtb r12, r1
; CHECK-ARMV6-NEXT: .LBB0_1:
-; CHECK-ARMV6-NEXT: ldrexb r3, [r0]
-; CHECK-ARMV6-NEXT: cmp r3, r1
+; CHECK-ARMV6-NEXT: ldrexb r1, [r0]
+; CHECK-ARMV6-NEXT: cmp r1, r12
; CHECK-ARMV6-NEXT: movne r0, #0
; CHECK-ARMV6-NEXT: bxne lr
; CHECK-ARMV6-NEXT: .LBB0_2:
; CHECK-ARMV6-NEXT: strexb r3, r2, [r0]
+; CHECK-ARMV6-NEXT: mov r1, #1
; CHECK-ARMV6-NEXT: cmp r3, #0
-; CHECK-ARMV6-NEXT: moveq r0, #1
+; CHECK-ARMV6-NEXT: moveq r0, r1
; CHECK-ARMV6-NEXT: bxeq lr
; CHECK-ARMV6-NEXT: b .LBB0_1
;
@@ -61,19 +63,22 @@ define zeroext i1 @test_cmpxchg_res_i8(ptr %addr, i8 %desired, i8 zeroext %new)
; CHECK-THUMBV6-NEXT: pop {r4, pc}
;
; CHECK-ARMV7-LABEL: test_cmpxchg_res_i8:
-; CHECK-ARMV7: uxtb r1, r1
+; CHECK-ARMV7: .fnstart
+; CHECK-ARMV7-NEXT: uxtb r12, r1
; CHECK-ARMV7-NEXT: .LBB0_1:
-; CHECK-ARMV7-NEXT: ldrexb r3, [r0]
-; CHECK-ARMV7-NEXT: cmp r3, r1
-; CHECK-ARMV7-NEXT: bne .LBB0_3
+; CHECK-ARMV7-NEXT: ldrexb r1, [r0]
+; CHECK-ARMV7-NEXT: cmp r1, r12
+; CHECK-ARMV7-NEXT: bne .LBB0_4
; CHECK-ARMV7-NEXT: strexb r3, r2, [r0]
+; CHECK-ARMV7-NEXT: mov r1, #1
; CHECK-ARMV7-NEXT: cmp r3, #0
-; CHECK-ARMV7-NEXT: moveq r0, #1
-; CHECK-ARMV7-NEXT: bxeq lr
-; CHECK-ARMV7-NEXT: b .LBB0_1
-; CHECK-ARMV7-NEXT: .LBB0_3:
-; CHECK-ARMV7-NEXT: mov r0, #0
+; CHECK-ARMV7-NEXT: bne .LBB0_1
+; CHECK-ARMV7-NEXT: mov r0, r1
+; CHECK-ARMV7-NEXT: bx lr
+; CHECK-ARMV7-NEXT: .LBB0_4:
+; CHECK-ARMV7-NEXT: mov r1, #0
; CHECK-ARMV7-NEXT: clrex
+; CHECK-ARMV7-NEXT: mov r0, r1
; CHECK-ARMV7-NEXT: bx lr
;
; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8:
diff --git a/llvm/test/CodeGen/ARM/cmpxchg-idioms.ll b/llvm/test/CodeGen/ARM/cmpxchg-idioms.ll
index 4ff71b42d5db0..d59f282314b5c 100644
--- a/llvm/test/CodeGen/ARM/cmpxchg-idioms.ll
+++ b/llvm/test/CodeGen/ARM/cmpxchg-idioms.ll
@@ -4,14 +4,14 @@ define i32 @test_return(ptr %p, i32 %oldval, i32 %newval) {
; CHECK-LABEL: test_return:
; CHECK: ldrex [[LOADED:r[0-9]+]], [r0]
-; CHECK: cmp [[LOADED]], r1
+; CHECK: cmp [[LOADED]], r1
; CHECK: bne [[FAILED:LBB[0-9]+_[0-9]+]]
; CHECK: dmb ishst
; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
; CHECK: strex [[STATUS:r[0-9]+]], {{r[0-9]+}}, [r0]
-; CHECK: cbz [[STATUS]], [[SUCCESS:LBB[0-9]+_[0-9]+]]
+; CHECK: cmp r3, #0
; CHECK: ldrex [[LOADED]], [r0]
; CHECK: cmp [[LOADED]], r1
@@ -22,12 +22,6 @@ define i32 @test_return(ptr %p, i32 %oldval, i32 %newval) {
; CHECK: clrex
; CHECK: movs r0, #0
; CHECK: dmb ish
-; CHECK: bx lr
-
-; CHECK: [[SUCCESS]]:
-; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
-; CHECK: movs r0, #1
-; CHECK: dmb ish
; CHECK: bx lr
%pair = cmpxchg ptr %p, i32 %oldval, i32 %newval seq_cst seq_cst
@@ -49,7 +43,7 @@ define i1 @test_return_bool(ptr %value, i8 %oldValue, i8 %newValue) {
; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
; CHECK: strexb [[STATUS:r[0-9]+]], {{r[0-9]+}}, [r0]
-; CHECK: cbz [[STATUS]], [[SUCCESS:LBB[0-9]+_[0-9]+]]
+; CHECK: cmp [[STATUS]], #0
; CHECK: ldrexb [[LOADED]], [r0]
; CHECK: cmp [[LOADED]], [[OLDBYTE]]
@@ -63,12 +57,6 @@ define i1 @test_return_bool(ptr %value, i8 %oldValue, i8 %newValue) {
; CHECK: eor r0, [[TMP]], #1
; CHECK: bx lr
-; CHECK: [[SUCCESS]]:
-; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
-; CHECK: movs [[TMP:r[0-9]+]], #1
-; CHECK: eor r0, [[TMP]], #1
-; CHECK: bx lr
-
%pair = cmpxchg ptr %value, i8 %oldValue, i8 %newValue acq_rel monotonic
%success = extractvalue { i8, i1 } %pair, 1
@@ -87,7 +75,7 @@ define void @test_conditional(ptr %p, i32 %oldval, i32 %newval) {
; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
; CHECK: strex [[STATUS:r[0-9]+]], r2, [r0]
-; CHECK: cbz [[STATUS]], [[SUCCESS:LBB[0-9]+_[0-9]+]]
+; CHECK: cmp [[STATUS]], #0
; CHECK: ldrex [[LOADED]], [r0]
; CHECK: cmp [[LOADED]], r1
@@ -99,11 +87,6 @@ define void @test_conditional(ptr %p, i32 %oldval, i32 %newval) {
; CHECK: dmb ish
; CHECK: b.w _baz
-; CHECK: [[SUCCESS]]:
-; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
-; CHECK: dmb ish
-; CHECK: b.w _bar
-
%pair = cmpxchg ptr %p, i32 %oldval, i32 %newval seq_cst seq_cst
%success = extractvalue { i32, i1 } %pair, 1
br i1 %success, label %true, label %false
diff --git a/llvm/test/CodeGen/ARM/cmpxchg-weak.ll b/llvm/test/CodeGen/ARM/cmpxchg-weak.ll
index 9963f2d08ba52..b33eea9975740 100644
--- a/llvm/test/CodeGen/ARM/cmpxchg-weak.ll
+++ b/llvm/test/CodeGen/ARM/cmpxchg-weak.ll
@@ -10,16 +10,14 @@ define void @test_cmpxchg_weak(ptr %addr, i32 %desired, i32 %new) {
; CHECK-NEXT: dmb ish
; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r2, [r0]
; CHECK-NEXT: cmp [[SUCCESS]], #0
-; CHECK-NEXT: beq [[SUCCESSBB:LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: bne [[FAILBB:LBB[0-9]+_[0-9]+]]
; CHECK-NEXT: %bb.2:
+; CHECK-NEXT: dmb ish
; CHECK-NEXT: str r3, [r0]
; CHECK-NEXT: bx lr
; CHECK-NEXT: [[LDFAILBB]]:
; CHECK-NEXT: clrex
-; CHECK-NEXT: str r3, [r0]
-; CHECK-NEXT: bx lr
-; CHECK-NEXT: [[SUCCESSBB]]:
-; CHECK-NEXT: dmb ish
+; CHECK-NEXT: [[FAILBB]]:
; CHECK-NEXT: str r3, [r0]
; CHECK-NEXT: bx lr
;
@@ -37,19 +35,20 @@ define i1 @test_cmpxchg_weak_to_bool(i32, ptr %addr, i32 %desired, i32 %new) {
; CHECK-NEXT: bne [[LDFAILBB:LBB[0-9]+_[0-9]+]]
; CHECK-NEXT: %bb.1:
; CHECK-NEXT: dmb ish
-; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: strex [[SUCCESS:r[0-9]+]], r3, [r1]
; CHECK-NEXT: cmp [[SUCCESS]], #0
-; CHECK-NEXT: bxne lr
-; CHECK-NEXT: LBB1_2:
+; CHECK-NEXT: bne [[FAILBB:LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: %bb.2:
; CHECK-NEXT: mov r0, #1
; CHECK-NEXT: dmb ish
; CHECK-NEXT: bx lr
; CHECK-NEXT: [[LDFAILBB]]:
-; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: clrex
+; CHECK-NEXT: [[FAILBB]]:
+; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: bx lr
;
+
%pair = cmpxchg weak ptr %addr, i32 %desired, i32 %new seq_cst monotonic
%success = extractvalue { i32, i1 } %pair, 1
ret i1 %success
diff --git a/llvm/test/CodeGen/Hexagon/atomic-opaque-basic.ll b/llvm/test/CodeGen/Hexagon/atomic-opaque-basic.ll
index 4372cad3f87c6..0b0399cbf4661 100644
--- a/llvm/test/CodeGen/Hexagon/atomic-opaque-basic.ll
+++ b/llvm/test/CodeGen/Hexagon/atomic-opaque-basic.ll
@@ -79,7 +79,6 @@ define void @f1() #0 {
; CHECK-NEXT: {
; CHECK-NEXT: r4 = sub(#-1,r4)
; CHECK-NEXT: }
-; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB1_1: // %cmpxchg.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: {
diff --git a/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll b/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
index 2e72d26ed4566..585b4c7538246 100644
--- a/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
+++ b/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -o - -mtriple=armv7-apple-ios7.0 -passes=atomic-expand -codegen-opt-level=1 %s | FileCheck %s
define i8 @test_atomic_xchg_i8(ptr %ptr, i8 %xchgend) {
@@ -221,49 +222,40 @@ define i8 @test_atomic_umin_i8(ptr %ptr, i8 %uminend) {
define i8 @test_cmpxchg_i8_seqcst_seqcst(ptr %ptr, i8 %desired, i8 %newval) {
; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst
; CHECK: br label %[[START:.*]]
-
; CHECK: [[START]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i8) %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
-
; CHECK: [[FENCED_STORE]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[LOOP:.*]]
-
; CHECK: [[LOOP]]:
; CHECK: [[LOADED_LOOP:%.*]] = phi i8 [ [[OLDVAL]], %[[FENCED_STORE]] ], [ [[OLDVAL_LOOP:%.*]], %[[RELEASED_LOAD:.*]] ]
; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0(i32 [[NEWVAL32]], ptr elementtype(i8) %ptr)
; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[RELEASED_LOAD]]
-
; CHECK: [[RELEASED_LOAD]]:
; CHECK: [[OLDVAL32_LOOP:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i8) %ptr)
; CHECK: [[OLDVAL_LOOP]] = trunc i32 [[OLDVAL32_LOOP]] to i8
; CHECK: [[SHOULD_STORE_LOOP:%.*]] = icmp eq i8 [[OLDVAL_LOOP]], %desired
-; CHECK: br i1 [[SHOULD_STORE_LOOP]], label %[[LOOP]], label %[[NO_STORE_BB]]
-
+; CHECK: br i1 [[SHOULD_STORE_LOOP]], label %[[LOOP]], label %[[NO_STORE_BB:cmpxchg\.nostore]]
; CHECK: [[SUCCESS_BB]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE:.*]]
-
; CHECK: [[NO_STORE_BB]]:
; CHECK-NEXT: [[LOADED_NO_STORE:%.*]] = phi i8 [ [[OLDVAL]], %[[START]] ], [ [[OLDVAL_LOOP]], %[[RELEASED_LOAD]] ]
; CHECK-NEXT: call void @llvm.arm.clrex()
; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
-
; CHECK: [[FAILURE_BB]]:
; CHECK: [[LOADED_FAILURE:%.*]] = phi i8 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ]
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE]]
-
; CHECK: [[DONE]]:
; CHECK: [[LOADED:%.*]] = phi i8 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
; CHECK: ret i8 [[LOADED]]
-
%pairold = cmpxchg ptr %ptr, i8 %desired, i8 %newval seq_cst seq_cst
%old = extractvalue { i8, i1 } %pairold, 0
ret i8 %old
@@ -272,49 +264,40 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(ptr %ptr, i8 %desired, i8 %newval) {
define i16 @test_cmpxchg_i16_seqcst_monotonic(ptr %ptr, i16 %desired, i16 %newval) {
; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic
; CHECK: br label %[[LOOP:.*]]
-
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i16) %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
-
; CHECK: [[FENCED_STORE]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[LOOP:.*]]
-
; CHECK: [[LOOP]]:
; CHECK: [[LOADED_LOOP:%.*]] = phi i16 [ [[OLDVAL]], %[[FENCED_STORE]] ], [ [[OLDVAL_LOOP:%.*]], %[[RELEASED_LOAD:.*]] ]
; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0(i32 [[NEWVAL32]], ptr elementtype(i16) %ptr)
; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[RELEASED_LOAD:.*]]
-
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[RELEASED_LOAD:cmpxchg\.releasedload]]
; CHECK: [[RELEASED_LOAD]]:
; CHECK: [[OLDVAL32_LOOP:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i16) %ptr)
; CHECK: [[OLDVAL_LOOP]] = trunc i32 [[OLDVAL32_LOOP]] to i16
; CHECK: [[SHOULD_STORE_LOOP:%.*]] = icmp eq i16 [[OLDVAL_LOOP]], %desired
-; CHECK: br i1 [[SHOULD_STORE_LOOP]], label %[[LOOP]], label %[[NO_STORE_BB]]
-
+; CHECK: br i1 [[SHOULD_STORE_LOOP]], label %[[LOOP]], label %[[NO_STORE_BB:cmpxchg\.nostore]]
; CHECK: [[SUCCESS_BB]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE:.*]]
-
; CHECK: [[NO_STORE_BB]]:
; CHECK-NEXT: [[LOADED_NO_STORE:%.*]] = phi i16 [ [[OLDVAL]], %[[START]] ], [ [[OLDVAL_LOOP]], %[[RELEASED_LOAD]] ]
; CHECK-NEXT: call void @llvm.arm.clrex()
; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
-
; CHECK: [[FAILURE_BB]]:
; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i16 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ]
; CHECK-NOT: dmb
; CHECK: br label %[[DONE]]
-
; CHECK: [[DONE]]:
; CHECK: [[LOADED:%.*]] = phi i16 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
; CHECK: ret i16 [[LOADED]]
-
%pairold = cmpxchg ptr %ptr, i16 %desired, i16 %newval seq_cst monotonic
%old = extractvalue { i16, i1 } %pairold, 0
ret i16 %old
@@ -324,40 +307,32 @@ define i32 @test_cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %desired, i32 %newval
; CHECK-LABEL: @test_cmpxchg_i32_acquire_acquire
; CHECK-NOT: dmb
; CHECK: br label %[[LOOP:.*]]
-
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i32) %ptr)
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
-
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:cmpxchg\.nostore]]
; CHECK: [[FENCED_STORE]]:
; CHECK-NEXT: br label %[[TRY_STORE:.*]]
-
; CHECK: [[TRY_STORE]]:
; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[OLDVAL]], %[[FENCED_STORE]] ]
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0(i32 %newval, ptr elementtype(i32) %ptr)
; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
-
; CHECK: [[SUCCESS_BB]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE:.*]]
-
; CHECK: [[NO_STORE_BB]]:
; CHECK-NEXT: [[LOADED_NO_STORE:%.*]] = phi i32 [ [[OLDVAL]], %[[LOOP]] ]
; CHECK-NEXT: call void @llvm.arm.clrex()
; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
-
; CHECK: [[FAILURE_BB]]:
; CHECK: [[LOADED_FAILURE:%.*]] = phi i32 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ]
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE]]
-
; CHECK: [[DONE]]:
; CHECK: [[LOADED_EXIT:%.*]] = phi i32 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
; CHECK: ret i32 [[LOADED_EXIT]]
-
%pairold = cmpxchg ptr %ptr, i32 %desired, i32 %newval acquire acquire
%old = extractvalue { i32, i1 } %pairold, 0
ret i32 %old
@@ -367,7 +342,6 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %desired, i64 %ne
; CHECK-LABEL: @test_cmpxchg_i64_monotonic_monotonic
; CHECK-NOT: dmb
; CHECK: br label %[[LOOP:.*]]
-
; CHECK: [[LOOP]]:
; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(ptr %ptr)
; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
@@ -377,11 +351,9 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %desired, i64 %ne
; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
-
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:cmpxchg\.nostore]]
; CHECK: [[FENCED_STORE]]:
; CHECK-NEXT: br label %[[TRY_STORE:.*]]
-
; CHECK: [[TRY_STORE]]:
; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i64 [ [[OLDVAL]], %[[FENCED_STORE]] ]
; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
@@ -390,26 +362,21 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %desired, i64 %ne
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], ptr %ptr)
; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
-
; CHECK: [[SUCCESS_BB]]:
; CHECK-NOT: dmb
; CHECK: br label %[[DONE:.*]]
-
; CHECK: [[NO_STORE_BB]]:
; CHECK-NEXT: [[LOADED_NO_STORE:%.*]] = phi i64 [ [[OLDVAL]], %[[LOOP]] ]
; CHECK-NEXT: call void @llvm.arm.clrex()
; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
-
; CHECK: [[FAILURE_BB]]:
; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i64 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ]
; CHECK-NOT: dmb
; CHECK: br label %[[DONE]]
-
; CHECK: [[DONE]]:
; CHECK: [[LOADED_EXIT:%.*]] = phi i64 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
; CHECK: ret i64 [[LOADED_EXIT]]
-
%pairold = cmpxchg ptr %ptr, i64 %desired, i64 %newval monotonic monotonic
%old = extractvalue { i64, i1 } %pairold, 0
ret i64 %old
@@ -419,40 +386,32 @@ define i32 @test_cmpxchg_minsize(ptr %addr, i32 %desired, i32 %new) minsize {
; CHECK-LABEL: @test_cmpxchg_minsize
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[START:.*]]
-
; CHECK: [[START]]:
; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i32) %addr)
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
-
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:cmpxchg\.nostore]]
; CHECK: [[FENCED_STORE]]:
; CHECK-NEXT: br label %[[TRY_STORE:.*]]
-
; CHECK: [[TRY_STORE]]:
; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[LOADED]], %[[FENCED_STORE]] ]
; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0(i32 %new, ptr elementtype(i32) %addr)
; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[START]]
-
; CHECK: [[SUCCESS_BB]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[END:.*]]
-
; CHECK: [[NO_STORE_BB]]:
; CHECK: [[LOADED_NO_STORE:%.*]] = phi i32 [ [[LOADED]], %[[START]] ]
; CHECK: call void @llvm.arm.clrex()
; CHECK: br label %[[FAILURE_BB]]
-
; CHECK: [[FAILURE_BB]]:
; CHECK: [[LOADED_FAILURE:%.*]] = phi i32 [ [[LOADED_NO_STORE]], %[[NO_STORE_BB]] ]
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[END]]
-
; CHECK: [[END]]:
; CHECK: [[LOADED_EXIT:%.*]] = phi i32 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
; CHECK: ret i32 [[LOADED_EXIT]]
-
%pair = cmpxchg ptr %addr, i32 %desired, i32 %new seq_cst seq_cst
%oldval = extractvalue { i32, i1 } %pair, 0
ret i32 %oldval
diff --git a/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll b/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll
index 10073e23f5d46..98539ffcde32a 100644
--- a/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll
+++ b/llvm/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -o - -mtriple=armv8-linux-gnueabihf -passes=atomic-expand %s -codegen-opt-level=1 | FileCheck %s
define i8 @test_atomic_xchg_i8(ptr %ptr, i8 %xchgend) {
@@ -84,42 +85,34 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(ptr %ptr, i8 %desired, i8 %newval) {
; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst
; CHECK-NOT: fence
; CHECK: br label %[[LOOP:.*]]
-
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0(ptr elementtype(i8) %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
-
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:cmpxchg.nostore]]
; CHECK: [[FENCED_STORE]]:
; CHECK-NEXT: br label %[[TRY_STORE:.*]]
-
; CHECK: [[TRY_STORE]]:
; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i8 [ [[OLDVAL]], %[[FENCED_STORE]] ]
; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0(i32 [[NEWVAL32]], ptr elementtype(i8) %ptr)
; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
-
; CHECK: [[SUCCESS_BB]]:
; CHECK-NOT: fence_cst
; CHECK: br label %[[DONE:.*]]
-
; CHECK: [[NO_STORE_BB]]:
; CHECK-NEXT: [[LOADED_NOSTORE:%.*]] = phi i8 [ [[OLDVAL]], %[[LOOP]] ]
; CHECK-NEXT: call void @llvm.arm.clrex()
; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
-
; CHECK: [[FAILURE_BB]]:
; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i8 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ]
; CHECK-NOT: fence_cst
; CHECK: br label %[[DONE]]
-
; CHECK: [[DONE]]:
; CHECK: [[LOADED_EXIT:%.*]] = phi i8 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
; CHECK: ret i8 [[LOADED_EXIT]]
-
%pairold = cmpxchg ptr %ptr, i8 %desired, i8 %newval seq_cst seq_cst
%old = extractvalue { i8, i1 } %pairold, 0
ret i8 %old
@@ -129,43 +122,35 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(ptr %ptr, i16 %desired, i16 %newva
; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic
; CHECK-NOT: fence
; CHECK: br label %[[LOOP:.*]]
-
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0(ptr elementtype(i16) %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
-
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:cmpxchg.nostore]]
; CHECK: [[FENCED_STORE]]:
; CHECK-NEXT: br label %[[TRY_STORE:.*]]
-
; CHECK: [[TRY_STORE]]:
; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i16 [ [[OLDVAL]], %[[FENCED_STORE]] ]
; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0(i32 [[NEWVAL32]], ptr elementtype(i16) %ptr)
; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
-
; CHECK: [[SUCCESS_BB]]:
; CHECK-NOT: fence
; CHECK: br label %[[DONE:.*]]
-
; CHECK: [[NO_STORE_BB]]:
; The PHI is not required.
; CHECK-NEXT: [[LOADED_NOSTORE:%.*]] = phi i16 [ [[OLDVAL]], %[[LOOP]] ]
; CHECK-NEXT: call void @llvm.arm.clrex()
; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
-
; CHECK: [[FAILURE_BB]]:
; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i16 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ]
; CHECK-NOT: fence
; CHECK: br label %[[DONE]]
-
; CHECK: [[DONE]]:
; CHECK: [[LOADED_EXIT:%.*]] = phi i16 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
; CHECK: ret i16 [[LOADED_EXIT]]
-
%pairold = cmpxchg ptr %ptr, i16 %desired, i16 %newval seq_cst monotonic
%old = extractvalue { i16, i1 } %pairold, 0
ret i16 %old
@@ -175,40 +160,32 @@ define i32 @test_cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %desired, i32 %newval
; CHECK-LABEL: @test_cmpxchg_i32_acquire_acquire
; CHECK-NOT: fence
; CHECK: br label %[[LOOP:.*]]
-
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0(ptr elementtype(i32) %ptr)
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
-
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:cmpxchg.nostore]]
; CHECK: [[FENCED_STORE]]:
; CHECK-NEXT: br label %[[TRY_STORE:.*]]
-
; CHECK: [[TRY_STORE]]:
; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[OLDVAL]], %[[FENCED_STORE]] ]
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0(i32 %newval, ptr elementtype(i32) %ptr)
; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
-
; CHECK: [[SUCCESS_BB]]:
; CHECK-NOT: fence_cst
; CHECK: br label %[[DONE:.*]]
-
; CHECK: [[NO_STORE_BB]]:
; CHECK-NEXT: [[LOADED_NOSTORE:%.*]] = phi i32 [ [[OLDVAL]], %[[LOOP]] ]
; CHECK-NEXT: call void @llvm.arm.clrex()
; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
-
; CHECK: [[FAILURE_BB]]:
; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i32 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ]
; CHECK-NOT: fence_cst
; CHECK: br label %[[DONE]]
-
; CHECK: [[DONE]]:
; CHECK: [[LOADED_EXIT:%.*]] = phi i32 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
; CHECK: ret i32 [[LOADED_EXIT]]
-
%pairold = cmpxchg ptr %ptr, i32 %desired, i32 %newval acquire acquire
%old = extractvalue { i32, i1 } %pairold, 0
ret i32 %old
@@ -218,7 +195,6 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %desired, i64 %ne
; CHECK-LABEL: @test_cmpxchg_i64_monotonic_monotonic
; CHECK-NOT: fence
; CHECK: br label %[[LOOP:.*]]
-
; CHECK: [[LOOP]]:
; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(ptr %ptr)
; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
@@ -228,11 +204,9 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %desired, i64 %ne
; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
-
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:cmpxchg.nostore]]
; CHECK: [[FENCED_STORE]]:
; CHECK-NEXT: br label %[[TRY_STORE:.*]]
-
; CHECK: [[TRY_STORE]]:
; CHECK: [[LOADED_TRYSTORE:%.*]] = phi i64 [ [[OLDVAL]], %[[FENCED_STORE]] ]
; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
@@ -241,26 +215,21 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(ptr %ptr, i64 %desired, i64 %ne
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], ptr %ptr)
; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
-
; CHECK: [[SUCCESS_BB]]:
; CHECK-NOT: fence_cst
; CHECK: br label %[[DONE:.*]]
-
; CHECK: [[NO_STORE_BB]]:
; CHECK-NEXT: [[LOADED_NOSTORE:%.*]] = phi i64 [ [[OLDVAL]], %[[LOOP]] ]
; CHECK-NEXT: call void @llvm.arm.clrex()
; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
-
; CHECK: [[FAILURE_BB]]:
; CHECK-NEXT: [[LOADED_FAILURE:%.*]] = phi i64 [ [[LOADED_NOSTORE]], %[[NO_STORE_BB]] ]
; CHECK-NOT: fence_cst
; CHECK: br label %[[DONE]]
-
; CHECK: [[DONE]]:
; CHECK: [[LOADED_EXIT:%.*]] = phi i64 [ [[LOADED_TRYSTORE]], %[[SUCCESS_BB]] ], [ [[LOADED_FAILURE]], %[[FAILURE_BB]] ]
; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
; CHECK: ret i64 [[LOADED_EXIT]]
-
%pairold = cmpxchg ptr %ptr, i64 %desired, i64 %newval monotonic monotonic
%old = extractvalue { i64, i1 } %pairold, 0
ret i64 %old
diff --git a/llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll b/llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll
index 8195a5b6145e3..aff4196815e21 100644
--- a/llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll
+++ b/llvm/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll
@@ -9,7 +9,7 @@ define i32 @test_cmpxchg_seq_cst(ptr %addr, i32 %desired, i32 %new) {
; CHECK: [[CMPXCHG_START]]:
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i32) [[ADDR]])
; CHECK-NEXT: [[SHOULD_STORE:%.*]] = icmp eq i32 [[TMP1]], [[DESIRED]]
-; CHECK-NEXT: br i1 [[SHOULD_STORE]], label %[[CMPXCHG_FENCEDSTORE:.*]], label %[[CMPXCHG_NOSTORE:.*]]
+; CHECK-NEXT: br i1 [[SHOULD_STORE]], label %[[CMPXCHG_FENCEDSTORE:.*]], label %[[CMPXCHG_NOSTORE:cmpxchg.nostore]]
; CHECK: [[CMPXCHG_FENCEDSTORE]]:
; CHECK-NEXT: call void @llvm.arm.dmb(i32 10)
; CHECK-NEXT: br label %[[CMPXCHG_TRYSTORE:.*]]
@@ -17,7 +17,7 @@ define i32 @test_cmpxchg_seq_cst(ptr %addr, i32 %desired, i32 %new) {
; CHECK-NEXT: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[TMP1]], %[[CMPXCHG_FENCEDSTORE]] ]
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.strex.p0(i32 [[NEW]], ptr elementtype(i32) [[ADDR]])
; CHECK-NEXT: [[SUCCESS:%.*]] = icmp eq i32 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label %[[CMPXCHG_SUCCESS:.*]], label %[[CMPXCHG_FAILURE:.*]]
+; CHECK-NEXT: br i1 [[SUCCESS]], label %[[CMPXCHG_SUCCESS:.*]], label %[[CMPXCHG_FAILURE:cmpxchg.failure]]
; CHECK: [[CMPXCHG_RELEASEDLOAD:.*:]]
; CHECK-NEXT: unreachable
; CHECK: [[CMPXCHG_SUCCESS]]:
@@ -48,7 +48,7 @@ define i1 @test_cmpxchg_weak_fail(ptr %addr, i32 %desired, i32 %new) {
; CHECK: [[CMPXCHG_START]]:
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i32) [[ADDR]])
; CHECK-NEXT: [[SHOULD_STORE:%.*]] = icmp eq i32 [[TMP1]], [[DESIRED]]
-; CHECK-NEXT: br i1 [[SHOULD_STORE]], label %[[CMPXCHG_FENCEDSTORE:.*]], label %[[CMPXCHG_NOSTORE:.*]]
+; CHECK-NEXT: br i1 [[SHOULD_STORE]], label %[[CMPXCHG_FENCEDSTORE:.*]], label %[[CMPXCHG_NOSTORE:cmpxchg.nostore]]
; CHECK: [[CMPXCHG_FENCEDSTORE]]:
; CHECK-NEXT: call void @llvm.arm.dmb(i32 10)
; CHECK-NEXT: br label %[[CMPXCHG_TRYSTORE:.*]]
@@ -56,7 +56,7 @@ define i1 @test_cmpxchg_weak_fail(ptr %addr, i32 %desired, i32 %new) {
; CHECK-NEXT: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[TMP1]], %[[CMPXCHG_FENCEDSTORE]] ]
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.strex.p0(i32 [[NEW]], ptr elementtype(i32) [[ADDR]])
; CHECK-NEXT: [[SUCCESS:%.*]] = icmp eq i32 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label %[[CMPXCHG_SUCCESS:.*]], label %[[CMPXCHG_FAILURE:.*]]
+; CHECK-NEXT: br i1 [[SUCCESS]], label %[[CMPXCHG_SUCCESS:.*]], label %[[CMPXCHG_FAILURE:cmpxchg.failure]]
; CHECK: [[CMPXCHG_RELEASEDLOAD:.*:]]
; CHECK-NEXT: unreachable
; CHECK: [[CMPXCHG_SUCCESS]]:
@@ -86,14 +86,14 @@ define i32 @test_cmpxchg_monotonic(ptr %addr, i32 %desired, i32 %new) {
; CHECK: [[CMPXCHG_START]]:
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i32) [[ADDR]])
; CHECK-NEXT: [[SHOULD_STORE:%.*]] = icmp eq i32 [[TMP1]], [[DESIRED]]
-; CHECK-NEXT: br i1 [[SHOULD_STORE]], label %[[CMPXCHG_FENCEDSTORE:.*]], label %[[CMPXCHG_NOSTORE:.*]]
+; CHECK-NEXT: br i1 [[SHOULD_STORE]], label %[[CMPXCHG_FENCEDSTORE:.*]], label %[[CMPXCHG_NOSTORE:cmpxchg.nostore]]
; CHECK: [[CMPXCHG_FENCEDSTORE]]:
; CHECK-NEXT: br label %[[CMPXCHG_TRYSTORE:.*]]
; CHECK: [[CMPXCHG_TRYSTORE]]:
; CHECK-NEXT: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[TMP1]], %[[CMPXCHG_FENCEDSTORE]] ]
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.strex.p0(i32 [[NEW]], ptr elementtype(i32) [[ADDR]])
; CHECK-NEXT: [[SUCCESS:%.*]] = icmp eq i32 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label %[[CMPXCHG_SUCCESS:.*]], label %[[CMPXCHG_FAILURE:.*]]
+; CHECK-NEXT: br i1 [[SUCCESS]], label %[[CMPXCHG_SUCCESS:.*]], label %[[CMPXCHG_FAILURE:cmpxchg.failure]]
; CHECK: [[CMPXCHG_RELEASEDLOAD:.*:]]
; CHECK-NEXT: unreachable
; CHECK: [[CMPXCHG_SUCCESS]]:
@@ -122,7 +122,7 @@ define i32 @test_cmpxchg_seq_cst_minsize(ptr %addr, i32 %desired, i32 %new) mins
; CHECK: [[CMPXCHG_START]]:
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.ldrex.p0(ptr elementtype(i32) [[ADDR]])
; CHECK-NEXT: [[SHOULD_STORE:%.*]] = icmp eq i32 [[TMP1]], [[DESIRED]]
-; CHECK-NEXT: br i1 [[SHOULD_STORE]], label %[[CMPXCHG_FENCEDSTORE:.*]], label %[[CMPXCHG_NOSTORE:.*]]
+; CHECK-NEXT: br i1 [[SHOULD_STORE]], label %[[CMPXCHG_FENCEDSTORE:.*]], label %[[CMPXCHG_NOSTORE:cmpxchg.nostore]]
; CHECK: [[CMPXCHG_FENCEDSTORE]]:
; CHECK-NEXT: call void @llvm.arm.dmb(i32 10)
; CHECK-NEXT: br label %[[CMPXCHG_TRYSTORE:.*]]
@@ -130,7 +130,7 @@ define i32 @test_cmpxchg_seq_cst_minsize(ptr %addr, i32 %desired, i32 %new) mins
; CHECK-NEXT: [[LOADED_TRYSTORE:%.*]] = phi i32 [ [[TMP1]], %[[CMPXCHG_FENCEDSTORE]] ]
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.strex.p0(i32 [[NEW]], ptr elementtype(i32) [[ADDR]])
; CHECK-NEXT: [[SUCCESS:%.*]] = icmp eq i32 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[SUCCESS]], label %[[CMPXCHG_SUCCESS:.*]], label %[[CMPXCHG_FAILURE:.*]]
+; CHECK-NEXT: br i1 [[SUCCESS]], label %[[CMPXCHG_SUCCESS:.*]], label %[[CMPXCHG_FAILURE:cmpxchg.failure]]
; CHECK: [[CMPXCHG_RELEASEDLOAD:.*:]]
; CHECK-NEXT: unreachable
; CHECK: [[CMPXCHG_SUCCESS]]:
More information about the llvm-commits
mailing list