[llvm] 85a9f2e - [PowerPC] enable AtomicExpandImpl::expandAtomicCmpXchg for powerpc (#142395)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 13 06:14:53 PDT 2025
Author: zhijian lin
Date: 2025-06-13T09:14:48-04:00
New Revision: 85a9f2e14859b472750f13fb441291e6e9c893a0
URL: https://github.com/llvm/llvm-project/commit/85a9f2e14859b472750f13fb441291e6e9c893a0
DIFF: https://github.com/llvm/llvm-project/commit/85a9f2e14859b472750f13fb441291e6e9c893a0.diff
LOG: [PowerPC] enable AtomicExpandImpl::expandAtomicCmpXchg for powerpc (#142395)
In PowerPC, the AtomicCmpXchgInst is lowered to
ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS. However, this node does not handle
the weak attribute of AtomicCmpXchgInst. As a result, when compiling C++
atomic_compare_exchange_weak_explicit, the generated assembly includes a
"reservation lost" loop — i.e., it branches back and retries if the
stwcx. (store-conditional) fails. This differs from GCC’s codegen, which
does not include that loop for weak compare-exchange.
Since PowerPC uses LL/SC-style atomic instructions, the patch enables
AtomicExpandImpl::expandAtomicCmpXchg for PowerPC. With this, the weak
attribute is properly respected, and the "reservation lost" loop is
removed for weak operations.
---------
Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
Added:
Modified:
llvm/include/llvm/CodeGen/TargetLowering.h
llvm/include/llvm/IR/IntrinsicsPowerPC.td
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.h
llvm/lib/Target/PowerPC/PPCInstr64Bit.td
llvm/lib/Target/PowerPC/PPCInstrInfo.td
llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
llvm/test/CodeGen/PowerPC/all-atomics.ll
llvm/test/CodeGen/PowerPC/atomic-2.ll
llvm/test/CodeGen/PowerPC/atomic-compare-exchange-weak.ll
llvm/test/CodeGen/PowerPC/atomic-float.ll
llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll
llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll
llvm/test/CodeGen/PowerPC/atomics-regression.ll
llvm/test/CodeGen/PowerPC/atomics.ll
llvm/test/CodeGen/PowerPC/loop-comment.ll
llvm/test/Transforms/AtomicExpand/PowerPC/atomicrmw-fp.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 04bc0e9353101..4ed81d25e8e22 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -254,20 +254,20 @@ class LLVM_ABI TargetLoweringBase {
/// support for these atomic instructions, and also have
diff erent options
/// w.r.t. what they should expand to.
enum class AtomicExpansionKind {
- None, // Don't expand the instruction.
- CastToInteger, // Cast the atomic instruction to another type, e.g. from
- // floating-point to integer type.
+ None, // Don't expand the instruction.
+ CastToInteger, // Cast the atomic instruction to another type, e.g. from
+ // floating-point to integer type.
LLSC, // Expand the instruction into loadlinked/storeconditional; used
- // by ARM/AArch64.
+ // by ARM/AArch64/PowerPC.
LLOnly, // Expand the (load) instruction into just a load-linked, which has
// greater atomic guarantees than a normal load.
CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
- MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop.
- BitTestIntrinsic, // Use a target-specific intrinsic for special bit
- // operations; used by X86.
- CmpArithIntrinsic,// Use a target-specific intrinsic for special compare
- // operations; used by X86.
- Expand, // Generic expansion in terms of other atomic operations.
+ MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop.
+ BitTestIntrinsic, // Use a target-specific intrinsic for special bit
+ // operations; used by X86.
+ CmpArithIntrinsic, // Use a target-specific intrinsic for special compare
+ // operations; used by X86.
+ Expand, // Generic expansion in terms of other atomic operations.
// Rewrite to a non-atomic form for use in a known non-preemptible
// environment.
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 751628cee58c0..84c26599b5b70 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1835,6 +1835,19 @@ let TargetPrefix = "ppc" in {
Intrinsic<[],[],[]>;
def int_ppc_iospace_eieio : ClangBuiltin<"__builtin_ppc_iospace_eieio">,
Intrinsic<[],[],[]>;
+ def int_ppc_lbarx :
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+ def int_ppc_lharx :
+ Intrinsic<[llvm_i32_ty],[llvm_ptr_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+ def int_ppc_lwarx :
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+ def int_ppc_ldarx :
+ Intrinsic<[llvm_i64_ty],[llvm_ptr_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+
def int_ppc_stdcx :
ClangBuiltin<"__builtin_ppc_stdcx">,
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i64_ty],
@@ -1844,7 +1857,7 @@ let TargetPrefix = "ppc" in {
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
[IntrWriteMem, IntrArgMemOnly]>;
def int_ppc_sthcx :
- Intrinsic<[llvm_i32_ty], [ llvm_ptr_ty, llvm_i32_ty ],
+ Intrinsic<[llvm_i32_ty], [ llvm_ptr_ty, llvm_i32_ty],
[IntrWriteMem, IntrArgMemOnly, IntrNoDuplicate]>;
def int_ppc_stbcx :
ClangBuiltin<"__builtin_ppc_stbcx">,
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 59c89985c6cff..0f8e5e57c58b7 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1442,6 +1442,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setMinimumJumpTableEntries(PPCMinimumJumpTableEntries);
setMinFunctionAlignment(Align(4));
+ setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? 8 : 32);
auto CPUDirective = Subtarget.getCPUDirective();
switch (CPUDirective) {
@@ -12690,6 +12691,76 @@ static Instruction *callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id) {
return Builder.CreateIntrinsic(Id, {});
}
+Value *PPCTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy,
+ Value *Addr,
+ AtomicOrdering Ord) const {
+ unsigned SZ = ValueTy->getPrimitiveSizeInBits();
+
+ assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
+ "Only 8/16/32/64-bit atomic loads supported");
+ Intrinsic::ID IntID;
+ switch (SZ) {
+ default:
+ llvm_unreachable("Unexpected PrimitiveSize");
+ case 8:
+ IntID = Intrinsic::ppc_lbarx;
+ assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
+ break;
+ case 16:
+ IntID = Intrinsic::ppc_lharx;
+ assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
+ break;
+ case 32:
+ IntID = Intrinsic::ppc_lwarx;
+ break;
+ case 64:
+ IntID = Intrinsic::ppc_ldarx;
+ break;
+ }
+ Value *Call =
+ Builder.CreateIntrinsic(IntID, Addr, /*FMFSource=*/nullptr, "larx");
+
+ return Builder.CreateTruncOrBitCast(Call, ValueTy);
+}
+
+// Perform a store-conditional operation to Addr. Return the status of the
+// store. This should be 0 if the store succeeded, non-zero otherwise.
+Value *PPCTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
+ Value *Val, Value *Addr,
+ AtomicOrdering Ord) const {
+ Type *Ty = Val->getType();
+ unsigned SZ = Ty->getPrimitiveSizeInBits();
+
+ assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
+ "Only 8/16/32/64-bit atomic loads supported");
+ Intrinsic::ID IntID;
+ switch (SZ) {
+ default:
+ llvm_unreachable("Unexpected PrimitiveSize");
+ case 8:
+ IntID = Intrinsic::ppc_stbcx;
+ assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
+ break;
+ case 16:
+ IntID = Intrinsic::ppc_sthcx;
+ assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
+ break;
+ case 32:
+ IntID = Intrinsic::ppc_stwcx;
+ break;
+ case 64:
+ IntID = Intrinsic::ppc_stdcx;
+ break;
+ }
+
+ if (SZ == 8 || SZ == 16)
+ Val = Builder.CreateZExt(Val, Builder.getInt32Ty());
+
+ Value *Call = Builder.CreateIntrinsic(IntID, {Addr, Val},
+ /*FMFSource=*/nullptr, "stcx");
+ return Builder.CreateXor(Call, Builder.getInt32(1));
+}
+
// The mappings for emitLeading/TrailingFence is taken from
// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
Instruction *PPCTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
@@ -19651,7 +19722,7 @@ PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
if (shouldInlineQuadwordAtomics() && Size == 128)
return AtomicExpansionKind::MaskedIntrinsic;
- return TargetLowering::shouldExpandAtomicCmpXchgInIR(AI);
+ return AtomicExpansionKind::LLSC;
}
static Intrinsic::ID
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 2c55b5427297a..4c88bd372b106 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -927,6 +927,12 @@ namespace llvm {
return true;
}
+ Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
+ AtomicOrdering Ord) const override;
+
+ Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
+ AtomicOrdering Ord) const override;
+
Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst,
AtomicOrdering Ord) const override;
Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 659c1a9079c33..fd2084398c857 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -2023,6 +2023,8 @@ def SLBSYNC : XForm_0<31, 338, (outs), (ins), "slbsync", IIC_SprSLBSYNC, []>;
} // IsISA3_0
+def : Pat<(int_ppc_ldarx ForceXForm:$ptr),
+ (LDARX ForceXForm:$ptr)>;
def : Pat<(int_ppc_stdcx ForceXForm:$dst, g8rc:$A),
(RLWINM (STDCX g8rc:$A, ForceXForm:$dst), 31, 31, 31)>;
def : Pat<(PPCStoreCond ForceXForm:$dst, g8rc:$A, 8),
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index b70290df07b1c..99ef89a7fdc0c 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -5143,7 +5143,6 @@ def : Pat<(int_ppc_store2r gprc:$a, ForceXForm:$ptr),
def : Pat<(int_ppc_store4r gprc:$a, ForceXForm:$ptr),
(STWBRX gprc:$a, ForceXForm:$ptr)>;
-
// Fast 32-bit reverse bits algorithm:
// Step 1: 1-bit swap (swap odd 1-bit and even 1-bit):
// n = ((n >> 1) & 0x55555555) | ((n << 1) & 0xAAAAAAAA);
@@ -5324,10 +5323,14 @@ def CFENCE : PPCPostRAExpPseudo<(outs), (ins gprc:$cr), "#CFENCE", []>;
def : Pat<(i64 (bitreverse i64:$A)),
(OR8 (RLDICR DWBytes7654.DWord, 32, 31), DWBytes3210.DWord)>;
+def : Pat<(int_ppc_lwarx ForceXForm:$ptr),
+ (LWARX ForceXForm:$ptr)>;
def : Pat<(int_ppc_stwcx ForceXForm:$dst, gprc:$A),
(RLWINM (STWCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
def : Pat<(PPCStoreCond ForceXForm:$dst, gprc:$A, 4),
(RLWINM (STWCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
+def : Pat<(int_ppc_lbarx ForceXForm:$ptr),
+ (LBARX ForceXForm:$ptr)>;
def : Pat<(int_ppc_stbcx ForceXForm:$dst, gprc:$A),
(RLWINM (STBCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
def : Pat<(PPCStoreCond ForceXForm:$dst, gprc:$A, 1),
@@ -5360,6 +5363,9 @@ def : Pat<(int_ppc_mtmsr gprc:$RS),
(MTMSR $RS, 0)>;
let Predicates = [IsISA2_07] in {
+ def : Pat<(int_ppc_lharx ForceXForm:$ptr),
+ (LHARX ForceXForm:$ptr)>;
+
def : Pat<(int_ppc_sthcx ForceXForm:$dst, gprc:$A),
(RLWINM (STHCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
def : Pat<(PPCStoreCond ForceXForm:$dst, gprc:$A, 2),
diff --git a/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll b/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
index 8517783e3ebd7..1a8dabc5ad719 100644
--- a/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
+++ b/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
@@ -15,50 +15,57 @@ define signext i32 @main() nounwind {
; CHECK-NEXT: stdu 1, -48(1)
; CHECK-NEXT: li 3, -32477
; CHECK-NEXT: std 0, 64(1)
-; CHECK-NEXT: li 4, 234
-; CHECK-NEXT: addi 6, 1, 46
; CHECK-NEXT: sth 3, 46(1)
-; CHECK-NEXT: lis 3, 0
+; CHECK-NEXT: addi 3, 1, 46
+; CHECK-NEXT: lharx 4, 0, 3
+; CHECK-NEXT: clrlwi 4, 4, 16
+; CHECK-NEXT: cmplwi 4, 33059
+; CHECK-NEXT: bne 0, .LBB0_4
+; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore
; CHECK-NEXT: sync
-; CHECK-NEXT: ori 3, 3, 33059
-; CHECK-NEXT: .LBB0_1: # %L.entry
-; CHECK-NEXT: #
-; CHECK-NEXT: lharx 5, 0, 6
-; CHECK-NEXT: cmpw 5, 3
-; CHECK-NEXT: bne 0, .LBB0_3
-; CHECK-NEXT: # %bb.2: # %L.entry
-; CHECK-NEXT: #
-; CHECK-NEXT: sthcx. 4, 0, 6
-; CHECK-NEXT: bne 0, .LBB0_1
-; CHECK-NEXT: .LBB0_3: # %L.entry
-; CHECK-NEXT: cmplwi 5, 33059
+; CHECK-NEXT: li 4, 234
+; CHECK-NEXT: .p2align 5
+; CHECK-NEXT: .LBB0_2: # %cmpxchg.trystore
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: sthcx. 4, 0, 3
+; CHECK-NEXT: beq 0, .LBB0_7
+; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload
+; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: lharx 5, 0, 3
+; CHECK-NEXT: clrlwi 5, 5, 16
+; CHECK-NEXT: cmplwi 5, 33059
+; CHECK-NEXT: beq 0, .LBB0_2
+; CHECK-NEXT: .LBB0_4: # %cmpxchg.nostore
; CHECK-NEXT: lwsync
-; CHECK-NEXT: bne 0, .LBB0_6
-; CHECK-NEXT: # %bb.4: # %L.B0000
+; CHECK-NEXT: b .LBB0_8
+; CHECK-NEXT: .LBB0_5: # %L.B0000
; CHECK-NEXT: lhz 3, 46(1)
-; CHECK-NEXT: cmplwi 3, 234
-; CHECK-NEXT: bne 0, .LBB0_7
-; CHECK-NEXT: # %bb.5: # %L.B0001
+; CHECK-NEXT: cmplwi 3, 234
+; CHECK-NEXT: bne 0, .LBB0_9
+; CHECK-NEXT: # %bb.6: # %L.B0001
; CHECK-NEXT: addis 3, 2, .L_MergedGlobals at toc@ha
; CHECK-NEXT: addi 3, 3, .L_MergedGlobals at toc@l
; CHECK-NEXT: bl puts
; CHECK-NEXT: nop
; CHECK-NEXT: li 3, 0
-; CHECK-NEXT: b .LBB0_9
-; CHECK-NEXT: .LBB0_6: # %L.B0003
+; CHECK-NEXT: b .LBB0_11
+; CHECK-NEXT: .LBB0_7: # %cmpxchg.success
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: b .LBB0_5
+; CHECK-NEXT: .LBB0_8: # %L.B0003
; CHECK-NEXT: addis 3, 2, .L_MergedGlobals at toc@ha
; CHECK-NEXT: addi 3, 3, .L_MergedGlobals at toc@l
; CHECK-NEXT: addi 3, 3, 16
-; CHECK-NEXT: b .LBB0_8
-; CHECK-NEXT: .LBB0_7: # %L.B0005
+; CHECK-NEXT: b .LBB0_10
+; CHECK-NEXT: .LBB0_9: # %L.B0005
; CHECK-NEXT: addis 3, 2, .L_MergedGlobals at toc@ha
; CHECK-NEXT: addi 3, 3, .L_MergedGlobals at toc@l
; CHECK-NEXT: addi 3, 3, 64
-; CHECK-NEXT: .LBB0_8: # %L.B0003
+; CHECK-NEXT: .LBB0_10: # %L.B0003
; CHECK-NEXT: bl puts
; CHECK-NEXT: nop
; CHECK-NEXT: li 3, 1
-; CHECK-NEXT: .LBB0_9: # %L.B0003
+; CHECK-NEXT: .LBB0_11: # %L.B0003
; CHECK-NEXT: addi 1, 1, 48
; CHECK-NEXT: ld 0, 16(1)
; CHECK-NEXT: mtlr 0
@@ -69,64 +76,69 @@ define signext i32 @main() nounwind {
; CHECK-P7-NEXT: mflr 0
; CHECK-P7-NEXT: stdu 1, -48(1)
; CHECK-P7-NEXT: li 3, -32477
-; CHECK-P7-NEXT: std 0, 64(1)
; CHECK-P7-NEXT: addi 4, 1, 46
-; CHECK-P7-NEXT: li 6, 234
+; CHECK-P7-NEXT: std 0, 64(1)
; CHECK-P7-NEXT: sth 3, 46(1)
-; CHECK-P7-NEXT: lis 3, 0
+; CHECK-P7-NEXT: rldicr 3, 4, 0, 61
+; CHECK-P7-NEXT: rlwinm 4, 4, 3, 27, 27
+; CHECK-P7-NEXT: lwarx 5, 0, 3
+; CHECK-P7-NEXT: srw 6, 5, 4
+; CHECK-P7-NEXT: clrlwi 6, 6, 16
+; CHECK-P7-NEXT: cmplwi 6, 33059
+; CHECK-P7-NEXT: bne 0, .LBB0_4
+; CHECK-P7-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; CHECK-P7-NEXT: lis 6, 0
+; CHECK-P7-NEXT: li 7, 234
; CHECK-P7-NEXT: sync
-; CHECK-P7-NEXT: ori 5, 3, 33059
-; CHECK-P7-NEXT: rlwinm 3, 4, 3, 27, 27
-; CHECK-P7-NEXT: rldicr 4, 4, 0, 61
-; CHECK-P7-NEXT: slw 7, 5, 3
-; CHECK-P7-NEXT: li 5, 0
-; CHECK-P7-NEXT: slw 6, 6, 3
-; CHECK-P7-NEXT: ori 5, 5, 65535
-; CHECK-P7-NEXT: slw 5, 5, 3
-; CHECK-P7-NEXT: and 6, 6, 5
-; CHECK-P7-NEXT: and 7, 7, 5
-; CHECK-P7-NEXT: .LBB0_1: # %L.entry
-; CHECK-P7-NEXT: #
-; CHECK-P7-NEXT: lwarx 9, 0, 4
-; CHECK-P7-NEXT: and 8, 9, 5
-; CHECK-P7-NEXT: cmpw 8, 7
-; CHECK-P7-NEXT: bne 0, .LBB0_3
-; CHECK-P7-NEXT: # %bb.2: # %L.entry
-; CHECK-P7-NEXT: #
-; CHECK-P7-NEXT: andc 9, 9, 5
-; CHECK-P7-NEXT: or 9, 9, 6
-; CHECK-P7-NEXT: stwcx. 9, 0, 4
-; CHECK-P7-NEXT: bne 0, .LBB0_1
-; CHECK-P7-NEXT: .LBB0_3: # %L.entry
-; CHECK-P7-NEXT: srw 3, 8, 3
+; CHECK-P7-NEXT: ori 6, 6, 65535
+; CHECK-P7-NEXT: slw 7, 7, 4
+; CHECK-P7-NEXT: slw 6, 6, 4
+; CHECK-P7-NEXT: not 6, 6
+; CHECK-P7-NEXT: .p2align 4
+; CHECK-P7-NEXT: .LBB0_2: # %cmpxchg.trystore
+; CHECK-P7-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-P7-NEXT: and 5, 5, 6
+; CHECK-P7-NEXT: or 5, 5, 7
+; CHECK-P7-NEXT: stwcx. 5, 0, 3
+; CHECK-P7-NEXT: beq 0, .LBB0_7
+; CHECK-P7-NEXT: # %bb.3: # %cmpxchg.releasedload
+; CHECK-P7-NEXT: # in Loop: Header=BB0_2 Depth=1
+; CHECK-P7-NEXT: lwarx 5, 0, 3
+; CHECK-P7-NEXT: srw 8, 5, 4
+; CHECK-P7-NEXT: clrlwi 8, 8, 16
+; CHECK-P7-NEXT: cmplwi 8, 33059
+; CHECK-P7-NEXT: beq 0, .LBB0_2
+; CHECK-P7-NEXT: .LBB0_4: # %cmpxchg.nostore
; CHECK-P7-NEXT: lwsync
-; CHECK-P7-NEXT: cmplwi 3, 33059
-; CHECK-P7-NEXT: bne 0, .LBB0_6
-; CHECK-P7-NEXT: # %bb.4: # %L.B0000
+; CHECK-P7-NEXT: b .LBB0_8
+; CHECK-P7-NEXT: .LBB0_5: # %L.B0000
; CHECK-P7-NEXT: lhz 3, 46(1)
-; CHECK-P7-NEXT: cmplwi 3, 234
-; CHECK-P7-NEXT: bne 0, .LBB0_7
-; CHECK-P7-NEXT: # %bb.5: # %L.B0001
+; CHECK-P7-NEXT: cmplwi 3, 234
+; CHECK-P7-NEXT: bne 0, .LBB0_9
+; CHECK-P7-NEXT: # %bb.6: # %L.B0001
; CHECK-P7-NEXT: addis 3, 2, .L_MergedGlobals at toc@ha
; CHECK-P7-NEXT: addi 3, 3, .L_MergedGlobals at toc@l
; CHECK-P7-NEXT: bl puts
; CHECK-P7-NEXT: nop
; CHECK-P7-NEXT: li 3, 0
-; CHECK-P7-NEXT: b .LBB0_9
-; CHECK-P7-NEXT: .LBB0_6: # %L.B0003
+; CHECK-P7-NEXT: b .LBB0_11
+; CHECK-P7-NEXT: .LBB0_7: # %cmpxchg.success
+; CHECK-P7-NEXT: lwsync
+; CHECK-P7-NEXT: b .LBB0_5
+; CHECK-P7-NEXT: .LBB0_8: # %L.B0003
; CHECK-P7-NEXT: addis 3, 2, .L_MergedGlobals at toc@ha
; CHECK-P7-NEXT: addi 3, 3, .L_MergedGlobals at toc@l
; CHECK-P7-NEXT: addi 3, 3, 16
-; CHECK-P7-NEXT: b .LBB0_8
-; CHECK-P7-NEXT: .LBB0_7: # %L.B0005
+; CHECK-P7-NEXT: b .LBB0_10
+; CHECK-P7-NEXT: .LBB0_9: # %L.B0005
; CHECK-P7-NEXT: addis 3, 2, .L_MergedGlobals at toc@ha
; CHECK-P7-NEXT: addi 3, 3, .L_MergedGlobals at toc@l
; CHECK-P7-NEXT: addi 3, 3, 64
-; CHECK-P7-NEXT: .LBB0_8: # %L.B0003
+; CHECK-P7-NEXT: .LBB0_10: # %L.B0003
; CHECK-P7-NEXT: bl puts
; CHECK-P7-NEXT: nop
; CHECK-P7-NEXT: li 3, 1
-; CHECK-P7-NEXT: .LBB0_9: # %L.B0003
+; CHECK-P7-NEXT: .LBB0_11: # %L.B0003
; CHECK-P7-NEXT: addi 1, 1, 48
; CHECK-P7-NEXT: ld 0, 16(1)
; CHECK-P7-NEXT: mtlr 0
diff --git a/llvm/test/CodeGen/PowerPC/all-atomics.ll b/llvm/test/CodeGen/PowerPC/all-atomics.ll
index 531e559ea7309..67cee358882ff 100644
--- a/llvm/test/CodeGen/PowerPC/all-atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/all-atomics.ll
@@ -4336,704 +4336,959 @@ entry:
define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-LABEL: test_compare_and_swap:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addis 3, 2, uc at toc@ha
-; CHECK-NEXT: addis 4, 2, sc at toc@ha
-; CHECK-NEXT: std 27, -40(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 28, -32(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill
-; CHECK-NEXT: lbz 5, uc at toc@l(3)
-; CHECK-NEXT: lbz 8, sc at toc@l(4)
-; CHECK-NEXT: addi 6, 3, uc at toc@l
-; CHECK-NEXT: addi 0, 4, sc at toc@l
-; CHECK-NEXT: sync
-; CHECK-NEXT: .LBB3_1: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: lbarx 7, 0, 0
-; CHECK-NEXT: cmpw 7, 5
-; CHECK-NEXT: bne 0, .LBB3_3
-; CHECK-NEXT: # %bb.2: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: stbcx. 8, 0, 0
-; CHECK-NEXT: bne 0, .LBB3_1
-; CHECK-NEXT: .LBB3_3: # %entry
-; CHECK-NEXT: lwsync
-; CHECK-NEXT: stb 7, sc at toc@l(4)
-; CHECK-NEXT: lbz 8, uc at toc@l(3)
-; CHECK-NEXT: sync
-; CHECK-NEXT: .LBB3_4: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: lbarx 5, 0, 6
-; CHECK-NEXT: cmpw 5, 8
-; CHECK-NEXT: bne 0, .LBB3_6
-; CHECK-NEXT: # %bb.5: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: stbcx. 7, 0, 6
-; CHECK-NEXT: bne 0, .LBB3_4
-; CHECK-NEXT: .LBB3_6: # %entry
-; CHECK-NEXT: lwsync
-; CHECK-NEXT: stb 5, uc at toc@l(3)
-; CHECK-NEXT: lbz 7, sc at toc@l(4)
-; CHECK-NEXT: sync
-; CHECK-NEXT: extsb 8, 7
-; CHECK-NEXT: addis 7, 2, ss at toc@ha
-; CHECK-NEXT: addi 12, 7, ss at toc@l
-; CHECK-NEXT: .LBB3_7: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: lharx 9, 0, 12
-; CHECK-NEXT: cmpw 9, 5
-; CHECK-NEXT: bne 0, .LBB3_9
-; CHECK-NEXT: # %bb.8: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: sthcx. 8, 0, 12
-; CHECK-NEXT: bne 0, .LBB3_7
-; CHECK-NEXT: .LBB3_9: # %entry
-; CHECK-NEXT: lwsync
-; CHECK-NEXT: sth 9, ss at toc@l(7)
-; CHECK-NEXT: lbz 7, sc at toc@l(4)
-; CHECK-NEXT: lbz 5, uc at toc@l(3)
-; CHECK-NEXT: sync
-; CHECK-NEXT: extsb 8, 7
-; CHECK-NEXT: addis 7, 2, us at toc@ha
-; CHECK-NEXT: addi 11, 7, us at toc@l
-; CHECK-NEXT: .LBB3_10: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: lharx 9, 0, 11
-; CHECK-NEXT: cmpw 9, 5
-; CHECK-NEXT: bne 0, .LBB3_12
-; CHECK-NEXT: # %bb.11: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: sthcx. 8, 0, 11
-; CHECK-NEXT: bne 0, .LBB3_10
-; CHECK-NEXT: .LBB3_12: # %entry
-; CHECK-NEXT: lwsync
-; CHECK-NEXT: sth 9, us at toc@l(7)
-; CHECK-NEXT: lbz 7, sc at toc@l(4)
-; CHECK-NEXT: lbz 5, uc at toc@l(3)
-; CHECK-NEXT: sync
-; CHECK-NEXT: extsb 8, 7
-; CHECK-NEXT: addis 7, 2, si at toc@ha
-; CHECK-NEXT: addi 10, 7, si at toc@l
-; CHECK-NEXT: .LBB3_13: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: lwarx 9, 0, 10
-; CHECK-NEXT: cmpw 9, 5
-; CHECK-NEXT: bne 0, .LBB3_15
-; CHECK-NEXT: # %bb.14: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: stwcx. 8, 0, 10
-; CHECK-NEXT: bne 0, .LBB3_13
-; CHECK-NEXT: .LBB3_15: # %entry
-; CHECK-NEXT: lwsync
-; CHECK-NEXT: stw 9, si at toc@l(7)
-; CHECK-NEXT: lbz 5, sc at toc@l(4)
-; CHECK-NEXT: lbz 7, uc at toc@l(3)
-; CHECK-NEXT: sync
-; CHECK-NEXT: extsb 8, 5
-; CHECK-NEXT: addis 5, 2, ui at toc@ha
-; CHECK-NEXT: addi 9, 5, ui at toc@l
-; CHECK-NEXT: .LBB3_16: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: lwarx 30, 0, 9
-; CHECK-NEXT: cmpw 30, 7
-; CHECK-NEXT: bne 0, .LBB3_18
-; CHECK-NEXT: # %bb.17: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: stwcx. 8, 0, 9
-; CHECK-NEXT: bne 0, .LBB3_16
-; CHECK-NEXT: .LBB3_18: # %entry
-; CHECK-NEXT: lwsync
-; CHECK-NEXT: stw 30, ui at toc@l(5)
-; CHECK-NEXT: addis 30, 2, sll at toc@ha
-; CHECK-NEXT: lbz 8, sc at toc@l(4)
-; CHECK-NEXT: lbz 7, uc at toc@l(3)
-; CHECK-NEXT: sync
-; CHECK-NEXT: extsb 29, 8
-; CHECK-NEXT: addi 8, 30, sll at toc@l
-; CHECK-NEXT: .LBB3_19: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: ldarx 28, 0, 8
-; CHECK-NEXT: cmpd 28, 7
-; CHECK-NEXT: bne 0, .LBB3_21
-; CHECK-NEXT: # %bb.20: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: stdcx. 29, 0, 8
-; CHECK-NEXT: bne 0, .LBB3_19
-; CHECK-NEXT: .LBB3_21: # %entry
-; CHECK-NEXT: lwsync
-; CHECK-NEXT: addis 29, 2, ull at toc@ha
-; CHECK-NEXT: std 28, sll at toc@l(30)
-; CHECK-NEXT: lbz 7, sc at toc@l(4)
-; CHECK-NEXT: lbz 30, uc at toc@l(3)
-; CHECK-NEXT: sync
-; CHECK-NEXT: extsb 28, 7
-; CHECK-NEXT: addi 7, 29, ull at toc@l
-; CHECK-NEXT: .LBB3_22: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: ldarx 27, 0, 7
-; CHECK-NEXT: cmpd 27, 30
-; CHECK-NEXT: bne 0, .LBB3_24
-; CHECK-NEXT: # %bb.23: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: stdcx. 28, 0, 7
-; CHECK-NEXT: bne 0, .LBB3_22
-; CHECK-NEXT: .LBB3_24: # %entry
-; CHECK-NEXT: lwsync
-; CHECK-NEXT: std 27, ull at toc@l(29)
-; CHECK-NEXT: lbz 30, uc at toc@l(3)
-; CHECK-NEXT: lbz 29, sc at toc@l(4)
-; CHECK-NEXT: sync
-; CHECK-NEXT: .LBB3_25: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: lbarx 28, 0, 0
-; CHECK-NEXT: cmpw 28, 30
-; CHECK-NEXT: bne 0, .LBB3_27
-; CHECK-NEXT: # %bb.26: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: stbcx. 29, 0, 0
-; CHECK-NEXT: bne 0, .LBB3_25
-; CHECK-NEXT: .LBB3_27: # %entry
-; CHECK-NEXT: xor 0, 28, 30
-; CHECK-NEXT: lwsync
-; CHECK-NEXT: lbz 30, sc at toc@l(4)
-; CHECK-NEXT: cntlzw 0, 0
-; CHECK-NEXT: srwi 0, 0, 5
-; CHECK-NEXT: stw 0, ui at toc@l(5)
-; CHECK-NEXT: lbz 0, uc at toc@l(3)
-; CHECK-NEXT: sync
-; CHECK-NEXT: .LBB3_28: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: lbarx 29, 0, 6
-; CHECK-NEXT: cmpw 29, 0
-; CHECK-NEXT: bne 0, .LBB3_30
-; CHECK-NEXT: # %bb.29: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: stbcx. 30, 0, 6
-; CHECK-NEXT: bne 0, .LBB3_28
-; CHECK-NEXT: .LBB3_30: # %entry
-; CHECK-NEXT: xor 6, 29, 0
-; CHECK-NEXT: lwsync
-; CHECK-NEXT: lbz 0, sc at toc@l(4)
-; CHECK-NEXT: cntlzw 6, 6
-; CHECK-NEXT: extsb 0, 0
-; CHECK-NEXT: srwi 6, 6, 5
-; CHECK-NEXT: stw 6, ui at toc@l(5)
-; CHECK-NEXT: lbz 6, uc at toc@l(3)
-; CHECK-NEXT: sync
-; CHECK-NEXT: .LBB3_31: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: lharx 30, 0, 12
-; CHECK-NEXT: cmpw 30, 6
-; CHECK-NEXT: bne 0, .LBB3_33
-; CHECK-NEXT: # %bb.32: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: sthcx. 0, 0, 12
-; CHECK-NEXT: bne 0, .LBB3_31
-; CHECK-NEXT: .LBB3_33: # %entry
-; CHECK-NEXT: xor 6, 30, 6
-; CHECK-NEXT: lwsync
-; CHECK-NEXT: lbz 12, sc at toc@l(4)
-; CHECK-NEXT: cntlzw 6, 6
-; CHECK-NEXT: extsb 12, 12
-; CHECK-NEXT: srwi 6, 6, 5
-; CHECK-NEXT: stw 6, ui at toc@l(5)
-; CHECK-NEXT: lbz 6, uc at toc@l(3)
-; CHECK-NEXT: sync
-; CHECK-NEXT: .LBB3_34: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: lharx 0, 0, 11
-; CHECK-NEXT: cmpw 0, 6
-; CHECK-NEXT: bne 0, .LBB3_36
-; CHECK-NEXT: # %bb.35: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: sthcx. 12, 0, 11
-; CHECK-NEXT: bne 0, .LBB3_34
-; CHECK-NEXT: .LBB3_36: # %entry
-; CHECK-NEXT: xor 6, 0, 6
-; CHECK-NEXT: lwsync
-; CHECK-NEXT: lbz 11, sc at toc@l(4)
-; CHECK-NEXT: cntlzw 6, 6
-; CHECK-NEXT: extsb 11, 11
-; CHECK-NEXT: srwi 6, 6, 5
-; CHECK-NEXT: stw 6, ui at toc@l(5)
-; CHECK-NEXT: lbz 6, uc at toc@l(3)
-; CHECK-NEXT: sync
-; CHECK-NEXT: .LBB3_37: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: lwarx 12, 0, 10
-; CHECK-NEXT: cmpw 12, 6
-; CHECK-NEXT: bne 0, .LBB3_39
-; CHECK-NEXT: # %bb.38: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: stwcx. 11, 0, 10
-; CHECK-NEXT: bne 0, .LBB3_37
-; CHECK-NEXT: .LBB3_39: # %entry
-; CHECK-NEXT: xor 6, 12, 6
-; CHECK-NEXT: lwsync
-; CHECK-NEXT: lbz 10, sc at toc@l(4)
-; CHECK-NEXT: cntlzw 6, 6
-; CHECK-NEXT: extsb 10, 10
-; CHECK-NEXT: srwi 6, 6, 5
-; CHECK-NEXT: stw 6, ui at toc@l(5)
-; CHECK-NEXT: lbz 6, uc at toc@l(3)
-; CHECK-NEXT: sync
-; CHECK-NEXT: .LBB3_40: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: lwarx 11, 0, 9
-; CHECK-NEXT: cmpw 11, 6
-; CHECK-NEXT: bne 0, .LBB3_42
-; CHECK-NEXT: # %bb.41: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: stwcx. 10, 0, 9
-; CHECK-NEXT: bne 0, .LBB3_40
-; CHECK-NEXT: .LBB3_42: # %entry
-; CHECK-NEXT: xor 6, 11, 6
-; CHECK-NEXT: lwsync
-; CHECK-NEXT: lbz 9, sc at toc@l(4)
-; CHECK-NEXT: cntlzw 6, 6
-; CHECK-NEXT: extsb 9, 9
-; CHECK-NEXT: srwi 6, 6, 5
-; CHECK-NEXT: stw 6, ui at toc@l(5)
-; CHECK-NEXT: lbz 6, uc at toc@l(3)
-; CHECK-NEXT: sync
-; CHECK-NEXT: .LBB3_43: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: ldarx 10, 0, 8
-; CHECK-NEXT: cmpd 10, 6
-; CHECK-NEXT: bne 0, .LBB3_45
-; CHECK-NEXT: # %bb.44: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: stdcx. 9, 0, 8
-; CHECK-NEXT: bne 0, .LBB3_43
-; CHECK-NEXT: .LBB3_45: # %entry
-; CHECK-NEXT: xor 6, 10, 6
-; CHECK-NEXT: lwsync
-; CHECK-NEXT: lbz 4, sc at toc@l(4)
-; CHECK-NEXT: lbz 3, uc at toc@l(3)
-; CHECK-NEXT: cntlzd 6, 6
-; CHECK-NEXT: extsb 4, 4
-; CHECK-NEXT: rldicl 6, 6, 58, 63
-; CHECK-NEXT: stw 6, ui at toc@l(5)
-; CHECK-NEXT: sync
-; CHECK-NEXT: .LBB3_46: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: ldarx 6, 0, 7
-; CHECK-NEXT: cmpd 6, 3
-; CHECK-NEXT: bne 0, .LBB3_48
-; CHECK-NEXT: # %bb.47: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: stdcx. 4, 0, 7
-; CHECK-NEXT: bne 0, .LBB3_46
-; CHECK-NEXT: .LBB3_48: # %entry
-; CHECK-NEXT: xor 3, 6, 3
-; CHECK-NEXT: lwsync
-; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 29, -24(1) # 8-byte Folded Reload
-; CHECK-NEXT: cntlzd 3, 3
-; CHECK-NEXT: ld 28, -32(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 27, -40(1) # 8-byte Folded Reload
-; CHECK-NEXT: rldicl 3, 3, 58, 63
-; CHECK-NEXT: stw 3, ui at toc@l(5)
-; CHECK-NEXT: blr
+; CHECK-NEXT: addis 4, 2, sc at toc@ha
+; CHECK-NEXT: addis 3, 2, uc at toc@ha
+; CHECK-NEXT: std 27, -40(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 28, -32(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill
+; CHECK-NEXT: addi 6, 4, sc at toc@l
+; CHECK-NEXT: lbz 7, uc at toc@l(3)
+; CHECK-NEXT: lbz 8, sc at toc@l(4)
+; CHECK-NEXT: lbarx 5, 0, 6
+; CHECK-NEXT: clrlwi 9, 5, 24
+; CHECK-NEXT: cmplw 9, 7
+; CHECK-NEXT: bne 0, .LBB3_4
+; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore276
+; CHECK-NEXT: sync
+; CHECK-NEXT: .p2align 5
+; CHECK-NEXT: .LBB3_2: # %cmpxchg.trystore275
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: stbcx. 8, 0, 6
+; CHECK-NEXT: beq 0, .LBB3_4
+; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload274
+; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1
+; CHECK-NEXT: lbarx 5, 0, 6
+; CHECK-NEXT: clrlwi 9, 5, 24
+; CHECK-NEXT: cmplw 9, 7
+; CHECK-NEXT: beq 0, .LBB3_2
+; CHECK-NEXT: .LBB3_4: # %cmpxchg.nostore272
+; CHECK-NEXT: addi 7, 3, uc at toc@l
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: stb 5, sc at toc@l(4)
+; CHECK-NEXT: lbz 9, uc at toc@l(3)
+; CHECK-NEXT: lbarx 8, 0, 7
+; CHECK-NEXT: clrlwi 10, 8, 24
+; CHECK-NEXT: cmplw 10, 9
+; CHECK-NEXT: bne 0, .LBB3_8
+; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore257
+; CHECK-NEXT: sync
+; CHECK-NEXT: clrlwi 5, 5, 24
+; CHECK-NEXT: .p2align 5
+; CHECK-NEXT: .LBB3_6: # %cmpxchg.trystore256
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: stbcx. 5, 0, 7
+; CHECK-NEXT: beq 0, .LBB3_8
+; CHECK-NEXT: # %bb.7: # %cmpxchg.releasedload255
+; CHECK-NEXT: # in Loop: Header=BB3_6 Depth=1
+; CHECK-NEXT: lbarx 8, 0, 7
+; CHECK-NEXT: clrlwi 10, 8, 24
+; CHECK-NEXT: cmplw 10, 9
+; CHECK-NEXT: beq 0, .LBB3_6
+; CHECK-NEXT: .LBB3_8: # %cmpxchg.nostore253
+; CHECK-NEXT: addis 5, 2, ss at toc@ha
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: stb 8, uc at toc@l(3)
+; CHECK-NEXT: clrlwi 10, 8, 24
+; CHECK-NEXT: lbz 11, sc at toc@l(4)
+; CHECK-NEXT: addi 8, 5, ss at toc@l
+; CHECK-NEXT: lharx 9, 0, 8
+; CHECK-NEXT: clrlwi 12, 9, 16
+; CHECK-NEXT: cmplw 12, 10
+; CHECK-NEXT: bne 0, .LBB3_12
+; CHECK-NEXT: # %bb.9: # %cmpxchg.fencedstore238
+; CHECK-NEXT: extsb 11, 11
+; CHECK-NEXT: sync
+; CHECK-NEXT: clrlwi 11, 11, 16
+; CHECK-NEXT: .p2align 5
+; CHECK-NEXT: .LBB3_10: # %cmpxchg.trystore237
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: sthcx. 11, 0, 8
+; CHECK-NEXT: beq 0, .LBB3_12
+; CHECK-NEXT: # %bb.11: # %cmpxchg.releasedload236
+; CHECK-NEXT: # in Loop: Header=BB3_10 Depth=1
+; CHECK-NEXT: lharx 9, 0, 8
+; CHECK-NEXT: clrlwi 12, 9, 16
+; CHECK-NEXT: cmplw 12, 10
+; CHECK-NEXT: beq 0, .LBB3_10
+; CHECK-NEXT: .LBB3_12: # %cmpxchg.nostore234
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: sth 9, ss at toc@l(5)
+; CHECK-NEXT: addis 5, 2, us at toc@ha
+; CHECK-NEXT: lbz 11, uc at toc@l(3)
+; CHECK-NEXT: lbz 12, sc at toc@l(4)
+; CHECK-NEXT: addi 9, 5, us at toc@l
+; CHECK-NEXT: lharx 10, 0, 9
+; CHECK-NEXT: clrlwi 0, 10, 16
+; CHECK-NEXT: cmplw 0, 11
+; CHECK-NEXT: bne 0, .LBB3_16
+; CHECK-NEXT: # %bb.13: # %cmpxchg.fencedstore219
+; CHECK-NEXT: extsb 12, 12
+; CHECK-NEXT: sync
+; CHECK-NEXT: clrlwi 12, 12, 16
+; CHECK-NEXT: .p2align 5
+; CHECK-NEXT: .LBB3_14: # %cmpxchg.trystore218
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: sthcx. 12, 0, 9
+; CHECK-NEXT: beq 0, .LBB3_16
+; CHECK-NEXT: # %bb.15: # %cmpxchg.releasedload217
+; CHECK-NEXT: # in Loop: Header=BB3_14 Depth=1
+; CHECK-NEXT: lharx 10, 0, 9
+; CHECK-NEXT: clrlwi 0, 10, 16
+; CHECK-NEXT: cmplw 0, 11
+; CHECK-NEXT: beq 0, .LBB3_14
+; CHECK-NEXT: .LBB3_16: # %cmpxchg.nostore215
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: sth 10, us at toc@l(5)
+; CHECK-NEXT: addis 5, 2, si at toc@ha
+; CHECK-NEXT: lbz 12, uc at toc@l(3)
+; CHECK-NEXT: lbz 0, sc at toc@l(4)
+; CHECK-NEXT: addi 10, 5, si at toc@l
+; CHECK-NEXT: lwarx 11, 0, 10
+; CHECK-NEXT: cmplw 11, 12
+; CHECK-NEXT: bne 0, .LBB3_20
+; CHECK-NEXT: # %bb.17: # %cmpxchg.fencedstore200
+; CHECK-NEXT: extsb 0, 0
+; CHECK-NEXT: sync
+; CHECK-NEXT: .p2align 5
+; CHECK-NEXT: .LBB3_18: # %cmpxchg.trystore199
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: stwcx. 0, 0, 10
+; CHECK-NEXT: beq 0, .LBB3_20
+; CHECK-NEXT: # %bb.19: # %cmpxchg.releasedload198
+; CHECK-NEXT: # in Loop: Header=BB3_18 Depth=1
+; CHECK-NEXT: lwarx 11, 0, 10
+; CHECK-NEXT: cmplw 11, 12
+; CHECK-NEXT: beq 0, .LBB3_18
+; CHECK-NEXT: .LBB3_20: # %cmpxchg.nostore196
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: stw 11, si at toc@l(5)
+; CHECK-NEXT: addis 5, 2, ui at toc@ha
+; CHECK-NEXT: lbz 0, uc at toc@l(3)
+; CHECK-NEXT: lbz 30, sc at toc@l(4)
+; CHECK-NEXT: addi 11, 5, ui at toc@l
+; CHECK-NEXT: lwarx 12, 0, 11
+; CHECK-NEXT: cmplw 12, 0
+; CHECK-NEXT: bne 0, .LBB3_24
+; CHECK-NEXT: # %bb.21: # %cmpxchg.fencedstore181
+; CHECK-NEXT: extsb 30, 30
+; CHECK-NEXT: sync
+; CHECK-NEXT: .p2align 5
+; CHECK-NEXT: .LBB3_22: # %cmpxchg.trystore180
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: stwcx. 30, 0, 11
+; CHECK-NEXT: beq 0, .LBB3_24
+; CHECK-NEXT: # %bb.23: # %cmpxchg.releasedload179
+; CHECK-NEXT: # in Loop: Header=BB3_22 Depth=1
+; CHECK-NEXT: lwarx 12, 0, 11
+; CHECK-NEXT: cmplw 12, 0
+; CHECK-NEXT: beq 0, .LBB3_22
+; CHECK-NEXT: .LBB3_24: # %cmpxchg.nostore177
+; CHECK-NEXT: addis 30, 2, sll at toc@ha
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: stw 12, ui at toc@l(5)
+; CHECK-NEXT: lbz 29, uc at toc@l(3)
+; CHECK-NEXT: lbz 28, sc at toc@l(4)
+; CHECK-NEXT: addi 12, 30, sll at toc@l
+; CHECK-NEXT: ldarx 0, 0, 12
+; CHECK-NEXT: cmpld 0, 29
+; CHECK-NEXT: bne 0, .LBB3_28
+; CHECK-NEXT: # %bb.25: # %cmpxchg.fencedstore162
+; CHECK-NEXT: extsb 28, 28
+; CHECK-NEXT: sync
+; CHECK-NEXT: .p2align 5
+; CHECK-NEXT: .LBB3_26: # %cmpxchg.trystore161
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: stdcx. 28, 0, 12
+; CHECK-NEXT: beq 0, .LBB3_28
+; CHECK-NEXT: # %bb.27: # %cmpxchg.releasedload160
+; CHECK-NEXT: # in Loop: Header=BB3_26 Depth=1
+; CHECK-NEXT: ldarx 0, 0, 12
+; CHECK-NEXT: cmpld 0, 29
+; CHECK-NEXT: beq 0, .LBB3_26
+; CHECK-NEXT: .LBB3_28: # %cmpxchg.nostore158
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: std 0, sll at toc@l(30)
+; CHECK-NEXT: addis 30, 2, ull at toc@ha
+; CHECK-NEXT: lbz 28, uc at toc@l(3)
+; CHECK-NEXT: lbz 27, sc at toc@l(4)
+; CHECK-NEXT: addi 0, 30, ull at toc@l
+; CHECK-NEXT: ldarx 29, 0, 0
+; CHECK-NEXT: cmpld 29, 28
+; CHECK-NEXT: bne 0, .LBB3_32
+; CHECK-NEXT: # %bb.29: # %cmpxchg.fencedstore143
+; CHECK-NEXT: extsb 27, 27
+; CHECK-NEXT: sync
+; CHECK-NEXT: .p2align 5
+; CHECK-NEXT: .LBB3_30: # %cmpxchg.trystore142
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: stdcx. 27, 0, 0
+; CHECK-NEXT: beq 0, .LBB3_32
+; CHECK-NEXT: # %bb.31: # %cmpxchg.releasedload141
+; CHECK-NEXT: # in Loop: Header=BB3_30 Depth=1
+; CHECK-NEXT: ldarx 29, 0, 0
+; CHECK-NEXT: cmpld 29, 28
+; CHECK-NEXT: beq 0, .LBB3_30
+; CHECK-NEXT: .LBB3_32: # %cmpxchg.nostore139
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: std 29, ull at toc@l(30)
+; CHECK-NEXT: lbz 30, uc at toc@l(3)
+; CHECK-NEXT: lbz 29, sc at toc@l(4)
+; CHECK-NEXT: lbarx 28, 0, 6
+; CHECK-NEXT: clrlwi 28, 28, 24
+; CHECK-NEXT: cmplw 28, 30
+; CHECK-NEXT: bne 0, .LBB3_36
+; CHECK-NEXT: # %bb.33: # %cmpxchg.fencedstore124
+; CHECK-NEXT: sync
+; CHECK-NEXT: .p2align 5
+; CHECK-NEXT: .LBB3_34: # %cmpxchg.trystore123
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: stbcx. 29, 0, 6
+; CHECK-NEXT: beq 0, .LBB3_37
+; CHECK-NEXT: # %bb.35: # %cmpxchg.releasedload122
+; CHECK-NEXT: # in Loop: Header=BB3_34 Depth=1
+; CHECK-NEXT: lbarx 28, 0, 6
+; CHECK-NEXT: clrlwi 28, 28, 24
+; CHECK-NEXT: cmplw 28, 30
+; CHECK-NEXT: beq 0, .LBB3_34
+; CHECK-NEXT: .LBB3_36: # %cmpxchg.nostore120
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: crxor 20, 20, 20
+; CHECK-NEXT: b .LBB3_38
+; CHECK-NEXT: .LBB3_37: # %cmpxchg.success121
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: creqv 20, 20, 20
+; CHECK-NEXT: .LBB3_38: # %cmpxchg.end118
+; CHECK-NEXT: li 6, 0
+; CHECK-NEXT: li 30, 1
+; CHECK-NEXT: isel 6, 30, 6, 20
+; CHECK-NEXT: lbz 30, sc at toc@l(4)
+; CHECK-NEXT: stw 6, ui at toc@l(5)
+; CHECK-NEXT: lbz 6, uc at toc@l(3)
+; CHECK-NEXT: lbarx 29, 0, 7
+; CHECK-NEXT: clrlwi 29, 29, 24
+; CHECK-NEXT: cmplw 29, 6
+; CHECK-NEXT: bne 0, .LBB3_42
+; CHECK-NEXT: # %bb.39: # %cmpxchg.fencedstore105
+; CHECK-NEXT: sync
+; CHECK-NEXT: .p2align 5
+; CHECK-NEXT: .LBB3_40: # %cmpxchg.trystore104
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: stbcx. 30, 0, 7
+; CHECK-NEXT: beq 0, .LBB3_43
+; CHECK-NEXT: # %bb.41: # %cmpxchg.releasedload103
+; CHECK-NEXT: # in Loop: Header=BB3_40 Depth=1
+; CHECK-NEXT: lbarx 29, 0, 7
+; CHECK-NEXT: clrlwi 29, 29, 24
+; CHECK-NEXT: cmplw 29, 6
+; CHECK-NEXT: beq 0, .LBB3_40
+; CHECK-NEXT: .LBB3_42: # %cmpxchg.nostore101
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: crxor 20, 20, 20
+; CHECK-NEXT: b .LBB3_44
+; CHECK-NEXT: .LBB3_43: # %cmpxchg.success102
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: creqv 20, 20, 20
+; CHECK-NEXT: .LBB3_44: # %cmpxchg.end99
+; CHECK-NEXT: li 6, 0
+; CHECK-NEXT: li 7, 1
+; CHECK-NEXT: isel 6, 7, 6, 20
+; CHECK-NEXT: lbz 7, sc at toc@l(4)
+; CHECK-NEXT: stw 6, ui at toc@l(5)
+; CHECK-NEXT: lbz 6, uc at toc@l(3)
+; CHECK-NEXT: lharx 30, 0, 8
+; CHECK-NEXT: clrlwi 30, 30, 16
+; CHECK-NEXT: cmplw 30, 6
+; CHECK-NEXT: bne 0, .LBB3_48
+; CHECK-NEXT: # %bb.45: # %cmpxchg.fencedstore86
+; CHECK-NEXT: extsb 7, 7
+; CHECK-NEXT: sync
+; CHECK-NEXT: clrlwi 7, 7, 16
+; CHECK-NEXT: .p2align 5
+; CHECK-NEXT: .LBB3_46: # %cmpxchg.trystore85
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: sthcx. 7, 0, 8
+; CHECK-NEXT: beq 0, .LBB3_49
+; CHECK-NEXT: # %bb.47: # %cmpxchg.releasedload84
+; CHECK-NEXT: # in Loop: Header=BB3_46 Depth=1
+; CHECK-NEXT: lharx 30, 0, 8
+; CHECK-NEXT: clrlwi 30, 30, 16
+; CHECK-NEXT: cmplw 30, 6
+; CHECK-NEXT: beq 0, .LBB3_46
+; CHECK-NEXT: .LBB3_48: # %cmpxchg.nostore82
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: crxor 20, 20, 20
+; CHECK-NEXT: b .LBB3_50
+; CHECK-NEXT: .LBB3_49: # %cmpxchg.success83
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: creqv 20, 20, 20
+; CHECK-NEXT: .LBB3_50: # %cmpxchg.end80
+; CHECK-NEXT: li 6, 0
+; CHECK-NEXT: li 7, 1
+; CHECK-NEXT: isel 6, 7, 6, 20
+; CHECK-NEXT: lbz 7, sc at toc@l(4)
+; CHECK-NEXT: stw 6, ui at toc@l(5)
+; CHECK-NEXT: lbz 6, uc at toc@l(3)
+; CHECK-NEXT: lharx 8, 0, 9
+; CHECK-NEXT: clrlwi 8, 8, 16
+; CHECK-NEXT: cmplw 8, 6
+; CHECK-NEXT: bne 0, .LBB3_54
+; CHECK-NEXT: # %bb.51: # %cmpxchg.fencedstore67
+; CHECK-NEXT: extsb 7, 7
+; CHECK-NEXT: sync
+; CHECK-NEXT: clrlwi 7, 7, 16
+; CHECK-NEXT: .p2align 5
+; CHECK-NEXT: .LBB3_52: # %cmpxchg.trystore66
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: sthcx. 7, 0, 9
+; CHECK-NEXT: beq 0, .LBB3_55
+; CHECK-NEXT: # %bb.53: # %cmpxchg.releasedload65
+; CHECK-NEXT: # in Loop: Header=BB3_52 Depth=1
+; CHECK-NEXT: lharx 8, 0, 9
+; CHECK-NEXT: clrlwi 8, 8, 16
+; CHECK-NEXT: cmplw 8, 6
+; CHECK-NEXT: beq 0, .LBB3_52
+; CHECK-NEXT: .LBB3_54: # %cmpxchg.nostore63
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: crxor 20, 20, 20
+; CHECK-NEXT: b .LBB3_56
+; CHECK-NEXT: .LBB3_55: # %cmpxchg.success64
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: creqv 20, 20, 20
+; CHECK-NEXT: .LBB3_56: # %cmpxchg.end61
+; CHECK-NEXT: li 6, 0
+; CHECK-NEXT: li 7, 1
+; CHECK-NEXT: isel 6, 7, 6, 20
+; CHECK-NEXT: lbz 7, sc at toc@l(4)
+; CHECK-NEXT: stw 6, ui at toc@l(5)
+; CHECK-NEXT: lbz 6, uc at toc@l(3)
+; CHECK-NEXT: lwarx 8, 0, 10
+; CHECK-NEXT: cmplw 8, 6
+; CHECK-NEXT: bne 0, .LBB3_60
+; CHECK-NEXT: # %bb.57: # %cmpxchg.fencedstore48
+; CHECK-NEXT: extsb 7, 7
+; CHECK-NEXT: sync
+; CHECK-NEXT: .p2align 5
+; CHECK-NEXT: .LBB3_58: # %cmpxchg.trystore47
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: stwcx. 7, 0, 10
+; CHECK-NEXT: beq 0, .LBB3_61
+; CHECK-NEXT: # %bb.59: # %cmpxchg.releasedload46
+; CHECK-NEXT: # in Loop: Header=BB3_58 Depth=1
+; CHECK-NEXT: lwarx 8, 0, 10
+; CHECK-NEXT: cmplw 8, 6
+; CHECK-NEXT: beq 0, .LBB3_58
+; CHECK-NEXT: .LBB3_60: # %cmpxchg.nostore44
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: crxor 20, 20, 20
+; CHECK-NEXT: b .LBB3_62
+; CHECK-NEXT: .LBB3_61: # %cmpxchg.success45
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: creqv 20, 20, 20
+; CHECK-NEXT: .LBB3_62: # %cmpxchg.end42
+; CHECK-NEXT: li 6, 0
+; CHECK-NEXT: li 7, 1
+; CHECK-NEXT: isel 6, 7, 6, 20
+; CHECK-NEXT: lbz 7, sc at toc@l(4)
+; CHECK-NEXT: stw 6, ui at toc@l(5)
+; CHECK-NEXT: lbz 6, uc at toc@l(3)
+; CHECK-NEXT: lwarx 8, 0, 11
+; CHECK-NEXT: cmplw 8, 6
+; CHECK-NEXT: bne 0, .LBB3_66
+; CHECK-NEXT: # %bb.63: # %cmpxchg.fencedstore29
+; CHECK-NEXT: extsb 7, 7
+; CHECK-NEXT: sync
+; CHECK-NEXT: .p2align 5
+; CHECK-NEXT: .LBB3_64: # %cmpxchg.trystore28
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: stwcx. 7, 0, 11
+; CHECK-NEXT: beq 0, .LBB3_67
+; CHECK-NEXT: # %bb.65: # %cmpxchg.releasedload27
+; CHECK-NEXT: # in Loop: Header=BB3_64 Depth=1
+; CHECK-NEXT: lwarx 8, 0, 11
+; CHECK-NEXT: cmplw 8, 6
+; CHECK-NEXT: beq 0, .LBB3_64
+; CHECK-NEXT: .LBB3_66: # %cmpxchg.nostore25
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: crxor 20, 20, 20
+; CHECK-NEXT: b .LBB3_68
+; CHECK-NEXT: .LBB3_67: # %cmpxchg.success26
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: creqv 20, 20, 20
+; CHECK-NEXT: .LBB3_68: # %cmpxchg.end23
+; CHECK-NEXT: li 6, 0
+; CHECK-NEXT: li 7, 1
+; CHECK-NEXT: isel 6, 7, 6, 20
+; CHECK-NEXT: lbz 7, sc at toc@l(4)
+; CHECK-NEXT: stw 6, ui at toc@l(5)
+; CHECK-NEXT: lbz 6, uc at toc@l(3)
+; CHECK-NEXT: ldarx 8, 0, 12
+; CHECK-NEXT: cmpld 8, 6
+; CHECK-NEXT: bne 0, .LBB3_72
+; CHECK-NEXT: # %bb.69: # %cmpxchg.fencedstore10
+; CHECK-NEXT: extsb 7, 7
+; CHECK-NEXT: sync
+; CHECK-NEXT: .p2align 5
+; CHECK-NEXT: .LBB3_70: # %cmpxchg.trystore9
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: stdcx. 7, 0, 12
+; CHECK-NEXT: beq 0, .LBB3_73
+; CHECK-NEXT: # %bb.71: # %cmpxchg.releasedload8
+; CHECK-NEXT: # in Loop: Header=BB3_70 Depth=1
+; CHECK-NEXT: ldarx 8, 0, 12
+; CHECK-NEXT: cmpld 8, 6
+; CHECK-NEXT: beq 0, .LBB3_70
+; CHECK-NEXT: .LBB3_72: # %cmpxchg.nostore6
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: crxor 20, 20, 20
+; CHECK-NEXT: b .LBB3_74
+; CHECK-NEXT: .LBB3_73: # %cmpxchg.success7
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: creqv 20, 20, 20
+; CHECK-NEXT: .LBB3_74: # %cmpxchg.end4
+; CHECK-NEXT: li 6, 0
+; CHECK-NEXT: li 7, 1
+; CHECK-NEXT: lbz 3, uc at toc@l(3)
+; CHECK-NEXT: lbz 4, sc at toc@l(4)
+; CHECK-NEXT: isel 6, 7, 6, 20
+; CHECK-NEXT: stw 6, ui at toc@l(5)
+; CHECK-NEXT: ldarx 6, 0, 0
+; CHECK-NEXT: cmpld 6, 3
+; CHECK-NEXT: bne 0, .LBB3_78
+; CHECK-NEXT: # %bb.75: # %cmpxchg.fencedstore
+; CHECK-NEXT: extsb 4, 4
+; CHECK-NEXT: sync
+; CHECK-NEXT: .p2align 5
+; CHECK-NEXT: .LBB3_76: # %cmpxchg.trystore
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: stdcx. 4, 0, 0
+; CHECK-NEXT: beq 0, .LBB3_79
+; CHECK-NEXT: # %bb.77: # %cmpxchg.releasedload
+; CHECK-NEXT: # in Loop: Header=BB3_76 Depth=1
+; CHECK-NEXT: ldarx 6, 0, 0
+; CHECK-NEXT: cmpld 6, 3
+; CHECK-NEXT: beq 0, .LBB3_76
+; CHECK-NEXT: .LBB3_78: # %cmpxchg.nostore
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: crxor 20, 20, 20
+; CHECK-NEXT: b .LBB3_80
+; CHECK-NEXT: .LBB3_79: # %cmpxchg.success
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: creqv 20, 20, 20
+; CHECK-NEXT: .LBB3_80: # %cmpxchg.end
+; CHECK-NEXT: li 3, 0
+; CHECK-NEXT: li 4, 1
+; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 29, -24(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 28, -32(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 27, -40(1) # 8-byte Folded Reload
+; CHECK-NEXT: isel 3, 4, 3, 20
+; CHECK-NEXT: stw 3, ui at toc@l(5)
+; CHECK-NEXT: blr
;
; AIX32-LABEL: test_compare_and_swap:
; AIX32: # %bb.0: # %entry
; AIX32-NEXT: mflr 0
-; AIX32-NEXT: stwu 1, -128(1)
-; AIX32-NEXT: stw 0, 136(1)
-; AIX32-NEXT: stw 28, 112(1) # 4-byte Folded Spill
-; AIX32-NEXT: lwz 28, L..C0(2) # @sc
-; AIX32-NEXT: stw 29, 116(1) # 4-byte Folded Spill
-; AIX32-NEXT: lwz 29, L..C1(2) # @uc
-; AIX32-NEXT: lbz 3, 0(29)
-; AIX32-NEXT: rlwinm 5, 28, 3, 27, 28
-; AIX32-NEXT: stw 21, 84(1) # 4-byte Folded Spill
-; AIX32-NEXT: lbz 4, 0(28)
-; AIX32-NEXT: stw 17, 68(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 18, 72(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 19, 76(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 20, 80(1) # 4-byte Folded Spill
-; AIX32-NEXT: xori 21, 5, 24
-; AIX32-NEXT: stw 22, 88(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 23, 92(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 24, 96(1) # 4-byte Folded Spill
-; AIX32-NEXT: slw 5, 3, 21
-; AIX32-NEXT: li 3, 255
-; AIX32-NEXT: slw 4, 4, 21
-; AIX32-NEXT: stw 25, 100(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 26, 104(1) # 4-byte Folded Spill
-; AIX32-NEXT: slw 3, 3, 21
-; AIX32-NEXT: stw 27, 108(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 30, 120(1) # 4-byte Folded Spill
-; AIX32-NEXT: stw 31, 124(1) # 4-byte Folded Spill
-; AIX32-NEXT: sync
-; AIX32-NEXT: rlwinm 18, 28, 0, 0, 29
-; AIX32-NEXT: and 4, 4, 3
-; AIX32-NEXT: and 5, 5, 3
-; AIX32-NEXT: L..BB3_1: # %entry
-; AIX32-NEXT: #
-; AIX32-NEXT: lwarx 7, 0, 18
-; AIX32-NEXT: and 6, 7, 3
-; AIX32-NEXT: cmpw 6, 5
-; AIX32-NEXT: bne 0, L..BB3_3
-; AIX32-NEXT: # %bb.2: # %entry
-; AIX32-NEXT: #
-; AIX32-NEXT: andc 7, 7, 3
-; AIX32-NEXT: or 7, 7, 4
-; AIX32-NEXT: stwcx. 7, 0, 18
-; AIX32-NEXT: bne 0, L..BB3_1
-; AIX32-NEXT: L..BB3_3: # %entry
-; AIX32-NEXT: rlwinm 5, 29, 3, 27, 28
-; AIX32-NEXT: srw 3, 6, 21
-; AIX32-NEXT: lwsync
-; AIX32-NEXT: lbz 4, 0(29)
-; AIX32-NEXT: rlwinm 20, 29, 0, 0, 29
-; AIX32-NEXT: xori 25, 5, 24
-; AIX32-NEXT: slw 5, 3, 25
-; AIX32-NEXT: stb 3, 0(28)
-; AIX32-NEXT: li 3, 255
-; AIX32-NEXT: sync
-; AIX32-NEXT: slw 6, 4, 25
-; AIX32-NEXT: slw 3, 3, 25
-; AIX32-NEXT: and 4, 5, 3
-; AIX32-NEXT: and 5, 6, 3
-; AIX32-NEXT: L..BB3_4: # %entry
-; AIX32-NEXT: #
-; AIX32-NEXT: lwarx 7, 0, 20
-; AIX32-NEXT: and 6, 7, 3
-; AIX32-NEXT: cmpw 6, 5
-; AIX32-NEXT: bne 0, L..BB3_6
-; AIX32-NEXT: # %bb.5: # %entry
-; AIX32-NEXT: #
-; AIX32-NEXT: andc 7, 7, 3
-; AIX32-NEXT: or 7, 7, 4
-; AIX32-NEXT: stwcx. 7, 0, 20
-; AIX32-NEXT: bne 0, L..BB3_4
-; AIX32-NEXT: L..BB3_6: # %entry
-; AIX32-NEXT: lwsync
-; AIX32-NEXT: srw 4, 6, 25
-; AIX32-NEXT: lbz 3, 0(28)
-; AIX32-NEXT: extsb 5, 3
-; AIX32-NEXT: lwz 3, L..C2(2) # @ss
-; AIX32-NEXT: stb 4, 0(29)
-; AIX32-NEXT: sync
-; AIX32-NEXT: rlwinm 6, 3, 3, 27, 27
-; AIX32-NEXT: rlwinm 22, 3, 0, 0, 29
-; AIX32-NEXT: xori 26, 6, 16
-; AIX32-NEXT: slw 6, 4, 26
-; AIX32-NEXT: li 4, 0
-; AIX32-NEXT: slw 5, 5, 26
-; AIX32-NEXT: ori 4, 4, 65535
-; AIX32-NEXT: slw 4, 4, 26
-; AIX32-NEXT: and 5, 5, 4
-; AIX32-NEXT: and 6, 6, 4
-; AIX32-NEXT: L..BB3_7: # %entry
-; AIX32-NEXT: #
-; AIX32-NEXT: lwarx 8, 0, 22
-; AIX32-NEXT: and 7, 8, 4
-; AIX32-NEXT: cmpw 7, 6
-; AIX32-NEXT: bne 0, L..BB3_9
-; AIX32-NEXT: # %bb.8: # %entry
-; AIX32-NEXT: #
-; AIX32-NEXT: andc 8, 8, 4
-; AIX32-NEXT: or 8, 8, 5
-; AIX32-NEXT: stwcx. 8, 0, 22
-; AIX32-NEXT: bne 0, L..BB3_7
-; AIX32-NEXT: L..BB3_9: # %entry
-; AIX32-NEXT: srw 4, 7, 26
-; AIX32-NEXT: lwsync
-; AIX32-NEXT: sth 4, 0(3)
-; AIX32-NEXT: lbz 3, 0(28)
-; AIX32-NEXT: lbz 4, 0(29)
-; AIX32-NEXT: sync
-; AIX32-NEXT: extsb 5, 3
-; AIX32-NEXT: lwz 3, L..C3(2) # @us
-; AIX32-NEXT: rlwinm 6, 3, 3, 27, 27
-; AIX32-NEXT: rlwinm 19, 3, 0, 0, 29
-; AIX32-NEXT: xori 24, 6, 16
-; AIX32-NEXT: slw 6, 4, 24
-; AIX32-NEXT: li 4, 0
-; AIX32-NEXT: slw 5, 5, 24
-; AIX32-NEXT: ori 4, 4, 65535
-; AIX32-NEXT: slw 4, 4, 24
-; AIX32-NEXT: and 5, 5, 4
-; AIX32-NEXT: and 6, 6, 4
-; AIX32-NEXT: L..BB3_10: # %entry
-; AIX32-NEXT: #
-; AIX32-NEXT: lwarx 8, 0, 19
-; AIX32-NEXT: and 7, 8, 4
-; AIX32-NEXT: cmpw 7, 6
-; AIX32-NEXT: bne 0, L..BB3_12
-; AIX32-NEXT: # %bb.11: # %entry
-; AIX32-NEXT: #
-; AIX32-NEXT: andc 8, 8, 4
-; AIX32-NEXT: or 8, 8, 5
-; AIX32-NEXT: stwcx. 8, 0, 19
-; AIX32-NEXT: bne 0, L..BB3_10
-; AIX32-NEXT: L..BB3_12: # %entry
-; AIX32-NEXT: srw 4, 7, 24
-; AIX32-NEXT: lwsync
-; AIX32-NEXT: lwz 17, L..C4(2) # @si
-; AIX32-NEXT: sth 4, 0(3)
-; AIX32-NEXT: lbz 4, 0(28)
-; AIX32-NEXT: lbz 3, 0(29)
-; AIX32-NEXT: sync
-; AIX32-NEXT: extsb 4, 4
-; AIX32-NEXT: L..BB3_13: # %entry
-; AIX32-NEXT: #
-; AIX32-NEXT: lwarx 5, 0, 17
-; AIX32-NEXT: cmpw 5, 3
-; AIX32-NEXT: bne 0, L..BB3_15
-; AIX32-NEXT: # %bb.14: # %entry
-; AIX32-NEXT: #
-; AIX32-NEXT: stwcx. 4, 0, 17
-; AIX32-NEXT: bne 0, L..BB3_13
-; AIX32-NEXT: L..BB3_15: # %entry
-; AIX32-NEXT: lwsync
-; AIX32-NEXT: stw 5, 0(17)
-; AIX32-NEXT: lwz 27, L..C5(2) # @ui
-; AIX32-NEXT: lbz 4, 0(28)
-; AIX32-NEXT: lbz 3, 0(29)
-; AIX32-NEXT: sync
-; AIX32-NEXT: extsb 4, 4
-; AIX32-NEXT: L..BB3_16: # %entry
-; AIX32-NEXT: #
-; AIX32-NEXT: lwarx 5, 0, 27
-; AIX32-NEXT: cmpw 5, 3
-; AIX32-NEXT: bne 0, L..BB3_18
-; AIX32-NEXT: # %bb.17: # %entry
-; AIX32-NEXT: #
-; AIX32-NEXT: stwcx. 4, 0, 27
-; AIX32-NEXT: bne 0, L..BB3_16
-; AIX32-NEXT: L..BB3_18: # %entry
-; AIX32-NEXT: lwsync
-; AIX32-NEXT: lwz 31, L..C6(2) # @sll
-; AIX32-NEXT: stw 5, 0(27)
-; AIX32-NEXT: lbz 3, 0(28)
-; AIX32-NEXT: li 23, 0
-; AIX32-NEXT: addi 4, 1, 56
-; AIX32-NEXT: li 7, 5
-; AIX32-NEXT: li 8, 5
-; AIX32-NEXT: stw 23, 56(1)
-; AIX32-NEXT: extsb 6, 3
-; AIX32-NEXT: lbz 3, 0(29)
-; AIX32-NEXT: srawi 5, 6, 31
-; AIX32-NEXT: stw 3, 60(1)
-; AIX32-NEXT: mr 3, 31
-; AIX32-NEXT: bl .__atomic_compare_exchange_8[PR]
-; AIX32-NEXT: nop
-; AIX32-NEXT: lwz 3, 60(1)
-; AIX32-NEXT: lbz 4, 0(28)
-; AIX32-NEXT: lwz 30, L..C7(2) # @ull
-; AIX32-NEXT: li 7, 5
-; AIX32-NEXT: li 8, 5
-; AIX32-NEXT: stw 3, 4(31)
-; AIX32-NEXT: lwz 3, 56(1)
-; AIX32-NEXT: extsb 6, 4
-; AIX32-NEXT: addi 4, 1, 56
-; AIX32-NEXT: srawi 5, 6, 31
-; AIX32-NEXT: stw 23, 56(1)
-; AIX32-NEXT: stw 3, 0(31)
-; AIX32-NEXT: lbz 3, 0(29)
-; AIX32-NEXT: stw 3, 60(1)
-; AIX32-NEXT: mr 3, 30
-; AIX32-NEXT: bl .__atomic_compare_exchange_8[PR]
-; AIX32-NEXT: nop
-; AIX32-NEXT: lwz 4, 60(1)
-; AIX32-NEXT: lwz 3, 56(1)
-; AIX32-NEXT: stw 4, 4(30)
-; AIX32-NEXT: lbz 4, 0(28)
-; AIX32-NEXT: stw 3, 0(30)
-; AIX32-NEXT: lbz 3, 0(29)
-; AIX32-NEXT: sync
-; AIX32-NEXT: slw 5, 4, 21
-; AIX32-NEXT: li 4, 255
-; AIX32-NEXT: slw 6, 3, 21
-; AIX32-NEXT: slw 4, 4, 21
-; AIX32-NEXT: and 5, 5, 4
-; AIX32-NEXT: and 6, 6, 4
-; AIX32-NEXT: L..BB3_19: # %entry
-; AIX32-NEXT: #
-; AIX32-NEXT: lwarx 8, 0, 18
-; AIX32-NEXT: and 7, 8, 4
-; AIX32-NEXT: cmpw 7, 6
-; AIX32-NEXT: bne 0, L..BB3_21
-; AIX32-NEXT: # %bb.20: # %entry
-; AIX32-NEXT: #
-; AIX32-NEXT: andc 8, 8, 4
-; AIX32-NEXT: or 8, 8, 5
-; AIX32-NEXT: stwcx. 8, 0, 18
-; AIX32-NEXT: bne 0, L..BB3_19
-; AIX32-NEXT: L..BB3_21: # %entry
-; AIX32-NEXT: srw 4, 7, 21
-; AIX32-NEXT: lwsync
-; AIX32-NEXT: lbz 5, 0(28)
-; AIX32-NEXT: cmpw 4, 3
-; AIX32-NEXT: li 3, 1
-; AIX32-NEXT: iseleq 4, 3, 23
-; AIX32-NEXT: slw 6, 5, 25
-; AIX32-NEXT: li 5, 255
-; AIX32-NEXT: stw 4, 0(27)
-; AIX32-NEXT: lbz 4, 0(29)
-; AIX32-NEXT: slw 5, 5, 25
-; AIX32-NEXT: sync
-; AIX32-NEXT: slw 7, 4, 25
-; AIX32-NEXT: and 6, 6, 5
-; AIX32-NEXT: and 7, 7, 5
-; AIX32-NEXT: L..BB3_22: # %entry
-; AIX32-NEXT: #
-; AIX32-NEXT: lwarx 9, 0, 20
-; AIX32-NEXT: and 8, 9, 5
-; AIX32-NEXT: cmpw 8, 7
-; AIX32-NEXT: bne 0, L..BB3_24
-; AIX32-NEXT: # %bb.23: # %entry
-; AIX32-NEXT: #
-; AIX32-NEXT: andc 9, 9, 5
-; AIX32-NEXT: or 9, 9, 6
-; AIX32-NEXT: stwcx. 9, 0, 20
-; AIX32-NEXT: bne 0, L..BB3_22
-; AIX32-NEXT: L..BB3_24: # %entry
-; AIX32-NEXT: srw 5, 8, 25
-; AIX32-NEXT: lwsync
-; AIX32-NEXT: cmpw 5, 4
-; AIX32-NEXT: lbz 5, 0(28)
-; AIX32-NEXT: iseleq 4, 3, 23
-; AIX32-NEXT: extsb 5, 5
-; AIX32-NEXT: stw 4, 0(27)
-; AIX32-NEXT: lbz 4, 0(29)
-; AIX32-NEXT: sync
-; AIX32-NEXT: slw 6, 5, 26
-; AIX32-NEXT: li 5, 0
-; AIX32-NEXT: slw 7, 4, 26
-; AIX32-NEXT: ori 5, 5, 65535
-; AIX32-NEXT: slw 5, 5, 26
-; AIX32-NEXT: and 6, 6, 5
-; AIX32-NEXT: and 7, 7, 5
-; AIX32-NEXT: L..BB3_25: # %entry
-; AIX32-NEXT: #
-; AIX32-NEXT: lwarx 9, 0, 22
-; AIX32-NEXT: and 8, 9, 5
-; AIX32-NEXT: cmpw 8, 7
-; AIX32-NEXT: bne 0, L..BB3_27
-; AIX32-NEXT: # %bb.26: # %entry
-; AIX32-NEXT: #
-; AIX32-NEXT: andc 9, 9, 5
-; AIX32-NEXT: or 9, 9, 6
-; AIX32-NEXT: stwcx. 9, 0, 22
-; AIX32-NEXT: bne 0, L..BB3_25
-; AIX32-NEXT: L..BB3_27: # %entry
-; AIX32-NEXT: srw 5, 8, 26
-; AIX32-NEXT: lwsync
-; AIX32-NEXT: cmpw 5, 4
-; AIX32-NEXT: lbz 5, 0(28)
-; AIX32-NEXT: iseleq 4, 3, 23
-; AIX32-NEXT: extsb 5, 5
-; AIX32-NEXT: stw 4, 0(27)
-; AIX32-NEXT: lbz 4, 0(29)
-; AIX32-NEXT: sync
-; AIX32-NEXT: slw 6, 5, 24
-; AIX32-NEXT: li 5, 0
-; AIX32-NEXT: slw 7, 4, 24
-; AIX32-NEXT: ori 5, 5, 65535
-; AIX32-NEXT: slw 5, 5, 24
-; AIX32-NEXT: and 6, 6, 5
-; AIX32-NEXT: and 7, 7, 5
-; AIX32-NEXT: L..BB3_28: # %entry
-; AIX32-NEXT: #
-; AIX32-NEXT: lwarx 9, 0, 19
-; AIX32-NEXT: and 8, 9, 5
-; AIX32-NEXT: cmpw 8, 7
-; AIX32-NEXT: bne 0, L..BB3_30
-; AIX32-NEXT: # %bb.29: # %entry
-; AIX32-NEXT: #
-; AIX32-NEXT: andc 9, 9, 5
-; AIX32-NEXT: or 9, 9, 6
-; AIX32-NEXT: stwcx. 9, 0, 19
-; AIX32-NEXT: bne 0, L..BB3_28
-; AIX32-NEXT: L..BB3_30: # %entry
-; AIX32-NEXT: srw 5, 8, 24
-; AIX32-NEXT: lwsync
-; AIX32-NEXT: cmpw 5, 4
-; AIX32-NEXT: lbz 5, 0(28)
-; AIX32-NEXT: iseleq 4, 3, 23
-; AIX32-NEXT: stw 4, 0(27)
-; AIX32-NEXT: lbz 4, 0(29)
-; AIX32-NEXT: sync
-; AIX32-NEXT: extsb 5, 5
-; AIX32-NEXT: L..BB3_31: # %entry
-; AIX32-NEXT: #
-; AIX32-NEXT: lwarx 6, 0, 17
-; AIX32-NEXT: cmpw 1, 6, 4
-; AIX32-NEXT: bne 1, L..BB3_33
-; AIX32-NEXT: # %bb.32: # %entry
-; AIX32-NEXT: #
-; AIX32-NEXT: stwcx. 5, 0, 17
-; AIX32-NEXT: bne 0, L..BB3_31
-; AIX32-NEXT: L..BB3_33: # %entry
-; AIX32-NEXT: lwsync
-; AIX32-NEXT: isel 4, 3, 23, 6
-; AIX32-NEXT: lbz 5, 0(28)
-; AIX32-NEXT: stw 4, 0(27)
-; AIX32-NEXT: lbz 4, 0(29)
-; AIX32-NEXT: sync
-; AIX32-NEXT: extsb 5, 5
-; AIX32-NEXT: L..BB3_34: # %entry
-; AIX32-NEXT: #
-; AIX32-NEXT: lwarx 6, 0, 27
-; AIX32-NEXT: cmpw 1, 6, 4
-; AIX32-NEXT: bne 1, L..BB3_36
-; AIX32-NEXT: # %bb.35: # %entry
-; AIX32-NEXT: #
-; AIX32-NEXT: stwcx. 5, 0, 27
-; AIX32-NEXT: bne 0, L..BB3_34
-; AIX32-NEXT: L..BB3_36: # %entry
-; AIX32-NEXT: lwsync
-; AIX32-NEXT: isel 3, 3, 23, 6
-; AIX32-NEXT: li 7, 5
-; AIX32-NEXT: li 8, 5
-; AIX32-NEXT: lbz 4, 0(28)
-; AIX32-NEXT: stw 3, 0(27)
-; AIX32-NEXT: lbz 3, 0(29)
-; AIX32-NEXT: stw 23, 56(1)
-; AIX32-NEXT: extsb 6, 4
-; AIX32-NEXT: addi 4, 1, 56
-; AIX32-NEXT: stw 3, 60(1)
-; AIX32-NEXT: mr 3, 31
-; AIX32-NEXT: srawi 5, 6, 31
-; AIX32-NEXT: bl .__atomic_compare_exchange_8[PR]
-; AIX32-NEXT: nop
-; AIX32-NEXT: lbz 4, 0(28)
-; AIX32-NEXT: stw 3, 0(27)
-; AIX32-NEXT: lbz 3, 0(29)
-; AIX32-NEXT: li 7, 5
-; AIX32-NEXT: li 8, 5
-; AIX32-NEXT: extsb 6, 4
-; AIX32-NEXT: addi 4, 1, 56
-; AIX32-NEXT: stw 3, 60(1)
-; AIX32-NEXT: mr 3, 30
-; AIX32-NEXT: stw 23, 56(1)
-; AIX32-NEXT: srawi 5, 6, 31
-; AIX32-NEXT: bl .__atomic_compare_exchange_8[PR]
-; AIX32-NEXT: nop
-; AIX32-NEXT: stw 3, 0(27)
-; AIX32-NEXT: lwz 31, 124(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 30, 120(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 29, 116(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 28, 112(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 27, 108(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 26, 104(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 25, 100(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 24, 96(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 23, 92(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 22, 88(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 21, 84(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 20, 80(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 19, 76(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 18, 72(1) # 4-byte Folded Reload
-; AIX32-NEXT: lwz 17, 68(1) # 4-byte Folded Reload
-; AIX32-NEXT: addi 1, 1, 128
-; AIX32-NEXT: lwz 0, 8(1)
-; AIX32-NEXT: mtlr 0
+; AIX32-NEXT: stwu 1, -144(1)
+; AIX32-NEXT: stw 0, 152(1)
+; AIX32-NEXT: stw 29, 132(1) # 4-byte Folded Spill
+; AIX32-NEXT: lwz 29, L..C0(2) # @sc
+; AIX32-NEXT: stw 26, 120(1) # 4-byte Folded Spill
+; AIX32-NEXT: not 3, 29
+; AIX32-NEXT: stw 30, 136(1) # 4-byte Folded Spill
+; AIX32-NEXT: lwz 30, L..C1(2) # @uc
+; AIX32-NEXT: lbz 4, 0(30)
+; AIX32-NEXT: lbz 5, 0(29)
+; AIX32-NEXT: stw 27, 124(1) # 4-byte Folded Spill
+; AIX32-NEXT: rlwinm 27, 29, 0, 0, 29
+; AIX32-NEXT: stw 14, 72(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 15, 76(1) # 4-byte Folded Spill
+; AIX32-NEXT: rlwinm 26, 3, 3, 27, 28
+; AIX32-NEXT: li 3, 255
+; AIX32-NEXT: slw 3, 3, 26
+; AIX32-NEXT: stw 16, 80(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 17, 84(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 18, 88(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 19, 92(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 20, 96(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 21, 100(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 22, 104(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 23, 108(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 24, 112(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 25, 116(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 28, 128(1) # 4-byte Folded Spill
+; AIX32-NEXT: stw 31, 140(1) # 4-byte Folded Spill
+; AIX32-NEXT: not 25, 3
+; AIX32-NEXT: lwarx 3, 0, 27
+; AIX32-NEXT: srw 6, 3, 26
+; AIX32-NEXT: clrlwi 6, 6, 24
+; AIX32-NEXT: cmplw 6, 4
+; AIX32-NEXT: bne 0, L..BB3_4
+; AIX32-NEXT: # %bb.1: # %cmpxchg.fencedstore289
+; AIX32-NEXT: sync
+; AIX32-NEXT: slw 5, 5, 26
+; AIX32-NEXT: .align 4
+; AIX32-NEXT: L..BB3_2: # %cmpxchg.trystore288
+; AIX32-NEXT: # =>This Inner Loop Header: Depth=1
+; AIX32-NEXT: and 6, 3, 25
+; AIX32-NEXT: or 6, 6, 5
+; AIX32-NEXT: stwcx. 6, 0, 27
+; AIX32-NEXT: beq 0, L..BB3_4
+; AIX32-NEXT: # %bb.3: # %cmpxchg.releasedload287
+; AIX32-NEXT: # in Loop: Header=BB3_2 Depth=1
+; AIX32-NEXT: lwarx 3, 0, 27
+; AIX32-NEXT: srw 6, 3, 26
+; AIX32-NEXT: clrlwi 6, 6, 24
+; AIX32-NEXT: cmplw 6, 4
+; AIX32-NEXT: beq 0, L..BB3_2
+; AIX32-NEXT: L..BB3_4: # %cmpxchg.nostore285
+; AIX32-NEXT: not 4, 30
+; AIX32-NEXT: srw 5, 3, 26
+; AIX32-NEXT: lwsync
+; AIX32-NEXT: lbz 3, 0(30)
+; AIX32-NEXT: rlwinm 24, 30, 0, 0, 29
+; AIX32-NEXT: rlwinm 23, 4, 3, 27, 28
+; AIX32-NEXT: li 4, 255
+; AIX32-NEXT: stb 5, 0(29)
+; AIX32-NEXT: slw 4, 4, 23
+; AIX32-NEXT: not 22, 4
+; AIX32-NEXT: lwarx 4, 0, 24
+; AIX32-NEXT: srw 6, 4, 23
+; AIX32-NEXT: clrlwi 6, 6, 24
+; AIX32-NEXT: cmplw 6, 3
+; AIX32-NEXT: bne 0, L..BB3_8
+; AIX32-NEXT: # %bb.5: # %cmpxchg.fencedstore256
+; AIX32-NEXT: clrlwi 5, 5, 24
+; AIX32-NEXT: sync
+; AIX32-NEXT: slw 5, 5, 23
+; AIX32-NEXT: .align 4
+; AIX32-NEXT: L..BB3_6: # %cmpxchg.trystore255
+; AIX32-NEXT: # =>This Inner Loop Header: Depth=1
+; AIX32-NEXT: and 6, 4, 22
+; AIX32-NEXT: or 6, 6, 5
+; AIX32-NEXT: stwcx. 6, 0, 24
+; AIX32-NEXT: beq 0, L..BB3_8
+; AIX32-NEXT: # %bb.7: # %cmpxchg.releasedload254
+; AIX32-NEXT: # in Loop: Header=BB3_6 Depth=1
+; AIX32-NEXT: lwarx 4, 0, 24
+; AIX32-NEXT: srw 6, 4, 23
+; AIX32-NEXT: clrlwi 6, 6, 24
+; AIX32-NEXT: cmplw 6, 3
+; AIX32-NEXT: beq 0, L..BB3_6
+; AIX32-NEXT: L..BB3_8: # %cmpxchg.nostore252
+; AIX32-NEXT: srw 4, 4, 23
+; AIX32-NEXT: lwsync
+; AIX32-NEXT: lis 3, 0
+; AIX32-NEXT: lbz 7, 0(29)
+; AIX32-NEXT: stb 4, 0(30)
+; AIX32-NEXT: clrlwi 6, 4, 24
+; AIX32-NEXT: lwz 4, L..C2(2) # @ss
+; AIX32-NEXT: ori 3, 3, 65535
+; AIX32-NEXT: clrlwi 5, 4, 30
+; AIX32-NEXT: rlwinm 21, 4, 0, 0, 29
+; AIX32-NEXT: xori 5, 5, 2
+; AIX32-NEXT: slwi 20, 5, 3
+; AIX32-NEXT: slw 5, 3, 20
+; AIX32-NEXT: not 19, 5
+; AIX32-NEXT: lwarx 5, 0, 21
+; AIX32-NEXT: srw 8, 5, 20
+; AIX32-NEXT: clrlwi 8, 8, 16
+; AIX32-NEXT: cmplw 8, 6
+; AIX32-NEXT: bne 0, L..BB3_12
+; AIX32-NEXT: # %bb.9: # %cmpxchg.fencedstore223
+; AIX32-NEXT: extsb 7, 7
+; AIX32-NEXT: sync
+; AIX32-NEXT: clrlwi 7, 7, 16
+; AIX32-NEXT: slw 7, 7, 20
+; AIX32-NEXT: .align 4
+; AIX32-NEXT: L..BB3_10: # %cmpxchg.trystore222
+; AIX32-NEXT: # =>This Inner Loop Header: Depth=1
+; AIX32-NEXT: and 8, 5, 19
+; AIX32-NEXT: or 8, 8, 7
+; AIX32-NEXT: stwcx. 8, 0, 21
+; AIX32-NEXT: beq 0, L..BB3_12
+; AIX32-NEXT: # %bb.11: # %cmpxchg.releasedload221
+; AIX32-NEXT: # in Loop: Header=BB3_10 Depth=1
+; AIX32-NEXT: lwarx 5, 0, 21
+; AIX32-NEXT: srw 8, 5, 20
+; AIX32-NEXT: clrlwi 8, 8, 16
+; AIX32-NEXT: cmplw 8, 6
+; AIX32-NEXT: beq 0, L..BB3_10
+; AIX32-NEXT: L..BB3_12: # %cmpxchg.nostore219
+; AIX32-NEXT: srw 5, 5, 20
+; AIX32-NEXT: lwsync
+; AIX32-NEXT: lbz 6, 0(29)
+; AIX32-NEXT: sth 5, 0(4)
+; AIX32-NEXT: lwz 4, L..C3(2) # @us
+; AIX32-NEXT: lbz 5, 0(30)
+; AIX32-NEXT: clrlwi 7, 4, 30
+; AIX32-NEXT: rlwinm 18, 4, 0, 0, 29
+; AIX32-NEXT: xori 7, 7, 2
+; AIX32-NEXT: slwi 17, 7, 3
+; AIX32-NEXT: slw 3, 3, 17
+; AIX32-NEXT: not 16, 3
+; AIX32-NEXT: lwarx 3, 0, 18
+; AIX32-NEXT: srw 7, 3, 17
+; AIX32-NEXT: clrlwi 7, 7, 16
+; AIX32-NEXT: cmplw 7, 5
+; AIX32-NEXT: bne 0, L..BB3_16
+; AIX32-NEXT: # %bb.13: # %cmpxchg.fencedstore190
+; AIX32-NEXT: extsb 6, 6
+; AIX32-NEXT: sync
+; AIX32-NEXT: clrlwi 6, 6, 16
+; AIX32-NEXT: slw 6, 6, 17
+; AIX32-NEXT: .align 4
+; AIX32-NEXT: L..BB3_14: # %cmpxchg.trystore189
+; AIX32-NEXT: # =>This Inner Loop Header: Depth=1
+; AIX32-NEXT: and 7, 3, 16
+; AIX32-NEXT: or 7, 7, 6
+; AIX32-NEXT: stwcx. 7, 0, 18
+; AIX32-NEXT: beq 0, L..BB3_16
+; AIX32-NEXT: # %bb.15: # %cmpxchg.releasedload188
+; AIX32-NEXT: # in Loop: Header=BB3_14 Depth=1
+; AIX32-NEXT: lwarx 3, 0, 18
+; AIX32-NEXT: srw 7, 3, 17
+; AIX32-NEXT: clrlwi 7, 7, 16
+; AIX32-NEXT: cmplw 7, 5
+; AIX32-NEXT: beq 0, L..BB3_14
+; AIX32-NEXT: L..BB3_16: # %cmpxchg.nostore186
+; AIX32-NEXT: srw 3, 3, 17
+; AIX32-NEXT: lwsync
+; AIX32-NEXT: lwz 15, L..C4(2) # @si
+; AIX32-NEXT: lbz 5, 0(29)
+; AIX32-NEXT: sth 3, 0(4)
+; AIX32-NEXT: lbz 4, 0(30)
+; AIX32-NEXT: lwarx 3, 0, 15
+; AIX32-NEXT: cmplw 3, 4
+; AIX32-NEXT: bne 0, L..BB3_20
+; AIX32-NEXT: # %bb.17: # %cmpxchg.fencedstore171
+; AIX32-NEXT: extsb 5, 5
+; AIX32-NEXT: sync
+; AIX32-NEXT: .align 5
+; AIX32-NEXT: L..BB3_18: # %cmpxchg.trystore170
+; AIX32-NEXT: # =>This Inner Loop Header: Depth=1
+; AIX32-NEXT: stwcx. 5, 0, 15
+; AIX32-NEXT: beq 0, L..BB3_20
+; AIX32-NEXT: # %bb.19: # %cmpxchg.releasedload169
+; AIX32-NEXT: # in Loop: Header=BB3_18 Depth=1
+; AIX32-NEXT: lwarx 3, 0, 15
+; AIX32-NEXT: cmplw 3, 4
+; AIX32-NEXT: beq 0, L..BB3_18
+; AIX32-NEXT: L..BB3_20: # %cmpxchg.nostore167
+; AIX32-NEXT: lwsync
+; AIX32-NEXT: lwz 28, L..C5(2) # @ui
+; AIX32-NEXT: stw 3, 0(15)
+; AIX32-NEXT: lbz 4, 0(30)
+; AIX32-NEXT: lbz 5, 0(29)
+; AIX32-NEXT: lwarx 3, 0, 28
+; AIX32-NEXT: cmplw 3, 4
+; AIX32-NEXT: bne 0, L..BB3_24
+; AIX32-NEXT: # %bb.21: # %cmpxchg.fencedstore152
+; AIX32-NEXT: extsb 5, 5
+; AIX32-NEXT: sync
+; AIX32-NEXT: .align 5
+; AIX32-NEXT: L..BB3_22: # %cmpxchg.trystore151
+; AIX32-NEXT: # =>This Inner Loop Header: Depth=1
+; AIX32-NEXT: stwcx. 5, 0, 28
+; AIX32-NEXT: beq 0, L..BB3_24
+; AIX32-NEXT: # %bb.23: # %cmpxchg.releasedload150
+; AIX32-NEXT: # in Loop: Header=BB3_22 Depth=1
+; AIX32-NEXT: lwarx 3, 0, 28
+; AIX32-NEXT: cmplw 3, 4
+; AIX32-NEXT: beq 0, L..BB3_22
+; AIX32-NEXT: L..BB3_24: # %cmpxchg.nostore148
+; AIX32-NEXT: lwsync
+; AIX32-NEXT: stw 3, 0(28)
+; AIX32-NEXT: lwz 31, L..C6(2) # @sll
+; AIX32-NEXT: lbz 3, 0(29)
+; AIX32-NEXT: li 14, 0
+; AIX32-NEXT: addi 4, 1, 64
+; AIX32-NEXT: li 7, 5
+; AIX32-NEXT: li 8, 5
+; AIX32-NEXT: stw 14, 64(1)
+; AIX32-NEXT: extsb 6, 3
+; AIX32-NEXT: lbz 3, 0(30)
+; AIX32-NEXT: srawi 5, 6, 31
+; AIX32-NEXT: stw 3, 68(1)
+; AIX32-NEXT: mr 3, 31
+; AIX32-NEXT: bl .__atomic_compare_exchange_8[PR]
+; AIX32-NEXT: nop
+; AIX32-NEXT: lwz 3, 68(1)
+; AIX32-NEXT: lbz 4, 0(29)
+; AIX32-NEXT: li 7, 5
+; AIX32-NEXT: li 8, 5
+; AIX32-NEXT: stw 3, 4(31)
+; AIX32-NEXT: lwz 3, 64(1)
+; AIX32-NEXT: extsb 6, 4
+; AIX32-NEXT: addi 4, 1, 64
+; AIX32-NEXT: stw 14, 64(1)
+; AIX32-NEXT: srawi 5, 6, 31
+; AIX32-NEXT: stw 3, 0(31)
+; AIX32-NEXT: lbz 3, 0(30)
+; AIX32-NEXT: lwz 31, L..C7(2) # @ull
+; AIX32-NEXT: stw 3, 68(1)
+; AIX32-NEXT: mr 3, 31
+; AIX32-NEXT: bl .__atomic_compare_exchange_8[PR]
+; AIX32-NEXT: nop
+; AIX32-NEXT: lwz 3, 64(1)
+; AIX32-NEXT: lwz 4, 68(1)
+; AIX32-NEXT: lbz 5, 0(29)
+; AIX32-NEXT: stw 4, 4(31)
+; AIX32-NEXT: stw 3, 0(31)
+; AIX32-NEXT: lbz 3, 0(30)
+; AIX32-NEXT: lwarx 4, 0, 27
+; AIX32-NEXT: srw 6, 4, 26
+; AIX32-NEXT: clrlwi 6, 6, 24
+; AIX32-NEXT: cmplw 6, 3
+; AIX32-NEXT: bne 0, L..BB3_28
+; AIX32-NEXT: # %bb.25: # %cmpxchg.fencedstore119
+; AIX32-NEXT: sync
+; AIX32-NEXT: slw 5, 5, 26
+; AIX32-NEXT: .align 4
+; AIX32-NEXT: L..BB3_26: # %cmpxchg.trystore118
+; AIX32-NEXT: # =>This Inner Loop Header: Depth=1
+; AIX32-NEXT: and 4, 4, 25
+; AIX32-NEXT: or 4, 4, 5
+; AIX32-NEXT: stwcx. 4, 0, 27
+; AIX32-NEXT: beq 0, L..BB3_29
+; AIX32-NEXT: # %bb.27: # %cmpxchg.releasedload117
+; AIX32-NEXT: # in Loop: Header=BB3_26 Depth=1
+; AIX32-NEXT: lwarx 4, 0, 27
+; AIX32-NEXT: srw 6, 4, 26
+; AIX32-NEXT: clrlwi 6, 6, 24
+; AIX32-NEXT: cmplw 6, 3
+; AIX32-NEXT: beq 0, L..BB3_26
+; AIX32-NEXT: L..BB3_28: # %cmpxchg.nostore115
+; AIX32-NEXT: crxor 20, 20, 20
+; AIX32-NEXT: lwsync
+; AIX32-NEXT: b L..BB3_30
+; AIX32-NEXT: L..BB3_29: # %cmpxchg.success116
+; AIX32-NEXT: lwsync
+; AIX32-NEXT: creqv 20, 20, 20
+; AIX32-NEXT: L..BB3_30: # %cmpxchg.end113
+; AIX32-NEXT: li 3, 0
+; AIX32-NEXT: li 4, 1
+; AIX32-NEXT: lbz 5, 0(29)
+; AIX32-NEXT: isel 3, 4, 3, 20
+; AIX32-NEXT: stw 3, 0(28)
+; AIX32-NEXT: lbz 3, 0(30)
+; AIX32-NEXT: lwarx 4, 0, 24
+; AIX32-NEXT: srw 6, 4, 23
+; AIX32-NEXT: clrlwi 6, 6, 24
+; AIX32-NEXT: cmplw 6, 3
+; AIX32-NEXT: bne 0, L..BB3_34
+; AIX32-NEXT: # %bb.31: # %cmpxchg.fencedstore86
+; AIX32-NEXT: sync
+; AIX32-NEXT: slw 5, 5, 23
+; AIX32-NEXT: .align 4
+; AIX32-NEXT: L..BB3_32: # %cmpxchg.trystore85
+; AIX32-NEXT: # =>This Inner Loop Header: Depth=1
+; AIX32-NEXT: and 4, 4, 22
+; AIX32-NEXT: or 4, 4, 5
+; AIX32-NEXT: stwcx. 4, 0, 24
+; AIX32-NEXT: beq 0, L..BB3_35
+; AIX32-NEXT: # %bb.33: # %cmpxchg.releasedload84
+; AIX32-NEXT: # in Loop: Header=BB3_32 Depth=1
+; AIX32-NEXT: lwarx 4, 0, 24
+; AIX32-NEXT: srw 6, 4, 23
+; AIX32-NEXT: clrlwi 6, 6, 24
+; AIX32-NEXT: cmplw 6, 3
+; AIX32-NEXT: beq 0, L..BB3_32
+; AIX32-NEXT: L..BB3_34: # %cmpxchg.nostore82
+; AIX32-NEXT: crxor 20, 20, 20
+; AIX32-NEXT: lwsync
+; AIX32-NEXT: b L..BB3_36
+; AIX32-NEXT: L..BB3_35: # %cmpxchg.success83
+; AIX32-NEXT: lwsync
+; AIX32-NEXT: creqv 20, 20, 20
+; AIX32-NEXT: L..BB3_36: # %cmpxchg.end80
+; AIX32-NEXT: li 3, 0
+; AIX32-NEXT: li 4, 1
+; AIX32-NEXT: lbz 5, 0(29)
+; AIX32-NEXT: isel 3, 4, 3, 20
+; AIX32-NEXT: stw 3, 0(28)
+; AIX32-NEXT: lbz 3, 0(30)
+; AIX32-NEXT: lwarx 4, 0, 21
+; AIX32-NEXT: srw 6, 4, 20
+; AIX32-NEXT: clrlwi 6, 6, 16
+; AIX32-NEXT: cmplw 6, 3
+; AIX32-NEXT: bne 0, L..BB3_40
+; AIX32-NEXT: # %bb.37: # %cmpxchg.fencedstore53
+; AIX32-NEXT: extsb 5, 5
+; AIX32-NEXT: sync
+; AIX32-NEXT: clrlwi 5, 5, 16
+; AIX32-NEXT: slw 5, 5, 20
+; AIX32-NEXT: .align 4
+; AIX32-NEXT: L..BB3_38: # %cmpxchg.trystore52
+; AIX32-NEXT: # =>This Inner Loop Header: Depth=1
+; AIX32-NEXT: and 4, 4, 19
+; AIX32-NEXT: or 4, 4, 5
+; AIX32-NEXT: stwcx. 4, 0, 21
+; AIX32-NEXT: beq 0, L..BB3_41
+; AIX32-NEXT: # %bb.39: # %cmpxchg.releasedload51
+; AIX32-NEXT: # in Loop: Header=BB3_38 Depth=1
+; AIX32-NEXT: lwarx 4, 0, 21
+; AIX32-NEXT: srw 6, 4, 20
+; AIX32-NEXT: clrlwi 6, 6, 16
+; AIX32-NEXT: cmplw 6, 3
+; AIX32-NEXT: beq 0, L..BB3_38
+; AIX32-NEXT: L..BB3_40: # %cmpxchg.nostore49
+; AIX32-NEXT: crxor 20, 20, 20
+; AIX32-NEXT: lwsync
+; AIX32-NEXT: b L..BB3_42
+; AIX32-NEXT: L..BB3_41: # %cmpxchg.success50
+; AIX32-NEXT: lwsync
+; AIX32-NEXT: creqv 20, 20, 20
+; AIX32-NEXT: L..BB3_42: # %cmpxchg.end47
+; AIX32-NEXT: li 3, 0
+; AIX32-NEXT: li 4, 1
+; AIX32-NEXT: lbz 5, 0(29)
+; AIX32-NEXT: isel 3, 4, 3, 20
+; AIX32-NEXT: stw 3, 0(28)
+; AIX32-NEXT: lbz 3, 0(30)
+; AIX32-NEXT: lwarx 4, 0, 18
+; AIX32-NEXT: srw 6, 4, 17
+; AIX32-NEXT: clrlwi 6, 6, 16
+; AIX32-NEXT: cmplw 6, 3
+; AIX32-NEXT: bne 0, L..BB3_46
+; AIX32-NEXT: # %bb.43: # %cmpxchg.fencedstore29
+; AIX32-NEXT: extsb 5, 5
+; AIX32-NEXT: sync
+; AIX32-NEXT: clrlwi 5, 5, 16
+; AIX32-NEXT: slw 5, 5, 17
+; AIX32-NEXT: .align 4
+; AIX32-NEXT: L..BB3_44: # %cmpxchg.trystore28
+; AIX32-NEXT: # =>This Inner Loop Header: Depth=1
+; AIX32-NEXT: and 4, 4, 16
+; AIX32-NEXT: or 4, 4, 5
+; AIX32-NEXT: stwcx. 4, 0, 18
+; AIX32-NEXT: beq 0, L..BB3_47
+; AIX32-NEXT: # %bb.45: # %cmpxchg.releasedload27
+; AIX32-NEXT: # in Loop: Header=BB3_44 Depth=1
+; AIX32-NEXT: lwarx 4, 0, 18
+; AIX32-NEXT: srw 6, 4, 17
+; AIX32-NEXT: clrlwi 6, 6, 16
+; AIX32-NEXT: cmplw 6, 3
+; AIX32-NEXT: beq 0, L..BB3_44
+; AIX32-NEXT: L..BB3_46: # %cmpxchg.nostore25
+; AIX32-NEXT: crxor 20, 20, 20
+; AIX32-NEXT: lwsync
+; AIX32-NEXT: b L..BB3_48
+; AIX32-NEXT: L..BB3_47: # %cmpxchg.success26
+; AIX32-NEXT: lwsync
+; AIX32-NEXT: creqv 20, 20, 20
+; AIX32-NEXT: L..BB3_48: # %cmpxchg.end23
+; AIX32-NEXT: li 3, 0
+; AIX32-NEXT: li 4, 1
+; AIX32-NEXT: isel 3, 4, 3, 20
+; AIX32-NEXT: lbz 4, 0(29)
+; AIX32-NEXT: stw 3, 0(28)
+; AIX32-NEXT: lbz 3, 0(30)
+; AIX32-NEXT: lwarx 5, 0, 15
+; AIX32-NEXT: cmplw 5, 3
+; AIX32-NEXT: bne 0, L..BB3_52
+; AIX32-NEXT: # %bb.49: # %cmpxchg.fencedstore10
+; AIX32-NEXT: extsb 4, 4
+; AIX32-NEXT: sync
+; AIX32-NEXT: .align 5
+; AIX32-NEXT: L..BB3_50: # %cmpxchg.trystore9
+; AIX32-NEXT: # =>This Inner Loop Header: Depth=1
+; AIX32-NEXT: stwcx. 4, 0, 15
+; AIX32-NEXT: beq 0, L..BB3_53
+; AIX32-NEXT: # %bb.51: # %cmpxchg.releasedload8
+; AIX32-NEXT: # in Loop: Header=BB3_50 Depth=1
+; AIX32-NEXT: lwarx 5, 0, 15
+; AIX32-NEXT: cmplw 5, 3
+; AIX32-NEXT: beq 0, L..BB3_50
+; AIX32-NEXT: L..BB3_52: # %cmpxchg.nostore6
+; AIX32-NEXT: crxor 20, 20, 20
+; AIX32-NEXT: lwsync
+; AIX32-NEXT: b L..BB3_54
+; AIX32-NEXT: L..BB3_53: # %cmpxchg.success7
+; AIX32-NEXT: lwsync
+; AIX32-NEXT: creqv 20, 20, 20
+; AIX32-NEXT: L..BB3_54: # %cmpxchg.end4
+; AIX32-NEXT: li 3, 0
+; AIX32-NEXT: li 4, 1
+; AIX32-NEXT: isel 3, 4, 3, 20
+; AIX32-NEXT: lbz 4, 0(29)
+; AIX32-NEXT: stw 3, 0(28)
+; AIX32-NEXT: lbz 3, 0(30)
+; AIX32-NEXT: lwarx 5, 0, 28
+; AIX32-NEXT: cmplw 5, 3
+; AIX32-NEXT: bne 0, L..BB3_58
+; AIX32-NEXT: # %bb.55: # %cmpxchg.fencedstore
+; AIX32-NEXT: extsb 4, 4
+; AIX32-NEXT: sync
+; AIX32-NEXT: .align 5
+; AIX32-NEXT: L..BB3_56: # %cmpxchg.trystore
+; AIX32-NEXT: # =>This Inner Loop Header: Depth=1
+; AIX32-NEXT: stwcx. 4, 0, 28
+; AIX32-NEXT: beq 0, L..BB3_59
+; AIX32-NEXT: # %bb.57: # %cmpxchg.releasedload
+; AIX32-NEXT: # in Loop: Header=BB3_56 Depth=1
+; AIX32-NEXT: lwarx 5, 0, 28
+; AIX32-NEXT: cmplw 5, 3
+; AIX32-NEXT: beq 0, L..BB3_56
+; AIX32-NEXT: L..BB3_58: # %cmpxchg.nostore
+; AIX32-NEXT: crxor 20, 20, 20
+; AIX32-NEXT: lwsync
+; AIX32-NEXT: b L..BB3_60
+; AIX32-NEXT: L..BB3_59: # %cmpxchg.success
+; AIX32-NEXT: lwsync
+; AIX32-NEXT: creqv 20, 20, 20
+; AIX32-NEXT: L..BB3_60: # %cmpxchg.end
+; AIX32-NEXT: li 3, 1
+; AIX32-NEXT: li 31, 0
+; AIX32-NEXT: lbz 4, 0(29)
+; AIX32-NEXT: isel 3, 3, 31, 20
+; AIX32-NEXT: li 7, 5
+; AIX32-NEXT: li 8, 5
+; AIX32-NEXT: extsb 6, 4
+; AIX32-NEXT: stw 3, 0(28)
+; AIX32-NEXT: lbz 3, 0(30)
+; AIX32-NEXT: addi 4, 1, 64
+; AIX32-NEXT: stw 31, 64(1)
+; AIX32-NEXT: srawi 5, 6, 31
+; AIX32-NEXT: stw 3, 68(1)
+; AIX32-NEXT: lwz 3, L..C6(2) # @sll
+; AIX32-NEXT: bl .__atomic_compare_exchange_8[PR]
+; AIX32-NEXT: nop
+; AIX32-NEXT: lbz 4, 0(29)
+; AIX32-NEXT: stw 3, 0(28)
+; AIX32-NEXT: lbz 3, 0(30)
+; AIX32-NEXT: li 7, 5
+; AIX32-NEXT: li 8, 5
+; AIX32-NEXT: extsb 6, 4
+; AIX32-NEXT: stw 3, 68(1)
+; AIX32-NEXT: lwz 3, L..C7(2) # @ull
+; AIX32-NEXT: addi 4, 1, 64
+; AIX32-NEXT: stw 31, 64(1)
+; AIX32-NEXT: srawi 5, 6, 31
+; AIX32-NEXT: bl .__atomic_compare_exchange_8[PR]
+; AIX32-NEXT: nop
+; AIX32-NEXT: stw 3, 0(28)
+; AIX32-NEXT: lwz 31, 140(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 30, 136(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 29, 132(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 28, 128(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 27, 124(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 26, 120(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 25, 116(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 24, 112(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 23, 108(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 22, 104(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 21, 100(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 20, 96(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 19, 92(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 18, 88(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 17, 84(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 16, 80(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 15, 76(1) # 4-byte Folded Reload
+; AIX32-NEXT: lwz 14, 72(1) # 4-byte Folded Reload
+; AIX32-NEXT: addi 1, 1, 144
+; AIX32-NEXT: lwz 0, 8(1)
+; AIX32-NEXT: mtlr 0
; AIX32-NEXT: blr
entry:
%0 = load i8, ptr @uc, align 1
@@ -5597,21 +5852,20 @@ entry:
define dso_local i64 @cmpswplp(ptr noundef %ptr, ptr nocapture noundef readnone %oldval, i64 noundef %newval) local_unnamed_addr #0 {
; CHECK-LABEL: cmpswplp:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi 4, 5, 1
-; CHECK-NEXT: .LBB6_1: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: ldarx 6, 0, 3
-; CHECK-NEXT: cmpd 1, 6, 5
-; CHECK-NEXT: bne 1, .LBB6_3
-; CHECK-NEXT: # %bb.2: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: stdcx. 4, 0, 3
-; CHECK-NEXT: bne 0, .LBB6_1
-; CHECK-NEXT: .LBB6_3: # %entry
-; CHECK-NEXT: li 3, 66
-; CHECK-NEXT: li 4, 55
-; CHECK-NEXT: isel 3, 4, 3, 6
-; CHECK-NEXT: blr
+; CHECK-NEXT: ldarx 4, 0, 3
+; CHECK-NEXT: cmpld 4, 5
+; CHECK-NEXT: bne 0, .LBB6_2
+; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; CHECK-NEXT: addi 4, 5, 1
+; CHECK-NEXT: stdcx. 4, 0, 3
+; CHECK-NEXT: beq 0, .LBB6_4
+; CHECK-NEXT: .LBB6_2: # %cmpxchg.failure
+; CHECK-NEXT: crxor 20, 20, 20
+; CHECK-NEXT: .LBB6_3: # %cmpxchg.end
+; CHECK-NEXT: li 3, 66
+; CHECK-NEXT: li 4, 55
+; CHECK-NEXT: isel 3, 4, 3, 20
+; CHECK-NEXT: blr
;
; AIX32-LABEL: cmpswplp:
; AIX32: # %bb.0: # %entry
diff --git a/llvm/test/CodeGen/PowerPC/atomic-2.ll b/llvm/test/CodeGen/PowerPC/atomic-2.ll
index 10476541870f9..8fa0d767b329d 100644
--- a/llvm/test/CodeGen/PowerPC/atomic-2.ll
+++ b/llvm/test/CodeGen/PowerPC/atomic-2.ll
@@ -42,8 +42,8 @@ define i64 @exchange_and_cmp(ptr %mem) nounwind {
define i8 @exchange_and_cmp8(ptr %mem) nounwind {
; CHECK-LABEL: exchange_and_cmp8:
-; CHECK-BE: xori
-; CHECK-LE-NOT: xori
+; CHECK-BE: or r{{.*}} r{{.*}} r{{.*}}
+; CHECK-LE-NOT: or r{{.*}} r{{.*}} r{{.*}}
; CHECK-P8U: lbarx
%tmppair = cmpxchg ptr %mem, i8 0, i8 1 monotonic monotonic
%tmp = extractvalue { i8, i1 } %tmppair, 0
diff --git a/llvm/test/CodeGen/PowerPC/atomic-compare-exchange-weak.ll b/llvm/test/CodeGen/PowerPC/atomic-compare-exchange-weak.ll
index 399645f671f7b..65a12a6222f24 100644
--- a/llvm/test/CodeGen/PowerPC/atomic-compare-exchange-weak.ll
+++ b/llvm/test/CodeGen/PowerPC/atomic-compare-exchange-weak.ll
@@ -12,62 +12,60 @@
define i32 @foo(ptr noundef %cp, ptr noundef %old, i32 noundef %c) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lwz r7, 0(r4)
; CHECK-NEXT: stw r3, -4(r1)
; CHECK-NEXT: stw r4, -8(r1)
+; CHECK-NEXT: lwz r7, 0(r4)
; CHECK-NEXT: stw r5, -12(r1)
; CHECK-NEXT: stw r5, -16(r1)
-; CHECK-NEXT: L..BB0_1: # %entry
-; CHECK-NEXT: #
; CHECK-NEXT: lwarx r6, 0, r3
-; CHECK-NEXT: cmpw cr1, r6, r7
-; CHECK-NEXT: bne cr1, L..BB0_3
-; CHECK-NEXT: # %bb.2: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: stwcx. r5, 0, r3
-; CHECK-NEXT: bne cr0, L..BB0_1
-; CHECK-NEXT: L..BB0_3: # %entry
; CHECK-NEXT: cmplw r6, r7
+; CHECK-NEXT: bne cr0, L..BB0_2
+; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; CHECK-NEXT: stwcx. r5, 0, r3
; CHECK-NEXT: beq cr0, L..BB0_5
-; CHECK-NEXT: # %bb.4: # %cmpxchg.store_expected
+; CHECK-NEXT: L..BB0_2: # %cmpxchg.failure
+; CHECK-NEXT: crxor 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
+; CHECK-NEXT: # %bb.3: # %cmpxchg.store_expected
; CHECK-NEXT: stw r6, 0(r4)
-; CHECK-NEXT: L..BB0_5: # %cmpxchg.continue
+; CHECK-NEXT: L..BB0_4: # %cmpxchg.continue
; CHECK-NEXT: li r3, 0
; CHECK-NEXT: li r4, 1
-; CHECK-NEXT: isel r3, r4, r3, 4*cr1+eq
+; CHECK-NEXT: isel r3, r4, r3, 4*cr5+lt
; CHECK-NEXT: stb r3, -17(r1)
; CHECK-NEXT: blr
+; CHECK-NEXT: L..BB0_5:
+; CHECK-NEXT: creqv 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
+; CHECK-NEXT: b L..BB0_4
;
; CHECK64-LABEL: foo:
; CHECK64: # %bb.0: # %entry
-; CHECK64-NEXT: lwz r7, 0(r4)
; CHECK64-NEXT: std r3, -8(r1)
; CHECK64-NEXT: std r4, -16(r1)
+; CHECK64-NEXT: lwz r7, 0(r4)
; CHECK64-NEXT: stw r5, -20(r1)
; CHECK64-NEXT: stw r5, -24(r1)
-; CHECK64-NEXT: L..BB0_1: # %entry
-; CHECK64-NEXT: #
; CHECK64-NEXT: lwarx r6, 0, r3
-; CHECK64-NEXT: cmpw cr1, r6, r7
-; CHECK64-NEXT: bne cr1, L..BB0_3
-; CHECK64-NEXT: # %bb.2: # %entry
-; CHECK64-NEXT: #
-; CHECK64-NEXT: stwcx. r5, 0, r3
-; CHECK64-NEXT: bne cr0, L..BB0_1
-; CHECK64-NEXT: L..BB0_3: # %entry
; CHECK64-NEXT: cmplw r6, r7
+; CHECK64-NEXT: bne cr0, L..BB0_2
+; CHECK64-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; CHECK64-NEXT: stwcx. r5, 0, r3
; CHECK64-NEXT: beq cr0, L..BB0_5
-; CHECK64-NEXT: # %bb.4: # %cmpxchg.store_expected
+; CHECK64-NEXT: L..BB0_2: # %cmpxchg.failure
+; CHECK64-NEXT: crxor 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
+; CHECK64-NEXT: # %bb.3: # %cmpxchg.store_expected
; CHECK64-NEXT: stw r6, 0(r4)
-; CHECK64-NEXT: L..BB0_5: # %cmpxchg.continue
+; CHECK64-NEXT: L..BB0_4: # %cmpxchg.continue
; CHECK64-NEXT: li r3, 0
; CHECK64-NEXT: li r4, 1
-; CHECK64-NEXT: isel r3, r4, r3, 4*cr1+eq
+; CHECK64-NEXT: isel r3, r4, r3, 4*cr5+lt
; CHECK64-NEXT: li r4, 1
; CHECK64-NEXT: stb r3, -25(r1)
; CHECK64-NEXT: li r3, 0
-; CHECK64-NEXT: isel r3, r4, r3, 4*cr1+eq
+; CHECK64-NEXT: isel r3, r4, r3, 4*cr5+lt
; CHECK64-NEXT: blr
+; CHECK64-NEXT: L..BB0_5:
+; CHECK64-NEXT: creqv 4*cr5+lt, 4*cr5+lt, 4*cr5+lt
+; CHECK64-NEXT: b L..BB0_4
entry:
%cp.addr = alloca ptr, align 4
%old.addr = alloca ptr, align 4
diff --git a/llvm/test/CodeGen/PowerPC/atomic-float.ll b/llvm/test/CodeGen/PowerPC/atomic-float.ll
index e2a46130ab797..600d28936c162 100644
--- a/llvm/test/CodeGen/PowerPC/atomic-float.ll
+++ b/llvm/test/CodeGen/PowerPC/atomic-float.ll
@@ -9,33 +9,37 @@ define float @test_add(ptr %ptr, float %incr) {
; CHECK-64: # %bb.0: # %entry
; CHECK-64-NEXT: sync
; CHECK-64-NEXT: lfs 0, 0(3)
-; CHECK-64-NEXT: b .LBB0_2
-; CHECK-64-NEXT: .LBB0_1: # %atomicrmw.start
-; CHECK-64-NEXT: #
-; CHECK-64-NEXT: stw 6, -4(1)
-; CHECK-64-NEXT: cmplw 6, 4
-; CHECK-64-NEXT: lfs 0, -4(1)
-; CHECK-64-NEXT: beq 0, .LBB0_5
-; CHECK-64-NEXT: .LBB0_2: # %atomicrmw.start
-; CHECK-64-NEXT: # =>This Loop Header: Depth=1
-; CHECK-64-NEXT: # Child Loop BB0_3 Depth 2
+; CHECK-64-NEXT: b .LBB0_3
+; CHECK-64-NEXT: .LBB0_1: # %cmpxchg.nostore
+; CHECK-64-NEXT: # in Loop: Header=BB0_3 Depth=1
+; CHECK-64-NEXT: crxor 20, 20, 20
+; CHECK-64-NEXT: .LBB0_2: # %cmpxchg.end
+; CHECK-64-NEXT: # in Loop: Header=BB0_3 Depth=1
+; CHECK-64-NEXT: stw 4, -12(1)
+; CHECK-64-NEXT: lfs 0, -12(1)
+; CHECK-64-NEXT: bc 12, 20, .LBB0_7
+; CHECK-64-NEXT: .LBB0_3: # %atomicrmw.start
+; CHECK-64-NEXT: # =>This Loop Header: Depth=1
+; CHECK-64-NEXT: # Child Loop BB0_4 Depth 2
; CHECK-64-NEXT: fadds 2, 0, 1
-; CHECK-64-NEXT: stfs 2, -8(1)
-; CHECK-64-NEXT: stfs 0, -12(1)
-; CHECK-64-NEXT: lwz 5, -8(1)
-; CHECK-64-NEXT: lwz 4, -12(1)
-; CHECK-64-NEXT: .LBB0_3: # %atomicrmw.start
-; CHECK-64-NEXT: # Parent Loop BB0_2 Depth=1
-; CHECK-64-NEXT: # => This Inner Loop Header: Depth=2
-; CHECK-64-NEXT: lwarx 6, 0, 3
-; CHECK-64-NEXT: cmpw 6, 4
-; CHECK-64-NEXT: bne 0, .LBB0_1
-; CHECK-64-NEXT: # %bb.4: # %atomicrmw.start
-; CHECK-64-NEXT: #
+; CHECK-64-NEXT: stfs 2, -4(1)
+; CHECK-64-NEXT: stfs 0, -8(1)
+; CHECK-64-NEXT: lwz 5, -4(1)
+; CHECK-64-NEXT: lwz 6, -8(1)
+; CHECK-64-NEXT: .LBB0_4: # %cmpxchg.start
+; CHECK-64-NEXT: # Parent Loop BB0_3 Depth=1
+; CHECK-64-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-64-NEXT: lwarx 4, 0, 3
+; CHECK-64-NEXT: cmplw 4, 6
+; CHECK-64-NEXT: bne 0, .LBB0_1
+; CHECK-64-NEXT: # %bb.5: # %cmpxchg.fencedstore
+; CHECK-64-NEXT: # in Loop: Header=BB0_4 Depth=2
; CHECK-64-NEXT: stwcx. 5, 0, 3
-; CHECK-64-NEXT: bne 0, .LBB0_3
-; CHECK-64-NEXT: b .LBB0_1
-; CHECK-64-NEXT: .LBB0_5: # %atomicrmw.end
+; CHECK-64-NEXT: bne 0, .LBB0_4
+; CHECK-64-NEXT: # %bb.6: # in Loop: Header=BB0_3 Depth=1
+; CHECK-64-NEXT: creqv 20, 20, 20
+; CHECK-64-NEXT: b .LBB0_2
+; CHECK-64-NEXT: .LBB0_7: # %atomicrmw.end
; CHECK-64-NEXT: fmr 1, 0
; CHECK-64-NEXT: lwsync
; CHECK-64-NEXT: blr
@@ -46,33 +50,37 @@ define float @test_add(ptr %ptr, float %incr) {
; CHECK-32-NEXT: .cfi_def_cfa_offset 32
; CHECK-32-NEXT: sync
; CHECK-32-NEXT: lfs 0, 0(3)
-; CHECK-32-NEXT: b .LBB0_2
-; CHECK-32-NEXT: .LBB0_1: # %atomicrmw.start
-; CHECK-32-NEXT: #
-; CHECK-32-NEXT: stw 6, 28(1)
-; CHECK-32-NEXT: cmplw 6, 4
-; CHECK-32-NEXT: lfs 0, 28(1)
-; CHECK-32-NEXT: beq 0, .LBB0_5
-; CHECK-32-NEXT: .LBB0_2: # %atomicrmw.start
-; CHECK-32-NEXT: # =>This Loop Header: Depth=1
-; CHECK-32-NEXT: # Child Loop BB0_3 Depth 2
+; CHECK-32-NEXT: b .LBB0_3
+; CHECK-32-NEXT: .LBB0_1: # %cmpxchg.nostore
+; CHECK-32-NEXT: # in Loop: Header=BB0_3 Depth=1
+; CHECK-32-NEXT: crxor 20, 20, 20
+; CHECK-32-NEXT: .LBB0_2: # %cmpxchg.end
+; CHECK-32-NEXT: # in Loop: Header=BB0_3 Depth=1
+; CHECK-32-NEXT: stw 4, 20(1)
+; CHECK-32-NEXT: lfs 0, 20(1)
+; CHECK-32-NEXT: bc 12, 20, .LBB0_7
+; CHECK-32-NEXT: .LBB0_3: # %atomicrmw.start
+; CHECK-32-NEXT: # =>This Loop Header: Depth=1
+; CHECK-32-NEXT: # Child Loop BB0_4 Depth 2
; CHECK-32-NEXT: fadds 2, 0, 1
-; CHECK-32-NEXT: stfs 2, 24(1)
-; CHECK-32-NEXT: stfs 0, 20(1)
-; CHECK-32-NEXT: lwz 5, 24(1)
-; CHECK-32-NEXT: lwz 4, 20(1)
-; CHECK-32-NEXT: .LBB0_3: # %atomicrmw.start
-; CHECK-32-NEXT: # Parent Loop BB0_2 Depth=1
-; CHECK-32-NEXT: # => This Inner Loop Header: Depth=2
-; CHECK-32-NEXT: lwarx 6, 0, 3
-; CHECK-32-NEXT: cmpw 6, 4
-; CHECK-32-NEXT: bne 0, .LBB0_1
-; CHECK-32-NEXT: # %bb.4: # %atomicrmw.start
-; CHECK-32-NEXT: #
+; CHECK-32-NEXT: stfs 2, 28(1)
+; CHECK-32-NEXT: stfs 0, 24(1)
+; CHECK-32-NEXT: lwz 5, 28(1)
+; CHECK-32-NEXT: lwz 6, 24(1)
+; CHECK-32-NEXT: .LBB0_4: # %cmpxchg.start
+; CHECK-32-NEXT: # Parent Loop BB0_3 Depth=1
+; CHECK-32-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-32-NEXT: lwarx 4, 0, 3
+; CHECK-32-NEXT: cmplw 4, 6
+; CHECK-32-NEXT: bne 0, .LBB0_1
+; CHECK-32-NEXT: # %bb.5: # %cmpxchg.fencedstore
+; CHECK-32-NEXT: # in Loop: Header=BB0_4 Depth=2
; CHECK-32-NEXT: stwcx. 5, 0, 3
-; CHECK-32-NEXT: bne 0, .LBB0_3
-; CHECK-32-NEXT: b .LBB0_1
-; CHECK-32-NEXT: .LBB0_5: # %atomicrmw.end
+; CHECK-32-NEXT: bne 0, .LBB0_4
+; CHECK-32-NEXT: # %bb.6: # in Loop: Header=BB0_3 Depth=1
+; CHECK-32-NEXT: creqv 20, 20, 20
+; CHECK-32-NEXT: b .LBB0_2
+; CHECK-32-NEXT: .LBB0_7: # %atomicrmw.end
; CHECK-32-NEXT: fmr 1, 0
; CHECK-32-NEXT: lwsync
; CHECK-32-NEXT: addi 1, 1, 32
diff --git a/llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll b/llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll
index 4f00cff83942a..27a26aaca8b26 100644
--- a/llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll
+++ b/llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll
@@ -5,49 +5,47 @@ define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) {
; CHECK-LABEL: atomicrmw_usub_cond_i8:
; CHECK: # %bb.0:
; CHECK-NEXT: sync
-; CHECK-NEXT: mr 5, 3
-; CHECK-NEXT: rlwinm 7, 5, 3, 27, 28
-; CHECK-NEXT: lbz 3, 0(3)
-; CHECK-NEXT: xori 7, 7, 24
-; CHECK-NEXT: li 8, 255
-; CHECK-NEXT: clrlwi 6, 4, 24
-; CHECK-NEXT: rldicr 5, 5, 0, 61
-; CHECK-NEXT: slw 8, 8, 7
+; CHECK-NEXT: rldicr 5, 3, 0, 61
+; CHECK-NEXT: not 3, 3
+; CHECK-NEXT: li 6, 255
+; CHECK-NEXT: lwz 8, 0(5)
+; CHECK-NEXT: rlwinm 3, 3, 3, 27, 28
+; CHECK-NEXT: slw 6, 6, 3
+; CHECK-NEXT: not 6, 6
+; CHECK-NEXT: clrlwi 7, 4, 24
; CHECK-NEXT: b .LBB0_2
-; CHECK-NEXT: .LBB0_1: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: srw 3, 11, 7
-; CHECK-NEXT: cmplw 3, 9
-; CHECK-NEXT: beq 0, .LBB0_7
-; CHECK-NEXT: .LBB0_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB0_5 Depth 2
-; CHECK-NEXT: clrlwi 9, 3, 24
-; CHECK-NEXT: cmplw 9, 6
-; CHECK-NEXT: blt 0, .LBB0_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: sub 3, 3, 4
-; CHECK-NEXT: .LBB0_4: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: slw 3, 3, 7
-; CHECK-NEXT: slw 10, 9, 7
-; CHECK-NEXT: and 3, 3, 8
-; CHECK-NEXT: and 10, 10, 8
-; CHECK-NEXT: .LBB0_5: # %atomicrmw.start
-; CHECK-NEXT: # Parent Loop BB0_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
-; CHECK-NEXT: lwarx 12, 0, 5
-; CHECK-NEXT: and 11, 12, 8
-; CHECK-NEXT: cmpw 11, 10
-; CHECK-NEXT: bne 0, .LBB0_1
-; CHECK-NEXT: # %bb.6: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: andc 12, 12, 8
-; CHECK-NEXT: or 12, 12, 3
-; CHECK-NEXT: stwcx. 12, 0, 5
-; CHECK-NEXT: bne 0, .LBB0_5
-; CHECK-NEXT: b .LBB0_1
-; CHECK-NEXT: .LBB0_7: # %atomicrmw.end
+; CHECK-NEXT: .LBB0_1: # %cmpxchg.nostore
+; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: mr 8, 9
+; CHECK-NEXT: .LBB0_2: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB0_5 Depth 2
+; CHECK-NEXT: srw 9, 8, 3
+; CHECK-NEXT: clrlwi 10, 9, 24
+; CHECK-NEXT: cmplw 10, 7
+; CHECK-NEXT: blt 0, .LBB0_4
+; CHECK-NEXT: # %bb.3: # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: sub 9, 9, 4
+; CHECK-NEXT: .LBB0_4: # %atomicrmw.start
+; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: clrlwi 9, 9, 24
+; CHECK-NEXT: slw 9, 9, 3
+; CHECK-NEXT: and 10, 8, 6
+; CHECK-NEXT: or 10, 10, 9
+; CHECK-NEXT: .LBB0_5: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB0_2 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: lwarx 9, 0, 5
+; CHECK-NEXT: cmplw 9, 8
+; CHECK-NEXT: bne 0, .LBB0_1
+; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
+; CHECK-NEXT: # in Loop: Header=BB0_5 Depth=2
+; CHECK-NEXT: stwcx. 10, 0, 5
+; CHECK-NEXT: bne 0, .LBB0_5
+; CHECK-NEXT: # %bb.7:
+; CHECK-NEXT: mr 8, 9
+; CHECK-NEXT: # %bb.8: # %atomicrmw.end
+; CHECK-NEXT: srw 3, 9, 3
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
%result = atomicrmw usub_cond ptr %ptr, i8 %val seq_cst
@@ -58,50 +56,49 @@ define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) {
; CHECK-LABEL: atomicrmw_usub_cond_i16:
; CHECK: # %bb.0:
; CHECK-NEXT: sync
-; CHECK-NEXT: mr 5, 3
-; CHECK-NEXT: li 8, 0
-; CHECK-NEXT: lhz 3, 0(3)
-; CHECK-NEXT: rlwinm 7, 5, 3, 27, 27
-; CHECK-NEXT: xori 7, 7, 16
-; CHECK-NEXT: ori 8, 8, 65535
-; CHECK-NEXT: clrlwi 6, 4, 16
-; CHECK-NEXT: rldicr 5, 5, 0, 61
-; CHECK-NEXT: slw 8, 8, 7
+; CHECK-NEXT: rldicr 5, 3, 0, 61
+; CHECK-NEXT: clrlwi 3, 3, 30
+; CHECK-NEXT: lis 6, 0
+; CHECK-NEXT: xori 3, 3, 2
+; CHECK-NEXT: lwz 8, 0(5)
+; CHECK-NEXT: ori 6, 6, 65535
+; CHECK-NEXT: slwi 3, 3, 3
+; CHECK-NEXT: slw 6, 6, 3
+; CHECK-NEXT: not 6, 6
+; CHECK-NEXT: clrlwi 7, 4, 16
; CHECK-NEXT: b .LBB1_2
-; CHECK-NEXT: .LBB1_1: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: srw 3, 11, 7
-; CHECK-NEXT: cmplw 3, 9
-; CHECK-NEXT: beq 0, .LBB1_7
-; CHECK-NEXT: .LBB1_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB1_5 Depth 2
-; CHECK-NEXT: clrlwi 9, 3, 16
-; CHECK-NEXT: cmplw 9, 6
-; CHECK-NEXT: blt 0, .LBB1_4
-; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: sub 3, 3, 4
-; CHECK-NEXT: .LBB1_4: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: slw 3, 3, 7
-; CHECK-NEXT: slw 10, 9, 7
-; CHECK-NEXT: and 3, 3, 8
-; CHECK-NEXT: and 10, 10, 8
-; CHECK-NEXT: .LBB1_5: # %atomicrmw.start
-; CHECK-NEXT: # Parent Loop BB1_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
-; CHECK-NEXT: lwarx 12, 0, 5
-; CHECK-NEXT: and 11, 12, 8
-; CHECK-NEXT: cmpw 11, 10
-; CHECK-NEXT: bne 0, .LBB1_1
-; CHECK-NEXT: # %bb.6: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: andc 12, 12, 8
-; CHECK-NEXT: or 12, 12, 3
-; CHECK-NEXT: stwcx. 12, 0, 5
-; CHECK-NEXT: bne 0, .LBB1_5
-; CHECK-NEXT: b .LBB1_1
-; CHECK-NEXT: .LBB1_7: # %atomicrmw.end
+; CHECK-NEXT: .LBB1_1: # %cmpxchg.nostore
+; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT: mr 8, 9
+; CHECK-NEXT: .LBB1_2: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB1_5 Depth 2
+; CHECK-NEXT: srw 9, 8, 3
+; CHECK-NEXT: clrlwi 10, 9, 16
+; CHECK-NEXT: cmplw 10, 7
+; CHECK-NEXT: blt 0, .LBB1_4
+; CHECK-NEXT: # %bb.3: # in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT: sub 9, 9, 4
+; CHECK-NEXT: .LBB1_4: # %atomicrmw.start
+; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT: clrlwi 9, 9, 16
+; CHECK-NEXT: slw 9, 9, 3
+; CHECK-NEXT: and 10, 8, 6
+; CHECK-NEXT: or 10, 10, 9
+; CHECK-NEXT: .LBB1_5: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB1_2 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: lwarx 9, 0, 5
+; CHECK-NEXT: cmplw 9, 8
+; CHECK-NEXT: bne 0, .LBB1_1
+; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
+; CHECK-NEXT: # in Loop: Header=BB1_5 Depth=2
+; CHECK-NEXT: stwcx. 10, 0, 5
+; CHECK-NEXT: bne 0, .LBB1_5
+; CHECK-NEXT: # %bb.7:
+; CHECK-NEXT: mr 8, 9
+; CHECK-NEXT: # %bb.8: # %atomicrmw.end
+; CHECK-NEXT: srw 3, 9, 3
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
%result = atomicrmw usub_cond ptr %ptr, i16 %val seq_cst
@@ -114,34 +111,33 @@ define i32 @atomicrmw_usub_cond_i32(ptr %ptr, i32 %val) {
; CHECK-NEXT: sync
; CHECK-NEXT: lwz 6, 0(3)
; CHECK-NEXT: b .LBB2_2
-; CHECK-NEXT: .LBB2_1: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: cmplw 5, 6
-; CHECK-NEXT: mr 6, 5
-; CHECK-NEXT: beq 0, .LBB2_7
-; CHECK-NEXT: .LBB2_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB2_5 Depth 2
-; CHECK-NEXT: cmplw 6, 4
+; CHECK-NEXT: .LBB2_1: # %cmpxchg.nostore
+; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: .LBB2_2: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB2_5 Depth 2
+; CHECK-NEXT: cmplw 6, 4
; CHECK-NEXT: bge 0, .LBB2_4
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: mr 7, 6
+; CHECK-NEXT: # %bb.3: # %atomicrmw.start
+; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT: mr 7, 6
; CHECK-NEXT: b .LBB2_5
-; CHECK-NEXT: .LBB2_4:
-; CHECK-NEXT: sub 7, 6, 4
-; CHECK-NEXT: .LBB2_5: # %atomicrmw.start
-; CHECK-NEXT: # Parent Loop BB2_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: .LBB2_4: # in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT: sub 7, 6, 4
+; CHECK-NEXT: .LBB2_5: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB2_2 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 5, 0, 3
-; CHECK-NEXT: cmpw 5, 6
-; CHECK-NEXT: bne 0, .LBB2_1
-; CHECK-NEXT: # %bb.6: # %atomicrmw.start
-; CHECK-NEXT: #
+; CHECK-NEXT: cmplw 5, 6
+; CHECK-NEXT: bne 0, .LBB2_1
+; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
+; CHECK-NEXT: # in Loop: Header=BB2_5 Depth=2
; CHECK-NEXT: stwcx. 7, 0, 3
-; CHECK-NEXT: bne 0, .LBB2_5
-; CHECK-NEXT: b .LBB2_1
-; CHECK-NEXT: .LBB2_7: # %atomicrmw.end
+; CHECK-NEXT: bne 0, .LBB2_5
+; CHECK-NEXT: # %bb.7:
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: # %bb.8: # %atomicrmw.end
; CHECK-NEXT: mr 3, 5
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
@@ -155,34 +151,33 @@ define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) {
; CHECK-NEXT: sync
; CHECK-NEXT: ld 6, 0(3)
; CHECK-NEXT: b .LBB3_2
-; CHECK-NEXT: .LBB3_1: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: cmpld 5, 6
-; CHECK-NEXT: mr 6, 5
-; CHECK-NEXT: beq 0, .LBB3_7
-; CHECK-NEXT: .LBB3_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB3_5 Depth 2
-; CHECK-NEXT: cmpld 6, 4
+; CHECK-NEXT: .LBB3_1: # %cmpxchg.nostore
+; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: .LBB3_2: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB3_5 Depth 2
+; CHECK-NEXT: cmpld 6, 4
; CHECK-NEXT: bge 0, .LBB3_4
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: mr 7, 6
+; CHECK-NEXT: # %bb.3: # %atomicrmw.start
+; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1
+; CHECK-NEXT: mr 7, 6
; CHECK-NEXT: b .LBB3_5
-; CHECK-NEXT: .LBB3_4:
-; CHECK-NEXT: sub 7, 6, 4
-; CHECK-NEXT: .LBB3_5: # %atomicrmw.start
-; CHECK-NEXT: # Parent Loop BB3_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: .LBB3_4: # in Loop: Header=BB3_2 Depth=1
+; CHECK-NEXT: sub 7, 6, 4
+; CHECK-NEXT: .LBB3_5: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB3_2 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldarx 5, 0, 3
-; CHECK-NEXT: cmpd 5, 6
-; CHECK-NEXT: bne 0, .LBB3_1
-; CHECK-NEXT: # %bb.6: # %atomicrmw.start
-; CHECK-NEXT: #
+; CHECK-NEXT: cmpld 5, 6
+; CHECK-NEXT: bne 0, .LBB3_1
+; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
+; CHECK-NEXT: # in Loop: Header=BB3_5 Depth=2
; CHECK-NEXT: stdcx. 7, 0, 3
-; CHECK-NEXT: bne 0, .LBB3_5
-; CHECK-NEXT: b .LBB3_1
-; CHECK-NEXT: .LBB3_7: # %atomicrmw.end
+; CHECK-NEXT: bne 0, .LBB3_5
+; CHECK-NEXT: # %bb.7:
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: # %bb.8: # %atomicrmw.end
; CHECK-NEXT: mr 3, 5
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
@@ -194,52 +189,49 @@ define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) {
; CHECK-LABEL: atomicrmw_usub_sat_i8:
; CHECK: # %bb.0:
; CHECK-NEXT: sync
-; CHECK-NEXT: mr 5, 3
-; CHECK-NEXT: rlwinm 6, 5, 3, 27, 28
-; CHECK-NEXT: lbz 3, 0(3)
-; CHECK-NEXT: xori 6, 6, 24
-; CHECK-NEXT: li 7, 255
-; CHECK-NEXT: clrlwi 4, 4, 24
-; CHECK-NEXT: rldicr 5, 5, 0, 61
-; CHECK-NEXT: slw 7, 7, 6
+; CHECK-NEXT: rldicr 5, 3, 0, 61
+; CHECK-NEXT: not 3, 3
+; CHECK-NEXT: li 6, 255
+; CHECK-NEXT: lwz 7, 0(5)
+; CHECK-NEXT: rlwinm 3, 3, 3, 27, 28
+; CHECK-NEXT: slw 6, 6, 3
+; CHECK-NEXT: not 6, 6
+; CHECK-NEXT: clrlwi 4, 4, 24
; CHECK-NEXT: b .LBB4_2
-; CHECK-NEXT: .LBB4_1: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: srw 3, 10, 6
-; CHECK-NEXT: cmplw 3, 8
-; CHECK-NEXT: beq 0, .LBB4_7
-; CHECK-NEXT: .LBB4_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB4_5 Depth 2
-; CHECK-NEXT: clrlwi 8, 3, 24
-; CHECK-NEXT: sub 3, 8, 4
-; CHECK-NEXT: cmplw 3, 8
+; CHECK-NEXT: .LBB4_1: # %cmpxchg.nostore
+; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1
+; CHECK-NEXT: mr 7, 8
+; CHECK-NEXT: .LBB4_2: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB4_5 Depth 2
+; CHECK-NEXT: srw 8, 7, 3
+; CHECK-NEXT: clrlwi 9, 8, 24
+; CHECK-NEXT: sub 8, 9, 4
+; CHECK-NEXT: cmplw 8, 9
; CHECK-NEXT: li 9, 0
-; CHECK-NEXT: bgt 0, .LBB4_4
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: mr 9, 3
-; CHECK-NEXT: .LBB4_4: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: slw 3, 9, 6
-; CHECK-NEXT: slw 9, 8, 6
-; CHECK-NEXT: and 3, 3, 7
-; CHECK-NEXT: and 9, 9, 7
-; CHECK-NEXT: .LBB4_5: # %atomicrmw.start
-; CHECK-NEXT: # Parent Loop BB4_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
-; CHECK-NEXT: lwarx 11, 0, 5
-; CHECK-NEXT: and 10, 11, 7
-; CHECK-NEXT: cmpw 10, 9
-; CHECK-NEXT: bne 0, .LBB4_1
-; CHECK-NEXT: # %bb.6: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: andc 11, 11, 7
-; CHECK-NEXT: or 11, 11, 3
-; CHECK-NEXT: stwcx. 11, 0, 5
-; CHECK-NEXT: bne 0, .LBB4_5
-; CHECK-NEXT: b .LBB4_1
-; CHECK-NEXT: .LBB4_7: # %atomicrmw.end
+; CHECK-NEXT: bgt 0, .LBB4_4
+; CHECK-NEXT: # %bb.3: # %atomicrmw.start
+; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1
+; CHECK-NEXT: mr 9, 8
+; CHECK-NEXT: .LBB4_4: # %atomicrmw.start
+; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1
+; CHECK-NEXT: slw 8, 9, 3
+; CHECK-NEXT: and 9, 7, 6
+; CHECK-NEXT: or 9, 9, 8
+; CHECK-NEXT: .LBB4_5: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB4_2 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: lwarx 8, 0, 5
+; CHECK-NEXT: cmplw 8, 7
+; CHECK-NEXT: bne 0, .LBB4_1
+; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
+; CHECK-NEXT: # in Loop: Header=BB4_5 Depth=2
+; CHECK-NEXT: stwcx. 9, 0, 5
+; CHECK-NEXT: bne 0, .LBB4_5
+; CHECK-NEXT: # %bb.7:
+; CHECK-NEXT: mr 7, 8
+; CHECK-NEXT: # %bb.8: # %atomicrmw.end
+; CHECK-NEXT: srw 3, 8, 3
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
%result = atomicrmw usub_sat ptr %ptr, i8 %val seq_cst
@@ -250,53 +242,51 @@ define i16 @atomicrmw_usub_sat_i16(ptr %ptr, i16 %val) {
; CHECK-LABEL: atomicrmw_usub_sat_i16:
; CHECK: # %bb.0:
; CHECK-NEXT: sync
-; CHECK-NEXT: mr 5, 3
-; CHECK-NEXT: li 7, 0
-; CHECK-NEXT: lhz 3, 0(3)
-; CHECK-NEXT: rlwinm 6, 5, 3, 27, 27
-; CHECK-NEXT: xori 6, 6, 16
-; CHECK-NEXT: ori 7, 7, 65535
-; CHECK-NEXT: clrlwi 4, 4, 16
-; CHECK-NEXT: rldicr 5, 5, 0, 61
-; CHECK-NEXT: slw 7, 7, 6
+; CHECK-NEXT: rldicr 5, 3, 0, 61
+; CHECK-NEXT: clrlwi 3, 3, 30
+; CHECK-NEXT: lis 6, 0
+; CHECK-NEXT: xori 3, 3, 2
+; CHECK-NEXT: lwz 7, 0(5)
+; CHECK-NEXT: ori 6, 6, 65535
+; CHECK-NEXT: slwi 3, 3, 3
+; CHECK-NEXT: slw 6, 6, 3
+; CHECK-NEXT: not 6, 6
+; CHECK-NEXT: clrlwi 4, 4, 16
; CHECK-NEXT: b .LBB5_2
-; CHECK-NEXT: .LBB5_1: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: srw 3, 10, 6
-; CHECK-NEXT: cmplw 3, 8
-; CHECK-NEXT: beq 0, .LBB5_7
-; CHECK-NEXT: .LBB5_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB5_5 Depth 2
-; CHECK-NEXT: clrlwi 8, 3, 16
-; CHECK-NEXT: sub 3, 8, 4
-; CHECK-NEXT: cmplw 3, 8
+; CHECK-NEXT: .LBB5_1: # %cmpxchg.nostore
+; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1
+; CHECK-NEXT: mr 7, 8
+; CHECK-NEXT: .LBB5_2: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB5_5 Depth 2
+; CHECK-NEXT: srw 8, 7, 3
+; CHECK-NEXT: clrlwi 9, 8, 16
+; CHECK-NEXT: sub 8, 9, 4
+; CHECK-NEXT: cmplw 8, 9
; CHECK-NEXT: li 9, 0
-; CHECK-NEXT: bgt 0, .LBB5_4
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: mr 9, 3
-; CHECK-NEXT: .LBB5_4: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: slw 3, 9, 6
-; CHECK-NEXT: slw 9, 8, 6
-; CHECK-NEXT: and 3, 3, 7
-; CHECK-NEXT: and 9, 9, 7
-; CHECK-NEXT: .LBB5_5: # %atomicrmw.start
-; CHECK-NEXT: # Parent Loop BB5_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
-; CHECK-NEXT: lwarx 11, 0, 5
-; CHECK-NEXT: and 10, 11, 7
-; CHECK-NEXT: cmpw 10, 9
-; CHECK-NEXT: bne 0, .LBB5_1
-; CHECK-NEXT: # %bb.6: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: andc 11, 11, 7
-; CHECK-NEXT: or 11, 11, 3
-; CHECK-NEXT: stwcx. 11, 0, 5
-; CHECK-NEXT: bne 0, .LBB5_5
-; CHECK-NEXT: b .LBB5_1
-; CHECK-NEXT: .LBB5_7: # %atomicrmw.end
+; CHECK-NEXT: bgt 0, .LBB5_4
+; CHECK-NEXT: # %bb.3: # %atomicrmw.start
+; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1
+; CHECK-NEXT: mr 9, 8
+; CHECK-NEXT: .LBB5_4: # %atomicrmw.start
+; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1
+; CHECK-NEXT: slw 8, 9, 3
+; CHECK-NEXT: and 9, 7, 6
+; CHECK-NEXT: or 9, 9, 8
+; CHECK-NEXT: .LBB5_5: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB5_2 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: lwarx 8, 0, 5
+; CHECK-NEXT: cmplw 8, 7
+; CHECK-NEXT: bne 0, .LBB5_1
+; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
+; CHECK-NEXT: # in Loop: Header=BB5_5 Depth=2
+; CHECK-NEXT: stwcx. 9, 0, 5
+; CHECK-NEXT: bne 0, .LBB5_5
+; CHECK-NEXT: # %bb.7:
+; CHECK-NEXT: mr 7, 8
+; CHECK-NEXT: # %bb.8: # %atomicrmw.end
+; CHECK-NEXT: srw 3, 8, 3
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
%result = atomicrmw usub_sat ptr %ptr, i16 %val seq_cst
@@ -309,33 +299,32 @@ define i32 @atomicrmw_usub_sat_i32(ptr %ptr, i32 %val) {
; CHECK-NEXT: sync
; CHECK-NEXT: lwz 6, 0(3)
; CHECK-NEXT: b .LBB6_2
-; CHECK-NEXT: .LBB6_1: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: cmplw 5, 6
-; CHECK-NEXT: mr 6, 5
-; CHECK-NEXT: beq 0, .LBB6_6
-; CHECK-NEXT: .LBB6_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB6_4 Depth 2
-; CHECK-NEXT: sub 5, 6, 4
-; CHECK-NEXT: cmplw 5, 6
+; CHECK-NEXT: .LBB6_1: # %cmpxchg.nostore
+; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: .LBB6_2: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB6_4 Depth 2
+; CHECK-NEXT: sub 5, 6, 4
+; CHECK-NEXT: cmplw 5, 6
; CHECK-NEXT: li 7, 0
-; CHECK-NEXT: bgt 0, .LBB6_4
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: mr 7, 5
-; CHECK-NEXT: .LBB6_4: # %atomicrmw.start
-; CHECK-NEXT: # Parent Loop BB6_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: bgt 0, .LBB6_4
+; CHECK-NEXT: # %bb.3: # %atomicrmw.start
+; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1
+; CHECK-NEXT: mr 7, 5
+; CHECK-NEXT: .LBB6_4: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB6_2 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 5, 0, 3
-; CHECK-NEXT: cmpw 5, 6
-; CHECK-NEXT: bne 0, .LBB6_1
-; CHECK-NEXT: # %bb.5: # %atomicrmw.start
-; CHECK-NEXT: #
+; CHECK-NEXT: cmplw 5, 6
+; CHECK-NEXT: bne 0, .LBB6_1
+; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT: # in Loop: Header=BB6_4 Depth=2
; CHECK-NEXT: stwcx. 7, 0, 3
-; CHECK-NEXT: bne 0, .LBB6_4
-; CHECK-NEXT: b .LBB6_1
-; CHECK-NEXT: .LBB6_6: # %atomicrmw.end
+; CHECK-NEXT: bne 0, .LBB6_4
+; CHECK-NEXT: # %bb.6:
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: # %bb.7: # %atomicrmw.end
; CHECK-NEXT: mr 3, 5
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
@@ -349,33 +338,32 @@ define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) {
; CHECK-NEXT: sync
; CHECK-NEXT: ld 6, 0(3)
; CHECK-NEXT: b .LBB7_2
-; CHECK-NEXT: .LBB7_1: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: cmpld 5, 6
-; CHECK-NEXT: mr 6, 5
-; CHECK-NEXT: beq 0, .LBB7_6
-; CHECK-NEXT: .LBB7_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB7_4 Depth 2
-; CHECK-NEXT: subc 5, 6, 4
+; CHECK-NEXT: .LBB7_1: # %cmpxchg.nostore
+; CHECK-NEXT: # in Loop: Header=BB7_2 Depth=1
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: .LBB7_2: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB7_4 Depth 2
+; CHECK-NEXT: subc 5, 6, 4
; CHECK-NEXT: li 7, 0
; CHECK-NEXT: addze. 8, 7
-; CHECK-NEXT: beq 0, .LBB7_4
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: mr 7, 5
-; CHECK-NEXT: .LBB7_4: # %atomicrmw.start
-; CHECK-NEXT: # Parent Loop BB7_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: beq 0, .LBB7_4
+; CHECK-NEXT: # %bb.3: # %atomicrmw.start
+; CHECK-NEXT: # in Loop: Header=BB7_2 Depth=1
+; CHECK-NEXT: mr 7, 5
+; CHECK-NEXT: .LBB7_4: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB7_2 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldarx 5, 0, 3
-; CHECK-NEXT: cmpd 5, 6
-; CHECK-NEXT: bne 0, .LBB7_1
-; CHECK-NEXT: # %bb.5: # %atomicrmw.start
-; CHECK-NEXT: #
+; CHECK-NEXT: cmpld 5, 6
+; CHECK-NEXT: bne 0, .LBB7_1
+; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT: # in Loop: Header=BB7_4 Depth=2
; CHECK-NEXT: stdcx. 7, 0, 3
-; CHECK-NEXT: bne 0, .LBB7_4
-; CHECK-NEXT: b .LBB7_1
-; CHECK-NEXT: .LBB7_6: # %atomicrmw.end
+; CHECK-NEXT: bne 0, .LBB7_4
+; CHECK-NEXT: # %bb.6:
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: # %bb.7: # %atomicrmw.end
; CHECK-NEXT: mr 3, 5
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
diff --git a/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll
index 2882dc420b608..6ced47bd6bcba 100644
--- a/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll
@@ -5,51 +5,49 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; CHECK-LABEL: atomicrmw_uinc_wrap_i8:
; CHECK: # %bb.0:
; CHECK-NEXT: sync
-; CHECK-NEXT: mr 5, 3
-; CHECK-NEXT: rlwinm 6, 5, 3, 27, 28
-; CHECK-NEXT: lbz 3, 0(3)
-; CHECK-NEXT: xori 6, 6, 24
-; CHECK-NEXT: li 7, 255
-; CHECK-NEXT: clrlwi 4, 4, 24
-; CHECK-NEXT: rldicr 5, 5, 0, 61
-; CHECK-NEXT: slw 7, 7, 6
+; CHECK-NEXT: rldicr 5, 3, 0, 61
+; CHECK-NEXT: not 3, 3
+; CHECK-NEXT: li 6, 255
+; CHECK-NEXT: lwz 7, 0(5)
+; CHECK-NEXT: rlwinm 3, 3, 3, 27, 28
+; CHECK-NEXT: slw 6, 6, 3
+; CHECK-NEXT: not 6, 6
+; CHECK-NEXT: clrlwi 4, 4, 24
; CHECK-NEXT: b .LBB0_2
-; CHECK-NEXT: .LBB0_1: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: srw 3, 10, 6
-; CHECK-NEXT: cmplw 3, 8
-; CHECK-NEXT: beq 0, .LBB0_7
-; CHECK-NEXT: .LBB0_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB0_5 Depth 2
-; CHECK-NEXT: clrlwi 8, 3, 24
-; CHECK-NEXT: cmplw 8, 4
+; CHECK-NEXT: .LBB0_1: # %cmpxchg.nostore
+; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: mr 7, 8
+; CHECK-NEXT: .LBB0_2: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB0_5 Depth 2
+; CHECK-NEXT: srw 8, 7, 3
+; CHECK-NEXT: clrlwi 9, 8, 24
+; CHECK-NEXT: cmplw 9, 4
; CHECK-NEXT: li 9, 0
; CHECK-NEXT: bge 0, .LBB0_4
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: addi 9, 3, 1
-; CHECK-NEXT: .LBB0_4: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: slw 3, 9, 6
-; CHECK-NEXT: slw 9, 8, 6
-; CHECK-NEXT: and 3, 3, 7
-; CHECK-NEXT: and 9, 9, 7
-; CHECK-NEXT: .LBB0_5: # %atomicrmw.start
-; CHECK-NEXT: # Parent Loop BB0_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
-; CHECK-NEXT: lwarx 11, 0, 5
-; CHECK-NEXT: and 10, 11, 7
-; CHECK-NEXT: cmpw 10, 9
-; CHECK-NEXT: bne 0, .LBB0_1
-; CHECK-NEXT: # %bb.6: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: andc 11, 11, 7
-; CHECK-NEXT: or 11, 11, 3
-; CHECK-NEXT: stwcx. 11, 0, 5
-; CHECK-NEXT: bne 0, .LBB0_5
-; CHECK-NEXT: b .LBB0_1
-; CHECK-NEXT: .LBB0_7: # %atomicrmw.end
+; CHECK-NEXT: # %bb.3: # %atomicrmw.start
+; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: addi 9, 8, 1
+; CHECK-NEXT: .LBB0_4: # %atomicrmw.start
+; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: clrlwi 8, 9, 24
+; CHECK-NEXT: slw 8, 8, 3
+; CHECK-NEXT: and 9, 7, 6
+; CHECK-NEXT: or 9, 9, 8
+; CHECK-NEXT: .LBB0_5: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB0_2 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: lwarx 8, 0, 5
+; CHECK-NEXT: cmplw 8, 7
+; CHECK-NEXT: bne 0, .LBB0_1
+; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
+; CHECK-NEXT: # in Loop: Header=BB0_5 Depth=2
+; CHECK-NEXT: stwcx. 9, 0, 5
+; CHECK-NEXT: bne 0, .LBB0_5
+; CHECK-NEXT: # %bb.7:
+; CHECK-NEXT: mr 7, 8
+; CHECK-NEXT: # %bb.8: # %atomicrmw.end
+; CHECK-NEXT: srw 3, 8, 3
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
%result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst
@@ -60,52 +58,51 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
; CHECK-LABEL: atomicrmw_uinc_wrap_i16:
; CHECK: # %bb.0:
; CHECK-NEXT: sync
-; CHECK-NEXT: mr 5, 3
-; CHECK-NEXT: li 7, 0
-; CHECK-NEXT: lhz 3, 0(3)
-; CHECK-NEXT: rlwinm 6, 5, 3, 27, 27
-; CHECK-NEXT: xori 6, 6, 16
-; CHECK-NEXT: ori 7, 7, 65535
-; CHECK-NEXT: clrlwi 4, 4, 16
-; CHECK-NEXT: rldicr 5, 5, 0, 61
-; CHECK-NEXT: slw 7, 7, 6
+; CHECK-NEXT: rldicr 5, 3, 0, 61
+; CHECK-NEXT: clrlwi 3, 3, 30
+; CHECK-NEXT: lis 6, 0
+; CHECK-NEXT: xori 3, 3, 2
+; CHECK-NEXT: lwz 7, 0(5)
+; CHECK-NEXT: ori 6, 6, 65535
+; CHECK-NEXT: slwi 3, 3, 3
+; CHECK-NEXT: slw 6, 6, 3
+; CHECK-NEXT: not 6, 6
+; CHECK-NEXT: clrlwi 4, 4, 16
; CHECK-NEXT: b .LBB1_2
-; CHECK-NEXT: .LBB1_1: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: srw 3, 10, 6
-; CHECK-NEXT: cmplw 3, 8
-; CHECK-NEXT: beq 0, .LBB1_7
-; CHECK-NEXT: .LBB1_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB1_5 Depth 2
-; CHECK-NEXT: clrlwi 8, 3, 16
-; CHECK-NEXT: cmplw 8, 4
+; CHECK-NEXT: .LBB1_1: # %cmpxchg.nostore
+; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT: mr 7, 8
+; CHECK-NEXT: .LBB1_2: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB1_5 Depth 2
+; CHECK-NEXT: srw 8, 7, 3
+; CHECK-NEXT: clrlwi 9, 8, 16
+; CHECK-NEXT: cmplw 9, 4
; CHECK-NEXT: li 9, 0
; CHECK-NEXT: bge 0, .LBB1_4
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: addi 9, 3, 1
-; CHECK-NEXT: .LBB1_4: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: slw 3, 9, 6
-; CHECK-NEXT: slw 9, 8, 6
-; CHECK-NEXT: and 3, 3, 7
-; CHECK-NEXT: and 9, 9, 7
-; CHECK-NEXT: .LBB1_5: # %atomicrmw.start
-; CHECK-NEXT: # Parent Loop BB1_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
-; CHECK-NEXT: lwarx 11, 0, 5
-; CHECK-NEXT: and 10, 11, 7
-; CHECK-NEXT: cmpw 10, 9
-; CHECK-NEXT: bne 0, .LBB1_1
-; CHECK-NEXT: # %bb.6: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: andc 11, 11, 7
-; CHECK-NEXT: or 11, 11, 3
-; CHECK-NEXT: stwcx. 11, 0, 5
-; CHECK-NEXT: bne 0, .LBB1_5
-; CHECK-NEXT: b .LBB1_1
-; CHECK-NEXT: .LBB1_7: # %atomicrmw.end
+; CHECK-NEXT: # %bb.3: # %atomicrmw.start
+; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT: addi 9, 8, 1
+; CHECK-NEXT: .LBB1_4: # %atomicrmw.start
+; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT: clrlwi 8, 9, 16
+; CHECK-NEXT: slw 8, 8, 3
+; CHECK-NEXT: and 9, 7, 6
+; CHECK-NEXT: or 9, 9, 8
+; CHECK-NEXT: .LBB1_5: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB1_2 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: lwarx 8, 0, 5
+; CHECK-NEXT: cmplw 8, 7
+; CHECK-NEXT: bne 0, .LBB1_1
+; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
+; CHECK-NEXT: # in Loop: Header=BB1_5 Depth=2
+; CHECK-NEXT: stwcx. 9, 0, 5
+; CHECK-NEXT: bne 0, .LBB1_5
+; CHECK-NEXT: # %bb.7:
+; CHECK-NEXT: mr 7, 8
+; CHECK-NEXT: # %bb.8: # %atomicrmw.end
+; CHECK-NEXT: srw 3, 8, 3
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
%result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst
@@ -118,32 +115,31 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
; CHECK-NEXT: sync
; CHECK-NEXT: lwz 6, 0(3)
; CHECK-NEXT: b .LBB2_2
-; CHECK-NEXT: .LBB2_1: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: cmplw 5, 6
-; CHECK-NEXT: mr 6, 5
-; CHECK-NEXT: beq 0, .LBB2_6
-; CHECK-NEXT: .LBB2_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB2_4 Depth 2
-; CHECK-NEXT: cmplw 6, 4
+; CHECK-NEXT: .LBB2_1: # %cmpxchg.nostore
+; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: .LBB2_2: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB2_4 Depth 2
+; CHECK-NEXT: cmplw 6, 4
; CHECK-NEXT: li 7, 0
; CHECK-NEXT: bge 0, .LBB2_4
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: #
+; CHECK-NEXT: # %bb.3: # %atomicrmw.start
+; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
; CHECK-NEXT: addi 7, 6, 1
-; CHECK-NEXT: .LBB2_4: # %atomicrmw.start
-; CHECK-NEXT: # Parent Loop BB2_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: .LBB2_4: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB2_2 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 5, 0, 3
-; CHECK-NEXT: cmpw 5, 6
-; CHECK-NEXT: bne 0, .LBB2_1
-; CHECK-NEXT: # %bb.5: # %atomicrmw.start
-; CHECK-NEXT: #
+; CHECK-NEXT: cmplw 5, 6
+; CHECK-NEXT: bne 0, .LBB2_1
+; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT: # in Loop: Header=BB2_4 Depth=2
; CHECK-NEXT: stwcx. 7, 0, 3
-; CHECK-NEXT: bne 0, .LBB2_4
-; CHECK-NEXT: b .LBB2_1
-; CHECK-NEXT: .LBB2_6: # %atomicrmw.end
+; CHECK-NEXT: bne 0, .LBB2_4
+; CHECK-NEXT: # %bb.6:
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: # %bb.7: # %atomicrmw.end
; CHECK-NEXT: mr 3, 5
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
@@ -157,32 +153,31 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; CHECK-NEXT: sync
; CHECK-NEXT: ld 6, 0(3)
; CHECK-NEXT: b .LBB3_2
-; CHECK-NEXT: .LBB3_1: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: cmpld 5, 6
-; CHECK-NEXT: mr 6, 5
-; CHECK-NEXT: beq 0, .LBB3_6
-; CHECK-NEXT: .LBB3_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB3_4 Depth 2
-; CHECK-NEXT: cmpld 6, 4
+; CHECK-NEXT: .LBB3_1: # %cmpxchg.nostore
+; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: .LBB3_2: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB3_4 Depth 2
+; CHECK-NEXT: cmpld 6, 4
; CHECK-NEXT: li 7, 0
; CHECK-NEXT: bge 0, .LBB3_4
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: #
+; CHECK-NEXT: # %bb.3: # %atomicrmw.start
+; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1
; CHECK-NEXT: addi 7, 6, 1
-; CHECK-NEXT: .LBB3_4: # %atomicrmw.start
-; CHECK-NEXT: # Parent Loop BB3_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: .LBB3_4: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB3_2 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldarx 5, 0, 3
-; CHECK-NEXT: cmpd 5, 6
-; CHECK-NEXT: bne 0, .LBB3_1
-; CHECK-NEXT: # %bb.5: # %atomicrmw.start
-; CHECK-NEXT: #
+; CHECK-NEXT: cmpld 5, 6
+; CHECK-NEXT: bne 0, .LBB3_1
+; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT: # in Loop: Header=BB3_4 Depth=2
; CHECK-NEXT: stdcx. 7, 0, 3
-; CHECK-NEXT: bne 0, .LBB3_4
-; CHECK-NEXT: b .LBB3_1
-; CHECK-NEXT: .LBB3_6: # %atomicrmw.end
+; CHECK-NEXT: bne 0, .LBB3_4
+; CHECK-NEXT: # %bb.6:
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: # %bb.7: # %atomicrmw.end
; CHECK-NEXT: mr 3, 5
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
@@ -194,52 +189,50 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; CHECK-LABEL: atomicrmw_udec_wrap_i8:
; CHECK: # %bb.0:
; CHECK-NEXT: sync
-; CHECK-NEXT: mr 5, 3
-; CHECK-NEXT: rlwinm 7, 5, 3, 27, 28
-; CHECK-NEXT: lbz 3, 0(3)
-; CHECK-NEXT: xori 7, 7, 24
-; CHECK-NEXT: li 8, 255
-; CHECK-NEXT: clrlwi 6, 4, 24
-; CHECK-NEXT: rldicr 5, 5, 0, 61
-; CHECK-NEXT: slw 8, 8, 7
+; CHECK-NEXT: rldicr 5, 3, 0, 61
+; CHECK-NEXT: not 3, 3
+; CHECK-NEXT: li 6, 255
+; CHECK-NEXT: lwz 8, 0(5)
+; CHECK-NEXT: rlwinm 3, 3, 3, 27, 28
+; CHECK-NEXT: slw 6, 6, 3
+; CHECK-NEXT: not 6, 6
+; CHECK-NEXT: clrlwi 7, 4, 24
; CHECK-NEXT: b .LBB4_2
-; CHECK-NEXT: .LBB4_1: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: srw 3, 11, 7
-; CHECK-NEXT: cmplw 3, 9
-; CHECK-NEXT: beq 0, .LBB4_7
-; CHECK-NEXT: .LBB4_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB4_5 Depth 2
-; CHECK-NEXT: andi. 9, 3, 255
-; CHECK-NEXT: cmplw 1, 9, 6
+; CHECK-NEXT: .LBB4_1: # %cmpxchg.nostore
+; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1
+; CHECK-NEXT: mr 8, 9
+; CHECK-NEXT: .LBB4_2: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB4_5 Depth 2
+; CHECK-NEXT: srw 9, 8, 3
+; CHECK-NEXT: andi. 10, 9, 255
+; CHECK-NEXT: cmplw 1, 10, 7
; CHECK-NEXT: cror 20, 2, 5
-; CHECK-NEXT: mr 10, 4
+; CHECK-NEXT: mr 10, 4
; CHECK-NEXT: bc 12, 20, .LBB4_4
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: addi 10, 3, -1
-; CHECK-NEXT: .LBB4_4: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: slw 3, 10, 7
-; CHECK-NEXT: slw 10, 9, 7
-; CHECK-NEXT: and 3, 3, 8
-; CHECK-NEXT: and 10, 10, 8
-; CHECK-NEXT: .LBB4_5: # %atomicrmw.start
-; CHECK-NEXT: # Parent Loop BB4_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
-; CHECK-NEXT: lwarx 12, 0, 5
-; CHECK-NEXT: and 11, 12, 8
-; CHECK-NEXT: cmpw 11, 10
-; CHECK-NEXT: bne 0, .LBB4_1
-; CHECK-NEXT: # %bb.6: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: andc 12, 12, 8
-; CHECK-NEXT: or 12, 12, 3
-; CHECK-NEXT: stwcx. 12, 0, 5
-; CHECK-NEXT: bne 0, .LBB4_5
-; CHECK-NEXT: b .LBB4_1
-; CHECK-NEXT: .LBB4_7: # %atomicrmw.end
+; CHECK-NEXT: # %bb.3: # %atomicrmw.start
+; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1
+; CHECK-NEXT: addi 10, 9, -1
+; CHECK-NEXT: .LBB4_4: # %atomicrmw.start
+; CHECK-NEXT: # in Loop: Header=BB4_2 Depth=1
+; CHECK-NEXT: clrlwi 9, 10, 24
+; CHECK-NEXT: slw 9, 9, 3
+; CHECK-NEXT: and 10, 8, 6
+; CHECK-NEXT: or 10, 10, 9
+; CHECK-NEXT: .LBB4_5: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB4_2 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: lwarx 9, 0, 5
+; CHECK-NEXT: cmplw 9, 8
+; CHECK-NEXT: bne 0, .LBB4_1
+; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
+; CHECK-NEXT: # in Loop: Header=BB4_5 Depth=2
+; CHECK-NEXT: stwcx. 10, 0, 5
+; CHECK-NEXT: bne 0, .LBB4_5
+; CHECK-NEXT: # %bb.7:
+; CHECK-NEXT: mr 8, 9
+; CHECK-NEXT: # %bb.8: # %atomicrmw.end
+; CHECK-NEXT: srw 3, 9, 3
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
%result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst
@@ -250,53 +243,52 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
; CHECK-LABEL: atomicrmw_udec_wrap_i16:
; CHECK: # %bb.0:
; CHECK-NEXT: sync
-; CHECK-NEXT: mr 5, 3
-; CHECK-NEXT: li 8, 0
-; CHECK-NEXT: lhz 3, 0(3)
-; CHECK-NEXT: rlwinm 7, 5, 3, 27, 27
-; CHECK-NEXT: xori 7, 7, 16
-; CHECK-NEXT: ori 8, 8, 65535
-; CHECK-NEXT: clrlwi 6, 4, 16
-; CHECK-NEXT: rldicr 5, 5, 0, 61
-; CHECK-NEXT: slw 8, 8, 7
+; CHECK-NEXT: rldicr 5, 3, 0, 61
+; CHECK-NEXT: clrlwi 3, 3, 30
+; CHECK-NEXT: lis 6, 0
+; CHECK-NEXT: xori 3, 3, 2
+; CHECK-NEXT: lwz 8, 0(5)
+; CHECK-NEXT: ori 6, 6, 65535
+; CHECK-NEXT: slwi 3, 3, 3
+; CHECK-NEXT: slw 6, 6, 3
+; CHECK-NEXT: not 6, 6
+; CHECK-NEXT: clrlwi 7, 4, 16
; CHECK-NEXT: b .LBB5_2
-; CHECK-NEXT: .LBB5_1: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: srw 3, 11, 7
-; CHECK-NEXT: cmplw 3, 9
-; CHECK-NEXT: beq 0, .LBB5_7
-; CHECK-NEXT: .LBB5_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB5_5 Depth 2
-; CHECK-NEXT: andi. 9, 3, 65535
-; CHECK-NEXT: cmplw 1, 9, 6
+; CHECK-NEXT: .LBB5_1: # %cmpxchg.nostore
+; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1
+; CHECK-NEXT: mr 8, 9
+; CHECK-NEXT: .LBB5_2: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB5_5 Depth 2
+; CHECK-NEXT: srw 9, 8, 3
+; CHECK-NEXT: andi. 10, 9, 65535
+; CHECK-NEXT: cmplw 1, 10, 7
; CHECK-NEXT: cror 20, 2, 5
-; CHECK-NEXT: mr 10, 4
+; CHECK-NEXT: mr 10, 4
; CHECK-NEXT: bc 12, 20, .LBB5_4
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: addi 10, 3, -1
-; CHECK-NEXT: .LBB5_4: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: slw 3, 10, 7
-; CHECK-NEXT: slw 10, 9, 7
-; CHECK-NEXT: and 3, 3, 8
-; CHECK-NEXT: and 10, 10, 8
-; CHECK-NEXT: .LBB5_5: # %atomicrmw.start
-; CHECK-NEXT: # Parent Loop BB5_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
-; CHECK-NEXT: lwarx 12, 0, 5
-; CHECK-NEXT: and 11, 12, 8
-; CHECK-NEXT: cmpw 11, 10
-; CHECK-NEXT: bne 0, .LBB5_1
-; CHECK-NEXT: # %bb.6: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: andc 12, 12, 8
-; CHECK-NEXT: or 12, 12, 3
-; CHECK-NEXT: stwcx. 12, 0, 5
-; CHECK-NEXT: bne 0, .LBB5_5
-; CHECK-NEXT: b .LBB5_1
-; CHECK-NEXT: .LBB5_7: # %atomicrmw.end
+; CHECK-NEXT: # %bb.3: # %atomicrmw.start
+; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1
+; CHECK-NEXT: addi 10, 9, -1
+; CHECK-NEXT: .LBB5_4: # %atomicrmw.start
+; CHECK-NEXT: # in Loop: Header=BB5_2 Depth=1
+; CHECK-NEXT: clrlwi 9, 10, 16
+; CHECK-NEXT: slw 9, 9, 3
+; CHECK-NEXT: and 10, 8, 6
+; CHECK-NEXT: or 10, 10, 9
+; CHECK-NEXT: .LBB5_5: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB5_2 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: lwarx 9, 0, 5
+; CHECK-NEXT: cmplw 9, 8
+; CHECK-NEXT: bne 0, .LBB5_1
+; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
+; CHECK-NEXT: # in Loop: Header=BB5_5 Depth=2
+; CHECK-NEXT: stwcx. 10, 0, 5
+; CHECK-NEXT: bne 0, .LBB5_5
+; CHECK-NEXT: # %bb.7:
+; CHECK-NEXT: mr 8, 9
+; CHECK-NEXT: # %bb.8: # %atomicrmw.end
+; CHECK-NEXT: srw 3, 9, 3
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
%result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst
@@ -309,37 +301,36 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
; CHECK-NEXT: sync
; CHECK-NEXT: lwz 6, 0(3)
; CHECK-NEXT: b .LBB6_2
-; CHECK-NEXT: .LBB6_1: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: cmplw 5, 6
-; CHECK-NEXT: mr 6, 5
-; CHECK-NEXT: beq 0, .LBB6_7
-; CHECK-NEXT: .LBB6_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB6_5 Depth 2
-; CHECK-NEXT: cmpwi 6, 0
-; CHECK-NEXT: mr 7, 4
+; CHECK-NEXT: .LBB6_1: # %cmpxchg.nostore
+; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: .LBB6_2: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB6_5 Depth 2
+; CHECK-NEXT: cmpwi 6, 0
+; CHECK-NEXT: mr 7, 4
; CHECK-NEXT: bc 12, 2, .LBB6_5
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: cmplw 6, 4
-; CHECK-NEXT: mr 7, 4
+; CHECK-NEXT: # %bb.3: # %atomicrmw.start
+; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1
+; CHECK-NEXT: cmplw 6, 4
+; CHECK-NEXT: mr 7, 4
; CHECK-NEXT: bc 12, 1, .LBB6_5
-; CHECK-NEXT: # %bb.4: # %atomicrmw.start
-; CHECK-NEXT: #
+; CHECK-NEXT: # %bb.4: # %atomicrmw.start
+; CHECK-NEXT: # in Loop: Header=BB6_2 Depth=1
; CHECK-NEXT: addi 7, 6, -1
-; CHECK-NEXT: .LBB6_5: # %atomicrmw.start
-; CHECK-NEXT: # Parent Loop BB6_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: .LBB6_5: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB6_2 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 5, 0, 3
-; CHECK-NEXT: cmpw 5, 6
-; CHECK-NEXT: bne 0, .LBB6_1
-; CHECK-NEXT: # %bb.6: # %atomicrmw.start
-; CHECK-NEXT: #
+; CHECK-NEXT: cmplw 5, 6
+; CHECK-NEXT: bne 0, .LBB6_1
+; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
+; CHECK-NEXT: # in Loop: Header=BB6_5 Depth=2
; CHECK-NEXT: stwcx. 7, 0, 3
-; CHECK-NEXT: bne 0, .LBB6_5
-; CHECK-NEXT: b .LBB6_1
-; CHECK-NEXT: .LBB6_7: # %atomicrmw.end
+; CHECK-NEXT: bne 0, .LBB6_5
+; CHECK-NEXT: # %bb.7:
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: # %bb.8: # %atomicrmw.end
; CHECK-NEXT: mr 3, 5
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
@@ -353,38 +344,37 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; CHECK-NEXT: sync
; CHECK-NEXT: ld 6, 0(3)
; CHECK-NEXT: b .LBB7_2
-; CHECK-NEXT: .LBB7_1: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: cmpld 5, 6
-; CHECK-NEXT: mr 6, 5
-; CHECK-NEXT: beq 0, .LBB7_7
-; CHECK-NEXT: .LBB7_2: # %atomicrmw.start
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB7_5 Depth 2
-; CHECK-NEXT: cmpdi 6, 0
-; CHECK-NEXT: mr 7, 4
+; CHECK-NEXT: .LBB7_1: # %cmpxchg.nostore
+; CHECK-NEXT: # in Loop: Header=BB7_2 Depth=1
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: .LBB7_2: # %atomicrmw.start
+; CHECK-NEXT: # =>This Loop Header: Depth=1
+; CHECK-NEXT: # Child Loop BB7_5 Depth 2
+; CHECK-NEXT: cmpdi 6, 0
+; CHECK-NEXT: mr 7, 4
; CHECK-NEXT: bc 12, 2, .LBB7_5
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: cmpld 6, 4
-; CHECK-NEXT: mr 7, 4
+; CHECK-NEXT: # %bb.3: # %atomicrmw.start
+; CHECK-NEXT: # in Loop: Header=BB7_2 Depth=1
+; CHECK-NEXT: cmpld 6, 4
+; CHECK-NEXT: mr 7, 4
; CHECK-NEXT: bc 12, 1, .LBB7_5
-; CHECK-NEXT: # %bb.4: # %atomicrmw.start
-; CHECK-NEXT: #
+; CHECK-NEXT: # %bb.4: # %atomicrmw.start
+; CHECK-NEXT: # in Loop: Header=BB7_2 Depth=1
; CHECK-NEXT: addi 7, 6, -1
-; CHECK-NEXT: .LBB7_5: # %atomicrmw.start
-; CHECK-NEXT: # Parent Loop BB7_2 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
+; CHECK-NEXT: .LBB7_5: # %cmpxchg.start
+; CHECK-NEXT: # Parent Loop BB7_2 Depth=1
+; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldarx 5, 0, 3
-; CHECK-NEXT: cmpd 5, 6
-; CHECK-NEXT: bne 0, .LBB7_1
-; CHECK-NEXT: # %bb.6: # %atomicrmw.start
-; CHECK-NEXT: #
+; CHECK-NEXT: cmpld 5, 6
+; CHECK-NEXT: bne 0, .LBB7_1
+; CHECK-NEXT: # %bb.6: # %cmpxchg.fencedstore
+; CHECK-NEXT: # in Loop: Header=BB7_5 Depth=2
; CHECK-NEXT: stdcx. 7, 0, 3
-; CHECK-NEXT: bne 0, .LBB7_5
-; CHECK-NEXT: b .LBB7_1
-; CHECK-NEXT: .LBB7_7: # %atomicrmw.end
-; CHECK-NEXT: mr 3, 5
+; CHECK-NEXT: bne 0, .LBB7_5
+; CHECK-NEXT: # %bb.7:
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: # %bb.8: # %atomicrmw.end
+; CHECK-NEXT: mr 3, 5
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
%result = atomicrmw udec_wrap ptr %ptr, i64 %val seq_cst
diff --git a/llvm/test/CodeGen/PowerPC/atomics-regression.ll b/llvm/test/CodeGen/PowerPC/atomics-regression.ll
index b31be701454da..280c4299c30b7 100644
--- a/llvm/test/CodeGen/PowerPC/atomics-regression.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics-regression.ll
@@ -400,15 +400,20 @@ define void @test39() {
define void @test40(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test40:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: clrlwi 5, 5, 24
; PPC64LE-NEXT: clrlwi 4, 4, 24
-; PPC64LE-NEXT: .LBB40_1:
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB40_1: # %cmpxchg.start
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: lbarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
; PPC64LE-NEXT: bnelr 0
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
; PPC64LE-NEXT: bne 0, .LBB40_1
-; PPC64LE-NEXT: # %bb.3:
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.end
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val monotonic monotonic
ret void
@@ -417,15 +422,20 @@ define void @test40(ptr %ptr, i8 %cmp, i8 %val) {
define void @test41(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test41:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: clrlwi 5, 5, 24
; PPC64LE-NEXT: clrlwi 4, 4, 24
-; PPC64LE-NEXT: .LBB41_1:
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB41_1: # %cmpxchg.start
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: lbarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB41_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
; PPC64LE-NEXT: bne 0, .LBB41_1
-; PPC64LE-NEXT: .LBB41_3:
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire monotonic
@@ -435,15 +445,20 @@ define void @test41(ptr %ptr, i8 %cmp, i8 %val) {
define void @test42(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test42:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: clrlwi 5, 5, 24
; PPC64LE-NEXT: clrlwi 4, 4, 24
-; PPC64LE-NEXT: .LBB42_1:
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB42_1: # %cmpxchg.start
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: lbarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
; PPC64LE-NEXT: bne 0, .LBB42_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
; PPC64LE-NEXT: bne 0, .LBB42_1
-; PPC64LE-NEXT: .LBB42_3:
+; PPC64LE-NEXT: .LBB42_3: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire
@@ -452,17 +467,26 @@ define void @test42(ptr %ptr, i8 %cmp, i8 %val) {
define void @test43(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test43:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: clrlwi 4, 4, 24
-; PPC64LE-NEXT: .LBB43_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lbarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
+; PPC64LE-NEXT: clrlwi 4, 4, 24
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
; PPC64LE-NEXT: bnelr 0
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: clrlwi 5, 5, 24
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB43_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB43_1
-; PPC64LE-NEXT: # %bb.3:
+; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lbarx 6, 0, 3
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB43_2
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val release monotonic
ret void
@@ -470,17 +494,27 @@ define void @test43(ptr %ptr, i8 %cmp, i8 %val) {
define void @test44(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test44:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: clrlwi 4, 4, 24
-; PPC64LE-NEXT: .LBB44_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lbarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB44_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: clrlwi 4, 4, 24
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bne 0, .LBB44_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: clrlwi 5, 5, 24
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB44_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB44_1
-; PPC64LE-NEXT: .LBB44_3:
+; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lbarx 6, 0, 3
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB44_2
+; PPC64LE-NEXT: .LBB44_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val release acquire
@@ -489,17 +523,29 @@ define void @test44(ptr %ptr, i8 %cmp, i8 %val) {
define void @test45(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test45:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: clrlwi 4, 4, 24
-; PPC64LE-NEXT: .LBB45_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lbarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB45_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: clrlwi 4, 4, 24
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: clrlwi 5, 5, 24
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB45_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB45_1
-; PPC64LE-NEXT: .LBB45_3:
+; PPC64LE-NEXT: beq 0, .LBB45_5
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lbarx 6, 0, 3
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB45_2
+; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: blr
+; PPC64LE-NEXT: .LBB45_5: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acq_rel monotonic
@@ -508,17 +554,27 @@ define void @test45(ptr %ptr, i8 %cmp, i8 %val) {
define void @test46(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test46:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: clrlwi 4, 4, 24
-; PPC64LE-NEXT: .LBB46_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lbarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB46_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: clrlwi 4, 4, 24
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bne 0, .LBB46_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: clrlwi 5, 5, 24
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB46_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB46_1
-; PPC64LE-NEXT: .LBB46_3:
+; PPC64LE-NEXT: beq 0, .LBB46_4
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lbarx 6, 0, 3
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB46_2
+; PPC64LE-NEXT: .LBB46_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acq_rel acquire
@@ -527,17 +583,29 @@ define void @test46(ptr %ptr, i8 %cmp, i8 %val) {
define void @test47(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test47:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: clrlwi 4, 4, 24
-; PPC64LE-NEXT: .LBB47_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lbarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB47_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: clrlwi 4, 4, 24
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: sync
+; PPC64LE-NEXT: clrlwi 5, 5, 24
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB47_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB47_1
-; PPC64LE-NEXT: .LBB47_3:
+; PPC64LE-NEXT: beq 0, .LBB47_5
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lbarx 6, 0, 3
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB47_2
+; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: blr
+; PPC64LE-NEXT: .LBB47_5: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val seq_cst monotonic
@@ -546,17 +614,27 @@ define void @test47(ptr %ptr, i8 %cmp, i8 %val) {
define void @test48(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test48:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: clrlwi 4, 4, 24
-; PPC64LE-NEXT: .LBB48_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lbarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB48_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: clrlwi 4, 4, 24
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bne 0, .LBB48_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: sync
+; PPC64LE-NEXT: clrlwi 5, 5, 24
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB48_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB48_1
-; PPC64LE-NEXT: .LBB48_3:
+; PPC64LE-NEXT: beq 0, .LBB48_4
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lbarx 6, 0, 3
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB48_2
+; PPC64LE-NEXT: .LBB48_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val seq_cst acquire
@@ -565,17 +643,27 @@ define void @test48(ptr %ptr, i8 %cmp, i8 %val) {
define void @test49(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test49:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: clrlwi 4, 4, 24
-; PPC64LE-NEXT: .LBB49_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lbarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB49_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: clrlwi 4, 4, 24
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bne 0, .LBB49_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: sync
+; PPC64LE-NEXT: clrlwi 5, 5, 24
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB49_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB49_1
-; PPC64LE-NEXT: .LBB49_3:
+; PPC64LE-NEXT: beq 0, .LBB49_4
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lbarx 6, 0, 3
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB49_2
+; PPC64LE-NEXT: .LBB49_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val seq_cst seq_cst
@@ -585,15 +673,20 @@ define void @test49(ptr %ptr, i8 %cmp, i8 %val) {
define void @test50(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test50:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: clrlwi 5, 5, 16
; PPC64LE-NEXT: clrlwi 4, 4, 16
-; PPC64LE-NEXT: .LBB50_1:
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB50_1: # %cmpxchg.start
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: lharx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
; PPC64LE-NEXT: bnelr 0
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
; PPC64LE-NEXT: bne 0, .LBB50_1
-; PPC64LE-NEXT: # %bb.3:
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.end
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val monotonic monotonic
ret void
@@ -602,15 +695,20 @@ define void @test50(ptr %ptr, i16 %cmp, i16 %val) {
define void @test51(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test51:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: clrlwi 5, 5, 16
; PPC64LE-NEXT: clrlwi 4, 4, 16
-; PPC64LE-NEXT: .LBB51_1:
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB51_1: # %cmpxchg.start
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: lharx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB51_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
; PPC64LE-NEXT: bne 0, .LBB51_1
-; PPC64LE-NEXT: .LBB51_3:
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire monotonic
@@ -620,15 +718,20 @@ define void @test51(ptr %ptr, i16 %cmp, i16 %val) {
define void @test52(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test52:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: clrlwi 5, 5, 16
; PPC64LE-NEXT: clrlwi 4, 4, 16
-; PPC64LE-NEXT: .LBB52_1:
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB52_1: # %cmpxchg.start
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: lharx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
; PPC64LE-NEXT: bne 0, .LBB52_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
; PPC64LE-NEXT: bne 0, .LBB52_1
-; PPC64LE-NEXT: .LBB52_3:
+; PPC64LE-NEXT: .LBB52_3: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire
@@ -637,17 +740,26 @@ define void @test52(ptr %ptr, i16 %cmp, i16 %val) {
define void @test53(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test53:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: clrlwi 4, 4, 16
-; PPC64LE-NEXT: .LBB53_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lharx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
+; PPC64LE-NEXT: clrlwi 4, 4, 16
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
; PPC64LE-NEXT: bnelr 0
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: clrlwi 5, 5, 16
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB53_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB53_1
-; PPC64LE-NEXT: # %bb.3:
+; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lharx 6, 0, 3
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB53_2
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val release monotonic
ret void
@@ -655,17 +767,27 @@ define void @test53(ptr %ptr, i16 %cmp, i16 %val) {
define void @test54(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test54:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: clrlwi 4, 4, 16
-; PPC64LE-NEXT: .LBB54_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lharx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB54_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: clrlwi 4, 4, 16
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bne 0, .LBB54_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: clrlwi 5, 5, 16
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB54_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB54_1
-; PPC64LE-NEXT: .LBB54_3:
+; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lharx 6, 0, 3
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB54_2
+; PPC64LE-NEXT: .LBB54_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val release acquire
@@ -674,17 +796,29 @@ define void @test54(ptr %ptr, i16 %cmp, i16 %val) {
define void @test55(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test55:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: clrlwi 4, 4, 16
-; PPC64LE-NEXT: .LBB55_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lharx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB55_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: clrlwi 4, 4, 16
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: clrlwi 5, 5, 16
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB55_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB55_1
-; PPC64LE-NEXT: .LBB55_3:
+; PPC64LE-NEXT: beq 0, .LBB55_5
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lharx 6, 0, 3
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB55_2
+; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: blr
+; PPC64LE-NEXT: .LBB55_5: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acq_rel monotonic
@@ -693,17 +827,27 @@ define void @test55(ptr %ptr, i16 %cmp, i16 %val) {
define void @test56(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test56:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: clrlwi 4, 4, 16
-; PPC64LE-NEXT: .LBB56_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lharx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB56_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: clrlwi 4, 4, 16
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bne 0, .LBB56_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: clrlwi 5, 5, 16
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB56_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB56_1
-; PPC64LE-NEXT: .LBB56_3:
+; PPC64LE-NEXT: beq 0, .LBB56_4
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lharx 6, 0, 3
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB56_2
+; PPC64LE-NEXT: .LBB56_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acq_rel acquire
@@ -712,17 +856,29 @@ define void @test56(ptr %ptr, i16 %cmp, i16 %val) {
define void @test57(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test57:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: clrlwi 4, 4, 16
-; PPC64LE-NEXT: .LBB57_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lharx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB57_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: clrlwi 4, 4, 16
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: sync
+; PPC64LE-NEXT: clrlwi 5, 5, 16
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB57_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB57_1
-; PPC64LE-NEXT: .LBB57_3:
+; PPC64LE-NEXT: beq 0, .LBB57_5
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lharx 6, 0, 3
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB57_2
+; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: blr
+; PPC64LE-NEXT: .LBB57_5: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val seq_cst monotonic
@@ -731,17 +887,27 @@ define void @test57(ptr %ptr, i16 %cmp, i16 %val) {
define void @test58(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test58:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: clrlwi 4, 4, 16
-; PPC64LE-NEXT: .LBB58_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lharx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB58_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: clrlwi 4, 4, 16
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bne 0, .LBB58_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: sync
+; PPC64LE-NEXT: clrlwi 5, 5, 16
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB58_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB58_1
-; PPC64LE-NEXT: .LBB58_3:
+; PPC64LE-NEXT: beq 0, .LBB58_4
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lharx 6, 0, 3
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB58_2
+; PPC64LE-NEXT: .LBB58_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val seq_cst acquire
@@ -750,17 +916,27 @@ define void @test58(ptr %ptr, i16 %cmp, i16 %val) {
define void @test59(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test59:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: clrlwi 4, 4, 16
-; PPC64LE-NEXT: .LBB59_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lharx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB59_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: clrlwi 4, 4, 16
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bne 0, .LBB59_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: sync
+; PPC64LE-NEXT: clrlwi 5, 5, 16
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB59_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB59_1
-; PPC64LE-NEXT: .LBB59_3:
+; PPC64LE-NEXT: beq 0, .LBB59_4
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lharx 6, 0, 3
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB59_2
+; PPC64LE-NEXT: .LBB59_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val seq_cst seq_cst
@@ -770,14 +946,17 @@ define void @test59(ptr %ptr, i16 %cmp, i16 %val) {
define void @test60(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE-LABEL: test60:
; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: .LBB60_1:
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB60_1: # %cmpxchg.start
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: lwarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
+; PPC64LE-NEXT: cmplw 6, 4
; PPC64LE-NEXT: bnelr 0
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
; PPC64LE-NEXT: bne 0, .LBB60_1
-; PPC64LE-NEXT: # %bb.3:
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.end
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val monotonic monotonic
ret void
@@ -786,14 +965,17 @@ define void @test60(ptr %ptr, i32 %cmp, i32 %val) {
define void @test61(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE-LABEL: test61:
; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: .LBB61_1:
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB61_1: # %cmpxchg.start
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: lwarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB61_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
; PPC64LE-NEXT: bne 0, .LBB61_1
-; PPC64LE-NEXT: .LBB61_3:
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire monotonic
@@ -803,14 +985,17 @@ define void @test61(ptr %ptr, i32 %cmp, i32 %val) {
define void @test62(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE-LABEL: test62:
; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: .LBB62_1:
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB62_1: # %cmpxchg.start
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: lwarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
+; PPC64LE-NEXT: cmplw 6, 4
; PPC64LE-NEXT: bne 0, .LBB62_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
; PPC64LE-NEXT: bne 0, .LBB62_1
-; PPC64LE-NEXT: .LBB62_3:
+; PPC64LE-NEXT: .LBB62_3: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire
@@ -819,16 +1004,22 @@ define void @test62(ptr %ptr, i32 %cmp, i32 %val) {
define void @test63(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE-LABEL: test63:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: .LBB63_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lwarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
+; PPC64LE-NEXT: cmplw 6, 4
; PPC64LE-NEXT: bnelr 0
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB63_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB63_1
-; PPC64LE-NEXT: # %bb.3:
+; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lwarx 6, 0, 3
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB63_2
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val release monotonic
ret void
@@ -836,16 +1027,23 @@ define void @test63(ptr %ptr, i32 %cmp, i32 %val) {
define void @test64(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE-LABEL: test64:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: .LBB64_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lwarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB64_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bne 0, .LBB64_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB64_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB64_1
-; PPC64LE-NEXT: .LBB64_3:
+; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lwarx 6, 0, 3
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB64_2
+; PPC64LE-NEXT: .LBB64_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val release acquire
@@ -854,16 +1052,25 @@ define void @test64(ptr %ptr, i32 %cmp, i32 %val) {
define void @test65(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE-LABEL: test65:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: .LBB65_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lwarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB65_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB65_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB65_1
-; PPC64LE-NEXT: .LBB65_3:
+; PPC64LE-NEXT: beq 0, .LBB65_5
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lwarx 6, 0, 3
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB65_2
+; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: blr
+; PPC64LE-NEXT: .LBB65_5: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acq_rel monotonic
@@ -872,16 +1079,23 @@ define void @test65(ptr %ptr, i32 %cmp, i32 %val) {
define void @test66(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE-LABEL: test66:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: .LBB66_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lwarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB66_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bne 0, .LBB66_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB66_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB66_1
-; PPC64LE-NEXT: .LBB66_3:
+; PPC64LE-NEXT: beq 0, .LBB66_4
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lwarx 6, 0, 3
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB66_2
+; PPC64LE-NEXT: .LBB66_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acq_rel acquire
@@ -890,16 +1104,25 @@ define void @test66(ptr %ptr, i32 %cmp, i32 %val) {
define void @test67(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE-LABEL: test67:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: .LBB67_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lwarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB67_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: sync
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB67_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB67_1
-; PPC64LE-NEXT: .LBB67_3:
+; PPC64LE-NEXT: beq 0, .LBB67_5
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lwarx 6, 0, 3
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB67_2
+; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: blr
+; PPC64LE-NEXT: .LBB67_5: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst monotonic
@@ -908,16 +1131,23 @@ define void @test67(ptr %ptr, i32 %cmp, i32 %val) {
define void @test68(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE-LABEL: test68:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: .LBB68_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lwarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB68_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bne 0, .LBB68_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: sync
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB68_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB68_1
-; PPC64LE-NEXT: .LBB68_3:
+; PPC64LE-NEXT: beq 0, .LBB68_4
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lwarx 6, 0, 3
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB68_2
+; PPC64LE-NEXT: .LBB68_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst acquire
@@ -926,16 +1156,23 @@ define void @test68(ptr %ptr, i32 %cmp, i32 %val) {
define void @test69(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE-LABEL: test69:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: .LBB69_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lwarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB69_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bne 0, .LBB69_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: sync
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB69_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB69_1
-; PPC64LE-NEXT: .LBB69_3:
+; PPC64LE-NEXT: beq 0, .LBB69_4
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lwarx 6, 0, 3
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB69_2
+; PPC64LE-NEXT: .LBB69_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst seq_cst
@@ -945,14 +1182,17 @@ define void @test69(ptr %ptr, i32 %cmp, i32 %val) {
define void @test70(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE-LABEL: test70:
; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: .LBB70_1:
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB70_1: # %cmpxchg.start
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: ldarx 6, 0, 3
-; PPC64LE-NEXT: cmpd 6, 4
+; PPC64LE-NEXT: cmpld 6, 4
; PPC64LE-NEXT: bnelr 0
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
; PPC64LE-NEXT: bne 0, .LBB70_1
-; PPC64LE-NEXT: # %bb.3:
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.end
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val monotonic monotonic
ret void
@@ -961,14 +1201,17 @@ define void @test70(ptr %ptr, i64 %cmp, i64 %val) {
define void @test71(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE-LABEL: test71:
; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: .LBB71_1:
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB71_1: # %cmpxchg.start
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: ldarx 6, 0, 3
-; PPC64LE-NEXT: cmpd 6, 4
-; PPC64LE-NEXT: bne 0, .LBB71_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: cmpld 6, 4
+; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
; PPC64LE-NEXT: bne 0, .LBB71_1
-; PPC64LE-NEXT: .LBB71_3:
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire monotonic
@@ -978,14 +1221,17 @@ define void @test71(ptr %ptr, i64 %cmp, i64 %val) {
define void @test72(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE-LABEL: test72:
; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: .LBB72_1:
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB72_1: # %cmpxchg.start
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: ldarx 6, 0, 3
-; PPC64LE-NEXT: cmpd 6, 4
+; PPC64LE-NEXT: cmpld 6, 4
; PPC64LE-NEXT: bne 0, .LBB72_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
; PPC64LE-NEXT: bne 0, .LBB72_1
-; PPC64LE-NEXT: .LBB72_3:
+; PPC64LE-NEXT: .LBB72_3: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire
@@ -994,16 +1240,22 @@ define void @test72(ptr %ptr, i64 %cmp, i64 %val) {
define void @test73(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE-LABEL: test73:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: .LBB73_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: ldarx 6, 0, 3
-; PPC64LE-NEXT: cmpd 6, 4
+; PPC64LE-NEXT: cmpld 6, 4
; PPC64LE-NEXT: bnelr 0
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB73_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB73_1
-; PPC64LE-NEXT: # %bb.3:
+; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: ldarx 6, 0, 3
+; PPC64LE-NEXT: cmpld 6, 4
+; PPC64LE-NEXT: beq 0, .LBB73_2
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val release monotonic
ret void
@@ -1011,16 +1263,23 @@ define void @test73(ptr %ptr, i64 %cmp, i64 %val) {
define void @test74(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE-LABEL: test74:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: .LBB74_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: ldarx 6, 0, 3
-; PPC64LE-NEXT: cmpd 6, 4
-; PPC64LE-NEXT: bne 0, .LBB74_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: cmpld 6, 4
+; PPC64LE-NEXT: bne 0, .LBB74_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB74_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB74_1
-; PPC64LE-NEXT: .LBB74_3:
+; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: ldarx 6, 0, 3
+; PPC64LE-NEXT: cmpld 6, 4
+; PPC64LE-NEXT: beq 0, .LBB74_2
+; PPC64LE-NEXT: .LBB74_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val release acquire
@@ -1029,16 +1288,25 @@ define void @test74(ptr %ptr, i64 %cmp, i64 %val) {
define void @test75(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE-LABEL: test75:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: .LBB75_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: ldarx 6, 0, 3
-; PPC64LE-NEXT: cmpd 6, 4
-; PPC64LE-NEXT: bne 0, .LBB75_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: cmpld 6, 4
+; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB75_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB75_1
-; PPC64LE-NEXT: .LBB75_3:
+; PPC64LE-NEXT: beq 0, .LBB75_5
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: ldarx 6, 0, 3
+; PPC64LE-NEXT: cmpld 6, 4
+; PPC64LE-NEXT: beq 0, .LBB75_2
+; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: blr
+; PPC64LE-NEXT: .LBB75_5: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acq_rel monotonic
@@ -1047,16 +1315,23 @@ define void @test75(ptr %ptr, i64 %cmp, i64 %val) {
define void @test76(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE-LABEL: test76:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: .LBB76_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: ldarx 6, 0, 3
-; PPC64LE-NEXT: cmpd 6, 4
-; PPC64LE-NEXT: bne 0, .LBB76_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: cmpld 6, 4
+; PPC64LE-NEXT: bne 0, .LBB76_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB76_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB76_1
-; PPC64LE-NEXT: .LBB76_3:
+; PPC64LE-NEXT: beq 0, .LBB76_4
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: ldarx 6, 0, 3
+; PPC64LE-NEXT: cmpld 6, 4
+; PPC64LE-NEXT: beq 0, .LBB76_2
+; PPC64LE-NEXT: .LBB76_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acq_rel acquire
@@ -1065,16 +1340,25 @@ define void @test76(ptr %ptr, i64 %cmp, i64 %val) {
define void @test77(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE-LABEL: test77:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: .LBB77_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: ldarx 6, 0, 3
-; PPC64LE-NEXT: cmpd 6, 4
-; PPC64LE-NEXT: bne 0, .LBB77_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: cmpld 6, 4
+; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: sync
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB77_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB77_1
-; PPC64LE-NEXT: .LBB77_3:
+; PPC64LE-NEXT: beq 0, .LBB77_5
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: ldarx 6, 0, 3
+; PPC64LE-NEXT: cmpld 6, 4
+; PPC64LE-NEXT: beq 0, .LBB77_2
+; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: blr
+; PPC64LE-NEXT: .LBB77_5: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val seq_cst monotonic
@@ -1083,16 +1367,23 @@ define void @test77(ptr %ptr, i64 %cmp, i64 %val) {
define void @test78(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE-LABEL: test78:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: .LBB78_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: ldarx 6, 0, 3
-; PPC64LE-NEXT: cmpd 6, 4
-; PPC64LE-NEXT: bne 0, .LBB78_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: cmpld 6, 4
+; PPC64LE-NEXT: bne 0, .LBB78_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: sync
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB78_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB78_1
-; PPC64LE-NEXT: .LBB78_3:
+; PPC64LE-NEXT: beq 0, .LBB78_4
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: ldarx 6, 0, 3
+; PPC64LE-NEXT: cmpld 6, 4
+; PPC64LE-NEXT: beq 0, .LBB78_2
+; PPC64LE-NEXT: .LBB78_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val seq_cst acquire
@@ -1101,16 +1392,23 @@ define void @test78(ptr %ptr, i64 %cmp, i64 %val) {
define void @test79(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE-LABEL: test79:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: .LBB79_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: ldarx 6, 0, 3
-; PPC64LE-NEXT: cmpd 6, 4
-; PPC64LE-NEXT: bne 0, .LBB79_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: cmpld 6, 4
+; PPC64LE-NEXT: bne 0, .LBB79_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: sync
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB79_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB79_1
-; PPC64LE-NEXT: .LBB79_3:
+; PPC64LE-NEXT: beq 0, .LBB79_4
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: ldarx 6, 0, 3
+; PPC64LE-NEXT: cmpld 6, 4
+; PPC64LE-NEXT: beq 0, .LBB79_2
+; PPC64LE-NEXT: .LBB79_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val seq_cst seq_cst
@@ -1120,15 +1418,20 @@ define void @test79(ptr %ptr, i64 %cmp, i64 %val) {
define void @test80(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test80:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: clrlwi 5, 5, 24
; PPC64LE-NEXT: clrlwi 4, 4, 24
-; PPC64LE-NEXT: .LBB80_1:
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB80_1: # %cmpxchg.start
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: lbarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
; PPC64LE-NEXT: bnelr 0
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
; PPC64LE-NEXT: bne 0, .LBB80_1
-; PPC64LE-NEXT: # %bb.3:
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.end
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val syncscope("singlethread") monotonic monotonic
ret void
@@ -1137,15 +1440,20 @@ define void @test80(ptr %ptr, i8 %cmp, i8 %val) {
define void @test81(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test81:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: clrlwi 5, 5, 24
; PPC64LE-NEXT: clrlwi 4, 4, 24
-; PPC64LE-NEXT: .LBB81_1:
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB81_1: # %cmpxchg.start
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: lbarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB81_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
; PPC64LE-NEXT: bne 0, .LBB81_1
-; PPC64LE-NEXT: .LBB81_3:
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val syncscope("singlethread") acquire monotonic
@@ -1155,15 +1463,20 @@ define void @test81(ptr %ptr, i8 %cmp, i8 %val) {
define void @test82(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test82:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: clrlwi 5, 5, 24
; PPC64LE-NEXT: clrlwi 4, 4, 24
-; PPC64LE-NEXT: .LBB82_1:
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB82_1: # %cmpxchg.start
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: lbarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
; PPC64LE-NEXT: bne 0, .LBB82_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
; PPC64LE-NEXT: bne 0, .LBB82_1
-; PPC64LE-NEXT: .LBB82_3:
+; PPC64LE-NEXT: .LBB82_3: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val syncscope("singlethread") acquire acquire
@@ -1172,17 +1485,26 @@ define void @test82(ptr %ptr, i8 %cmp, i8 %val) {
define void @test83(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test83:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: clrlwi 4, 4, 24
-; PPC64LE-NEXT: .LBB83_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lbarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
+; PPC64LE-NEXT: clrlwi 4, 4, 24
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
; PPC64LE-NEXT: bnelr 0
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: clrlwi 5, 5, 24
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB83_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB83_1
-; PPC64LE-NEXT: # %bb.3:
+; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lbarx 6, 0, 3
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB83_2
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val syncscope("singlethread") release monotonic
ret void
@@ -1190,17 +1512,27 @@ define void @test83(ptr %ptr, i8 %cmp, i8 %val) {
define void @test84(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test84:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: clrlwi 4, 4, 24
-; PPC64LE-NEXT: .LBB84_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lbarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB84_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: clrlwi 4, 4, 24
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bne 0, .LBB84_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: clrlwi 5, 5, 24
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB84_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB84_1
-; PPC64LE-NEXT: .LBB84_3:
+; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lbarx 6, 0, 3
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB84_2
+; PPC64LE-NEXT: .LBB84_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val syncscope("singlethread") release acquire
@@ -1209,17 +1541,29 @@ define void @test84(ptr %ptr, i8 %cmp, i8 %val) {
define void @test85(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test85:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: clrlwi 4, 4, 24
-; PPC64LE-NEXT: .LBB85_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lbarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB85_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: clrlwi 4, 4, 24
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: clrlwi 5, 5, 24
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB85_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB85_1
-; PPC64LE-NEXT: .LBB85_3:
+; PPC64LE-NEXT: beq 0, .LBB85_5
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lbarx 6, 0, 3
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB85_2
+; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: blr
+; PPC64LE-NEXT: .LBB85_5: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val syncscope("singlethread") acq_rel monotonic
@@ -1228,17 +1572,27 @@ define void @test85(ptr %ptr, i8 %cmp, i8 %val) {
define void @test86(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test86:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: clrlwi 4, 4, 24
-; PPC64LE-NEXT: .LBB86_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lbarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB86_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: clrlwi 4, 4, 24
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bne 0, .LBB86_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: clrlwi 5, 5, 24
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB86_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB86_1
-; PPC64LE-NEXT: .LBB86_3:
+; PPC64LE-NEXT: beq 0, .LBB86_4
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lbarx 6, 0, 3
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB86_2
+; PPC64LE-NEXT: .LBB86_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val syncscope("singlethread") acq_rel acquire
@@ -1247,17 +1601,29 @@ define void @test86(ptr %ptr, i8 %cmp, i8 %val) {
define void @test87(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test87:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: clrlwi 4, 4, 24
-; PPC64LE-NEXT: .LBB87_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lbarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB87_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: clrlwi 4, 4, 24
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: sync
+; PPC64LE-NEXT: clrlwi 5, 5, 24
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB87_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB87_1
-; PPC64LE-NEXT: .LBB87_3:
+; PPC64LE-NEXT: beq 0, .LBB87_5
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lbarx 6, 0, 3
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB87_2
+; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: blr
+; PPC64LE-NEXT: .LBB87_5: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val syncscope("singlethread") seq_cst monotonic
@@ -1266,17 +1632,27 @@ define void @test87(ptr %ptr, i8 %cmp, i8 %val) {
define void @test88(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test88:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: clrlwi 4, 4, 24
-; PPC64LE-NEXT: .LBB88_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lbarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB88_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: clrlwi 4, 4, 24
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bne 0, .LBB88_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: sync
+; PPC64LE-NEXT: clrlwi 5, 5, 24
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB88_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB88_1
-; PPC64LE-NEXT: .LBB88_3:
+; PPC64LE-NEXT: beq 0, .LBB88_4
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lbarx 6, 0, 3
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB88_2
+; PPC64LE-NEXT: .LBB88_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val syncscope("singlethread") seq_cst acquire
@@ -1285,17 +1661,27 @@ define void @test88(ptr %ptr, i8 %cmp, i8 %val) {
define void @test89(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test89:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: clrlwi 4, 4, 24
-; PPC64LE-NEXT: .LBB89_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lbarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB89_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: clrlwi 4, 4, 24
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bne 0, .LBB89_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: sync
+; PPC64LE-NEXT: clrlwi 5, 5, 24
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB89_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stbcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB89_1
-; PPC64LE-NEXT: .LBB89_3:
+; PPC64LE-NEXT: beq 0, .LBB89_4
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lbarx 6, 0, 3
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB89_2
+; PPC64LE-NEXT: .LBB89_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val syncscope("singlethread") seq_cst seq_cst
@@ -1305,15 +1691,20 @@ define void @test89(ptr %ptr, i8 %cmp, i8 %val) {
define void @test90(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test90:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: clrlwi 5, 5, 16
; PPC64LE-NEXT: clrlwi 4, 4, 16
-; PPC64LE-NEXT: .LBB90_1:
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB90_1: # %cmpxchg.start
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: lharx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
; PPC64LE-NEXT: bnelr 0
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
; PPC64LE-NEXT: bne 0, .LBB90_1
-; PPC64LE-NEXT: # %bb.3:
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.end
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val syncscope("singlethread") monotonic monotonic
ret void
@@ -1322,15 +1713,20 @@ define void @test90(ptr %ptr, i16 %cmp, i16 %val) {
define void @test91(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test91:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: clrlwi 5, 5, 16
; PPC64LE-NEXT: clrlwi 4, 4, 16
-; PPC64LE-NEXT: .LBB91_1:
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB91_1: # %cmpxchg.start
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: lharx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB91_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
; PPC64LE-NEXT: bne 0, .LBB91_1
-; PPC64LE-NEXT: .LBB91_3:
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val syncscope("singlethread") acquire monotonic
@@ -1340,15 +1736,20 @@ define void @test91(ptr %ptr, i16 %cmp, i16 %val) {
define void @test92(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test92:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: clrlwi 5, 5, 16
; PPC64LE-NEXT: clrlwi 4, 4, 16
-; PPC64LE-NEXT: .LBB92_1:
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB92_1: # %cmpxchg.start
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: lharx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
; PPC64LE-NEXT: bne 0, .LBB92_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
; PPC64LE-NEXT: bne 0, .LBB92_1
-; PPC64LE-NEXT: .LBB92_3:
+; PPC64LE-NEXT: .LBB92_3: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val syncscope("singlethread") acquire acquire
@@ -1357,17 +1758,26 @@ define void @test92(ptr %ptr, i16 %cmp, i16 %val) {
define void @test93(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test93:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: clrlwi 4, 4, 16
-; PPC64LE-NEXT: .LBB93_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lharx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
+; PPC64LE-NEXT: clrlwi 4, 4, 16
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
; PPC64LE-NEXT: bnelr 0
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: clrlwi 5, 5, 16
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB93_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB93_1
-; PPC64LE-NEXT: # %bb.3:
+; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lharx 6, 0, 3
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB93_2
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val syncscope("singlethread") release monotonic
ret void
@@ -1375,17 +1785,27 @@ define void @test93(ptr %ptr, i16 %cmp, i16 %val) {
define void @test94(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test94:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: clrlwi 4, 4, 16
-; PPC64LE-NEXT: .LBB94_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lharx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB94_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: clrlwi 4, 4, 16
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bne 0, .LBB94_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: clrlwi 5, 5, 16
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB94_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB94_1
-; PPC64LE-NEXT: .LBB94_3:
+; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lharx 6, 0, 3
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB94_2
+; PPC64LE-NEXT: .LBB94_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val syncscope("singlethread") release acquire
@@ -1394,17 +1814,29 @@ define void @test94(ptr %ptr, i16 %cmp, i16 %val) {
define void @test95(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test95:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: clrlwi 4, 4, 16
-; PPC64LE-NEXT: .LBB95_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lharx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB95_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: clrlwi 4, 4, 16
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: clrlwi 5, 5, 16
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB95_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB95_1
-; PPC64LE-NEXT: .LBB95_3:
+; PPC64LE-NEXT: beq 0, .LBB95_5
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lharx 6, 0, 3
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB95_2
+; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: blr
+; PPC64LE-NEXT: .LBB95_5: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val syncscope("singlethread") acq_rel monotonic
@@ -1413,17 +1845,27 @@ define void @test95(ptr %ptr, i16 %cmp, i16 %val) {
define void @test96(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test96:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: clrlwi 4, 4, 16
-; PPC64LE-NEXT: .LBB96_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lharx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB96_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: clrlwi 4, 4, 16
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bne 0, .LBB96_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: clrlwi 5, 5, 16
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB96_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB96_1
-; PPC64LE-NEXT: .LBB96_3:
+; PPC64LE-NEXT: beq 0, .LBB96_4
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lharx 6, 0, 3
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB96_2
+; PPC64LE-NEXT: .LBB96_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val syncscope("singlethread") acq_rel acquire
@@ -1432,17 +1874,29 @@ define void @test96(ptr %ptr, i16 %cmp, i16 %val) {
define void @test97(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test97:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: clrlwi 4, 4, 16
-; PPC64LE-NEXT: .LBB97_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lharx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB97_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: clrlwi 4, 4, 16
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: sync
+; PPC64LE-NEXT: clrlwi 5, 5, 16
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB97_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB97_1
-; PPC64LE-NEXT: .LBB97_3:
+; PPC64LE-NEXT: beq 0, .LBB97_5
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lharx 6, 0, 3
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB97_2
+; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: blr
+; PPC64LE-NEXT: .LBB97_5: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val syncscope("singlethread") seq_cst monotonic
@@ -1451,17 +1905,27 @@ define void @test97(ptr %ptr, i16 %cmp, i16 %val) {
define void @test98(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test98:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: clrlwi 4, 4, 16
-; PPC64LE-NEXT: .LBB98_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lharx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB98_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: clrlwi 4, 4, 16
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bne 0, .LBB98_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: sync
+; PPC64LE-NEXT: clrlwi 5, 5, 16
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB98_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB98_1
-; PPC64LE-NEXT: .LBB98_3:
+; PPC64LE-NEXT: beq 0, .LBB98_4
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lharx 6, 0, 3
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB98_2
+; PPC64LE-NEXT: .LBB98_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val syncscope("singlethread") seq_cst acquire
@@ -1470,17 +1934,27 @@ define void @test98(ptr %ptr, i16 %cmp, i16 %val) {
define void @test99(ptr %ptr, i16 %cmp, i16 %val) {
; PPC64LE-LABEL: test99:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: clrlwi 4, 4, 16
-; PPC64LE-NEXT: .LBB99_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lharx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB99_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: clrlwi 4, 4, 16
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bne 0, .LBB99_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: sync
+; PPC64LE-NEXT: clrlwi 5, 5, 16
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB99_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: sthcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB99_1
-; PPC64LE-NEXT: .LBB99_3:
+; PPC64LE-NEXT: beq 0, .LBB99_4
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lharx 6, 0, 3
+; PPC64LE-NEXT: clrlwi 6, 6, 16
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB99_2
+; PPC64LE-NEXT: .LBB99_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val syncscope("singlethread") seq_cst seq_cst
@@ -1490,14 +1964,17 @@ define void @test99(ptr %ptr, i16 %cmp, i16 %val) {
define void @test100(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE-LABEL: test100:
; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: .LBB100_1:
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB100_1: # %cmpxchg.start
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: lwarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
+; PPC64LE-NEXT: cmplw 6, 4
; PPC64LE-NEXT: bnelr 0
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
; PPC64LE-NEXT: bne 0, .LBB100_1
-; PPC64LE-NEXT: # %bb.3:
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.end
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val syncscope("singlethread") monotonic monotonic
ret void
@@ -1506,14 +1983,17 @@ define void @test100(ptr %ptr, i32 %cmp, i32 %val) {
define void @test101(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE-LABEL: test101:
; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: .LBB101_1:
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB101_1: # %cmpxchg.start
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: lwarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB101_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
; PPC64LE-NEXT: bne 0, .LBB101_1
-; PPC64LE-NEXT: .LBB101_3:
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val syncscope("singlethread") acquire monotonic
@@ -1523,14 +2003,17 @@ define void @test101(ptr %ptr, i32 %cmp, i32 %val) {
define void @test102(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE-LABEL: test102:
; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: .LBB102_1:
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB102_1: # %cmpxchg.start
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: lwarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
+; PPC64LE-NEXT: cmplw 6, 4
; PPC64LE-NEXT: bne 0, .LBB102_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
; PPC64LE-NEXT: bne 0, .LBB102_1
-; PPC64LE-NEXT: .LBB102_3:
+; PPC64LE-NEXT: .LBB102_3: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val syncscope("singlethread") acquire acquire
@@ -1539,16 +2022,22 @@ define void @test102(ptr %ptr, i32 %cmp, i32 %val) {
define void @test103(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE-LABEL: test103:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: .LBB103_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lwarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
+; PPC64LE-NEXT: cmplw 6, 4
; PPC64LE-NEXT: bnelr 0
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB103_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB103_1
-; PPC64LE-NEXT: # %bb.3:
+; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lwarx 6, 0, 3
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB103_2
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val syncscope("singlethread") release monotonic
ret void
@@ -1556,16 +2045,23 @@ define void @test103(ptr %ptr, i32 %cmp, i32 %val) {
define void @test104(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE-LABEL: test104:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: .LBB104_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lwarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB104_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bne 0, .LBB104_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB104_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB104_1
-; PPC64LE-NEXT: .LBB104_3:
+; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lwarx 6, 0, 3
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB104_2
+; PPC64LE-NEXT: .LBB104_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val syncscope("singlethread") release acquire
@@ -1574,16 +2070,25 @@ define void @test104(ptr %ptr, i32 %cmp, i32 %val) {
define void @test105(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE-LABEL: test105:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: .LBB105_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lwarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB105_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB105_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB105_1
-; PPC64LE-NEXT: .LBB105_3:
+; PPC64LE-NEXT: beq 0, .LBB105_5
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lwarx 6, 0, 3
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB105_2
+; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: blr
+; PPC64LE-NEXT: .LBB105_5: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val syncscope("singlethread") acq_rel monotonic
@@ -1592,16 +2097,23 @@ define void @test105(ptr %ptr, i32 %cmp, i32 %val) {
define void @test106(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE-LABEL: test106:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: .LBB106_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lwarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB106_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bne 0, .LBB106_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB106_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB106_1
-; PPC64LE-NEXT: .LBB106_3:
+; PPC64LE-NEXT: beq 0, .LBB106_4
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lwarx 6, 0, 3
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB106_2
+; PPC64LE-NEXT: .LBB106_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val syncscope("singlethread") acq_rel acquire
@@ -1610,16 +2122,25 @@ define void @test106(ptr %ptr, i32 %cmp, i32 %val) {
define void @test107(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE-LABEL: test107:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: .LBB107_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lwarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB107_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: sync
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB107_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB107_1
-; PPC64LE-NEXT: .LBB107_3:
+; PPC64LE-NEXT: beq 0, .LBB107_5
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lwarx 6, 0, 3
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB107_2
+; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: blr
+; PPC64LE-NEXT: .LBB107_5: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val syncscope("singlethread") seq_cst monotonic
@@ -1628,16 +2149,23 @@ define void @test107(ptr %ptr, i32 %cmp, i32 %val) {
define void @test108(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE-LABEL: test108:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: .LBB108_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lwarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB108_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bne 0, .LBB108_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: sync
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB108_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB108_1
-; PPC64LE-NEXT: .LBB108_3:
+; PPC64LE-NEXT: beq 0, .LBB108_4
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lwarx 6, 0, 3
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB108_2
+; PPC64LE-NEXT: .LBB108_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val syncscope("singlethread") seq_cst acquire
@@ -1646,16 +2174,23 @@ define void @test108(ptr %ptr, i32 %cmp, i32 %val) {
define void @test109(ptr %ptr, i32 %cmp, i32 %val) {
; PPC64LE-LABEL: test109:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: .LBB109_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: lwarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
-; PPC64LE-NEXT: bne 0, .LBB109_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: bne 0, .LBB109_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: sync
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB109_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stwcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB109_1
-; PPC64LE-NEXT: .LBB109_3:
+; PPC64LE-NEXT: beq 0, .LBB109_4
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: lwarx 6, 0, 3
+; PPC64LE-NEXT: cmplw 6, 4
+; PPC64LE-NEXT: beq 0, .LBB109_2
+; PPC64LE-NEXT: .LBB109_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val syncscope("singlethread") seq_cst seq_cst
@@ -1665,14 +2200,17 @@ define void @test109(ptr %ptr, i32 %cmp, i32 %val) {
define void @test110(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE-LABEL: test110:
; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: .LBB110_1:
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB110_1: # %cmpxchg.start
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: ldarx 6, 0, 3
-; PPC64LE-NEXT: cmpd 6, 4
+; PPC64LE-NEXT: cmpld 6, 4
; PPC64LE-NEXT: bnelr 0
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
; PPC64LE-NEXT: bne 0, .LBB110_1
-; PPC64LE-NEXT: # %bb.3:
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.end
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val syncscope("singlethread") monotonic monotonic
ret void
@@ -1681,14 +2219,17 @@ define void @test110(ptr %ptr, i64 %cmp, i64 %val) {
define void @test111(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE-LABEL: test111:
; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: .LBB111_1:
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB111_1: # %cmpxchg.start
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: ldarx 6, 0, 3
-; PPC64LE-NEXT: cmpd 6, 4
-; PPC64LE-NEXT: bne 0, .LBB111_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: cmpld 6, 4
+; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
; PPC64LE-NEXT: bne 0, .LBB111_1
-; PPC64LE-NEXT: .LBB111_3:
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val syncscope("singlethread") acquire monotonic
@@ -1698,14 +2239,17 @@ define void @test111(ptr %ptr, i64 %cmp, i64 %val) {
define void @test112(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE-LABEL: test112:
; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: .LBB112_1:
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB112_1: # %cmpxchg.start
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: ldarx 6, 0, 3
-; PPC64LE-NEXT: cmpd 6, 4
+; PPC64LE-NEXT: cmpld 6, 4
; PPC64LE-NEXT: bne 0, .LBB112_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: # %bb.2: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
; PPC64LE-NEXT: bne 0, .LBB112_1
-; PPC64LE-NEXT: .LBB112_3:
+; PPC64LE-NEXT: .LBB112_3: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val syncscope("singlethread") acquire acquire
@@ -1714,16 +2258,22 @@ define void @test112(ptr %ptr, i64 %cmp, i64 %val) {
define void @test113(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE-LABEL: test113:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: .LBB113_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: ldarx 6, 0, 3
-; PPC64LE-NEXT: cmpd 6, 4
+; PPC64LE-NEXT: cmpld 6, 4
; PPC64LE-NEXT: bnelr 0
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB113_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB113_1
-; PPC64LE-NEXT: # %bb.3:
+; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: ldarx 6, 0, 3
+; PPC64LE-NEXT: cmpld 6, 4
+; PPC64LE-NEXT: beq 0, .LBB113_2
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val syncscope("singlethread") release monotonic
ret void
@@ -1731,16 +2281,23 @@ define void @test113(ptr %ptr, i64 %cmp, i64 %val) {
define void @test114(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE-LABEL: test114:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: .LBB114_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: ldarx 6, 0, 3
-; PPC64LE-NEXT: cmpd 6, 4
-; PPC64LE-NEXT: bne 0, .LBB114_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: cmpld 6, 4
+; PPC64LE-NEXT: bne 0, .LBB114_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB114_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB114_1
-; PPC64LE-NEXT: .LBB114_3:
+; PPC64LE-NEXT: beqlr 0
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: ldarx 6, 0, 3
+; PPC64LE-NEXT: cmpld 6, 4
+; PPC64LE-NEXT: beq 0, .LBB114_2
+; PPC64LE-NEXT: .LBB114_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val syncscope("singlethread") release acquire
@@ -1749,16 +2306,25 @@ define void @test114(ptr %ptr, i64 %cmp, i64 %val) {
define void @test115(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE-LABEL: test115:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: .LBB115_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: ldarx 6, 0, 3
-; PPC64LE-NEXT: cmpd 6, 4
-; PPC64LE-NEXT: bne 0, .LBB115_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: cmpld 6, 4
+; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB115_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB115_1
-; PPC64LE-NEXT: .LBB115_3:
+; PPC64LE-NEXT: beq 0, .LBB115_5
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: ldarx 6, 0, 3
+; PPC64LE-NEXT: cmpld 6, 4
+; PPC64LE-NEXT: beq 0, .LBB115_2
+; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: blr
+; PPC64LE-NEXT: .LBB115_5: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val syncscope("singlethread") acq_rel monotonic
@@ -1767,16 +2333,23 @@ define void @test115(ptr %ptr, i64 %cmp, i64 %val) {
define void @test116(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE-LABEL: test116:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: lwsync
-; PPC64LE-NEXT: .LBB116_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: ldarx 6, 0, 3
-; PPC64LE-NEXT: cmpd 6, 4
-; PPC64LE-NEXT: bne 0, .LBB116_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: cmpld 6, 4
+; PPC64LE-NEXT: bne 0, .LBB116_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB116_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB116_1
-; PPC64LE-NEXT: .LBB116_3:
+; PPC64LE-NEXT: beq 0, .LBB116_4
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: ldarx 6, 0, 3
+; PPC64LE-NEXT: cmpld 6, 4
+; PPC64LE-NEXT: beq 0, .LBB116_2
+; PPC64LE-NEXT: .LBB116_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val syncscope("singlethread") acq_rel acquire
@@ -1785,16 +2358,25 @@ define void @test116(ptr %ptr, i64 %cmp, i64 %val) {
define void @test117(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE-LABEL: test117:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: .LBB117_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: ldarx 6, 0, 3
-; PPC64LE-NEXT: cmpd 6, 4
-; PPC64LE-NEXT: bne 0, .LBB117_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: cmpld 6, 4
+; PPC64LE-NEXT: bnelr 0
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: sync
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB117_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB117_1
-; PPC64LE-NEXT: .LBB117_3:
+; PPC64LE-NEXT: beq 0, .LBB117_5
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: ldarx 6, 0, 3
+; PPC64LE-NEXT: cmpld 6, 4
+; PPC64LE-NEXT: beq 0, .LBB117_2
+; PPC64LE-NEXT: # %bb.4: # %cmpxchg.end
+; PPC64LE-NEXT: blr
+; PPC64LE-NEXT: .LBB117_5: # %cmpxchg.success
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val syncscope("singlethread") seq_cst monotonic
@@ -1803,16 +2385,23 @@ define void @test117(ptr %ptr, i64 %cmp, i64 %val) {
define void @test118(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE-LABEL: test118:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: .LBB118_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: ldarx 6, 0, 3
-; PPC64LE-NEXT: cmpd 6, 4
-; PPC64LE-NEXT: bne 0, .LBB118_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: cmpld 6, 4
+; PPC64LE-NEXT: bne 0, .LBB118_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: sync
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB118_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB118_1
-; PPC64LE-NEXT: .LBB118_3:
+; PPC64LE-NEXT: beq 0, .LBB118_4
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: ldarx 6, 0, 3
+; PPC64LE-NEXT: cmpld 6, 4
+; PPC64LE-NEXT: beq 0, .LBB118_2
+; PPC64LE-NEXT: .LBB118_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val syncscope("singlethread") seq_cst acquire
@@ -1821,16 +2410,23 @@ define void @test118(ptr %ptr, i64 %cmp, i64 %val) {
define void @test119(ptr %ptr, i64 %cmp, i64 %val) {
; PPC64LE-LABEL: test119:
-; PPC64LE: # %bb.0:
-; PPC64LE-NEXT: sync
-; PPC64LE-NEXT: .LBB119_1:
+; PPC64LE: # %bb.0: # %cmpxchg.start
; PPC64LE-NEXT: ldarx 6, 0, 3
-; PPC64LE-NEXT: cmpd 6, 4
-; PPC64LE-NEXT: bne 0, .LBB119_3
-; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: cmpld 6, 4
+; PPC64LE-NEXT: bne 0, .LBB119_4
+; PPC64LE-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64LE-NEXT: sync
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB119_2: # %cmpxchg.trystore
+; PPC64LE-NEXT: #
; PPC64LE-NEXT: stdcx. 5, 0, 3
-; PPC64LE-NEXT: bne 0, .LBB119_1
-; PPC64LE-NEXT: .LBB119_3:
+; PPC64LE-NEXT: beq 0, .LBB119_4
+; PPC64LE-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64LE-NEXT: #
+; PPC64LE-NEXT: ldarx 6, 0, 3
+; PPC64LE-NEXT: cmpld 6, 4
+; PPC64LE-NEXT: beq 0, .LBB119_2
+; PPC64LE-NEXT: .LBB119_4: # %cmpxchg.nostore
; PPC64LE-NEXT: lwsync
; PPC64LE-NEXT: blr
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val syncscope("singlethread") seq_cst seq_cst
diff --git a/llvm/test/CodeGen/PowerPC/atomics.ll b/llvm/test/CodeGen/PowerPC/atomics.ll
index 24e71c87414e8..40786057ead5f 100644
--- a/llvm/test/CodeGen/PowerPC/atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics.ll
@@ -139,59 +139,67 @@ define void @store_i64_seq_cst(ptr %mem) {
define i8 @cas_strong_i8_sc_sc(ptr %mem) {
; PPC32-LABEL: cas_strong_i8_sc_sc:
; PPC32: # %bb.0:
-; PPC32-NEXT: rlwinm r8, r3, 3, 27, 28
-; PPC32-NEXT: li r5, 1
-; PPC32-NEXT: li r6, 0
-; PPC32-NEXT: li r7, 255
-; PPC32-NEXT: rlwinm r4, r3, 0, 0, 29
-; PPC32-NEXT: xori r3, r8, 24
-; PPC32-NEXT: slw r8, r5, r3
-; PPC32-NEXT: slw r9, r6, r3
-; PPC32-NEXT: slw r5, r7, r3
-; PPC32-NEXT: and r6, r8, r5
-; PPC32-NEXT: and r7, r9, r5
+; PPC32-NEXT: rlwinm r5, r3, 0, 0, 29
+; PPC32-NEXT: lwarx r4, 0, r5
+; PPC32-NEXT: not r3, r3
+; PPC32-NEXT: rlwinm r3, r3, 3, 27, 28
+; PPC32-NEXT: srw r6, r4, r3
+; PPC32-NEXT: andi. r6, r6, 255
+; PPC32-NEXT: bne cr0, .LBB8_4
+; PPC32-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC32-NEXT: li r6, 255
+; PPC32-NEXT: li r7, 1
+; PPC32-NEXT: slw r6, r6, r3
+; PPC32-NEXT: not r6, r6
+; PPC32-NEXT: slw r7, r7, r3
; PPC32-NEXT: sync
-; PPC32-NEXT: .LBB8_1:
-; PPC32-NEXT: lwarx r9, 0, r4
-; PPC32-NEXT: and r8, r9, r5
-; PPC32-NEXT: cmpw r8, r7
-; PPC32-NEXT: bne cr0, .LBB8_3
-; PPC32-NEXT: # %bb.2:
-; PPC32-NEXT: andc r9, r9, r5
-; PPC32-NEXT: or r9, r9, r6
-; PPC32-NEXT: stwcx. r9, 0, r4
-; PPC32-NEXT: bne cr0, .LBB8_1
-; PPC32-NEXT: .LBB8_3:
-; PPC32-NEXT: srw r3, r8, r3
+; PPC32-NEXT: .LBB8_2: # %cmpxchg.trystore
+; PPC32-NEXT: # =>This Inner Loop Header: Depth=1
+; PPC32-NEXT: and r8, r4, r6
+; PPC32-NEXT: or r8, r8, r7
+; PPC32-NEXT: stwcx. r8, 0, r5
+; PPC32-NEXT: beq cr0, .LBB8_4
+; PPC32-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC32-NEXT: # in Loop: Header=BB8_2 Depth=1
+; PPC32-NEXT: lwarx r4, 0, r5
+; PPC32-NEXT: srw r8, r4, r3
+; PPC32-NEXT: andi. r8, r8, 255
+; PPC32-NEXT: beq cr0, .LBB8_2
+; PPC32-NEXT: .LBB8_4: # %cmpxchg.nostore
+; PPC32-NEXT: srw r3, r4, r3
; PPC32-NEXT: lwsync
; PPC32-NEXT: blr
;
; PPC64-LABEL: cas_strong_i8_sc_sc:
; PPC64: # %bb.0:
-; PPC64-NEXT: rlwinm r8, r3, 3, 27, 28
-; PPC64-NEXT: li r5, 1
-; PPC64-NEXT: li r6, 0
-; PPC64-NEXT: li r7, 255
-; PPC64-NEXT: rldicr r4, r3, 0, 61
-; PPC64-NEXT: xori r3, r8, 24
-; PPC64-NEXT: slw r8, r5, r3
-; PPC64-NEXT: slw r9, r6, r3
-; PPC64-NEXT: slw r5, r7, r3
-; PPC64-NEXT: and r6, r8, r5
-; PPC64-NEXT: and r7, r9, r5
+; PPC64-NEXT: rldicr r5, r3, 0, 61
+; PPC64-NEXT: not r3, r3
+; PPC64-NEXT: lwarx r4, 0, r5
+; PPC64-NEXT: rlwinm r3, r3, 3, 27, 28
+; PPC64-NEXT: srw r6, r4, r3
+; PPC64-NEXT: andi. r6, r6, 255
+; PPC64-NEXT: bne cr0, .LBB8_4
+; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; PPC64-NEXT: li r6, 255
+; PPC64-NEXT: li r7, 1
+; PPC64-NEXT: slw r6, r6, r3
+; PPC64-NEXT: not r6, r6
+; PPC64-NEXT: slw r7, r7, r3
; PPC64-NEXT: sync
-; PPC64-NEXT: .LBB8_1:
-; PPC64-NEXT: lwarx r9, 0, r4
-; PPC64-NEXT: and r8, r9, r5
-; PPC64-NEXT: cmpw r8, r7
-; PPC64-NEXT: bne cr0, .LBB8_3
-; PPC64-NEXT: # %bb.2:
-; PPC64-NEXT: andc r9, r9, r5
-; PPC64-NEXT: or r9, r9, r6
-; PPC64-NEXT: stwcx. r9, 0, r4
-; PPC64-NEXT: bne cr0, .LBB8_1
-; PPC64-NEXT: .LBB8_3:
-; PPC64-NEXT: srw r3, r8, r3
+; PPC64-NEXT: .LBB8_2: # %cmpxchg.trystore
+; PPC64-NEXT: # =>This Inner Loop Header: Depth=1
+; PPC64-NEXT: and r8, r4, r6
+; PPC64-NEXT: or r8, r8, r7
+; PPC64-NEXT: stwcx. r8, 0, r5
+; PPC64-NEXT: beq cr0, .LBB8_4
+; PPC64-NEXT: # %bb.3: # %cmpxchg.releasedload
+; PPC64-NEXT: # in Loop: Header=BB8_2 Depth=1
+; PPC64-NEXT: lwarx r4, 0, r5
+; PPC64-NEXT: srw r8, r4, r3
+; PPC64-NEXT: andi. r8, r8, 255
+; PPC64-NEXT: beq cr0, .LBB8_2
+; PPC64-NEXT: .LBB8_4: # %cmpxchg.nostore
+; PPC64-NEXT: srw r3, r4, r3
; PPC64-NEXT: lwsync
; PPC64-NEXT: blr
%val = cmpxchg ptr %mem, i8 0, i8 1 seq_cst seq_cst
@@ -201,57 +209,53 @@ define i8 @cas_strong_i8_sc_sc(ptr %mem) {
define i16 @cas_weak_i16_acquire_acquire(ptr %mem) {
; PPC32-LABEL: cas_weak_i16_acquire_acquire:
; PPC32: # %bb.0:
-; PPC32-NEXT: li r6, 0
-; PPC32-NEXT: rlwinm r4, r3, 3, 27, 27
-; PPC32-NEXT: li r5, 1
-; PPC32-NEXT: ori r7, r6, 65535
-; PPC32-NEXT: xori r4, r4, 16
-; PPC32-NEXT: slw r8, r5, r4
-; PPC32-NEXT: slw r9, r6, r4
-; PPC32-NEXT: slw r5, r7, r4
-; PPC32-NEXT: rlwinm r3, r3, 0, 0, 29
-; PPC32-NEXT: and r6, r8, r5
-; PPC32-NEXT: and r7, r9, r5
-; PPC32-NEXT: .LBB9_1:
-; PPC32-NEXT: lwarx r9, 0, r3
-; PPC32-NEXT: and r8, r9, r5
-; PPC32-NEXT: cmpw r8, r7
-; PPC32-NEXT: bne cr0, .LBB9_3
-; PPC32-NEXT: # %bb.2:
-; PPC32-NEXT: andc r9, r9, r5
-; PPC32-NEXT: or r9, r9, r6
-; PPC32-NEXT: stwcx. r9, 0, r3
-; PPC32-NEXT: bne cr0, .LBB9_1
-; PPC32-NEXT: .LBB9_3:
-; PPC32-NEXT: srw r3, r8, r4
+; PPC32-NEXT: rlwinm r4, r3, 0, 0, 29
+; PPC32-NEXT: lwarx r5, 0, r4
+; PPC32-NEXT: clrlwi r3, r3, 30
+; PPC32-NEXT: xori r3, r3, 2
+; PPC32-NEXT: slwi r6, r3, 3
+; PPC32-NEXT: srw r3, r5, r6
+; PPC32-NEXT: andi. r7, r3, 65535
+; PPC32-NEXT: beq cr0, .LBB9_2
+; PPC32-NEXT: # %bb.1: # %cmpxchg.failure
+; PPC32-NEXT: lwsync
+; PPC32-NEXT: blr
+; PPC32-NEXT: .LBB9_2: # %cmpxchg.fencedstore
+; PPC32-NEXT: lis r7, 0
+; PPC32-NEXT: ori r7, r7, 65535
+; PPC32-NEXT: slw r7, r7, r6
+; PPC32-NEXT: li r8, 1
+; PPC32-NEXT: not r7, r7
+; PPC32-NEXT: slw r6, r8, r6
+; PPC32-NEXT: and r5, r5, r7
+; PPC32-NEXT: or r5, r5, r6
+; PPC32-NEXT: stwcx. r5, 0, r4
; PPC32-NEXT: lwsync
; PPC32-NEXT: blr
;
; PPC64-LABEL: cas_weak_i16_acquire_acquire:
; PPC64: # %bb.0:
-; PPC64-NEXT: li r6, 0
-; PPC64-NEXT: rlwinm r4, r3, 3, 27, 27
-; PPC64-NEXT: li r5, 1
-; PPC64-NEXT: ori r7, r6, 65535
-; PPC64-NEXT: xori r4, r4, 16
-; PPC64-NEXT: slw r8, r5, r4
-; PPC64-NEXT: slw r9, r6, r4
-; PPC64-NEXT: slw r5, r7, r4
-; PPC64-NEXT: rldicr r3, r3, 0, 61
-; PPC64-NEXT: and r6, r8, r5
-; PPC64-NEXT: and r7, r9, r5
-; PPC64-NEXT: .LBB9_1:
-; PPC64-NEXT: lwarx r9, 0, r3
-; PPC64-NEXT: and r8, r9, r5
-; PPC64-NEXT: cmpw r8, r7
-; PPC64-NEXT: bne cr0, .LBB9_3
-; PPC64-NEXT: # %bb.2:
-; PPC64-NEXT: andc r9, r9, r5
-; PPC64-NEXT: or r9, r9, r6
-; PPC64-NEXT: stwcx. r9, 0, r3
-; PPC64-NEXT: bne cr0, .LBB9_1
-; PPC64-NEXT: .LBB9_3:
-; PPC64-NEXT: srw r3, r8, r4
+; PPC64-NEXT: rldicr r4, r3, 0, 61
+; PPC64-NEXT: clrlwi r3, r3, 30
+; PPC64-NEXT: lwarx r5, 0, r4
+; PPC64-NEXT: xori r3, r3, 2
+; PPC64-NEXT: slwi r6, r3, 3
+; PPC64-NEXT: srw r3, r5, r6
+; PPC64-NEXT: andi. r7, r3, 65535
+; PPC64-NEXT: beq cr0, .LBB9_2
+; PPC64-NEXT: # %bb.1: # %cmpxchg.failure
+; PPC64-NEXT: lwsync
+; PPC64-NEXT: blr
+; PPC64-NEXT: .LBB9_2: # %cmpxchg.fencedstore
+; PPC64-NEXT: lis r7, 0
+; PPC64-NEXT: ori r7, r7, 65535
+; PPC64-NEXT: slw r7, r7, r6
+; PPC64-NEXT: li r8, 1
+; PPC64-NEXT: not r7, r7
+; PPC64-NEXT: slw r6, r8, r6
+; PPC64-NEXT: and r5, r5, r7
+; PPC64-NEXT: or r5, r5, r6
+; PPC64-NEXT: stwcx. r5, 0, r4
; PPC64-NEXT: lwsync
; PPC64-NEXT: blr
%val = cmpxchg weak ptr %mem, i16 0, i16 1 acquire acquire
@@ -261,17 +265,23 @@ define i16 @cas_weak_i16_acquire_acquire(ptr %mem) {
define i32 @cas_strong_i32_acqrel_acquire(ptr %mem) {
; CHECK-LABEL: cas_strong_i32_acqrel_acquire:
; CHECK: # %bb.0:
+; CHECK-NEXT: mr r4, r3
+; CHECK-NEXT: lwarx r3, 0, r3
+; CHECK-NEXT: cmplwi r3, 0
+; CHECK-NEXT: bne cr0, .LBB10_4
+; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore
; CHECK-NEXT: li r5, 1
; CHECK-NEXT: lwsync
-; CHECK-NEXT: .LBB10_1:
-; CHECK-NEXT: lwarx r4, 0, r3
-; CHECK-NEXT: cmpwi r4, 0
-; CHECK-NEXT: bne cr0, .LBB10_3
-; CHECK-NEXT: # %bb.2:
-; CHECK-NEXT: stwcx. r5, 0, r3
-; CHECK-NEXT: bne cr0, .LBB10_1
-; CHECK-NEXT: .LBB10_3:
-; CHECK-NEXT: mr r3, r4
+; CHECK-NEXT: .LBB10_2: # %cmpxchg.trystore
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: stwcx. r5, 0, r4
+; CHECK-NEXT: beq cr0, .LBB10_4
+; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload
+; CHECK-NEXT: # in Loop: Header=BB10_2 Depth=1
+; CHECK-NEXT: lwarx r3, 0, r4
+; CHECK-NEXT: cmplwi r3, 0
+; CHECK-NEXT: beq cr0, .LBB10_2
+; CHECK-NEXT: .LBB10_4: # %cmpxchg.nostore
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
%val = cmpxchg ptr %mem, i32 0, i32 1 acq_rel acquire
@@ -304,17 +314,14 @@ define i64 @cas_weak_i64_release_monotonic(ptr %mem) {
;
; PPC64-LABEL: cas_weak_i64_release_monotonic:
; PPC64: # %bb.0:
+; PPC64-NEXT: mr r4, r3
+; PPC64-NEXT: ldarx r3, 0, r3
+; PPC64-NEXT: cmpldi r3, 0
+; PPC64-NEXT: bnelr cr0
+; PPC64-NEXT: # %bb.1: # %cmpxchg.fencedstore
; PPC64-NEXT: li r5, 1
; PPC64-NEXT: lwsync
-; PPC64-NEXT: .LBB11_1:
-; PPC64-NEXT: ldarx r4, 0, r3
-; PPC64-NEXT: cmpdi r4, 0
-; PPC64-NEXT: bne cr0, .LBB11_3
-; PPC64-NEXT: # %bb.2:
-; PPC64-NEXT: stdcx. r5, 0, r3
-; PPC64-NEXT: bne cr0, .LBB11_1
-; PPC64-NEXT: .LBB11_3:
-; PPC64-NEXT: mr r3, r4
+; PPC64-NEXT: stdcx. r5, 0, r4
; PPC64-NEXT: blr
%val = cmpxchg weak ptr %mem, i64 0, i64 1 release monotonic
%loaded = extractvalue { i64, i1} %val, 0
diff --git a/llvm/test/CodeGen/PowerPC/loop-comment.ll b/llvm/test/CodeGen/PowerPC/loop-comment.ll
index 14f6791fc7792..1fa9dda51ef9e 100644
--- a/llvm/test/CodeGen/PowerPC/loop-comment.ll
+++ b/llvm/test/CodeGen/PowerPC/loop-comment.ll
@@ -4,12 +4,17 @@
define void @test(ptr %ptr, i8 %cmp, i8 %val) {
; PPC64LE-LABEL: test:
; PPC64LE: # %bb.0:
+; PPC64LE-NEXT: clrlwi 5, 5, 24
; PPC64LE-NEXT: clrlwi 4, 4, 24
-; PPC64LE-NEXT: .LBB0_1:
+; PPC64LE-NEXT: .p2align 5
+; PPC64LE-NEXT: .LBB0_1: # %cmpxchg.start
+; PPC64LE-NEXT: # =>This Inner Loop Header: Depth=1
; PPC64LE-NEXT: lbarx 6, 0, 3
-; PPC64LE-NEXT: cmpw 6, 4
+; PPC64LE-NEXT: clrlwi 6, 6, 24
+; PPC64LE-NEXT: cmplw 6, 4
; PPC64LE-NEXT: bnelr 0
; PPC64LE-NEXT: # %bb.2:
+; PPC64LE-NEXT: # in Loop: Header=BB0_1 Depth=1
; PPC64LE-NEXT: stbcx. 5, 0, 3
; PPC64LE-NEXT: bne 0, .LBB0_1
; PPC64LE-NEXT: # %bb.3:
diff --git a/llvm/test/Transforms/AtomicExpand/PowerPC/atomicrmw-fp.ll b/llvm/test/Transforms/AtomicExpand/PowerPC/atomicrmw-fp.ll
index f787aa7f6a42b..840e2d3eee553 100644
--- a/llvm/test/Transforms/AtomicExpand/PowerPC/atomicrmw-fp.ll
+++ b/llvm/test/Transforms/AtomicExpand/PowerPC/atomicrmw-fp.ll
@@ -7,19 +7,51 @@ define float @test_atomicrmw_fadd_f32(ptr %ptr, float %value) {
; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4
; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[NEW:%.*]] = fadd float [[LOADED]], [[VALUE:%.*]]
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
-; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP3]], i32 [[TMP2]] monotonic monotonic, align 4
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
-; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
-; CHECK-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
+; CHECK-NEXT: %loaded = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], %cmpxchg.end ]
+; CHECK-NEXT: %new = fadd float %loaded, %value
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast float %new to i32
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast float %loaded to i32
+; CHECK-NEXT: br label %cmpxchg.start
+; CHECK-EMPTY:
+; CHECK-NEXT: cmpxchg.start: ; preds = %cmpxchg.trystore, %atomicrmw.start
+; CHECK-NEXT: %larx = call i32 @llvm.ppc.lwarx(ptr %ptr)
+; CHECK-NEXT: %should_store = icmp eq i32 %larx, [[TMP3]]
+; CHECK-NEXT: br i1 %should_store, label %cmpxchg.fencedstore, label %cmpxchg.nostore
+; CHECK-EMPTY:
+; CHECK-NEXT: cmpxchg.fencedstore: ; preds = %cmpxchg.start
+; CHECK-NEXT: br label %cmpxchg.trystore
+; CHECK-EMPTY:
+; CHECK-NEXT: cmpxchg.trystore: ; preds = %cmpxchg.fencedstore
+; CHECK-NEXT: %loaded.trystore = phi i32 [ %larx, %cmpxchg.fencedstore ]
+; CHECK-NEXT: %stcx = call i32 @llvm.ppc.stwcx(ptr %ptr, i32 [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = xor i32 %stcx, 1
+; CHECK-NEXT: %success1 = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT: br i1 %success1, label %cmpxchg.success, label %cmpxchg.start
+; CHECK-EMPTY:
+; CHECK-NEXT: cmpxchg.releasedload: ; No predecessors!
+; CHECK-NEXT: unreachable
+; CHECK-EMPTY:
+; CHECK-NEXT: cmpxchg.success: ; preds = %cmpxchg.trystore
+; CHECK-NEXT: br label %cmpxchg.end
+; CHECK-EMPTY:
+; CHECK-NEXT: cmpxchg.nostore: ; preds = %cmpxchg.start
+; CHECK-NEXT: %loaded.nostore = phi i32 [ %larx, %cmpxchg.start ]
+; CHECK-NEXT: br label %cmpxchg.failure
+; CHECK-EMPTY:
+; CHECK-NEXT: cmpxchg.failure: ; preds = %cmpxchg.nostore
+; CHECK-NEXT: %loaded.failure = phi i32 [ %loaded.nostore, %cmpxchg.nostore ]
+; CHECK-NEXT: br label %cmpxchg.end
+; CHECK-EMPTY:
+; CHECK-NEXT: cmpxchg.end: ; preds = %cmpxchg.failure, %cmpxchg.success
+; CHECK-NEXT: %loaded.exit = phi i32 [ %loaded.trystore, %cmpxchg.success ], [ %loaded.failure, %cmpxchg.failure ]
+; CHECK-NEXT: %success2 = phi i1 [ true, %cmpxchg.success ], [ false, %cmpxchg.failure ]
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 %loaded.exit to float
+; CHECK-NEXT: br i1 %success2, label %atomicrmw.end, label %atomicrmw.start
+; CHECK-EMPTY:
+; CHECK-NEXT: atomicrmw.end: ; preds = %cmpxchg.end
; CHECK-NEXT: call void @llvm.ppc.lwsync()
; CHECK-NEXT: ret float [[TMP5]]
-;
+; CHECK-NEXT: }
%res = atomicrmw fadd ptr %ptr, float %value seq_cst
ret float %res
}
@@ -28,22 +60,56 @@ define float @test_atomicrmw_fsub_f32(ptr %ptr, float %value) {
; CHECK-LABEL: @test_atomicrmw_fsub_f32(
; CHECK-NEXT: call void @llvm.ppc.sync()
; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[PTR:%.*]], align 4
-; CHECK-NEXT: br label [[ATOMICRMW_START:%.*]]
-; CHECK: atomicrmw.start:
-; CHECK-NEXT: [[LOADED:%.*]] = phi float [ [[TMP1]], [[TMP0:%.*]] ], [ [[TMP5:%.*]], [[ATOMICRMW_START]] ]
-; CHECK-NEXT: [[NEW:%.*]] = fsub float [[LOADED]], [[VALUE:%.*]]
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[NEW]] to i32
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[LOADED]] to i32
-; CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i32 [[TMP3]], i32 [[TMP2]] monotonic monotonic, align 4
-; CHECK-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
-; CHECK-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
-; CHECK-NEXT: [[TMP5]] = bitcast i32 [[NEWLOADED]] to float
-; CHECK-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
-; CHECK: atomicrmw.end:
+; CHECK-NEXT: br label %atomicrmw.start
+; CHECK-EMPTY:
+; CHECK-NEXT: atomicrmw.start:
+; CHECK-NEXT: %loaded = phi float [ [[TMP1]], %0 ], [ [[TMP5:%.*]], %cmpxchg.end ]
+; CHECK-NEXT: %new = fsub float %loaded, %value
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast float %new to i32
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast float %loaded to i32
+; CHECK-NEXT: br label %cmpxchg.start
+; CHECK-EMPTY:
+; CHECK-NEXT: cmpxchg.start:
+; CHECK-NEXT: %larx = call i32 @llvm.ppc.lwarx(ptr %ptr)
+; CHECK-NEXT: %should_store = icmp eq i32 %larx, [[TMP3]]
+; CHECK-NEXT: br i1 %should_store, label %cmpxchg.fencedstore, label %cmpxchg.nostore
+; CHECK-EMPTY:
+; CHECK-NEXT: cmpxchg.fencedstore: ; preds = %cmpxchg.start
+; CHECK-NEXT: br label %cmpxchg.trystore
+; CHECK-EMPTY:
+; CHECK-NEXT: cmpxchg.trystore: ; preds = %cmpxchg.fencedstore
+; CHECK-NEXT: %loaded.trystore = phi i32 [ %larx, %cmpxchg.fencedstore ]
+; CHECK-NEXT: %stcx = call i32 @llvm.ppc.stwcx(ptr %ptr, i32 %2)
+; CHECK-NEXT: [[TMP4:%.*]] = xor i32 %stcx, 1
+; CHECK-NEXT: %success1 = icmp eq i32 [[TMP4]], 0
+; CHECK-NEXT: br i1 %success1, label %cmpxchg.success, label %cmpxchg.start
+; CHECK-EMPTY:
+; CHECK-NEXT: cmpxchg.releasedload: ; No predecessors!
+; CHECK-NEXT: unreachable
+; CHECK-EMPTY:
+; CHECK-NEXT: cmpxchg.success: ; preds = %cmpxchg.trystore
+; CHECK-NEXT: br label %cmpxchg.end
+; CHECK-EMPTY:
+; CHECK-NEXT: cmpxchg.nostore: ; preds = %cmpxchg.start
+; CHECK-NEXT: %loaded.nostore = phi i32 [ %larx, %cmpxchg.start ]
+; CHECK-NEXT: br label %cmpxchg.failure
+; CHECK-EMPTY:
+; CHECK-NEXT: cmpxchg.failure: ; preds = %cmpxchg.nostore
+; CHECK-NEXT: %loaded.failure = phi i32 [ %loaded.nostore, %cmpxchg.nostore ]
+; CHECK-NEXT: br label %cmpxchg.end
+; CHECK-EMPTY:
+; CHECK-NEXT: cmpxchg.end: ; preds = %cmpxchg.failure, %cmpxchg.success
+; CHECK-NEXT: %loaded.exit = phi i32 [ %loaded.trystore, %cmpxchg.success ], [ %loaded.failure, %cmpxchg.failure ]
+; CHECK-NEXT: %success2 = phi i1 [ true, %cmpxchg.success ], [ false, %cmpxchg.failure ]
+; CHECK-NEXT: [[TMP5]] = bitcast i32 %loaded.exit to float
+; CHECK-NEXT: br i1 %success2, label %atomicrmw.end, label %atomicrmw.start
+; CHECK-EMPTY:
+; CHECK-NEXT: atomicrmw.end: ; preds = %cmpxchg.end
; CHECK-NEXT: call void @llvm.ppc.lwsync()
; CHECK-NEXT: ret float [[TMP5]]
-;
- %res = atomicrmw fsub ptr %ptr, float %value seq_cst
+; CHECK-NEXT: }
+
+%res = atomicrmw fsub ptr %ptr, float %value seq_cst
ret float %res
}
More information about the llvm-commits
mailing list