[llvm] [PowerPC] enable AtomicExpandImpl::expandAtomicCmpXchg for powerpc (PR #142395)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 2 06:57:46 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: zhijian lin (diggerlin)
<details>
<summary>Changes</summary>
In PowerPC, the AtomicCmpXchgInst is lowered to ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS. However, this node does not handle the weak attribute of AtomicCmpXchgInst. As a result, when compiling C++ atomic_compare_exchange_weak_explicit, the generated assembly includes a "reservation lost" loop — i.e., it branches back and retries if the stwcx. (store-conditional) fails. This differs from GCC’s codegen, which does not include that loop for weak compare-exchange.
Since PowerPC uses LL/SC-style atomic instructions, the patch enables AtomicExpandImpl::expandAtomicCmpXchg for PowerPC. With this, the weak attribute is properly respected, and the "reservation lost" loop is removed for weak operations.
---
Patch is 250.32 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/142395.diff
17 Files Affected:
- (modified) llvm/include/llvm/CodeGen/TargetLowering.h (+1-1)
- (modified) llvm/include/llvm/IR/IntrinsicsPowerPC.td (+14-1)
- (modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+77-1)
- (modified) llvm/lib/Target/PowerPC/PPCISelLowering.h (+6)
- (modified) llvm/lib/Target/PowerPC/PPCInstr64Bit.td (+2)
- (modified) llvm/lib/Target/PowerPC/PPCInstrInfo.td (+7-1)
- (modified) llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll (+77-65)
- (modified) llvm/test/CodeGen/PowerPC/all-atomics.ll (+963-709)
- (modified) llvm/test/CodeGen/PowerPC/atomic-2.ll (+2-2)
- (modified) llvm/test/CodeGen/PowerPC/atomic-compare-exchange-weak.ll (+25-27)
- (modified) llvm/test/CodeGen/PowerPC/atomic-float.ll (+58-50)
- (modified) llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll (+257-269)
- (modified) llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll (+257-267)
- (modified) llvm/test/CodeGen/PowerPC/atomics-regression.ll (+1168-572)
- (modified) llvm/test/CodeGen/PowerPC/atomics.ll (+121-114)
- (modified) llvm/test/CodeGen/PowerPC/loop-comment.ll (+7-2)
- (modified) llvm/test/Transforms/AtomicExpand/PowerPC/atomicrmw-fp.ll (+91-25)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 5105bcc5cce3a..50c1a7f7c30f6 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -257,7 +257,7 @@ class TargetLoweringBase {
CastToInteger, // Cast the atomic instruction to another type, e.g. from
// floating-point to integer type.
LLSC, // Expand the instruction into loadlinked/storeconditional; used
- // by ARM/AArch64.
+ // by ARM/AArch64/PowerPC.
LLOnly, // Expand the (load) instruction into just a load-linked, which has
// greater atomic guarantees than a normal load.
CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 751628cee58c0..68d8b5ae0c38d 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1835,6 +1835,19 @@ let TargetPrefix = "ppc" in {
Intrinsic<[],[],[]>;
def int_ppc_iospace_eieio : ClangBuiltin<"__builtin_ppc_iospace_eieio">,
Intrinsic<[],[],[]>;
+ def int_ppc_lbarx :
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty],
+ [IntrReadMem, IntrArgMemOnly, IntrNoDuplicate]>;
+ def int_ppc_lharx :
+ Intrinsic<[llvm_i32_ty],[llvm_ptr_ty],
+ [IntrReadMem, IntrArgMemOnly, IntrNoDuplicate]>;
+ def int_ppc_lwarx :
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty],
+ [IntrReadMem, IntrArgMemOnly, IntrNoDuplicate]>;
+ def int_ppc_ldarx :
+ Intrinsic<[llvm_i64_ty],[llvm_ptr_ty],
+ [IntrReadMem, IntrArgMemOnly, IntrNoDuplicate]>;
+
def int_ppc_stdcx :
ClangBuiltin<"__builtin_ppc_stdcx">,
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i64_ty],
@@ -1844,7 +1857,7 @@ let TargetPrefix = "ppc" in {
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
[IntrWriteMem, IntrArgMemOnly]>;
def int_ppc_sthcx :
- Intrinsic<[llvm_i32_ty], [ llvm_ptr_ty, llvm_i32_ty ],
+ Intrinsic<[llvm_i32_ty], [ llvm_ptr_ty, llvm_i32_ty],
[IntrWriteMem, IntrArgMemOnly, IntrNoDuplicate]>;
def int_ppc_stbcx :
ClangBuiltin<"__builtin_ppc_stbcx">,
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 0c2a506005604..287145e3de7a4 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1475,6 +1475,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setMinimumJumpTableEntries(PPCMinimumJumpTableEntries);
setMinFunctionAlignment(Align(4));
+ if(Subtarget.hasPartwordAtomics())
+ setMinCmpXchgSizeInBits(8);
+ else
+ setMinCmpXchgSizeInBits(32);
switch (Subtarget.getCPUDirective()) {
default: break;
@@ -12672,6 +12676,77 @@ static Instruction *callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id) {
return Builder.CreateIntrinsic(Id, {});
}
+Value *PPCTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy,
+ Value *Addr,
+ AtomicOrdering Ord) const {
+ unsigned SZ = ValueTy->getPrimitiveSizeInBits();
+
+ assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
+ +"Only 8/16/32/64-bit atomic loads supported");
+ Intrinsic::ID IntID;
+ switch (SZ) {
+ default:
+ llvm_unreachable("Unexpected PrimitiveSize");
+ case 8:
+ IntID = Intrinsic::ppc_lbarx;
+ assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
+ break;
+ case 16:
+ IntID = Intrinsic::ppc_lharx;
+ assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
+ break;
+ case 32:
+ IntID = Intrinsic::ppc_lwarx;
+ break;
+ case 64:
+ IntID = Intrinsic::ppc_ldarx;
+ break;
+ }
+ Value *Call =
+ Builder.CreateIntrinsic(IntID, Addr, /*FMFSource=*/nullptr, "larx");
+
+ return Builder.CreateTruncOrBitCast(Call, ValueTy);
+}
+
+// Perform a store-conditional operation to Addr. Return the status of the
+// store. This should be 0 if the store succeeded, non-zero otherwise.
+Value *PPCTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
+ Value *Val, Value *Addr,
+ AtomicOrdering Ord) const {
+ Type *Ty = Val->getType();
+ unsigned SZ = Ty->getPrimitiveSizeInBits();
+
+ assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
+ "Only 8/16/32/64-bit atomic loads supported");
+ Intrinsic::ID IntID;
+ switch (SZ) {
+ default:
+ llvm_unreachable("Unexpected PrimitiveSize");
+ case 8:
+ IntID = Intrinsic::ppc_stbcx;
+ assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
+ break;
+ case 16:
+ IntID = Intrinsic::ppc_sthcx;
+ assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
+ break;
+ case 32:
+ IntID = Intrinsic::ppc_stwcx;
+ break;
+ case 64:
+ IntID = Intrinsic::ppc_stdcx;
+ break;
+ }
+
+ if(SZ ==8 || SZ==16)
+ Val = Builder.CreateZExt(Val, Builder.getIntNTy(32));;
+
+ Value *Call = Builder.CreateIntrinsic(IntID, {Addr, Val},
+ /*FMFSource=*/nullptr, "stcx");
+ Value *Not = Builder.CreateXor(Call,Builder.getInt32(1));
+ return Not;
+}
+
// The mappings for emitLeading/TrailingFence is taken from
// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
Instruction *PPCTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
@@ -19633,7 +19708,8 @@ PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
if (shouldInlineQuadwordAtomics() && Size == 128)
return AtomicExpansionKind::MaskedIntrinsic;
- return TargetLowering::shouldExpandAtomicCmpXchgInIR(AI);
+ return AtomicExpansionKind::LLSC;
+ //return TargetLowering::shouldExpandAtomicCmpXchgInIR(AI);
}
static Intrinsic::ID
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 2c55b5427297a..4c88bd372b106 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -927,6 +927,12 @@ namespace llvm {
return true;
}
+ Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
+ AtomicOrdering Ord) const override;
+
+ Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
+ AtomicOrdering Ord) const override;
+
Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst,
AtomicOrdering Ord) const override;
Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 659c1a9079c33..fd2084398c857 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -2023,6 +2023,8 @@ def SLBSYNC : XForm_0<31, 338, (outs), (ins), "slbsync", IIC_SprSLBSYNC, []>;
} // IsISA3_0
+def : Pat<(int_ppc_ldarx ForceXForm:$ptr),
+ (LDARX ForceXForm:$ptr)>;
def : Pat<(int_ppc_stdcx ForceXForm:$dst, g8rc:$A),
(RLWINM (STDCX g8rc:$A, ForceXForm:$dst), 31, 31, 31)>;
def : Pat<(PPCStoreCond ForceXForm:$dst, g8rc:$A, 8),
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index b70290df07b1c..99ef89a7fdc0c 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -5143,7 +5143,6 @@ def : Pat<(int_ppc_store2r gprc:$a, ForceXForm:$ptr),
def : Pat<(int_ppc_store4r gprc:$a, ForceXForm:$ptr),
(STWBRX gprc:$a, ForceXForm:$ptr)>;
-
// Fast 32-bit reverse bits algorithm:
// Step 1: 1-bit swap (swap odd 1-bit and even 1-bit):
// n = ((n >> 1) & 0x55555555) | ((n << 1) & 0xAAAAAAAA);
@@ -5324,10 +5323,14 @@ def CFENCE : PPCPostRAExpPseudo<(outs), (ins gprc:$cr), "#CFENCE", []>;
def : Pat<(i64 (bitreverse i64:$A)),
(OR8 (RLDICR DWBytes7654.DWord, 32, 31), DWBytes3210.DWord)>;
+def : Pat<(int_ppc_lwarx ForceXForm:$ptr),
+ (LWARX ForceXForm:$ptr)>;
def : Pat<(int_ppc_stwcx ForceXForm:$dst, gprc:$A),
(RLWINM (STWCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
def : Pat<(PPCStoreCond ForceXForm:$dst, gprc:$A, 4),
(RLWINM (STWCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
+def : Pat<(int_ppc_lbarx ForceXForm:$ptr),
+ (LBARX ForceXForm:$ptr)>;
def : Pat<(int_ppc_stbcx ForceXForm:$dst, gprc:$A),
(RLWINM (STBCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
def : Pat<(PPCStoreCond ForceXForm:$dst, gprc:$A, 1),
@@ -5360,6 +5363,9 @@ def : Pat<(int_ppc_mtmsr gprc:$RS),
(MTMSR $RS, 0)>;
let Predicates = [IsISA2_07] in {
+ def : Pat<(int_ppc_lharx ForceXForm:$ptr),
+ (LHARX ForceXForm:$ptr)>;
+
def : Pat<(int_ppc_sthcx ForceXForm:$dst, gprc:$A),
(RLWINM (STHCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
def : Pat<(PPCStoreCond ForceXForm:$dst, gprc:$A, 2),
diff --git a/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll b/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
index 8517783e3ebd7..1a8dabc5ad719 100644
--- a/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
+++ b/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
@@ -15,50 +15,57 @@ define signext i32 @main() nounwind {
; CHECK-NEXT: stdu 1, -48(1)
; CHECK-NEXT: li 3, -32477
; CHECK-NEXT: std 0, 64(1)
-; CHECK-NEXT: li 4, 234
-; CHECK-NEXT: addi 6, 1, 46
; CHECK-NEXT: sth 3, 46(1)
-; CHECK-NEXT: lis 3, 0
+; CHECK-NEXT: addi 3, 1, 46
+; CHECK-NEXT: lharx 4, 0, 3
+; CHECK-NEXT: clrlwi 4, 4, 16
+; CHECK-NEXT: cmplwi 4, 33059
+; CHECK-NEXT: bne 0, .LBB0_4
+; CHECK-NEXT: # %bb.1: # %cmpxchg.fencedstore
; CHECK-NEXT: sync
-; CHECK-NEXT: ori 3, 3, 33059
-; CHECK-NEXT: .LBB0_1: # %L.entry
-; CHECK-NEXT: #
-; CHECK-NEXT: lharx 5, 0, 6
-; CHECK-NEXT: cmpw 5, 3
-; CHECK-NEXT: bne 0, .LBB0_3
-; CHECK-NEXT: # %bb.2: # %L.entry
-; CHECK-NEXT: #
-; CHECK-NEXT: sthcx. 4, 0, 6
-; CHECK-NEXT: bne 0, .LBB0_1
-; CHECK-NEXT: .LBB0_3: # %L.entry
-; CHECK-NEXT: cmplwi 5, 33059
+; CHECK-NEXT: li 4, 234
+; CHECK-NEXT: .p2align 5
+; CHECK-NEXT: .LBB0_2: # %cmpxchg.trystore
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: sthcx. 4, 0, 3
+; CHECK-NEXT: beq 0, .LBB0_7
+; CHECK-NEXT: # %bb.3: # %cmpxchg.releasedload
+; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: lharx 5, 0, 3
+; CHECK-NEXT: clrlwi 5, 5, 16
+; CHECK-NEXT: cmplwi 5, 33059
+; CHECK-NEXT: beq 0, .LBB0_2
+; CHECK-NEXT: .LBB0_4: # %cmpxchg.nostore
; CHECK-NEXT: lwsync
-; CHECK-NEXT: bne 0, .LBB0_6
-; CHECK-NEXT: # %bb.4: # %L.B0000
+; CHECK-NEXT: b .LBB0_8
+; CHECK-NEXT: .LBB0_5: # %L.B0000
; CHECK-NEXT: lhz 3, 46(1)
-; CHECK-NEXT: cmplwi 3, 234
-; CHECK-NEXT: bne 0, .LBB0_7
-; CHECK-NEXT: # %bb.5: # %L.B0001
+; CHECK-NEXT: cmplwi 3, 234
+; CHECK-NEXT: bne 0, .LBB0_9
+; CHECK-NEXT: # %bb.6: # %L.B0001
; CHECK-NEXT: addis 3, 2, .L_MergedGlobals at toc@ha
; CHECK-NEXT: addi 3, 3, .L_MergedGlobals at toc@l
; CHECK-NEXT: bl puts
; CHECK-NEXT: nop
; CHECK-NEXT: li 3, 0
-; CHECK-NEXT: b .LBB0_9
-; CHECK-NEXT: .LBB0_6: # %L.B0003
+; CHECK-NEXT: b .LBB0_11
+; CHECK-NEXT: .LBB0_7: # %cmpxchg.success
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: b .LBB0_5
+; CHECK-NEXT: .LBB0_8: # %L.B0003
; CHECK-NEXT: addis 3, 2, .L_MergedGlobals at toc@ha
; CHECK-NEXT: addi 3, 3, .L_MergedGlobals at toc@l
; CHECK-NEXT: addi 3, 3, 16
-; CHECK-NEXT: b .LBB0_8
-; CHECK-NEXT: .LBB0_7: # %L.B0005
+; CHECK-NEXT: b .LBB0_10
+; CHECK-NEXT: .LBB0_9: # %L.B0005
; CHECK-NEXT: addis 3, 2, .L_MergedGlobals at toc@ha
; CHECK-NEXT: addi 3, 3, .L_MergedGlobals at toc@l
; CHECK-NEXT: addi 3, 3, 64
-; CHECK-NEXT: .LBB0_8: # %L.B0003
+; CHECK-NEXT: .LBB0_10: # %L.B0003
; CHECK-NEXT: bl puts
; CHECK-NEXT: nop
; CHECK-NEXT: li 3, 1
-; CHECK-NEXT: .LBB0_9: # %L.B0003
+; CHECK-NEXT: .LBB0_11: # %L.B0003
; CHECK-NEXT: addi 1, 1, 48
; CHECK-NEXT: ld 0, 16(1)
; CHECK-NEXT: mtlr 0
@@ -69,64 +76,69 @@ define signext i32 @main() nounwind {
; CHECK-P7-NEXT: mflr 0
; CHECK-P7-NEXT: stdu 1, -48(1)
; CHECK-P7-NEXT: li 3, -32477
-; CHECK-P7-NEXT: std 0, 64(1)
; CHECK-P7-NEXT: addi 4, 1, 46
-; CHECK-P7-NEXT: li 6, 234
+; CHECK-P7-NEXT: std 0, 64(1)
; CHECK-P7-NEXT: sth 3, 46(1)
-; CHECK-P7-NEXT: lis 3, 0
+; CHECK-P7-NEXT: rldicr 3, 4, 0, 61
+; CHECK-P7-NEXT: rlwinm 4, 4, 3, 27, 27
+; CHECK-P7-NEXT: lwarx 5, 0, 3
+; CHECK-P7-NEXT: srw 6, 5, 4
+; CHECK-P7-NEXT: clrlwi 6, 6, 16
+; CHECK-P7-NEXT: cmplwi 6, 33059
+; CHECK-P7-NEXT: bne 0, .LBB0_4
+; CHECK-P7-NEXT: # %bb.1: # %cmpxchg.fencedstore
+; CHECK-P7-NEXT: lis 6, 0
+; CHECK-P7-NEXT: li 7, 234
; CHECK-P7-NEXT: sync
-; CHECK-P7-NEXT: ori 5, 3, 33059
-; CHECK-P7-NEXT: rlwinm 3, 4, 3, 27, 27
-; CHECK-P7-NEXT: rldicr 4, 4, 0, 61
-; CHECK-P7-NEXT: slw 7, 5, 3
-; CHECK-P7-NEXT: li 5, 0
-; CHECK-P7-NEXT: slw 6, 6, 3
-; CHECK-P7-NEXT: ori 5, 5, 65535
-; CHECK-P7-NEXT: slw 5, 5, 3
-; CHECK-P7-NEXT: and 6, 6, 5
-; CHECK-P7-NEXT: and 7, 7, 5
-; CHECK-P7-NEXT: .LBB0_1: # %L.entry
-; CHECK-P7-NEXT: #
-; CHECK-P7-NEXT: lwarx 9, 0, 4
-; CHECK-P7-NEXT: and 8, 9, 5
-; CHECK-P7-NEXT: cmpw 8, 7
-; CHECK-P7-NEXT: bne 0, .LBB0_3
-; CHECK-P7-NEXT: # %bb.2: # %L.entry
-; CHECK-P7-NEXT: #
-; CHECK-P7-NEXT: andc 9, 9, 5
-; CHECK-P7-NEXT: or 9, 9, 6
-; CHECK-P7-NEXT: stwcx. 9, 0, 4
-; CHECK-P7-NEXT: bne 0, .LBB0_1
-; CHECK-P7-NEXT: .LBB0_3: # %L.entry
-; CHECK-P7-NEXT: srw 3, 8, 3
+; CHECK-P7-NEXT: ori 6, 6, 65535
+; CHECK-P7-NEXT: slw 7, 7, 4
+; CHECK-P7-NEXT: slw 6, 6, 4
+; CHECK-P7-NEXT: not 6, 6
+; CHECK-P7-NEXT: .p2align 4
+; CHECK-P7-NEXT: .LBB0_2: # %cmpxchg.trystore
+; CHECK-P7-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-P7-NEXT: and 5, 5, 6
+; CHECK-P7-NEXT: or 5, 5, 7
+; CHECK-P7-NEXT: stwcx. 5, 0, 3
+; CHECK-P7-NEXT: beq 0, .LBB0_7
+; CHECK-P7-NEXT: # %bb.3: # %cmpxchg.releasedload
+; CHECK-P7-NEXT: # in Loop: Header=BB0_2 Depth=1
+; CHECK-P7-NEXT: lwarx 5, 0, 3
+; CHECK-P7-NEXT: srw 8, 5, 4
+; CHECK-P7-NEXT: clrlwi 8, 8, 16
+; CHECK-P7-NEXT: cmplwi 8, 33059
+; CHECK-P7-NEXT: beq 0, .LBB0_2
+; CHECK-P7-NEXT: .LBB0_4: # %cmpxchg.nostore
; CHECK-P7-NEXT: lwsync
-; CHECK-P7-NEXT: cmplwi 3, 33059
-; CHECK-P7-NEXT: bne 0, .LBB0_6
-; CHECK-P7-NEXT: # %bb.4: # %L.B0000
+; CHECK-P7-NEXT: b .LBB0_8
+; CHECK-P7-NEXT: .LBB0_5: # %L.B0000
; CHECK-P7-NEXT: lhz 3, 46(1)
-; CHECK-P7-NEXT: cmplwi 3, 234
-; CHECK-P7-NEXT: bne 0, .LBB0_7
-; CHECK-P7-NEXT: # %bb.5: # %L.B0001
+; CHECK-P7-NEXT: cmplwi 3, 234
+; CHECK-P7-NEXT: bne 0, .LBB0_9
+; CHECK-P7-NEXT: # %bb.6: # %L.B0001
; CHECK-P7-NEXT: addis 3, 2, .L_MergedGlobals at toc@ha
; CHECK-P7-NEXT: addi 3, 3, .L_MergedGlobals at toc@l
; CHECK-P7-NEXT: bl puts
; CHECK-P7-NEXT: nop
; CHECK-P7-NEXT: li 3, 0
-; CHECK-P7-NEXT: b .LBB0_9
-; CHECK-P7-NEXT: .LBB0_6: # %L.B0003
+; CHECK-P7-NEXT: b .LBB0_11
+; CHECK-P7-NEXT: .LBB0_7: # %cmpxchg.success
+; CHECK-P7-NEXT: lwsync
+; CHECK-P7-NEXT: b .LBB0_5
+; CHECK-P7-NEXT: .LBB0_8: # %L.B0003
; CHECK-P7-NEXT: addis 3, 2, .L_MergedGlobals at toc@ha
; CHECK-P7-NEXT: addi 3, 3, .L_MergedGlobals at toc@l
; CHECK-P7-NEXT: addi 3, 3, 16
-; CHECK-P7-NEXT: b .LBB0_8
-; CHECK-P7-NEXT: .LBB0_7: # %L.B0005
+; CHECK-P7-NEXT: b .LBB0_10
+; CHECK-P7-NEXT: .LBB0_9: # %L.B0005
; CHECK-P7-NEXT: addis 3, 2, .L_MergedGlobals at toc@ha
; CHECK-P7-NEXT: addi 3, 3, .L_MergedGlobals at toc@l
; CHECK-P7-NEXT: addi 3, 3, 64
-; CHECK-P7-NEXT: .LBB0_8: # %L.B0003
+; CHECK-P7-NEXT: .LBB0_10: # %L.B0003
; CHECK-P7-NEXT: bl puts
; CHECK-P7-NEXT: nop
; CHECK-P7-NEXT: li 3, 1
-; CHECK-P7-NEXT: .LBB0_9: # %L.B0003
+; CHECK-P7-NEXT: .LBB0_11: # %L.B0003
; CHECK-P7-NEXT: addi 1, 1, 48
; CHECK-P7-NEXT: ld 0, 16(1)
; CHECK-P7-NEXT: mtlr 0
diff --git a/llvm/test/CodeGen/PowerPC/all-atomics.ll b/llvm/test/CodeGen/PowerPC/all-atomics.ll
index 531e559ea7309..67cee358882ff 100644
--- a/llvm/test/CodeGen/PowerPC/all-atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/all-atomics.ll
@@ -4336,704 +4336,959 @@ entry:
define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
; CHECK-LABEL: test_compare_and_swap:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addis 3, 2, uc at toc@ha
-; CHECK-NEXT: addis 4, 2, sc at toc@ha
-; CHECK-NEXT: std 27, -40(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 28, -32(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill
-; CHECK-NEXT: lbz 5, uc at toc@l(3)
-; CHECK-NEXT: lbz 8, sc at toc@l(4)
-; CHECK-NEXT: addi 6, 3, uc at toc@l
-; CHECK-NEXT: addi 0, 4, sc at toc@l
-; CHECK-NEXT: sync
-; CHECK-NEXT: .LBB3_1: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: lbarx 7, 0, 0
-; CHECK-NEXT: cmpw 7, 5
-; CHECK-NEXT: bne 0, .LBB3_3
-; CHECK-NEXT: # %bb.2: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: stbcx. 8, 0, 0
-; CHECK-NEXT: bne 0, .LBB3_1
-; CHECK-NEXT: .LBB3_3: # %entry
-; CHECK-NEXT: lwsync
-; CHECK-NEXT: stb 7, sc at toc@l(4)
-; CHECK-NEXT: lbz 8, uc at toc@l(3)
-; CHECK-NEXT: sync
-; CHECK-NEXT: .LBB3_4: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: lbarx 5, 0, 6
-; CHECK-NEXT: cmpw 5, 8
-; CHECK-NEXT: bne 0, .LBB3_6
-; CHECK-NEXT: # %bb.5: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: stbcx. 7, 0, 6
-; CHECK-NEXT: bne 0, .LBB3_4
-; CHECK-NEXT: .LBB3_6: # %entry
-; CHECK-NEXT: lwsync
-; CHECK-NEXT: stb 5, uc at toc@l(3)
-; CHECK-NEXT: lbz 7, sc at toc@l(4)
-; CHECK-NEXT: sync
-; CHECK-NEXT: extsb 8, 7
-; CHECK-NEXT: addis 7, 2, ss at toc@ha
-; CHECK-NEXT: addi 12, 7, ss at toc@l
-; CHECK-NEXT: .LBB3_7: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: lharx 9, 0, 12
-; CHECK-NEXT: cmpw 9, 5
-; CHECK-NEXT: bne 0, .LBB3_9
-; CHECK-NEXT: # %bb.8: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: sthcx. 8, 0, 12
-; CHECK-NEXT: bne 0, .LBB3_7
-; CHECK-NEXT: .LBB3_9: # %entry
-; CHECK-NEXT: lwsync
-; CHECK-NEXT: sth 9, ss at toc@l(7)
-; CHECK-NEXT: lbz 7, sc at toc@l(4)
-; CHECK-NEXT: lbz 5, uc at toc@l(3)
-; CHECK-NEXT: sync
-; CHECK-NEXT: extsb 8, 7
-; CHECK-NEXT: addis 7, 2, us at toc@ha
-; CHECK-NEXT: addi 11, 7, us at toc@l
-; CHECK-NEXT: .LBB3_10: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: lharx 9, 0, 11
-; CHECK-NEXT: cmpw 9, 5
-; CHECK-NEXT: bne 0, .LBB3_12
-; CHECK-NEXT: # %bb.11: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: sthcx. 8, 0, 11
-; CHECK-NEXT: bne 0, .LBB3_10
-; CHECK-NEXT: .LBB3_12: # %entry
-; CHECK-NEXT: lwsync
-; CHECK-NEXT: sth 9, us at toc@l(7)
-; CHECK-NEXT: lbz 7, sc at toc@l(4)
-; CHECK-NEXT: lbz 5, uc at toc@l(3)
-; CHECK-NEXT: sync
-; CHECK-NEXT: extsb 8, 7
-; CHECK-NEXT: addis 7, 2, si at toc@ha
-; CHECK-NEXT: addi 10, 7, si at toc@l
-; CHECK-NEXT: .LBB3_13: # %entry
-; CHECK-NEXT: #
-; CHECK-NEXT: lwarx 9, 0, 10
-; CHECK-NEXT: cmpw 9, 5
-...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/142395
More information about the llvm-commits
mailing list