[llvm] [PowerPC] enable AtomicExpandImpl::expandAtomicCmpXchg for powerpc (PR #142395)

via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 2 06:57:46 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: zhijian lin (diggerlin)

<details>
<summary>Changes</summary>

In PowerPC, the AtomicCmpXchgInst is lowered to ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS. However, this node does not handle the weak attribute of AtomicCmpXchgInst. As a result, when compiling C++ atomic_compare_exchange_weak_explicit, the generated assembly includes a "reservation lost" loop — i.e., it branches back and retries if the stwcx. (store-conditional) fails. This differs from GCC’s codegen, which does not include that loop for weak compare-exchange.

Since PowerPC uses LL/SC-style atomic instructions, the patch enables AtomicExpandImpl::expandAtomicCmpXchg for PowerPC. With this, the weak attribute is properly respected, and the "reservation lost" loop is removed for weak operations.


---

Patch is 250.32 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/142395.diff


17 Files Affected:

- (modified) llvm/include/llvm/CodeGen/TargetLowering.h (+1-1) 
- (modified) llvm/include/llvm/IR/IntrinsicsPowerPC.td (+14-1) 
- (modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+77-1) 
- (modified) llvm/lib/Target/PowerPC/PPCISelLowering.h (+6) 
- (modified) llvm/lib/Target/PowerPC/PPCInstr64Bit.td (+2) 
- (modified) llvm/lib/Target/PowerPC/PPCInstrInfo.td (+7-1) 
- (modified) llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll (+77-65) 
- (modified) llvm/test/CodeGen/PowerPC/all-atomics.ll (+963-709) 
- (modified) llvm/test/CodeGen/PowerPC/atomic-2.ll (+2-2) 
- (modified) llvm/test/CodeGen/PowerPC/atomic-compare-exchange-weak.ll (+25-27) 
- (modified) llvm/test/CodeGen/PowerPC/atomic-float.ll (+58-50) 
- (modified) llvm/test/CodeGen/PowerPC/atomicrmw-cond-sub-clamp.ll (+257-269) 
- (modified) llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll (+257-267) 
- (modified) llvm/test/CodeGen/PowerPC/atomics-regression.ll (+1168-572) 
- (modified) llvm/test/CodeGen/PowerPC/atomics.ll (+121-114) 
- (modified) llvm/test/CodeGen/PowerPC/loop-comment.ll (+7-2) 
- (modified) llvm/test/Transforms/AtomicExpand/PowerPC/atomicrmw-fp.ll (+91-25) 


``````````diff
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 5105bcc5cce3a..50c1a7f7c30f6 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -257,7 +257,7 @@ class TargetLoweringBase {
     CastToInteger,    // Cast the atomic instruction to another type, e.g. from
                       // floating-point to integer type.
     LLSC,    // Expand the instruction into loadlinked/storeconditional; used
-             // by ARM/AArch64.
+             // by ARM/AArch64/PowerPC.
     LLOnly,  // Expand the (load) instruction into just a load-linked, which has
              // greater atomic guarantees than a normal load.
     CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 751628cee58c0..68d8b5ae0c38d 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1835,6 +1835,19 @@ let TargetPrefix = "ppc" in {
                       Intrinsic<[],[],[]>;
   def int_ppc_iospace_eieio : ClangBuiltin<"__builtin_ppc_iospace_eieio">,
                               Intrinsic<[],[],[]>;
+  def int_ppc_lbarx :
+    Intrinsic<[llvm_i32_ty], [llvm_ptr_ty],
+              [IntrReadMem, IntrArgMemOnly, IntrNoDuplicate]>;
+  def int_ppc_lharx :
+    Intrinsic<[llvm_i32_ty],[llvm_ptr_ty],
+              [IntrReadMem, IntrArgMemOnly, IntrNoDuplicate]>;
+  def int_ppc_lwarx :
+    Intrinsic<[llvm_i32_ty], [llvm_ptr_ty],
+              [IntrReadMem, IntrArgMemOnly, IntrNoDuplicate]>;
+  def int_ppc_ldarx :
+    Intrinsic<[llvm_i64_ty],[llvm_ptr_ty],
+              [IntrReadMem, IntrArgMemOnly, IntrNoDuplicate]>;
+
   def int_ppc_stdcx :
     ClangBuiltin<"__builtin_ppc_stdcx">,
     Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i64_ty],
@@ -1844,7 +1857,7 @@ let TargetPrefix = "ppc" in {
     Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
               [IntrWriteMem, IntrArgMemOnly]>;
   def int_ppc_sthcx :
-    Intrinsic<[llvm_i32_ty], [ llvm_ptr_ty, llvm_i32_ty ],
+    Intrinsic<[llvm_i32_ty], [ llvm_ptr_ty, llvm_i32_ty],
               [IntrWriteMem, IntrArgMemOnly, IntrNoDuplicate]>;
   def int_ppc_stbcx :
     ClangBuiltin<"__builtin_ppc_stbcx">,
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 0c2a506005604..287145e3de7a4 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1475,6 +1475,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   setMinimumJumpTableEntries(PPCMinimumJumpTableEntries);
 
   setMinFunctionAlignment(Align(4));
+  if(Subtarget.hasPartwordAtomics())
+    setMinCmpXchgSizeInBits(8);
+  else
+    setMinCmpXchgSizeInBits(32);
 
   switch (Subtarget.getCPUDirective()) {
   default: break;
@@ -12672,6 +12676,77 @@ static Instruction *callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id) {
   return Builder.CreateIntrinsic(Id, {});
 }
 
+Value *PPCTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy,
+                                         Value *Addr,
+                                         AtomicOrdering Ord) const {
+  unsigned SZ = ValueTy->getPrimitiveSizeInBits();
+
+  assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
+         +"Only 8/16/32/64-bit atomic loads supported");
+  Intrinsic::ID IntID;
+  switch (SZ) {
+  default:
+    llvm_unreachable("Unexpected PrimitiveSize");
+  case 8:
+    IntID = Intrinsic::ppc_lbarx;
+    assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
+    break;
+  case 16:
+    IntID = Intrinsic::ppc_lharx;
+    assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
+    break;
+  case 32:
+    IntID = Intrinsic::ppc_lwarx;
+    break;
+  case 64:
+    IntID = Intrinsic::ppc_ldarx;
+    break;
+  }
+  Value *Call =
+      Builder.CreateIntrinsic(IntID, Addr, /*FMFSource=*/nullptr, "larx");
+
+  return Builder.CreateTruncOrBitCast(Call, ValueTy);
+}
+
+// Perform a store-conditional operation to Addr. Return the status of the
+// store. This should be 0 if the store succeeded, non-zero otherwise.
+Value *PPCTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
+                                               Value *Val, Value *Addr,
+                                               AtomicOrdering Ord) const {
+  Type *Ty = Val->getType();
+  unsigned SZ = Ty->getPrimitiveSizeInBits();
+
+  assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
+         "Only 8/16/32/64-bit atomic loads supported");
+  Intrinsic::ID IntID;
+  switch (SZ) {
+  default:
+    llvm_unreachable("Unexpected PrimitiveSize");
+  case 8:
+    IntID = Intrinsic::ppc_stbcx;
+    assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
+    break;
+  case 16:
+    IntID = Intrinsic::ppc_sthcx;
+    assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
+    break;
+  case 32:
+    IntID = Intrinsic::ppc_stwcx;
+    break;
+  case 64:
+    IntID = Intrinsic::ppc_stdcx;
+    break;
+  }
+
+  if(SZ ==8 || SZ==16)
+    Val = Builder.CreateZExt(Val, Builder.getIntNTy(32));;
+
+  Value *Call = Builder.CreateIntrinsic(IntID, {Addr, Val},
+                                        /*FMFSource=*/nullptr, "stcx");
+  Value *Not = Builder.CreateXor(Call,Builder.getInt32(1));
+  return Not;
+}
+
 // The mappings for emitLeading/TrailingFence is taken from
 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
 Instruction *PPCTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
@@ -19633,7 +19708,8 @@ PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
   unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
   if (shouldInlineQuadwordAtomics() && Size == 128)
     return AtomicExpansionKind::MaskedIntrinsic;
-  return TargetLowering::shouldExpandAtomicCmpXchgInIR(AI);
+  return AtomicExpansionKind::LLSC;
+  //return TargetLowering::shouldExpandAtomicCmpXchgInIR(AI);
 }
 
 static Intrinsic::ID
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 2c55b5427297a..4c88bd372b106 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -927,6 +927,12 @@ namespace llvm {
       return true;
     }
 
+    Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
+                          AtomicOrdering Ord) const override;
+
+    Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
+                                AtomicOrdering Ord) const override;
+
     Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst,
                                   AtomicOrdering Ord) const override;
     Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 659c1a9079c33..fd2084398c857 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -2023,6 +2023,8 @@ def SLBSYNC : XForm_0<31, 338, (outs), (ins), "slbsync", IIC_SprSLBSYNC, []>;
 
 } // IsISA3_0
 
+def : Pat<(int_ppc_ldarx ForceXForm:$ptr),
+          (LDARX ForceXForm:$ptr)>;
 def : Pat<(int_ppc_stdcx ForceXForm:$dst, g8rc:$A),
           (RLWINM (STDCX g8rc:$A, ForceXForm:$dst), 31, 31, 31)>;
 def : Pat<(PPCStoreCond ForceXForm:$dst, g8rc:$A, 8),
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index b70290df07b1c..99ef89a7fdc0c 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -5143,7 +5143,6 @@ def : Pat<(int_ppc_store2r gprc:$a, ForceXForm:$ptr),
 def : Pat<(int_ppc_store4r gprc:$a, ForceXForm:$ptr),
           (STWBRX gprc:$a, ForceXForm:$ptr)>;
 
-
 // Fast 32-bit reverse bits algorithm:
 // Step 1: 1-bit swap (swap odd 1-bit and even 1-bit):
 // n = ((n >> 1) & 0x55555555) | ((n << 1) & 0xAAAAAAAA);
@@ -5324,10 +5323,14 @@ def CFENCE : PPCPostRAExpPseudo<(outs), (ins gprc:$cr), "#CFENCE", []>;
 def : Pat<(i64 (bitreverse i64:$A)),
   (OR8 (RLDICR DWBytes7654.DWord, 32, 31), DWBytes3210.DWord)>;
 
+def : Pat<(int_ppc_lwarx ForceXForm:$ptr),
+          (LWARX ForceXForm:$ptr)>;
 def : Pat<(int_ppc_stwcx ForceXForm:$dst, gprc:$A),
           (RLWINM (STWCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
 def : Pat<(PPCStoreCond ForceXForm:$dst, gprc:$A, 4),
           (RLWINM (STWCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
+def : Pat<(int_ppc_lbarx ForceXForm:$ptr),
+          (LBARX ForceXForm:$ptr)>;
 def : Pat<(int_ppc_stbcx ForceXForm:$dst, gprc:$A),
           (RLWINM (STBCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
 def : Pat<(PPCStoreCond ForceXForm:$dst, gprc:$A, 1),
@@ -5360,6 +5363,9 @@ def : Pat<(int_ppc_mtmsr gprc:$RS),
           (MTMSR $RS, 0)>;
 
 let Predicates = [IsISA2_07] in {
+  def : Pat<(int_ppc_lharx ForceXForm:$ptr),
+          (LHARX ForceXForm:$ptr)>;
+
   def : Pat<(int_ppc_sthcx ForceXForm:$dst, gprc:$A),
             (RLWINM (STHCX gprc:$A, ForceXForm:$dst), 31, 31, 31)>;
   def : Pat<(PPCStoreCond ForceXForm:$dst, gprc:$A, 2),
diff --git a/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll b/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
index 8517783e3ebd7..1a8dabc5ad719 100644
--- a/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
+++ b/llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll
@@ -15,50 +15,57 @@ define signext i32 @main() nounwind {
 ; CHECK-NEXT:    stdu 1, -48(1)
 ; CHECK-NEXT:    li 3, -32477
 ; CHECK-NEXT:    std 0, 64(1)
-; CHECK-NEXT:    li 4, 234
-; CHECK-NEXT:    addi 6, 1, 46
 ; CHECK-NEXT:    sth 3, 46(1)
-; CHECK-NEXT:    lis 3, 0
+; CHECK-NEXT:    addi 3, 1, 46
+; CHECK-NEXT:    lharx 4, 0, 3
+; CHECK-NEXT:    clrlwi  4, 4, 16
+; CHECK-NEXT:    cmplwi  4, 33059
+; CHECK-NEXT:    bne     0, .LBB0_4
+; CHECK-NEXT:  # %bb.1:                                # %cmpxchg.fencedstore
 ; CHECK-NEXT:    sync
-; CHECK-NEXT:    ori 3, 3, 33059
-; CHECK-NEXT:  .LBB0_1: # %L.entry
-; CHECK-NEXT:    #
-; CHECK-NEXT:    lharx 5, 0, 6
-; CHECK-NEXT:    cmpw 5, 3
-; CHECK-NEXT:    bne 0, .LBB0_3
-; CHECK-NEXT:  # %bb.2: # %L.entry
-; CHECK-NEXT:    #
-; CHECK-NEXT:    sthcx. 4, 0, 6
-; CHECK-NEXT:    bne 0, .LBB0_1
-; CHECK-NEXT:  .LBB0_3: # %L.entry
-; CHECK-NEXT:    cmplwi 5, 33059
+; CHECK-NEXT:    li 4, 234
+; CHECK-NEXT:    .p2align        5
+; CHECK-NEXT:  .LBB0_2:                                # %cmpxchg.trystore
+; CHECK-NEXT:                                          # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    sthcx. 4, 0, 3
+; CHECK-NEXT:    beq     0, .LBB0_7
+; CHECK-NEXT:  # %bb.3:                                # %cmpxchg.releasedload
+; CHECK-NEXT:                                          #   in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT:    lharx 5, 0, 3
+; CHECK-NEXT:    clrlwi  5, 5, 16
+; CHECK-NEXT:    cmplwi  5, 33059
+; CHECK-NEXT:    beq     0, .LBB0_2
+; CHECK-NEXT:  .LBB0_4:                                # %cmpxchg.nostore
 ; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    bne 0, .LBB0_6
-; CHECK-NEXT:  # %bb.4: # %L.B0000
+; CHECK-NEXT:    b .LBB0_8
+; CHECK-NEXT:  .LBB0_5:                                # %L.B0000
 ; CHECK-NEXT:    lhz 3, 46(1)
-; CHECK-NEXT:    cmplwi 3, 234
-; CHECK-NEXT:    bne 0, .LBB0_7
-; CHECK-NEXT:  # %bb.5: # %L.B0001
+; CHECK-NEXT:    cmplwi  3, 234
+; CHECK-NEXT:    bne     0, .LBB0_9
+; CHECK-NEXT:  # %bb.6:                                # %L.B0001
 ; CHECK-NEXT:    addis 3, 2, .L_MergedGlobals at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .L_MergedGlobals at toc@l
 ; CHECK-NEXT:    bl puts
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:    b .LBB0_9
-; CHECK-NEXT:  .LBB0_6: # %L.B0003
+; CHECK-NEXT:    b .LBB0_11
+; CHECK-NEXT:  .LBB0_7:                                # %cmpxchg.success
+; CHECK-NEXT:    lwsync
+; CHECK-NEXT:    b .LBB0_5
+; CHECK-NEXT:  .LBB0_8:                                # %L.B0003
 ; CHECK-NEXT:    addis 3, 2, .L_MergedGlobals at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .L_MergedGlobals at toc@l
 ; CHECK-NEXT:    addi 3, 3, 16
-; CHECK-NEXT:    b .LBB0_8
-; CHECK-NEXT:  .LBB0_7: # %L.B0005
+; CHECK-NEXT:    b .LBB0_10
+; CHECK-NEXT:  .LBB0_9:                                # %L.B0005
 ; CHECK-NEXT:    addis 3, 2, .L_MergedGlobals at toc@ha
 ; CHECK-NEXT:    addi 3, 3, .L_MergedGlobals at toc@l
 ; CHECK-NEXT:    addi 3, 3, 64
-; CHECK-NEXT:  .LBB0_8: # %L.B0003
+; CHECK-NEXT:  .LBB0_10:                               # %L.B0003
 ; CHECK-NEXT:    bl puts
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:  .LBB0_9: # %L.B0003
+; CHECK-NEXT:  .LBB0_11:                               # %L.B0003
 ; CHECK-NEXT:    addi 1, 1, 48
 ; CHECK-NEXT:    ld 0, 16(1)
 ; CHECK-NEXT:    mtlr 0
@@ -69,64 +76,69 @@ define signext i32 @main() nounwind {
 ; CHECK-P7-NEXT:    mflr 0
 ; CHECK-P7-NEXT:    stdu 1, -48(1)
 ; CHECK-P7-NEXT:    li 3, -32477
-; CHECK-P7-NEXT:    std 0, 64(1)
 ; CHECK-P7-NEXT:    addi 4, 1, 46
-; CHECK-P7-NEXT:    li 6, 234
+; CHECK-P7-NEXT:    std 0, 64(1)
 ; CHECK-P7-NEXT:    sth 3, 46(1)
-; CHECK-P7-NEXT:    lis 3, 0
+; CHECK-P7-NEXT:    rldicr 3, 4, 0, 61
+; CHECK-P7-NEXT:    rlwinm 4, 4, 3, 27, 27
+; CHECK-P7-NEXT:    lwarx 5, 0, 3
+; CHECK-P7-NEXT:    srw 6, 5, 4
+; CHECK-P7-NEXT:    clrlwi  6, 6, 16
+; CHECK-P7-NEXT:    cmplwi  6, 33059
+; CHECK-P7-NEXT:    bne     0, .LBB0_4
+; CHECK-P7-NEXT:  # %bb.1:                                # %cmpxchg.fencedstore
+; CHECK-P7-NEXT:    lis 6, 0
+; CHECK-P7-NEXT:    li 7, 234
 ; CHECK-P7-NEXT:    sync
-; CHECK-P7-NEXT:    ori 5, 3, 33059
-; CHECK-P7-NEXT:    rlwinm 3, 4, 3, 27, 27
-; CHECK-P7-NEXT:    rldicr 4, 4, 0, 61
-; CHECK-P7-NEXT:    slw 7, 5, 3
-; CHECK-P7-NEXT:    li 5, 0
-; CHECK-P7-NEXT:    slw 6, 6, 3
-; CHECK-P7-NEXT:    ori 5, 5, 65535
-; CHECK-P7-NEXT:    slw 5, 5, 3
-; CHECK-P7-NEXT:    and 6, 6, 5
-; CHECK-P7-NEXT:    and 7, 7, 5
-; CHECK-P7-NEXT:  .LBB0_1: # %L.entry
-; CHECK-P7-NEXT:    #
-; CHECK-P7-NEXT:    lwarx 9, 0, 4
-; CHECK-P7-NEXT:    and 8, 9, 5
-; CHECK-P7-NEXT:    cmpw 8, 7
-; CHECK-P7-NEXT:    bne 0, .LBB0_3
-; CHECK-P7-NEXT:  # %bb.2: # %L.entry
-; CHECK-P7-NEXT:    #
-; CHECK-P7-NEXT:    andc 9, 9, 5
-; CHECK-P7-NEXT:    or 9, 9, 6
-; CHECK-P7-NEXT:    stwcx. 9, 0, 4
-; CHECK-P7-NEXT:    bne 0, .LBB0_1
-; CHECK-P7-NEXT:  .LBB0_3: # %L.entry
-; CHECK-P7-NEXT:    srw 3, 8, 3
+; CHECK-P7-NEXT:    ori 6, 6, 65535
+; CHECK-P7-NEXT:    slw 7, 7, 4
+; CHECK-P7-NEXT:    slw 6, 6, 4
+; CHECK-P7-NEXT:    not     6, 6
+; CHECK-P7-NEXT:    .p2align        4
+; CHECK-P7-NEXT:  .LBB0_2:                                # %cmpxchg.trystore
+; CHECK-P7-NEXT:                                        # =>This Inner Loop Header: Depth=1
+; CHECK-P7-NEXT:    and 5, 5, 6
+; CHECK-P7-NEXT:    or 5, 5, 7
+; CHECK-P7-NEXT:    stwcx. 5, 0, 3
+; CHECK-P7-NEXT:    beq     0, .LBB0_7
+; CHECK-P7-NEXT:  # %bb.3:                                # %cmpxchg.releasedload
+; CHECK-P7-NEXT:                                        #   in Loop: Header=BB0_2 Depth=1
+; CHECK-P7-NEXT:    lwarx 5, 0, 3
+; CHECK-P7-NEXT:    srw 8, 5, 4
+; CHECK-P7-NEXT:    clrlwi  8, 8, 16
+; CHECK-P7-NEXT:    cmplwi  8, 33059
+; CHECK-P7-NEXT:    beq     0, .LBB0_2
+; CHECK-P7-NEXT:  .LBB0_4:                                # %cmpxchg.nostore
 ; CHECK-P7-NEXT:    lwsync
-; CHECK-P7-NEXT:    cmplwi 3, 33059
-; CHECK-P7-NEXT:    bne 0, .LBB0_6
-; CHECK-P7-NEXT:  # %bb.4: # %L.B0000
+; CHECK-P7-NEXT:    b .LBB0_8
+; CHECK-P7-NEXT:  .LBB0_5:                                # %L.B0000
 ; CHECK-P7-NEXT:    lhz 3, 46(1)
-; CHECK-P7-NEXT:    cmplwi 3, 234
-; CHECK-P7-NEXT:    bne 0, .LBB0_7
-; CHECK-P7-NEXT:  # %bb.5: # %L.B0001
+; CHECK-P7-NEXT:    cmplwi  3, 234
+; CHECK-P7-NEXT:    bne     0, .LBB0_9
+; CHECK-P7-NEXT:  # %bb.6:                                # %L.B0001
 ; CHECK-P7-NEXT:    addis 3, 2, .L_MergedGlobals at toc@ha
 ; CHECK-P7-NEXT:    addi 3, 3, .L_MergedGlobals at toc@l
 ; CHECK-P7-NEXT:    bl puts
 ; CHECK-P7-NEXT:    nop
 ; CHECK-P7-NEXT:    li 3, 0
-; CHECK-P7-NEXT:    b .LBB0_9
-; CHECK-P7-NEXT:  .LBB0_6: # %L.B0003
+; CHECK-P7-NEXT:    b .LBB0_11
+; CHECK-P7-NEXT:  .LBB0_7:                                # %cmpxchg.success
+; CHECK-P7-NEXT:    lwsync
+; CHECK-P7-NEXT:    b .LBB0_5
+; CHECK-P7-NEXT:  .LBB0_8:                                # %L.B0003
 ; CHECK-P7-NEXT:    addis 3, 2, .L_MergedGlobals at toc@ha
 ; CHECK-P7-NEXT:    addi 3, 3, .L_MergedGlobals at toc@l
 ; CHECK-P7-NEXT:    addi 3, 3, 16
-; CHECK-P7-NEXT:    b .LBB0_8
-; CHECK-P7-NEXT:  .LBB0_7: # %L.B0005
+; CHECK-P7-NEXT:    b .LBB0_10
+; CHECK-P7-NEXT:  .LBB0_9:                                # %L.B0005
 ; CHECK-P7-NEXT:    addis 3, 2, .L_MergedGlobals at toc@ha
 ; CHECK-P7-NEXT:    addi 3, 3, .L_MergedGlobals at toc@l
 ; CHECK-P7-NEXT:    addi 3, 3, 64
-; CHECK-P7-NEXT:  .LBB0_8: # %L.B0003
+; CHECK-P7-NEXT:  .LBB0_10:                               # %L.B0003
 ; CHECK-P7-NEXT:    bl puts
 ; CHECK-P7-NEXT:    nop
 ; CHECK-P7-NEXT:    li 3, 1
-; CHECK-P7-NEXT:  .LBB0_9: # %L.B0003
+; CHECK-P7-NEXT:  .LBB0_11:                               # %L.B0003
 ; CHECK-P7-NEXT:    addi 1, 1, 48
 ; CHECK-P7-NEXT:    ld 0, 16(1)
 ; CHECK-P7-NEXT:    mtlr 0
diff --git a/llvm/test/CodeGen/PowerPC/all-atomics.ll b/llvm/test/CodeGen/PowerPC/all-atomics.ll
index 531e559ea7309..67cee358882ff 100644
--- a/llvm/test/CodeGen/PowerPC/all-atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/all-atomics.ll
@@ -4336,704 +4336,959 @@ entry:
 define dso_local void @test_compare_and_swap() local_unnamed_addr #0 {
 ; CHECK-LABEL: test_compare_and_swap:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addis 3, 2, uc at toc@ha
-; CHECK-NEXT:    addis 4, 2, sc at toc@ha
-; CHECK-NEXT:    std 27, -40(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 28, -32(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 29, -24(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; CHECK-NEXT:    lbz 5, uc at toc@l(3)
-; CHECK-NEXT:    lbz 8, sc at toc@l(4)
-; CHECK-NEXT:    addi 6, 3, uc at toc@l
-; CHECK-NEXT:    addi 0, 4, sc at toc@l
-; CHECK-NEXT:    sync
-; CHECK-NEXT:  .LBB3_1: # %entry
-; CHECK-NEXT:    #
-; CHECK-NEXT:    lbarx 7, 0, 0
-; CHECK-NEXT:    cmpw 7, 5
-; CHECK-NEXT:    bne 0, .LBB3_3
-; CHECK-NEXT:  # %bb.2: # %entry
-; CHECK-NEXT:    #
-; CHECK-NEXT:    stbcx. 8, 0, 0
-; CHECK-NEXT:    bne 0, .LBB3_1
-; CHECK-NEXT:  .LBB3_3: # %entry
-; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    stb 7, sc at toc@l(4)
-; CHECK-NEXT:    lbz 8, uc at toc@l(3)
-; CHECK-NEXT:    sync
-; CHECK-NEXT:  .LBB3_4: # %entry
-; CHECK-NEXT:    #
-; CHECK-NEXT:    lbarx 5, 0, 6
-; CHECK-NEXT:    cmpw 5, 8
-; CHECK-NEXT:    bne 0, .LBB3_6
-; CHECK-NEXT:  # %bb.5: # %entry
-; CHECK-NEXT:    #
-; CHECK-NEXT:    stbcx. 7, 0, 6
-; CHECK-NEXT:    bne 0, .LBB3_4
-; CHECK-NEXT:  .LBB3_6: # %entry
-; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    stb 5, uc at toc@l(3)
-; CHECK-NEXT:    lbz 7, sc at toc@l(4)
-; CHECK-NEXT:    sync
-; CHECK-NEXT:    extsb 8, 7
-; CHECK-NEXT:    addis 7, 2, ss at toc@ha
-; CHECK-NEXT:    addi 12, 7, ss at toc@l
-; CHECK-NEXT:  .LBB3_7: # %entry
-; CHECK-NEXT:    #
-; CHECK-NEXT:    lharx 9, 0, 12
-; CHECK-NEXT:    cmpw 9, 5
-; CHECK-NEXT:    bne 0, .LBB3_9
-; CHECK-NEXT:  # %bb.8: # %entry
-; CHECK-NEXT:    #
-; CHECK-NEXT:    sthcx. 8, 0, 12
-; CHECK-NEXT:    bne 0, .LBB3_7
-; CHECK-NEXT:  .LBB3_9: # %entry
-; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    sth 9, ss at toc@l(7)
-; CHECK-NEXT:    lbz 7, sc at toc@l(4)
-; CHECK-NEXT:    lbz 5, uc at toc@l(3)
-; CHECK-NEXT:    sync
-; CHECK-NEXT:    extsb 8, 7
-; CHECK-NEXT:    addis 7, 2, us at toc@ha
-; CHECK-NEXT:    addi 11, 7, us at toc@l
-; CHECK-NEXT:  .LBB3_10: # %entry
-; CHECK-NEXT:    #
-; CHECK-NEXT:    lharx 9, 0, 11
-; CHECK-NEXT:    cmpw 9, 5
-; CHECK-NEXT:    bne 0, .LBB3_12
-; CHECK-NEXT:  # %bb.11: # %entry
-; CHECK-NEXT:    #
-; CHECK-NEXT:    sthcx. 8, 0, 11
-; CHECK-NEXT:    bne 0, .LBB3_10
-; CHECK-NEXT:  .LBB3_12: # %entry
-; CHECK-NEXT:    lwsync
-; CHECK-NEXT:    sth 9, us at toc@l(7)
-; CHECK-NEXT:    lbz 7, sc at toc@l(4)
-; CHECK-NEXT:    lbz 5, uc at toc@l(3)
-; CHECK-NEXT:    sync
-; CHECK-NEXT:    extsb 8, 7
-; CHECK-NEXT:    addis 7, 2, si at toc@ha
-; CHECK-NEXT:    addi 10, 7, si at toc@l
-; CHECK-NEXT:  .LBB3_13: # %entry
-; CHECK-NEXT:    #
-; CHECK-NEXT:    lwarx 9, 0, 10
-; CHECK-NEXT:    cmpw 9, 5
-...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/142395


More information about the llvm-commits mailing list