[llvm] 3104681 - [PowerPC][Atomics] Remove redundant block to clear reservation (#68430)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 12 19:59:32 PDT 2023
Author: Kai Luo
Date: 2023-10-13T10:59:27+08:00
New Revision: 3104681686b17ea3c611e84b30884a25b84f87b6
URL: https://github.com/llvm/llvm-project/commit/3104681686b17ea3c611e84b30884a25b84f87b6
DIFF: https://github.com/llvm/llvm-project/commit/3104681686b17ea3c611e84b30884a25b84f87b6.diff
LOG: [PowerPC][Atomics] Remove redundant block to clear reservation (#68430)
This PR is following what https://reviews.llvm.org/D134783 does for
quardword CAS.
Added:
Modified:
llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
llvm/test/CodeGen/PowerPC/atomics-i128.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
index a9794ddd05667b4..aee57a5075ff719 100644
--- a/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
+++ b/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
@@ -239,23 +239,18 @@ bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128(
// loop:
// old = lqarx ptr
// <compare old, cmp>
- // bne 0, fail
+ // bne 0, exit
// succ:
// stqcx new ptr
// bne 0, loop
- // b exit
- // fail:
- // stqcx old ptr
// exit:
// ....
MachineFunction::iterator MFI = ++MBB.getIterator();
MachineBasicBlock *LoopCmpMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *CmpSuccMBB = MF->CreateMachineBasicBlock(BB);
- MachineBasicBlock *CmpFailMBB = MF->CreateMachineBasicBlock(BB);
MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB);
MF->insert(MFI, LoopCmpMBB);
MF->insert(MFI, CmpSuccMBB);
- MF->insert(MFI, CmpFailMBB);
MF->insert(MFI, ExitMBB);
ExitMBB->splice(ExitMBB->begin(), &MBB, std::next(MI.getIterator()),
MBB.end());
@@ -276,9 +271,9 @@ bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128(
BuildMI(CurrentMBB, DL, TII->get(PPC::BCC))
.addImm(PPC::PRED_NE)
.addReg(PPC::CR0)
- .addMBB(CmpFailMBB);
+ .addMBB(ExitMBB);
CurrentMBB->addSuccessor(CmpSuccMBB);
- CurrentMBB->addSuccessor(CmpFailMBB);
+ CurrentMBB->addSuccessor(ExitMBB);
// Build succ.
CurrentMBB = CmpSuccMBB;
PairedCopy(TII, *CurrentMBB, CurrentMBB->end(), DL, ScratchHi, ScratchLo,
@@ -288,16 +283,11 @@ bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128(
.addImm(PPC::PRED_NE)
.addReg(PPC::CR0)
.addMBB(LoopCmpMBB);
- BuildMI(CurrentMBB, DL, TII->get(PPC::B)).addMBB(ExitMBB);
CurrentMBB->addSuccessor(LoopCmpMBB);
CurrentMBB->addSuccessor(ExitMBB);
- CurrentMBB = CmpFailMBB;
- BuildMI(CurrentMBB, DL, SC).addReg(Old).addReg(RA).addReg(RB);
- CurrentMBB->addSuccessor(ExitMBB);
recomputeLiveIns(*LoopCmpMBB);
recomputeLiveIns(*CmpSuccMBB);
- recomputeLiveIns(*CmpFailMBB);
recomputeLiveIns(*ExitMBB);
NMBBI = MBB.end();
MI.eraseFromParent();
diff --git a/llvm/test/CodeGen/PowerPC/atomics-i128.ll b/llvm/test/CodeGen/PowerPC/atomics-i128.ll
index 66d727caed69fad..f5422a9b7b54280 100644
--- a/llvm/test/CodeGen/PowerPC/atomics-i128.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics-i128.ll
@@ -986,10 +986,7 @@ define i128 @cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) {
; CHECK-NEXT: mr r10, r6
; CHECK-NEXT: stqcx. r10, 0, r3
; CHECK-NEXT: bne cr0, .LBB7_1
-; CHECK-NEXT: b .LBB7_4
; CHECK-NEXT: .LBB7_3: # %entry
-; CHECK-NEXT: stqcx. r8, 0, r3
-; CHECK-NEXT: .LBB7_4: # %entry
; CHECK-NEXT: lwsync
; CHECK-NEXT: mr r3, r8
; CHECK-NEXT: mr r4, r9
@@ -1033,10 +1030,7 @@ define i128 @cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) {
; LE-PWR8-NEXT: mr r10, r7
; LE-PWR8-NEXT: stqcx. r10, 0, r3
; LE-PWR8-NEXT: bne cr0, .LBB7_1
-; LE-PWR8-NEXT: b .LBB7_4
; LE-PWR8-NEXT: .LBB7_3: # %entry
-; LE-PWR8-NEXT: stqcx. r8, 0, r3
-; LE-PWR8-NEXT: .LBB7_4: # %entry
; LE-PWR8-NEXT: lwsync
; LE-PWR8-NEXT: mr r3, r9
; LE-PWR8-NEXT: mr r4, r8
@@ -1057,10 +1051,7 @@ define i128 @cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) {
; AIX64-PWR8-NEXT: mr r10, r6
; AIX64-PWR8-NEXT: stqcx. r10, 0, r3
; AIX64-PWR8-NEXT: bne cr0, L..BB7_1
-; AIX64-PWR8-NEXT: b L..BB7_4
; AIX64-PWR8-NEXT: L..BB7_3: # %entry
-; AIX64-PWR8-NEXT: stqcx. r8, 0, r3
-; AIX64-PWR8-NEXT: L..BB7_4: # %entry
; AIX64-PWR8-NEXT: lwsync
; AIX64-PWR8-NEXT: mr r3, r8
; AIX64-PWR8-NEXT: mr r4, r9
@@ -1121,10 +1112,7 @@ define i128 @cas_weak_release_monotonic(ptr %a, i128 %cmp, i128 %new) {
; CHECK-NEXT: mr r10, r6
; CHECK-NEXT: stqcx. r10, 0, r3
; CHECK-NEXT: bne cr0, .LBB8_1
-; CHECK-NEXT: b .LBB8_4
; CHECK-NEXT: .LBB8_3: # %entry
-; CHECK-NEXT: stqcx. r8, 0, r3
-; CHECK-NEXT: .LBB8_4: # %entry
; CHECK-NEXT: mr r3, r8
; CHECK-NEXT: mr r4, r9
; CHECK-NEXT: blr
@@ -1168,10 +1156,7 @@ define i128 @cas_weak_release_monotonic(ptr %a, i128 %cmp, i128 %new) {
; LE-PWR8-NEXT: mr r10, r7
; LE-PWR8-NEXT: stqcx. r10, 0, r3
; LE-PWR8-NEXT: bne cr0, .LBB8_1
-; LE-PWR8-NEXT: b .LBB8_4
; LE-PWR8-NEXT: .LBB8_3: # %entry
-; LE-PWR8-NEXT: stqcx. r8, 0, r3
-; LE-PWR8-NEXT: .LBB8_4: # %entry
; LE-PWR8-NEXT: mr r3, r9
; LE-PWR8-NEXT: mr r4, r8
; LE-PWR8-NEXT: blr
@@ -1192,10 +1177,7 @@ define i128 @cas_weak_release_monotonic(ptr %a, i128 %cmp, i128 %new) {
; AIX64-PWR8-NEXT: mr r10, r6
; AIX64-PWR8-NEXT: stqcx. r10, 0, r3
; AIX64-PWR8-NEXT: bne cr0, L..BB8_1
-; AIX64-PWR8-NEXT: b L..BB8_4
; AIX64-PWR8-NEXT: L..BB8_3: # %entry
-; AIX64-PWR8-NEXT: stqcx. r8, 0, r3
-; AIX64-PWR8-NEXT: L..BB8_4: # %entry
; AIX64-PWR8-NEXT: mr r3, r8
; AIX64-PWR8-NEXT: mr r4, r9
; AIX64-PWR8-NEXT: blr
@@ -1255,10 +1237,7 @@ define i128 @cas_sc_sc(ptr %a, i128 %cmp, i128 %new) {
; CHECK-NEXT: mr r10, r6
; CHECK-NEXT: stqcx. r10, 0, r3
; CHECK-NEXT: bne cr0, .LBB9_1
-; CHECK-NEXT: b .LBB9_4
; CHECK-NEXT: .LBB9_3: # %entry
-; CHECK-NEXT: stqcx. r8, 0, r3
-; CHECK-NEXT: .LBB9_4: # %entry
; CHECK-NEXT: lwsync
; CHECK-NEXT: mr r3, r8
; CHECK-NEXT: mr r4, r9
@@ -1303,10 +1282,7 @@ define i128 @cas_sc_sc(ptr %a, i128 %cmp, i128 %new) {
; LE-PWR8-NEXT: mr r10, r7
; LE-PWR8-NEXT: stqcx. r10, 0, r3
; LE-PWR8-NEXT: bne cr0, .LBB9_1
-; LE-PWR8-NEXT: b .LBB9_4
; LE-PWR8-NEXT: .LBB9_3: # %entry
-; LE-PWR8-NEXT: stqcx. r8, 0, r3
-; LE-PWR8-NEXT: .LBB9_4: # %entry
; LE-PWR8-NEXT: lwsync
; LE-PWR8-NEXT: mr r3, r9
; LE-PWR8-NEXT: mr r4, r8
@@ -1328,10 +1304,7 @@ define i128 @cas_sc_sc(ptr %a, i128 %cmp, i128 %new) {
; AIX64-PWR8-NEXT: mr r10, r6
; AIX64-PWR8-NEXT: stqcx. r10, 0, r3
; AIX64-PWR8-NEXT: bne cr0, L..BB9_1
-; AIX64-PWR8-NEXT: b L..BB9_4
; AIX64-PWR8-NEXT: L..BB9_3: # %entry
-; AIX64-PWR8-NEXT: stqcx. r8, 0, r3
-; AIX64-PWR8-NEXT: L..BB9_4: # %entry
; AIX64-PWR8-NEXT: lwsync
; AIX64-PWR8-NEXT: mr r3, r8
; AIX64-PWR8-NEXT: mr r4, r9
@@ -1392,10 +1365,7 @@ define i128 @cas_acqrel_acquire(ptr %a, i128 %cmp, i128 %new) {
; CHECK-NEXT: mr r10, r6
; CHECK-NEXT: stqcx. r10, 0, r3
; CHECK-NEXT: bne cr0, .LBB10_1
-; CHECK-NEXT: b .LBB10_4
; CHECK-NEXT: .LBB10_3: # %entry
-; CHECK-NEXT: stqcx. r8, 0, r3
-; CHECK-NEXT: .LBB10_4: # %entry
; CHECK-NEXT: lwsync
; CHECK-NEXT: mr r3, r8
; CHECK-NEXT: mr r4, r9
@@ -1440,10 +1410,7 @@ define i128 @cas_acqrel_acquire(ptr %a, i128 %cmp, i128 %new) {
; LE-PWR8-NEXT: mr r10, r7
; LE-PWR8-NEXT: stqcx. r10, 0, r3
; LE-PWR8-NEXT: bne cr0, .LBB10_1
-; LE-PWR8-NEXT: b .LBB10_4
; LE-PWR8-NEXT: .LBB10_3: # %entry
-; LE-PWR8-NEXT: stqcx. r8, 0, r3
-; LE-PWR8-NEXT: .LBB10_4: # %entry
; LE-PWR8-NEXT: lwsync
; LE-PWR8-NEXT: mr r3, r9
; LE-PWR8-NEXT: mr r4, r8
@@ -1465,10 +1432,7 @@ define i128 @cas_acqrel_acquire(ptr %a, i128 %cmp, i128 %new) {
; AIX64-PWR8-NEXT: mr r10, r6
; AIX64-PWR8-NEXT: stqcx. r10, 0, r3
; AIX64-PWR8-NEXT: bne cr0, L..BB10_1
-; AIX64-PWR8-NEXT: b L..BB10_4
; AIX64-PWR8-NEXT: L..BB10_3: # %entry
-; AIX64-PWR8-NEXT: stqcx. r8, 0, r3
-; AIX64-PWR8-NEXT: L..BB10_4: # %entry
; AIX64-PWR8-NEXT: lwsync
; AIX64-PWR8-NEXT: mr r3, r8
; AIX64-PWR8-NEXT: mr r4, r9
@@ -1529,10 +1493,7 @@ define i1 @cas_acqrel_acquire_check_succ(ptr %a, i128 %cmp, i128 %new) {
; CHECK-NEXT: mr r10, r6
; CHECK-NEXT: stqcx. r10, 0, r3
; CHECK-NEXT: bne cr0, .LBB11_1
-; CHECK-NEXT: b .LBB11_4
; CHECK-NEXT: .LBB11_3: # %entry
-; CHECK-NEXT: stqcx. r8, 0, r3
-; CHECK-NEXT: .LBB11_4: # %entry
; CHECK-NEXT: lwsync
; CHECK-NEXT: xor r3, r4, r8
; CHECK-NEXT: xor r4, r5, r9
@@ -1578,10 +1539,7 @@ define i1 @cas_acqrel_acquire_check_succ(ptr %a, i128 %cmp, i128 %new) {
; LE-PWR8-NEXT: mr r10, r7
; LE-PWR8-NEXT: stqcx. r10, 0, r3
; LE-PWR8-NEXT: bne cr0, .LBB11_1
-; LE-PWR8-NEXT: b .LBB11_4
; LE-PWR8-NEXT: .LBB11_3: # %entry
-; LE-PWR8-NEXT: stqcx. r8, 0, r3
-; LE-PWR8-NEXT: .LBB11_4: # %entry
; LE-PWR8-NEXT: lwsync
; LE-PWR8-NEXT: xor r3, r5, r8
; LE-PWR8-NEXT: xor r4, r4, r9
@@ -1606,10 +1564,7 @@ define i1 @cas_acqrel_acquire_check_succ(ptr %a, i128 %cmp, i128 %new) {
; AIX64-PWR8-NEXT: mr r10, r6
; AIX64-PWR8-NEXT: stqcx. r10, 0, r3
; AIX64-PWR8-NEXT: bne cr0, L..BB11_1
-; AIX64-PWR8-NEXT: b L..BB11_4
; AIX64-PWR8-NEXT: L..BB11_3: # %entry
-; AIX64-PWR8-NEXT: stqcx. r8, 0, r3
-; AIX64-PWR8-NEXT: L..BB11_4: # %entry
; AIX64-PWR8-NEXT: lwsync
; AIX64-PWR8-NEXT: xor r3, r4, r8
; AIX64-PWR8-NEXT: xor r4, r5, r9
@@ -1651,3 +1606,132 @@ entry:
%1 = extractvalue { i128, i1 } %0, 1
ret i1 %1
}
+
+;; TODO: Optimize CAS at exit block when bool value is returned.
+define i1 @bool_cas_weak_acquire_acquire(ptr %a, i128 %cmp, i128 %new) {
+; CHECK-LABEL: bool_cas_weak_acquire_acquire:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: .LBB12_1: # %entry
+; CHECK-NEXT: #
+; CHECK-NEXT: lqarx r8, 0, r3
+; CHECK-NEXT: xor r11, r9, r5
+; CHECK-NEXT: xor r10, r8, r4
+; CHECK-NEXT: or. r11, r11, r10
+; CHECK-NEXT: bne cr0, .LBB12_3
+; CHECK-NEXT: # %bb.2: # %entry
+; CHECK-NEXT: #
+; CHECK-NEXT: mr r11, r7
+; CHECK-NEXT: mr r10, r6
+; CHECK-NEXT: stqcx. r10, 0, r3
+; CHECK-NEXT: bne cr0, .LBB12_1
+; CHECK-NEXT: .LBB12_3: # %entry
+; CHECK-NEXT: lwsync
+; CHECK-NEXT: xor r3, r4, r8
+; CHECK-NEXT: xor r4, r5, r9
+; CHECK-NEXT: or r3, r4, r3
+; CHECK-NEXT: cntlzd r3, r3
+; CHECK-NEXT: rldicl r3, r3, 58, 63
+; CHECK-NEXT: blr
+;
+; PWR7-LABEL: bool_cas_weak_acquire_acquire:
+; PWR7: # %bb.0: # %entry
+; PWR7-NEXT: mflr r0
+; PWR7-NEXT: stdu r1, -128(r1)
+; PWR7-NEXT: std r0, 144(r1)
+; PWR7-NEXT: .cfi_def_cfa_offset 128
+; PWR7-NEXT: .cfi_offset lr, 16
+; PWR7-NEXT: std r5, 120(r1)
+; PWR7-NEXT: std r4, 112(r1)
+; PWR7-NEXT: addi r4, r1, 112
+; PWR7-NEXT: mr r5, r6
+; PWR7-NEXT: mr r6, r7
+; PWR7-NEXT: li r7, 2
+; PWR7-NEXT: li r8, 2
+; PWR7-NEXT: bl __atomic_compare_exchange_16
+; PWR7-NEXT: nop
+; PWR7-NEXT: addi r1, r1, 128
+; PWR7-NEXT: ld r0, 16(r1)
+; PWR7-NEXT: mtlr r0
+; PWR7-NEXT: blr
+;
+; LE-PWR8-LABEL: bool_cas_weak_acquire_acquire:
+; LE-PWR8: # %bb.0: # %entry
+; LE-PWR8-NEXT: .LBB12_1: # %entry
+; LE-PWR8-NEXT: #
+; LE-PWR8-NEXT: lqarx r8, 0, r3
+; LE-PWR8-NEXT: xor r11, r9, r4
+; LE-PWR8-NEXT: xor r10, r8, r5
+; LE-PWR8-NEXT: or. r11, r11, r10
+; LE-PWR8-NEXT: bne cr0, .LBB12_3
+; LE-PWR8-NEXT: # %bb.2: # %entry
+; LE-PWR8-NEXT: #
+; LE-PWR8-NEXT: mr r11, r6
+; LE-PWR8-NEXT: mr r10, r7
+; LE-PWR8-NEXT: stqcx. r10, 0, r3
+; LE-PWR8-NEXT: bne cr0, .LBB12_1
+; LE-PWR8-NEXT: .LBB12_3: # %entry
+; LE-PWR8-NEXT: lwsync
+; LE-PWR8-NEXT: xor r3, r5, r8
+; LE-PWR8-NEXT: xor r4, r4, r9
+; LE-PWR8-NEXT: or r3, r4, r3
+; LE-PWR8-NEXT: cntlzd r3, r3
+; LE-PWR8-NEXT: rldicl r3, r3, 58, 63
+; LE-PWR8-NEXT: blr
+;
+; AIX64-PWR8-LABEL: bool_cas_weak_acquire_acquire:
+; AIX64-PWR8: # %bb.0: # %entry
+; AIX64-PWR8-NEXT: L..BB12_1: # %entry
+; AIX64-PWR8-NEXT: #
+; AIX64-PWR8-NEXT: lqarx r8, 0, r3
+; AIX64-PWR8-NEXT: xor r11, r9, r5
+; AIX64-PWR8-NEXT: xor r10, r8, r4
+; AIX64-PWR8-NEXT: or. r11, r11, r10
+; AIX64-PWR8-NEXT: bne cr0, L..BB12_3
+; AIX64-PWR8-NEXT: # %bb.2: # %entry
+; AIX64-PWR8-NEXT: #
+; AIX64-PWR8-NEXT: mr r11, r7
+; AIX64-PWR8-NEXT: mr r10, r6
+; AIX64-PWR8-NEXT: stqcx. r10, 0, r3
+; AIX64-PWR8-NEXT: bne cr0, L..BB12_1
+; AIX64-PWR8-NEXT: L..BB12_3: # %entry
+; AIX64-PWR8-NEXT: lwsync
+; AIX64-PWR8-NEXT: xor r3, r4, r8
+; AIX64-PWR8-NEXT: xor r4, r5, r9
+; AIX64-PWR8-NEXT: or r3, r4, r3
+; AIX64-PWR8-NEXT: cntlzd r3, r3
+; AIX64-PWR8-NEXT: rldicl r3, r3, 58, 63
+; AIX64-PWR8-NEXT: blr
+;
+; PPC-PWR8-LABEL: bool_cas_weak_acquire_acquire:
+; PPC-PWR8: # %bb.0: # %entry
+; PPC-PWR8-NEXT: mflr r0
+; PPC-PWR8-NEXT: stwu r1, -48(r1)
+; PPC-PWR8-NEXT: stw r0, 52(r1)
+; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48
+; PPC-PWR8-NEXT: .cfi_offset lr, 4
+; PPC-PWR8-NEXT: mr r4, r3
+; PPC-PWR8-NEXT: lwz r3, 60(r1)
+; PPC-PWR8-NEXT: stw r8, 44(r1)
+; PPC-PWR8-NEXT: stw r7, 40(r1)
+; PPC-PWR8-NEXT: stw r6, 36(r1)
+; PPC-PWR8-NEXT: stw r5, 32(r1)
+; PPC-PWR8-NEXT: addi r5, r1, 32
+; PPC-PWR8-NEXT: addi r6, r1, 16
+; PPC-PWR8-NEXT: li r7, 2
+; PPC-PWR8-NEXT: li r8, 2
+; PPC-PWR8-NEXT: stw r10, 20(r1)
+; PPC-PWR8-NEXT: stw r9, 16(r1)
+; PPC-PWR8-NEXT: stw r3, 28(r1)
+; PPC-PWR8-NEXT: lwz r3, 56(r1)
+; PPC-PWR8-NEXT: stw r3, 24(r1)
+; PPC-PWR8-NEXT: li r3, 16
+; PPC-PWR8-NEXT: bl __atomic_compare_exchange
+; PPC-PWR8-NEXT: lwz r0, 52(r1)
+; PPC-PWR8-NEXT: addi r1, r1, 48
+; PPC-PWR8-NEXT: mtlr r0
+; PPC-PWR8-NEXT: blr
+entry:
+ %0 = cmpxchg weak ptr %a, i128 %cmp, i128 %new acquire acquire
+ %1 = extractvalue { i128, i1 } %0, 1
+ ret i1 %1
+}
More information about the llvm-commits
mailing list