[llvm] [PowerPC] Do not generate `isel` instruction if target doesn't have this instruction (PR #72845)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 27 19:55:48 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-powerpc
Author: Kai Luo (bzEq)
<details>
<summary>Changes</summary>
When expand `select_cc` in finalize-isel, we should not generate `isel` for targets not feature it.
---
Patch is 154.09 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/72845.diff
27 Files Affected:
- (modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+51-31)
- (modified) llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll (+31-33)
- (modified) llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll (+122-150)
- (modified) llvm/test/CodeGen/PowerPC/ctrloops-pseudo.ll (+32-8)
- (modified) llvm/test/CodeGen/PowerPC/expand-isel-to-branch.ll (+6-6)
- (modified) llvm/test/CodeGen/PowerPC/fp-strict-fcmp-spe.ll (+146-114)
- (modified) llvm/test/CodeGen/PowerPC/fp-to-int-to-fp.ll (+50-53)
- (modified) llvm/test/CodeGen/PowerPC/fptoui-be-crash.ll (+38-40)
- (modified) llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll (+42-82)
- (modified) llvm/test/CodeGen/PowerPC/funnel-shift.ll (+313-292)
- (modified) llvm/test/CodeGen/PowerPC/i1-to-double.ll (+26-26)
- (modified) llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll (+7-8)
- (modified) llvm/test/CodeGen/PowerPC/pr43976.ll (+9-9)
- (modified) llvm/test/CodeGen/PowerPC/pr49509.ll (+13-22)
- (modified) llvm/test/CodeGen/PowerPC/save-crbp-ppc32svr4.ll (+1-1)
- (modified) llvm/test/CodeGen/PowerPC/select-cc-no-isel.ll (+16-4)
- (modified) llvm/test/CodeGen/PowerPC/select.ll (+51-48)
- (modified) llvm/test/CodeGen/PowerPC/select_const.ll (+96-129)
- (modified) llvm/test/CodeGen/PowerPC/smulfixsat.ll (+17-19)
- (modified) llvm/test/CodeGen/PowerPC/spe.ll (+83-93)
- (modified) llvm/test/CodeGen/PowerPC/srem-seteq-illegal-types.ll (+13-16)
- (modified) llvm/test/CodeGen/PowerPC/umulfixsat.ll (+10-10)
- (modified) llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll (+94-86)
- (modified) llvm/test/CodeGen/PowerPC/urem-seteq-illegal-types.ll (+32-42)
- (modified) llvm/test/CodeGen/PowerPC/varargs.ll (+18-21)
- (modified) llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll (+29-29)
- (modified) llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll (+29-29)
``````````diff
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 51becf1d5b8584..19cf1a8394b069 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12661,6 +12661,44 @@ PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
return TailMBB;
}
+static bool IsSelectCC(MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case PPC::SELECT_CC_I4:
+ case PPC::SELECT_CC_I8:
+ case PPC::SELECT_CC_F4:
+ case PPC::SELECT_CC_F8:
+ case PPC::SELECT_CC_F16:
+ case PPC::SELECT_CC_VRRC:
+ case PPC::SELECT_CC_VSFRC:
+ case PPC::SELECT_CC_VSSRC:
+ case PPC::SELECT_CC_VSRC:
+ case PPC::SELECT_CC_SPE4:
+ case PPC::SELECT_CC_SPE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool IsSelect(MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case PPC::SELECT_I4:
+ case PPC::SELECT_I8:
+ case PPC::SELECT_F4:
+ case PPC::SELECT_F8:
+ case PPC::SELECT_F16:
+ case PPC::SELECT_SPE:
+ case PPC::SELECT_SPE4:
+ case PPC::SELECT_VRRC:
+ case PPC::SELECT_VSFRC:
+ case PPC::SELECT_VSSRC:
+ case PPC::SELECT_VSRC:
+ return true;
+ default:
+ return false;
+ }
+}
+
MachineBasicBlock *
PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
@@ -12698,9 +12736,10 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineFunction *F = BB->getParent();
MachineRegisterInfo &MRI = F->getRegInfo();
- if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
- MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
- MI.getOpcode() == PPC::SELECT_I8) {
+ if (Subtarget.hasISEL() &&
+ (MI.getOpcode() == PPC::SELECT_CC_I4 ||
+ MI.getOpcode() == PPC::SELECT_CC_I8 ||
+ MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
SmallVector<MachineOperand, 2> Cond;
if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
MI.getOpcode() == PPC::SELECT_CC_I8)
@@ -12712,24 +12751,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
DebugLoc dl = MI.getDebugLoc();
TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
- } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
- MI.getOpcode() == PPC::SELECT_CC_F8 ||
- MI.getOpcode() == PPC::SELECT_CC_F16 ||
- MI.getOpcode() == PPC::SELECT_CC_VRRC ||
- MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
- MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
- MI.getOpcode() == PPC::SELECT_CC_VSRC ||
- MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
- MI.getOpcode() == PPC::SELECT_CC_SPE ||
- MI.getOpcode() == PPC::SELECT_F4 ||
- MI.getOpcode() == PPC::SELECT_F8 ||
- MI.getOpcode() == PPC::SELECT_F16 ||
- MI.getOpcode() == PPC::SELECT_SPE ||
- MI.getOpcode() == PPC::SELECT_SPE4 ||
- MI.getOpcode() == PPC::SELECT_VRRC ||
- MI.getOpcode() == PPC::SELECT_VSFRC ||
- MI.getOpcode() == PPC::SELECT_VSSRC ||
- MI.getOpcode() == PPC::SELECT_VSRC) {
+ } else if (IsSelectCC(MI) || IsSelect(MI)) {
// The incoming instruction knows the destination vreg to set, the
// condition code register to branch on, the true/false values to
// select between, and a branch opcode to use.
@@ -12738,7 +12760,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// ...
// TrueVal = ...
// cmpTY ccX, r1, r2
- // bCC copy1MBB
+ // bCC sinkMBB
// fallthrough --> copy0MBB
MachineBasicBlock *thisMBB = BB;
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
@@ -12747,6 +12769,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
F->insert(It, copy0MBB);
F->insert(It, sinkMBB);
+ // Set the call frame size on entry to the new basic blocks.
+ // See https://reviews.llvm.org/D156113.
+ unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
+ copy0MBB->setCallFrameSize(CallFrameSize);
+ sinkMBB->setCallFrameSize(CallFrameSize);
+
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), BB,
std::next(MachineBasicBlock::iterator(MI)), BB->end());
@@ -12756,15 +12784,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
BB->addSuccessor(copy0MBB);
BB->addSuccessor(sinkMBB);
- if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
- MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
- MI.getOpcode() == PPC::SELECT_F16 ||
- MI.getOpcode() == PPC::SELECT_SPE4 ||
- MI.getOpcode() == PPC::SELECT_SPE ||
- MI.getOpcode() == PPC::SELECT_VRRC ||
- MI.getOpcode() == PPC::SELECT_VSFRC ||
- MI.getOpcode() == PPC::SELECT_VSSRC ||
- MI.getOpcode() == PPC::SELECT_VSRC) {
+ if (IsSelect(MI)) {
BuildMI(BB, dl, TII->get(PPC::BC))
.addReg(MI.getOperand(1).getReg())
.addMBB(sinkMBB);
diff --git a/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll b/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
index 0405b25e7fb032..e20fc400f80f8f 100644
--- a/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
+++ b/llvm/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
@@ -36,7 +36,7 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
; CHECK-NEXT: # %bb.1: # %bb5
; CHECK-NEXT: li 3, 0
; CHECK-NEXT: li 4, 0
-; CHECK-NEXT: b .LBB0_17
+; CHECK-NEXT: b .LBB0_19
; CHECK-NEXT: .LBB0_2: # %bb1
; CHECK-NEXT: lfd 0, 400(1)
; CHECK-NEXT: lis 3, 15856
@@ -99,24 +99,22 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
; CHECK-NEXT: fadd 1, 28, 29
; CHECK-NEXT: mtfsf 1, 0
; CHECK-NEXT: lfs 0, .LCPI0_1 at l(3)
-; CHECK-NEXT: fctiwz 1, 1
-; CHECK-NEXT: stfd 1, 152(1)
; CHECK-NEXT: fcmpu 0, 28, 27
-; CHECK-NEXT: lwz 3, 164(1)
+; CHECK-NEXT: fctiwz 1, 1
; CHECK-NEXT: fcmpu 1, 29, 0
-; CHECK-NEXT: lwz 4, 156(1)
; CHECK-NEXT: crandc 20, 6, 0
; CHECK-NEXT: cror 20, 5, 20
-; CHECK-NEXT: addis 3, 3, -32768
+; CHECK-NEXT: stfd 1, 152(1)
; CHECK-NEXT: bc 12, 20, .LBB0_4
; CHECK-NEXT: # %bb.3: # %bb1
-; CHECK-NEXT: ori 30, 4, 0
+; CHECK-NEXT: lwz 30, 156(1)
; CHECK-NEXT: b .LBB0_5
-; CHECK-NEXT: .LBB0_4: # %bb1
-; CHECK-NEXT: addi 30, 3, 0
+; CHECK-NEXT: .LBB0_4:
+; CHECK-NEXT: lwz 3, 164(1)
+; CHECK-NEXT: addis 30, 3, -32768
; CHECK-NEXT: .LBB0_5: # %bb1
-; CHECK-NEXT: li 4, 0
; CHECK-NEXT: mr 3, 30
+; CHECK-NEXT: li 4, 0
; CHECK-NEXT: bl __floatditf
; CHECK-NEXT: lis 3, 17392
; CHECK-NEXT: stfd 1, 208(1)
@@ -179,10 +177,10 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
; CHECK-NEXT: lwz 3, 168(1)
; CHECK-NEXT: stw 3, 272(1)
; CHECK-NEXT: lfd 31, 272(1)
-; CHECK-NEXT: bc 12, 20, .LBB0_14
+; CHECK-NEXT: bc 12, 20, .LBB0_13
; CHECK-NEXT: # %bb.10: # %bb1
; CHECK-NEXT: cror 20, 1, 3
-; CHECK-NEXT: bc 12, 20, .LBB0_14
+; CHECK-NEXT: bc 12, 20, .LBB0_13
; CHECK-NEXT: # %bb.11: # %bb2
; CHECK-NEXT: fneg 29, 31
; CHECK-NEXT: stfd 29, 48(1)
@@ -223,24 +221,17 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
; CHECK-NEXT: fadd 1, 28, 29
; CHECK-NEXT: mtfsf 1, 0
; CHECK-NEXT: lfs 0, .LCPI0_3 at l(3)
-; CHECK-NEXT: fctiwz 1, 1
-; CHECK-NEXT: stfd 1, 24(1)
; CHECK-NEXT: fcmpu 0, 30, 2
-; CHECK-NEXT: lwz 3, 36(1)
+; CHECK-NEXT: fctiwz 1, 1
; CHECK-NEXT: fcmpu 1, 31, 0
-; CHECK-NEXT: lwz 4, 28(1)
; CHECK-NEXT: crandc 20, 6, 1
; CHECK-NEXT: cror 20, 4, 20
-; CHECK-NEXT: addis 3, 3, -32768
-; CHECK-NEXT: bc 12, 20, .LBB0_13
+; CHECK-NEXT: stfd 1, 24(1)
+; CHECK-NEXT: bc 12, 20, .LBB0_17
; CHECK-NEXT: # %bb.12: # %bb2
-; CHECK-NEXT: ori 3, 4, 0
-; CHECK-NEXT: b .LBB0_13
-; CHECK-NEXT: .LBB0_13: # %bb2
-; CHECK-NEXT: subfic 4, 3, 0
-; CHECK-NEXT: subfe 3, 29, 30
-; CHECK-NEXT: b .LBB0_17
-; CHECK-NEXT: .LBB0_14: # %bb3
+; CHECK-NEXT: lwz 3, 28(1)
+; CHECK-NEXT: b .LBB0_18
+; CHECK-NEXT: .LBB0_13: # %bb3
; CHECK-NEXT: stfd 31, 112(1)
; CHECK-NEXT: li 3, 0
; CHECK-NEXT: stw 3, 148(1)
@@ -278,22 +269,29 @@ define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
; CHECK-NEXT: fadd 2, 30, 31
; CHECK-NEXT: mtfsf 1, 0
; CHECK-NEXT: lfs 0, .LCPI0_1 at l(3)
-; CHECK-NEXT: fctiwz 2, 2
-; CHECK-NEXT: stfd 2, 88(1)
; CHECK-NEXT: fcmpu 0, 30, 1
-; CHECK-NEXT: lwz 3, 100(1)
+; CHECK-NEXT: fctiwz 1, 2
; CHECK-NEXT: fcmpu 1, 31, 0
-; CHECK-NEXT: lwz 4, 92(1)
; CHECK-NEXT: crandc 20, 6, 0
; CHECK-NEXT: cror 20, 5, 20
-; CHECK-NEXT: addis 3, 3, -32768
+; CHECK-NEXT: stfd 1, 88(1)
; CHECK-NEXT: bc 12, 20, .LBB0_15
+; CHECK-NEXT: # %bb.14: # %bb3
+; CHECK-NEXT: lwz 4, 92(1)
; CHECK-NEXT: b .LBB0_16
-; CHECK-NEXT: .LBB0_15: # %bb3
-; CHECK-NEXT: addi 4, 3, 0
+; CHECK-NEXT: .LBB0_15:
+; CHECK-NEXT: lwz 3, 100(1)
+; CHECK-NEXT: addis 4, 3, -32768
; CHECK-NEXT: .LBB0_16: # %bb3
; CHECK-NEXT: mr 3, 30
-; CHECK-NEXT: .LBB0_17: # %bb5
+; CHECK-NEXT: b .LBB0_19
+; CHECK-NEXT: .LBB0_17:
+; CHECK-NEXT: lwz 3, 36(1)
+; CHECK-NEXT: addis 3, 3, -32768
+; CHECK-NEXT: .LBB0_18: # %bb2
+; CHECK-NEXT: subfic 4, 3, 0
+; CHECK-NEXT: subfe 3, 29, 30
+; CHECK-NEXT: .LBB0_19: # %bb3
; CHECK-NEXT: lfd 31, 456(1) # 8-byte Folded Reload
; CHECK-NEXT: lfd 30, 448(1) # 8-byte Folded Reload
; CHECK-NEXT: lfd 29, 440(1) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll
index adbb956ba32ad9..505ac8639595fd 100644
--- a/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll
@@ -6,55 +6,50 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; CHECK: # %bb.0:
; CHECK-NEXT: sync
; CHECK-NEXT: mr 5, 3
-; CHECK-NEXT: rlwinm 7, 5, 3, 27, 28
+; CHECK-NEXT: rlwinm 6, 5, 3, 27, 28
; CHECK-NEXT: lbz 3, 0(3)
-; CHECK-NEXT: xori 7, 7, 24
-; CHECK-NEXT: li 8, 255
-; CHECK-NEXT: li 6, 0
+; CHECK-NEXT: xori 6, 6, 24
+; CHECK-NEXT: li 7, 255
; CHECK-NEXT: clrlwi 4, 4, 24
; CHECK-NEXT: rldicr 5, 5, 0, 61
-; CHECK-NEXT: slw 8, 8, 7
+; CHECK-NEXT: slw 7, 7, 6
; CHECK-NEXT: b .LBB0_2
; CHECK-NEXT: .LBB0_1: # %atomicrmw.start
; CHECK-NEXT: #
-; CHECK-NEXT: srw 3, 11, 7
-; CHECK-NEXT: cmplw 3, 9
-; CHECK-NEXT: beq 0, .LBB0_8
+; CHECK-NEXT: srw 3, 10, 6
+; CHECK-NEXT: cmplw 3, 8
+; CHECK-NEXT: beq 0, .LBB0_7
; CHECK-NEXT: .LBB0_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB0_6 Depth 2
-; CHECK-NEXT: clrlwi 9, 3, 24
-; CHECK-NEXT: addi 10, 3, 1
-; CHECK-NEXT: cmplw 9, 4
-; CHECK-NEXT: bc 12, 0, .LBB0_4
+; CHECK-NEXT: # Child Loop BB0_5 Depth 2
+; CHECK-NEXT: clrlwi 8, 3, 24
+; CHECK-NEXT: cmplw 8, 4
+; CHECK-NEXT: li 9, 0
+; CHECK-NEXT: bge 0, .LBB0_4
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: #
-; CHECK-NEXT: ori 3, 6, 0
-; CHECK-NEXT: b .LBB0_5
+; CHECK-NEXT: addi 9, 3, 1
; CHECK-NEXT: .LBB0_4: # %atomicrmw.start
; CHECK-NEXT: #
-; CHECK-NEXT: addi 3, 10, 0
+; CHECK-NEXT: slw 3, 9, 6
+; CHECK-NEXT: slw 9, 8, 6
+; CHECK-NEXT: and 3, 3, 7
+; CHECK-NEXT: and 9, 9, 7
; CHECK-NEXT: .LBB0_5: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: slw 11, 9, 7
-; CHECK-NEXT: slw 3, 3, 7
-; CHECK-NEXT: and 3, 3, 8
-; CHECK-NEXT: and 10, 11, 8
-; CHECK-NEXT: .LBB0_6: # %atomicrmw.start
; CHECK-NEXT: # Parent Loop BB0_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
-; CHECK-NEXT: lwarx 12, 0, 5
-; CHECK-NEXT: and 11, 12, 8
-; CHECK-NEXT: cmpw 11, 10
+; CHECK-NEXT: lwarx 11, 0, 5
+; CHECK-NEXT: and 10, 11, 7
+; CHECK-NEXT: cmpw 10, 9
; CHECK-NEXT: bne 0, .LBB0_1
-; CHECK-NEXT: # %bb.7: # %atomicrmw.start
+; CHECK-NEXT: # %bb.6: # %atomicrmw.start
; CHECK-NEXT: #
-; CHECK-NEXT: andc 12, 12, 8
-; CHECK-NEXT: or 12, 12, 3
-; CHECK-NEXT: stwcx. 12, 0, 5
-; CHECK-NEXT: bne 0, .LBB0_6
+; CHECK-NEXT: andc 11, 11, 7
+; CHECK-NEXT: or 11, 11, 3
+; CHECK-NEXT: stwcx. 11, 0, 5
+; CHECK-NEXT: bne 0, .LBB0_5
; CHECK-NEXT: b .LBB0_1
-; CHECK-NEXT: .LBB0_8: # %atomicrmw.end
+; CHECK-NEXT: .LBB0_7: # %atomicrmw.end
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
%result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst
@@ -66,55 +61,51 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
; CHECK: # %bb.0:
; CHECK-NEXT: sync
; CHECK-NEXT: mr 5, 3
-; CHECK-NEXT: li 6, 0
+; CHECK-NEXT: li 7, 0
; CHECK-NEXT: lhz 3, 0(3)
-; CHECK-NEXT: rlwinm 7, 5, 3, 27, 27
-; CHECK-NEXT: xori 7, 7, 16
-; CHECK-NEXT: ori 8, 6, 65535
+; CHECK-NEXT: rlwinm 6, 5, 3, 27, 27
+; CHECK-NEXT: xori 6, 6, 16
+; CHECK-NEXT: ori 7, 7, 65535
; CHECK-NEXT: clrlwi 4, 4, 16
; CHECK-NEXT: rldicr 5, 5, 0, 61
-; CHECK-NEXT: slw 8, 8, 7
+; CHECK-NEXT: slw 7, 7, 6
; CHECK-NEXT: b .LBB1_2
; CHECK-NEXT: .LBB1_1: # %atomicrmw.start
; CHECK-NEXT: #
-; CHECK-NEXT: srw 3, 11, 7
-; CHECK-NEXT: cmplw 3, 9
-; CHECK-NEXT: beq 0, .LBB1_8
+; CHECK-NEXT: srw 3, 10, 6
+; CHECK-NEXT: cmplw 3, 8
+; CHECK-NEXT: beq 0, .LBB1_7
; CHECK-NEXT: .LBB1_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB1_6 Depth 2
-; CHECK-NEXT: clrlwi 9, 3, 16
-; CHECK-NEXT: addi 10, 3, 1
-; CHECK-NEXT: cmplw 9, 4
-; CHECK-NEXT: bc 12, 0, .LBB1_4
+; CHECK-NEXT: # Child Loop BB1_5 Depth 2
+; CHECK-NEXT: clrlwi 8, 3, 16
+; CHECK-NEXT: cmplw 8, 4
+; CHECK-NEXT: li 9, 0
+; CHECK-NEXT: bge 0, .LBB1_4
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: #
-; CHECK-NEXT: ori 3, 6, 0
-; CHECK-NEXT: b .LBB1_5
+; CHECK-NEXT: addi 9, 3, 1
; CHECK-NEXT: .LBB1_4: # %atomicrmw.start
; CHECK-NEXT: #
-; CHECK-NEXT: addi 3, 10, 0
+; CHECK-NEXT: slw 3, 9, 6
+; CHECK-NEXT: slw 9, 8, 6
+; CHECK-NEXT: and 3, 3, 7
+; CHECK-NEXT: and 9, 9, 7
; CHECK-NEXT: .LBB1_5: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: slw 11, 9, 7
-; CHECK-NEXT: slw 3, 3, 7
-; CHECK-NEXT: and 3, 3, 8
-; CHECK-NEXT: and 10, 11, 8
-; CHECK-NEXT: .LBB1_6: # %atomicrmw.start
; CHECK-NEXT: # Parent Loop BB1_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
-; CHECK-NEXT: lwarx 12, 0, 5
-; CHECK-NEXT: and 11, 12, 8
-; CHECK-NEXT: cmpw 11, 10
+; CHECK-NEXT: lwarx 11, 0, 5
+; CHECK-NEXT: and 10, 11, 7
+; CHECK-NEXT: cmpw 10, 9
; CHECK-NEXT: bne 0, .LBB1_1
-; CHECK-NEXT: # %bb.7: # %atomicrmw.start
+; CHECK-NEXT: # %bb.6: # %atomicrmw.start
; CHECK-NEXT: #
-; CHECK-NEXT: andc 12, 12, 8
-; CHECK-NEXT: or 12, 12, 3
-; CHECK-NEXT: stwcx. 12, 0, 5
-; CHECK-NEXT: bne 0, .LBB1_6
+; CHECK-NEXT: andc 11, 11, 7
+; CHECK-NEXT: or 11, 11, 3
+; CHECK-NEXT: stwcx. 11, 0, 5
+; CHECK-NEXT: bne 0, .LBB1_5
; CHECK-NEXT: b .LBB1_1
-; CHECK-NEXT: .LBB1_8: # %atomicrmw.end
+; CHECK-NEXT: .LBB1_7: # %atomicrmw.end
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
%result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst
@@ -125,39 +116,34 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
; CHECK-LABEL: atomicrmw_uinc_wrap_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: sync
-; CHECK-NEXT: li 6, 0
-; CHECK-NEXT: lwz 5, 0(3)
+; CHECK-NEXT: lwz 6, 0(3)
; CHECK-NEXT: b .LBB2_2
; CHECK-NEXT: .LBB2_1: # %atomicrmw.start
; CHECK-NEXT: #
-; CHECK-NEXT: cmplw 5, 7
-; CHECK-NEXT: beq 0, .LBB2_7
+; CHECK-NEXT: cmplw 5, 6
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: beq 0, .LBB2_6
; CHECK-NEXT: .LBB2_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB2_5 Depth 2
-; CHECK-NEXT: mr 7, 5
-; CHECK-NEXT: addi 5, 5, 1
-; CHECK-NEXT: cmplw 7, 4
-; CHECK-NEXT: bc 12, 0, .LBB2_4
+; CHECK-NEXT: # Child Loop BB2_4 Depth 2
+; CHECK-NEXT: cmplw 6, 4
+; CHECK-NEXT: li 7, 0
+; CHECK-NEXT: bge 0, .LBB2_4
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: #
-; CHECK-NEXT: ori 8, 6, 0
-; CHECK-NEXT: b .LBB2_5
+; CHECK-NEXT: addi 7, 6, 1
; CHECK-NEXT: .LBB2_4: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: addi 8, 5, 0
-; CHECK-NEXT: .LBB2_5: # %atomicrmw.start
; CHECK-NEXT: # Parent Loop BB2_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: lwarx 5, 0, 3
-; CHECK-NEXT: cmpw 5, 7
+; CHECK-NEXT: cmpw 5, 6
; CHECK-NEXT: bne 0, .LBB2_1
-; CHECK-NEXT: # %bb.6: # %atomicrmw.start
+; CHECK-NEXT: # %bb.5: # %atomicrmw.start
; CHECK-NEXT: #
-; CHECK-NEXT: stwcx. 8, 0, 3
-; CHECK-NEXT: bne 0, .LBB2_5
+; CHECK-NEXT: stwcx. 7, 0, 3
+; CHECK-NEXT: bne 0, .LBB2_4
; CHECK-NEXT: b .LBB2_1
-; CHECK-NEXT: .LBB2_7: # %atomicrmw.end
+; CHECK-NEXT: .LBB2_6: # %atomicrmw.end
; CHECK-NEXT: mr 3, 5
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
@@ -169,39 +155,34 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
; CHECK-LABEL: atomicrmw_uinc_wrap_i64:
; CHECK: # %bb.0:
; CHECK-NEXT: sync
-; CHECK-NEXT: ld 5, 0(3)
-; CHECK-NEXT: li 6, 0
+; CHECK-NEXT: ld 6, 0(3)
; CHECK-NEXT: b .LBB3_2
; CHECK-NEXT: .LBB3_1: # %atomicrmw.start
; CHECK-NEXT: #
-; CHECK-NEXT: cmpld 5, 7
-; CHECK-NEXT: beq 0, .LBB3_7
+; CHECK-NEXT: cmpld 5, 6
+; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: beq 0, .LBB3_6
; CHECK-NEXT: .LBB3_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB3_5 Depth 2
-; CHECK-NEXT: mr 7, 5
-; CHECK-NEXT: addi 5, 5, 1
-; CHECK-NEXT: cmpld 7, 4
-; CHECK-NEXT: bc 12, 0, .LBB3_4
+; CHECK-NEXT: # Child Loop BB3_4 Depth 2
+; CHECK-NEXT: cmpld 6, 4
+; CHECK-NEXT: li 7, 0
+; CHECK-NEXT: bge 0, .LBB3_4
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: #
-; CHECK-NEXT: ori 8, 6, 0
-; CHECK-NEXT: b .LBB3_5
+; CHECK-NEXT: addi 7, 6, 1
; CHECK-NEXT: .LBB3_4: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: addi 8, 5, 0
-; CHECK-NEXT: .LBB3_5: # %atomicrmw.start
; CHECK-NEXT: # Parent Loop BB3_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldarx 5, 0, 3
-; CHECK-NEXT: cmpd 5, 7
+; CHECK-NEXT: cmpd 5, 6
; CHECK-NEXT: bne 0, .LBB3_1
-; CHECK-NEXT: # %bb.6: # %atomicrmw.start
+; CHECK-NEXT: # %bb.5: # %atomicrmw.start
; CHECK-NEXT: #
-; CHECK-NEXT: stdcx. 8, 0, 3
-; CHECK-NEXT: bne 0, .LBB3_5
+; CHECK-NEXT: stdcx. 7, 0, 3
+; CHECK-NEXT: bne 0, .LBB3_4
; CHECK-NEXT: b .LBB3_1
-; CHECK-NEXT: .LBB3_7: # %atomicrmw.end
+; CHECK-NEXT: .LBB3_6: # %atomicrmw.end
; CHECK-NEXT: mr 3, 5
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
@@ -226,43 +207,39 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; CHECK-NEXT: #
; CHECK-NEXT: srw 3, 11, 7
; CHECK-NEXT: cmplw 3, 9
-; CHECK-NEXT: beq 0, .LBB4_8
+; CHECK-NEXT: beq 0, .LBB4_7
; CHECK-NEXT: .LBB4_2: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB4_6 Depth 2
+; CHECK-NEXT: # Child Loop BB4_5 Depth 2
; CHECK-NEXT: andi. 9, 3, 255
; CHECK-NEXT: cmplw 1, 9, 6
-; CHECK-NEXT: addi 10, 3, -1
; CHECK-NEXT: cror 20, 2, 5
+; CHECK-NEXT: mr 10, 4
; CHECK-NEXT: bc 12, 20, .LBB4_4
; CHECK-NEXT: # %bb.3: # %atomicrmw.start
; CHECK-NEXT: #
-; CHECK-NEXT: ori 3, 10, 0
-; CHECK-NEXT: b .LBB4_5
+; CHECK-NEXT: addi 10, 3, -1
; CHECK-NEXT: .LBB4_4: # %atomicrmw.start
; CHECK-NEXT: #
-; CHECK-NEXT: addi 3, 4, 0
-; CHECK-NEXT: .LBB4_5: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: slw 11, 9, 7
-; CHECK-NEXT: slw 3, 3, 7
+; CHECK-NEXT: slw 3, 10, 7
+; CHECK-NEXT: slw 10, 9, 7
; CHECK-NEXT: and 3, 3, 8
-; CHECK-NEXT: and 10, 11, 8
-; CHECK-NEXT: .LBB4_6: # %atomicrmw.start
+; CHECK-NEXT:...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/72845
More information about the llvm-commits
mailing list