[llvm] c8f6c0f - [Machinesink] add one more profitable loop related pattern
Chen Zheng via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 26 19:11:46 PDT 2020
Author: Chen Zheng
Date: 2020-09-26T21:02:21-04:00
New Revision: c8f6c0f961eed1301b33b8af53d075542f7723c8
URL: https://github.com/llvm/llvm-project/commit/c8f6c0f961eed1301b33b8af53d075542f7723c8
DIFF: https://github.com/llvm/llvm-project/commit/c8f6c0f961eed1301b33b8af53d075542f7723c8.diff
LOG: [Machinesink] add one more profitable loop related pattern
Reviewed By: qcolombet
Differential Revision: https://reviews.llvm.org/D86925
Added:
Modified:
llvm/lib/CodeGen/MachineSink.cpp
llvm/test/CodeGen/PowerPC/sink-down-more-instructions-1.mir
Removed:
llvm/test/CodeGen/PowerPC/sink-down-more-instructions.ll
################################################################################
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index dfb88f6bf517..a34025259de9 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -596,9 +596,55 @@ bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr &MI,
FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors))
return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors);
- // If SuccToSinkTo is final destination and it is a post dominator of current
- // block then it is not profitable to sink MI into SuccToSinkTo block.
- return false;
+ MachineLoop *ML = LI->getLoopFor(MBB);
+
+ // If the instruction is not inside a loop, it is not profitable to sink MI to
+ // a post dominate block SuccToSinkTo.
+ if (!ML)
+ return false;
+
+ // If this instruction is inside a loop and sinking this instruction can make
+ // more registers live range shorten, it is still prifitable.
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ // Ignore non-register operands.
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (Reg == 0)
+ continue;
+
+ // Don't handle physical register.
+ if (Register::isPhysicalRegister(Reg))
+ return false;
+
+ // Users for the defs are all dominated by SuccToSinkTo.
+ if (MO.isDef()) {
+ // This def register's live range is shortened after sinking.
+ bool LocalUse = false;
+ if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB, BreakPHIEdge,
+ LocalUse))
+ return false;
+ } else {
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ // DefMI is defined outside of loop. There should be no live range
+ // impact for this operand. Defination outside of loop means:
+ // 1: defination is outside of loop.
+ // 2: defination is in this loop, but it is a PHI in the loop header.
+ if (LI->getLoopFor(DefMI->getParent()) != ML ||
+ (DefMI->isPHI() && LI->isLoopHeader(DefMI->getParent())))
+ continue;
+ // DefMI is inside the loop. Mark it as not profitable as sinking MI will
+ // enlarge DefMI live range.
+ // FIXME: check the register pressure in block SuccToSinkTo, if it is
+ // smaller than the limit after sinking, it is still profitable to sink.
+ return false;
+ }
+ }
+
+ // If MI is in loop and all its operands are alive across the whole loop, it
+ // is profitable to sink MI.
+ return true;
}
/// Get the sorted sequence of successors for this MachineBasicBlock, possibly
diff --git a/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-1.mir b/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-1.mir
index 5e19b9d005e4..94cd5877e47d 100644
--- a/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-1.mir
+++ b/llvm/test/CodeGen/PowerPC/sink-down-more-instructions-1.mir
@@ -370,7 +370,6 @@ body: |
; CHECK: [[PHI5:%[0-9]+]]:gprc = PHI [[LI2]], %bb.2, %27, %bb.17
; CHECK: [[PHI6:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_]], %bb.2, %55, %bb.17
; CHECK: [[PHI7:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.2, %15, %bb.17
- ; CHECK: [[ADDI8_4:%[0-9]+]]:g8rc_and_g8rc_nox0 = ADDI8 [[PHI7]], 8
; CHECK: [[LWZU:%[0-9]+]]:gprc, [[LWZU1:%[0-9]+]]:g8rc_and_g8rc_nox0 = LWZU 8, [[PHI6]] :: (load 4 from %ir.46, !tbaa !2)
; CHECK: [[COPY10:%[0-9]+]]:gprc_and_gprc_nor0 = COPY [[PHI4]].sub_32
; CHECK: [[MULHWU1:%[0-9]+]]:gprc = MULHWU [[COPY10]], [[ORI]]
@@ -396,6 +395,7 @@ body: |
; CHECK: bb.12 (%ir-block.60):
; CHECK: successors: %bb.15(0x2aaaaaab), %bb.13(0x55555555)
; CHECK: [[PHI8:%[0-9]+]]:gprc = PHI [[ADDI2]], %bb.11, [[ISEL1]], %bb.10
+ ; CHECK: [[ADDI8_4:%[0-9]+]]:g8rc_and_g8rc_nox0 = ADDI8 [[PHI7]], 8
; CHECK: [[COPY13:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY [[ADDI8_4]]
; CHECK: [[ADD4_3:%[0-9]+]]:gprc = nsw ADD4 [[PHI8]], [[ADD4_2]]
; CHECK: STW killed [[ADD4_3]], 0, [[ADDI8_4]] :: (store 4 into %ir.44, !tbaa !2)
diff --git a/llvm/test/CodeGen/PowerPC/sink-down-more-instructions.ll b/llvm/test/CodeGen/PowerPC/sink-down-more-instructions.ll
deleted file mode 100644
index c13d18151996..000000000000
--- a/llvm/test/CodeGen/PowerPC/sink-down-more-instructions.ll
+++ /dev/null
@@ -1,97 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \
-; RUN: -ppc-asm-full-reg-names -verify-machineinstrs < %s | FileCheck %s
-
-define signext i32 @foo(i32 signext %0, i32 signext %1, i32* %2, i32* %3, i32 signext %4) {
-; CHECK-LABEL: foo:
-; CHECK: # %bb.0:
-; CHECK-NEXT: cmpwi r7, 1
-; CHECK-NEXT: blt cr0, .LBB0_8
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: addi r4, r5, -4
-; CHECK-NEXT: addi r8, r6, -4
-; CHECK-NEXT: clrldi r7, r7, 32
-; CHECK-NEXT: li r5, 0
-; CHECK-NEXT: mtctr r7
-; CHECK-NEXT: lis r7, -30584
-; CHECK-NEXT: li r6, 0
-; CHECK-NEXT: cmplwi r3, 3
-; CHECK-NEXT: cmplwi cr1, r3, 1
-; CHECK-NEXT: ori r7, r7, 34953
-; CHECK-NEXT: b .LBB0_4
-; CHECK-NEXT: .p2align 4
-; CHECK-NEXT: .LBB0_2:
-; CHECK-NEXT: mulhwu r9, r6, r7
-; CHECK-NEXT: srwi r9, r9, 4
-; CHECK-NEXT: mulli r9, r9, 30
-; CHECK-NEXT: sub r9, r6, r9
-; CHECK-NEXT: .LBB0_3:
-; CHECK-NEXT: addi r6, r6, 1
-; CHECK-NEXT: add r9, r9, r5
-; CHECK-NEXT: stw r9, 4(r8)
-; CHECK-NEXT: mr r8, r3
-; CHECK-NEXT: bdz .LBB0_8
-; CHECK-NEXT: .LBB0_4:
-; CHECK-NEXT: lwzu r9, 4(r4)
-; CHECK-NEXT: addi r3, r8, 4
-; CHECK-NEXT: add r5, r9, r5
-; CHECK-NEXT: beq cr0, .LBB0_7
-; CHECK-NEXT: # %bb.5:
-; CHECK-NEXT: bne cr1, .LBB0_2
-; CHECK-NEXT: # %bb.6:
-; CHECK-NEXT: slwi r9, r6, 1
-; CHECK-NEXT: b .LBB0_3
-; CHECK-NEXT: .p2align 4
-; CHECK-NEXT: .LBB0_7:
-; CHECK-NEXT: addi r9, r6, 100
-; CHECK-NEXT: b .LBB0_3
-; CHECK-NEXT: .LBB0_8:
-; CHECK-NEXT: li r3, 0
-; CHECK-NEXT: blr
- %6 = icmp sgt i32 %4, 0
- br i1 %6, label %7, label %9
-
-7: ; preds = %5
- %8 = zext i32 %4 to i64
- br label %10
-
-9: ; preds = %25, %5
- ret i32 undef
-
-10: ; preds = %7, %25
- %11 = phi i64 [ 0, %7 ], [ %29, %25 ]
- %12 = phi i32 [ 0, %7 ], [ %30, %25 ]
- %13 = phi i32 [ 0, %7 ], [ %16, %25 ]
- %14 = getelementptr inbounds i32, i32* %2, i64 %11
- %15 = load i32, i32* %14, align 4
- %16 = add nsw i32 %15, %13
- switch i32 %0, label %22 [
- i32 1, label %17
- i32 3, label %20
- ]
-
-17: ; preds = %10
- %18 = trunc i64 %11 to i32
- %19 = shl i32 %18, 1
- br label %25
-
-20: ; preds = %10
- %21 = add nuw nsw i32 %12, 100
- br label %25
-
-22: ; preds = %10
- %23 = trunc i64 %11 to i32
- %24 = urem i32 %23, 30
- br label %25
-
-25: ; preds = %22, %20, %17
- %26 = phi i32 [ %24, %22 ], [ %21, %20 ], [ %19, %17 ]
- %27 = add nsw i32 %26, %16
- %28 = getelementptr inbounds i32, i32* %3, i64 %11
- store i32 %27, i32* %28, align 4
- %29 = add nuw nsw i64 %11, 1
- %30 = add nuw nsw i32 %12, 1
- %31 = icmp eq i64 %29, %8
- br i1 %31, label %9, label %10
-}
-
More information about the llvm-commits
mailing list