[llvm] r366570 - [MachineCSE][MachinePRE] Avoid hoisting code from code regions into hot BBs.

Kai Luo via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 19 05:58:16 PDT 2019


Author: lkail
Date: Fri Jul 19 05:58:16 2019
New Revision: 366570

URL: http://llvm.org/viewvc/llvm-project?rev=366570&view=rev
Log:
[MachineCSE][MachinePRE] Avoid hoisting code from code regions into hot BBs.

Summary:
Current PRE hoists common computations into
CMBB = DT->findNearestCommonDominator(MBB, MBB1).
However, if CMBB is in a hot loop body, we might get performance
degradation.

Differential Revision: https://reviews.llvm.org/D64394

Modified:
    llvm/trunk/lib/CodeGen/MachineCSE.cpp
    llvm/trunk/test/CodeGen/AArch64/O3-pipeline.ll
    llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll
    llvm/trunk/test/CodeGen/PowerPC/machine-pre.ll
    llvm/trunk/test/CodeGen/X86/O3-pipeline.ll

Modified: llvm/trunk/lib/CodeGen/MachineCSE.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineCSE.cpp?rev=366570&r1=366569&r2=366570&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/MachineCSE.cpp (original)
+++ llvm/trunk/lib/CodeGen/MachineCSE.cpp Fri Jul 19 05:58:16 2019
@@ -21,6 +21,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -66,6 +67,7 @@ namespace {
     AliasAnalysis *AA;
     MachineDominatorTree *DT;
     MachineRegisterInfo *MRI;
+    MachineBlockFrequencyInfo *MBFI;
 
   public:
     static char ID; // Pass identification
@@ -83,6 +85,8 @@ namespace {
       AU.addPreservedID(MachineLoopInfoID);
       AU.addRequired<MachineDominatorTree>();
       AU.addPreserved<MachineDominatorTree>();
+      AU.addRequired<MachineBlockFrequencyInfo>();
+      AU.addPreserved<MachineBlockFrequencyInfo>();
     }
 
     void releaseMemory() override {
@@ -133,6 +137,11 @@ namespace {
     bool isPRECandidate(MachineInstr *MI);
     bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
     bool PerformSimplePRE(MachineDominatorTree *DT);
+    /// Heuristics to see if it's beneficial to move common computations of MBB
+    /// and MBB1 to CandidateBB.
+    bool isBeneficalToHoistInto(MachineBasicBlock *CandidateBB,
+                                MachineBasicBlock *MBB,
+                                MachineBasicBlock *MBB1);
   };
 
 } // end anonymous namespace
@@ -802,6 +811,9 @@ bool MachineCSE::ProcessBlockPRE(Machine
     if (!CMBB->isLegalToHoistInto())
       continue;
 
+    if (!isBeneficalToHoistInto(CMBB, MBB, MBB1))
+      continue;
+
     // Two instrs are partial redundant if their basic blocks are reachable
     // from one to another but one doesn't dominate another.
     if (CMBB != MBB1) {
@@ -854,6 +866,18 @@ bool MachineCSE::PerformSimplePRE(Machin
   return Changed;
 }
 
+bool MachineCSE::isBeneficalToHoistInto(MachineBasicBlock *CandidateBB,
+                                        MachineBasicBlock *MBB,
+                                        MachineBasicBlock *MBB1) {
+  if (CandidateBB->getParent()->getFunction().hasMinSize())
+    return true;
+  assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB");
+  assert(DT->dominates(CandidateBB, MBB1) &&
+         "CandidateBB should dominate MBB1");
+  return MBFI->getBlockFreq(CandidateBB) <=
+         MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1);
+}
+
 bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(MF.getFunction()))
     return false;
@@ -863,6 +887,7 @@ bool MachineCSE::runOnMachineFunction(Ma
   MRI = &MF.getRegInfo();
   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   DT = &getAnalysis<MachineDominatorTree>();
+  MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
   LookAheadLimit = TII->getMachineCSELookAheadLimit();
   bool ChangedPRE, ChangedCSE;
   ChangedPRE = PerformSimplePRE(DT);

Modified: llvm/trunk/test/CodeGen/AArch64/O3-pipeline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/O3-pipeline.ll?rev=366570&r1=366569&r2=366570&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/O3-pipeline.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/O3-pipeline.ll Fri Jul 19 05:58:16 2019
@@ -98,9 +98,9 @@
 ; CHECK-NEXT:       MachineDominator Tree Construction
 ; CHECK-NEXT:       Machine Natural Loop Construction
 ; CHECK-NEXT:       Early Machine Loop Invariant Code Motion
+; CHECK-NEXT:       Machine Block Frequency Analysis
 ; CHECK-NEXT:       Machine Common Subexpression Elimination
 ; CHECK-NEXT:       MachinePostDominator Tree Construction
-; CHECK-NEXT:       Machine Block Frequency Analysis
 ; CHECK-NEXT:       Machine code sinking
 ; CHECK-NEXT:       Peephole Optimizations
 ; CHECK-NEXT:       Remove dead machine instructions

Modified: llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll?rev=366570&r1=366569&r2=366570&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll Fri Jul 19 05:58:16 2019
@@ -72,9 +72,9 @@
 ; CHECK-NEXT:      MachineDominator Tree Construction
 ; CHECK-NEXT:      Machine Natural Loop Construction
 ; CHECK-NEXT:      Early Machine Loop Invariant Code Motion
+; CHECK-NEXT:      Machine Block Frequency Analysis
 ; CHECK-NEXT:      Machine Common Subexpression Elimination
 ; CHECK-NEXT:      MachinePostDominator Tree Construction
-; CHECK-NEXT:      Machine Block Frequency Analysis
 ; CHECK-NEXT:      Machine code sinking
 ; CHECK-NEXT:      Peephole Optimizations
 ; CHECK-NEXT:      Remove dead machine instructions

Modified: llvm/trunk/test/CodeGen/PowerPC/machine-pre.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/machine-pre.ll?rev=366570&r1=366569&r2=366570&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/machine-pre.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/machine-pre.ll Fri Jul 19 05:58:16 2019
@@ -8,25 +8,25 @@ define i32 @t(i32 %n, i32 %delta, i32 %a
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lis r7, 0
 ; CHECK-P9-NEXT:    li r6, 0
+; CHECK-P9-NEXT:    li r8, 0
 ; CHECK-P9-NEXT:    li r9, 0
-; CHECK-P9-NEXT:    li r10, 0
 ; CHECK-P9-NEXT:    ori r7, r7, 65535
 ; CHECK-P9-NEXT:    .p2align 5
 ; CHECK-P9-NEXT:  .LBB0_1: # %header
 ; CHECK-P9-NEXT:    #
-; CHECK-P9-NEXT:    addi r10, r10, 1
-; CHECK-P9-NEXT:    cmpw r10, r3
-; CHECK-P9-NEXT:    addi r8, r5, 1024
+; CHECK-P9-NEXT:    addi r9, r9, 1
+; CHECK-P9-NEXT:    cmpw r9, r3
 ; CHECK-P9-NEXT:    blt cr0, .LBB0_4
 ; CHECK-P9-NEXT:  # %bb.2: # %cont
 ; CHECK-P9-NEXT:    #
-; CHECK-P9-NEXT:    add r9, r9, r4
-; CHECK-P9-NEXT:    cmpw r9, r7
+; CHECK-P9-NEXT:    add r8, r8, r4
+; CHECK-P9-NEXT:    cmpw r8, r7
 ; CHECK-P9-NEXT:    bgt cr0, .LBB0_1
 ; CHECK-P9-NEXT:  # %bb.3: # %cont.1
-; CHECK-P9-NEXT:    mr r6, r8
+; CHECK-P9-NEXT:    addi r6, r5, 1024
 ; CHECK-P9-NEXT:  .LBB0_4: # %return
-; CHECK-P9-NEXT:    mullw r3, r6, r8
+; CHECK-P9-NEXT:    addi r3, r5, 1024
+; CHECK-P9-NEXT:    mullw r3, r6, r3
 ; CHECK-P9-NEXT:    blr
 entry:
   br label %header
@@ -75,16 +75,19 @@ define dso_local signext i32 @foo(i32 si
 ; CHECK-P9-NEXT:    lis r3, 21845
 ; CHECK-P9-NEXT:    add r28, r30, r29
 ; CHECK-P9-NEXT:    ori r27, r3, 21846
-; CHECK-P9-NEXT:    b .LBB1_3
+; CHECK-P9-NEXT:    b .LBB1_4
 ; CHECK-P9-NEXT:    .p2align 4
 ; CHECK-P9-NEXT:  .LBB1_1: # %sw.bb3
 ; CHECK-P9-NEXT:    #
-; CHECK-P9-NEXT:    add r28, r3, r28
+; CHECK-P9-NEXT:    mulli r3, r30, 23
 ; CHECK-P9-NEXT:  .LBB1_2: # %sw.epilog
 ; CHECK-P9-NEXT:    #
+; CHECK-P9-NEXT:    add r28, r3, r28
+; CHECK-P9-NEXT:  .LBB1_3: # %sw.epilog
+; CHECK-P9-NEXT:    #
 ; CHECK-P9-NEXT:    cmpwi r28, 1025
-; CHECK-P9-NEXT:    bge cr0, .LBB1_6
-; CHECK-P9-NEXT:  .LBB1_3: # %while.cond
+; CHECK-P9-NEXT:    bge cr0, .LBB1_7
+; CHECK-P9-NEXT:  .LBB1_4: # %while.cond
 ; CHECK-P9-NEXT:    #
 ; CHECK-P9-NEXT:    extsw r3, r29
 ; CHECK-P9-NEXT:    bl bar
@@ -101,41 +104,40 @@ define dso_local signext i32 @foo(i32 si
 ; CHECK-P9-NEXT:    add r4, r4, r5
 ; CHECK-P9-NEXT:    slwi r5, r4, 1
 ; CHECK-P9-NEXT:    add r4, r4, r5
-; CHECK-P9-NEXT:    subf r5, r4, r3
-; CHECK-P9-NEXT:    mulli r4, r29, 13
-; CHECK-P9-NEXT:    mulli r3, r30, 23
-; CHECK-P9-NEXT:    cmplwi r5, 1
+; CHECK-P9-NEXT:    subf r3, r4, r3
+; CHECK-P9-NEXT:    cmplwi r3, 1
 ; CHECK-P9-NEXT:    beq cr0, .LBB1_1
-; CHECK-P9-NEXT:  # %bb.4: # %while.cond
-; CHECK-P9-NEXT:    #
-; CHECK-P9-NEXT:    cmplwi r5, 0
-; CHECK-P9-NEXT:    bne cr0, .LBB1_2
-; CHECK-P9-NEXT:  # %bb.5: # %sw.bb
+; CHECK-P9-NEXT:  # %bb.5: # %while.cond
 ; CHECK-P9-NEXT:    #
-; CHECK-P9-NEXT:    add r28, r4, r28
-; CHECK-P9-NEXT:    cmpwi r28, 1025
-; CHECK-P9-NEXT:    blt cr0, .LBB1_3
-; CHECK-P9-NEXT:  .LBB1_6: # %while.end
-; CHECK-P9-NEXT:    lis r5, -13108
-; CHECK-P9-NEXT:    ori r5, r5, 52429
-; CHECK-P9-NEXT:    mullw r5, r28, r5
-; CHECK-P9-NEXT:    lis r6, 13107
-; CHECK-P9-NEXT:    ori r6, r6, 13108
-; CHECK-P9-NEXT:    cmplw r5, r6
-; CHECK-P9-NEXT:    blt cr0, .LBB1_8
-; CHECK-P9-NEXT:  # %bb.7: # %if.then8
-; CHECK-P9-NEXT:    extsw r4, r4
-; CHECK-P9-NEXT:    extsw r5, r28
+; CHECK-P9-NEXT:    cmplwi r3, 0
+; CHECK-P9-NEXT:    bne cr0, .LBB1_3
+; CHECK-P9-NEXT:  # %bb.6: # %sw.bb
+; CHECK-P9-NEXT:    #
+; CHECK-P9-NEXT:    mulli r3, r29, 13
+; CHECK-P9-NEXT:    b .LBB1_2
+; CHECK-P9-NEXT:  .LBB1_7: # %while.end
+; CHECK-P9-NEXT:    lis r3, -13108
+; CHECK-P9-NEXT:    ori r3, r3, 52429
+; CHECK-P9-NEXT:    mullw r3, r28, r3
+; CHECK-P9-NEXT:    lis r4, 13107
+; CHECK-P9-NEXT:    ori r4, r4, 13108
+; CHECK-P9-NEXT:    cmplw r3, r4
+; CHECK-P9-NEXT:    blt cr0, .LBB1_9
+; CHECK-P9-NEXT:  # %bb.8: # %if.then8
+; CHECK-P9-NEXT:    mulli r3, r29, 13
+; CHECK-P9-NEXT:    mulli r5, r30, 23
+; CHECK-P9-NEXT:    extsw r4, r28
 ; CHECK-P9-NEXT:    extsw r3, r3
+; CHECK-P9-NEXT:    extsw r5, r5
+; CHECK-P9-NEXT:    sub r3, r4, r3
 ; CHECK-P9-NEXT:    sub r4, r5, r4
-; CHECK-P9-NEXT:    sub r3, r3, r5
-; CHECK-P9-NEXT:    rldicl r4, r4, 1, 63
 ; CHECK-P9-NEXT:    rldicl r3, r3, 1, 63
-; CHECK-P9-NEXT:    or r3, r4, r3
-; CHECK-P9-NEXT:    b .LBB1_9
-; CHECK-P9-NEXT:  .LBB1_8: # %cleanup20
-; CHECK-P9-NEXT:    li r3, 0
+; CHECK-P9-NEXT:    rldicl r4, r4, 1, 63
+; CHECK-P9-NEXT:    or r3, r3, r4
+; CHECK-P9-NEXT:    b .LBB1_10
 ; CHECK-P9-NEXT:  .LBB1_9: # %cleanup20
+; CHECK-P9-NEXT:    li r3, 0
+; CHECK-P9-NEXT:  .LBB1_10: # %cleanup20
 ; CHECK-P9-NEXT:    addi r1, r1, 80
 ; CHECK-P9-NEXT:    ld r0, 16(r1)
 ; CHECK-P9-NEXT:    mtlr r0

Modified: llvm/trunk/test/CodeGen/X86/O3-pipeline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/O3-pipeline.ll?rev=366570&r1=366569&r2=366570&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/O3-pipeline.ll (original)
+++ llvm/trunk/test/CodeGen/X86/O3-pipeline.ll Fri Jul 19 05:58:16 2019
@@ -84,9 +84,9 @@
 ; CHECK-NEXT:       MachineDominator Tree Construction
 ; CHECK-NEXT:       Machine Natural Loop Construction
 ; CHECK-NEXT:       Early Machine Loop Invariant Code Motion
+; CHECK-NEXT:       Machine Block Frequency Analysis
 ; CHECK-NEXT:       Machine Common Subexpression Elimination
 ; CHECK-NEXT:       MachinePostDominator Tree Construction
-; CHECK-NEXT:       Machine Block Frequency Analysis
 ; CHECK-NEXT:       Machine code sinking
 ; CHECK-NEXT:       Peephole Optimizations
 ; CHECK-NEXT:       Remove dead machine instructions




More information about the llvm-commits mailing list