[llvm] r366570 - [MachineCSE][MachinePRE] Avoid hoisting code from code regions into hot BBs.

Hans Wennborg via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 22 12:54:12 PDT 2019


Merged to llvm 9 in r366729. Please let me know if there are any follow-ups.

On Fri, Jul 19, 2019 at 5:57 AM Kai Luo via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
>
> Author: lkail
> Date: Fri Jul 19 05:58:16 2019
> New Revision: 366570
>
> URL: http://llvm.org/viewvc/llvm-project?rev=366570&view=rev
> Log:
> [MachineCSE][MachinePRE] Avoid hoisting code from code regions into hot BBs.
>
> Summary:
> Current PRE hoists common computations into
> CMBB = DT->findNearestCommonDominator(MBB, MBB1).
> However, if CMBB is in a hot loop body, we might get performance
> degradation.
>
> Differential Revision: https://reviews.llvm.org/D64394
>
> Modified:
>     llvm/trunk/lib/CodeGen/MachineCSE.cpp
>     llvm/trunk/test/CodeGen/AArch64/O3-pipeline.ll
>     llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll
>     llvm/trunk/test/CodeGen/PowerPC/machine-pre.ll
>     llvm/trunk/test/CodeGen/X86/O3-pipeline.ll
>
> Modified: llvm/trunk/lib/CodeGen/MachineCSE.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineCSE.cpp?rev=366570&r1=366569&r2=366570&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/MachineCSE.cpp (original)
> +++ llvm/trunk/lib/CodeGen/MachineCSE.cpp Fri Jul 19 05:58:16 2019
> @@ -21,6 +21,7 @@
>  #include "llvm/Analysis/AliasAnalysis.h"
>  #include "llvm/Analysis/CFG.h"
>  #include "llvm/CodeGen/MachineBasicBlock.h"
> +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
>  #include "llvm/CodeGen/MachineDominators.h"
>  #include "llvm/CodeGen/MachineFunction.h"
>  #include "llvm/CodeGen/MachineFunctionPass.h"
> @@ -66,6 +67,7 @@ namespace {
>      AliasAnalysis *AA;
>      MachineDominatorTree *DT;
>      MachineRegisterInfo *MRI;
> +    MachineBlockFrequencyInfo *MBFI;
>
>    public:
>      static char ID; // Pass identification
> @@ -83,6 +85,8 @@ namespace {
>        AU.addPreservedID(MachineLoopInfoID);
>        AU.addRequired<MachineDominatorTree>();
>        AU.addPreserved<MachineDominatorTree>();
> +      AU.addRequired<MachineBlockFrequencyInfo>();
> +      AU.addPreserved<MachineBlockFrequencyInfo>();
>      }
>
>      void releaseMemory() override {
> @@ -133,6 +137,11 @@ namespace {
>      bool isPRECandidate(MachineInstr *MI);
>      bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
>      bool PerformSimplePRE(MachineDominatorTree *DT);
> +    /// Heuristics to see if it's beneficial to move common computations of MBB
> +    /// and MBB1 to CandidateBB.
> +    bool isBeneficalToHoistInto(MachineBasicBlock *CandidateBB,
> +                                MachineBasicBlock *MBB,
> +                                MachineBasicBlock *MBB1);
>    };
>
>  } // end anonymous namespace
> @@ -802,6 +811,9 @@ bool MachineCSE::ProcessBlockPRE(Machine
>      if (!CMBB->isLegalToHoistInto())
>        continue;
>
> +    if (!isBeneficalToHoistInto(CMBB, MBB, MBB1))
> +      continue;
> +
>      // Two instrs are partial redundant if their basic blocks are reachable
>      // from one to another but one doesn't dominate another.
>      if (CMBB != MBB1) {
> @@ -854,6 +866,18 @@ bool MachineCSE::PerformSimplePRE(Machin
>    return Changed;
>  }
>
> +bool MachineCSE::isBeneficalToHoistInto(MachineBasicBlock *CandidateBB,
> +                                        MachineBasicBlock *MBB,
> +                                        MachineBasicBlock *MBB1) {
> +  if (CandidateBB->getParent()->getFunction().hasMinSize())
> +    return true;
> +  assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB");
> +  assert(DT->dominates(CandidateBB, MBB1) &&
> +         "CandidateBB should dominate MBB1");
> +  return MBFI->getBlockFreq(CandidateBB) <=
> +         MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1);
> +}
> +
>  bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
>    if (skipFunction(MF.getFunction()))
>      return false;
> @@ -863,6 +887,7 @@ bool MachineCSE::runOnMachineFunction(Ma
>    MRI = &MF.getRegInfo();
>    AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
>    DT = &getAnalysis<MachineDominatorTree>();
> +  MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
>    LookAheadLimit = TII->getMachineCSELookAheadLimit();
>    bool ChangedPRE, ChangedCSE;
>    ChangedPRE = PerformSimplePRE(DT);
>
> Modified: llvm/trunk/test/CodeGen/AArch64/O3-pipeline.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/O3-pipeline.ll?rev=366570&r1=366569&r2=366570&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/O3-pipeline.ll (original)
> +++ llvm/trunk/test/CodeGen/AArch64/O3-pipeline.ll Fri Jul 19 05:58:16 2019
> @@ -98,9 +98,9 @@
>  ; CHECK-NEXT:       MachineDominator Tree Construction
>  ; CHECK-NEXT:       Machine Natural Loop Construction
>  ; CHECK-NEXT:       Early Machine Loop Invariant Code Motion
> +; CHECK-NEXT:       Machine Block Frequency Analysis
>  ; CHECK-NEXT:       Machine Common Subexpression Elimination
>  ; CHECK-NEXT:       MachinePostDominator Tree Construction
> -; CHECK-NEXT:       Machine Block Frequency Analysis
>  ; CHECK-NEXT:       Machine code sinking
>  ; CHECK-NEXT:       Peephole Optimizations
>  ; CHECK-NEXT:       Remove dead machine instructions
>
> Modified: llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll?rev=366570&r1=366569&r2=366570&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll (original)
> +++ llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll Fri Jul 19 05:58:16 2019
> @@ -72,9 +72,9 @@
>  ; CHECK-NEXT:      MachineDominator Tree Construction
>  ; CHECK-NEXT:      Machine Natural Loop Construction
>  ; CHECK-NEXT:      Early Machine Loop Invariant Code Motion
> +; CHECK-NEXT:      Machine Block Frequency Analysis
>  ; CHECK-NEXT:      Machine Common Subexpression Elimination
>  ; CHECK-NEXT:      MachinePostDominator Tree Construction
> -; CHECK-NEXT:      Machine Block Frequency Analysis
>  ; CHECK-NEXT:      Machine code sinking
>  ; CHECK-NEXT:      Peephole Optimizations
>  ; CHECK-NEXT:      Remove dead machine instructions
>
> Modified: llvm/trunk/test/CodeGen/PowerPC/machine-pre.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/machine-pre.ll?rev=366570&r1=366569&r2=366570&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/PowerPC/machine-pre.ll (original)
> +++ llvm/trunk/test/CodeGen/PowerPC/machine-pre.ll Fri Jul 19 05:58:16 2019
> @@ -8,25 +8,25 @@ define i32 @t(i32 %n, i32 %delta, i32 %a
>  ; CHECK-P9:       # %bb.0: # %entry
>  ; CHECK-P9-NEXT:    lis r7, 0
>  ; CHECK-P9-NEXT:    li r6, 0
> +; CHECK-P9-NEXT:    li r8, 0
>  ; CHECK-P9-NEXT:    li r9, 0
> -; CHECK-P9-NEXT:    li r10, 0
>  ; CHECK-P9-NEXT:    ori r7, r7, 65535
>  ; CHECK-P9-NEXT:    .p2align 5
>  ; CHECK-P9-NEXT:  .LBB0_1: # %header
>  ; CHECK-P9-NEXT:    #
> -; CHECK-P9-NEXT:    addi r10, r10, 1
> -; CHECK-P9-NEXT:    cmpw r10, r3
> -; CHECK-P9-NEXT:    addi r8, r5, 1024
> +; CHECK-P9-NEXT:    addi r9, r9, 1
> +; CHECK-P9-NEXT:    cmpw r9, r3
>  ; CHECK-P9-NEXT:    blt cr0, .LBB0_4
>  ; CHECK-P9-NEXT:  # %bb.2: # %cont
>  ; CHECK-P9-NEXT:    #
> -; CHECK-P9-NEXT:    add r9, r9, r4
> -; CHECK-P9-NEXT:    cmpw r9, r7
> +; CHECK-P9-NEXT:    add r8, r8, r4
> +; CHECK-P9-NEXT:    cmpw r8, r7
>  ; CHECK-P9-NEXT:    bgt cr0, .LBB0_1
>  ; CHECK-P9-NEXT:  # %bb.3: # %cont.1
> -; CHECK-P9-NEXT:    mr r6, r8
> +; CHECK-P9-NEXT:    addi r6, r5, 1024
>  ; CHECK-P9-NEXT:  .LBB0_4: # %return
> -; CHECK-P9-NEXT:    mullw r3, r6, r8
> +; CHECK-P9-NEXT:    addi r3, r5, 1024
> +; CHECK-P9-NEXT:    mullw r3, r6, r3
>  ; CHECK-P9-NEXT:    blr
>  entry:
>    br label %header
> @@ -75,16 +75,19 @@ define dso_local signext i32 @foo(i32 si
>  ; CHECK-P9-NEXT:    lis r3, 21845
>  ; CHECK-P9-NEXT:    add r28, r30, r29
>  ; CHECK-P9-NEXT:    ori r27, r3, 21846
> -; CHECK-P9-NEXT:    b .LBB1_3
> +; CHECK-P9-NEXT:    b .LBB1_4
>  ; CHECK-P9-NEXT:    .p2align 4
>  ; CHECK-P9-NEXT:  .LBB1_1: # %sw.bb3
>  ; CHECK-P9-NEXT:    #
> -; CHECK-P9-NEXT:    add r28, r3, r28
> +; CHECK-P9-NEXT:    mulli r3, r30, 23
>  ; CHECK-P9-NEXT:  .LBB1_2: # %sw.epilog
>  ; CHECK-P9-NEXT:    #
> +; CHECK-P9-NEXT:    add r28, r3, r28
> +; CHECK-P9-NEXT:  .LBB1_3: # %sw.epilog
> +; CHECK-P9-NEXT:    #
>  ; CHECK-P9-NEXT:    cmpwi r28, 1025
> -; CHECK-P9-NEXT:    bge cr0, .LBB1_6
> -; CHECK-P9-NEXT:  .LBB1_3: # %while.cond
> +; CHECK-P9-NEXT:    bge cr0, .LBB1_7
> +; CHECK-P9-NEXT:  .LBB1_4: # %while.cond
>  ; CHECK-P9-NEXT:    #
>  ; CHECK-P9-NEXT:    extsw r3, r29
>  ; CHECK-P9-NEXT:    bl bar
> @@ -101,41 +104,40 @@ define dso_local signext i32 @foo(i32 si
>  ; CHECK-P9-NEXT:    add r4, r4, r5
>  ; CHECK-P9-NEXT:    slwi r5, r4, 1
>  ; CHECK-P9-NEXT:    add r4, r4, r5
> -; CHECK-P9-NEXT:    subf r5, r4, r3
> -; CHECK-P9-NEXT:    mulli r4, r29, 13
> -; CHECK-P9-NEXT:    mulli r3, r30, 23
> -; CHECK-P9-NEXT:    cmplwi r5, 1
> +; CHECK-P9-NEXT:    subf r3, r4, r3
> +; CHECK-P9-NEXT:    cmplwi r3, 1
>  ; CHECK-P9-NEXT:    beq cr0, .LBB1_1
> -; CHECK-P9-NEXT:  # %bb.4: # %while.cond
> -; CHECK-P9-NEXT:    #
> -; CHECK-P9-NEXT:    cmplwi r5, 0
> -; CHECK-P9-NEXT:    bne cr0, .LBB1_2
> -; CHECK-P9-NEXT:  # %bb.5: # %sw.bb
> +; CHECK-P9-NEXT:  # %bb.5: # %while.cond
>  ; CHECK-P9-NEXT:    #
> -; CHECK-P9-NEXT:    add r28, r4, r28
> -; CHECK-P9-NEXT:    cmpwi r28, 1025
> -; CHECK-P9-NEXT:    blt cr0, .LBB1_3
> -; CHECK-P9-NEXT:  .LBB1_6: # %while.end
> -; CHECK-P9-NEXT:    lis r5, -13108
> -; CHECK-P9-NEXT:    ori r5, r5, 52429
> -; CHECK-P9-NEXT:    mullw r5, r28, r5
> -; CHECK-P9-NEXT:    lis r6, 13107
> -; CHECK-P9-NEXT:    ori r6, r6, 13108
> -; CHECK-P9-NEXT:    cmplw r5, r6
> -; CHECK-P9-NEXT:    blt cr0, .LBB1_8
> -; CHECK-P9-NEXT:  # %bb.7: # %if.then8
> -; CHECK-P9-NEXT:    extsw r4, r4
> -; CHECK-P9-NEXT:    extsw r5, r28
> +; CHECK-P9-NEXT:    cmplwi r3, 0
> +; CHECK-P9-NEXT:    bne cr0, .LBB1_3
> +; CHECK-P9-NEXT:  # %bb.6: # %sw.bb
> +; CHECK-P9-NEXT:    #
> +; CHECK-P9-NEXT:    mulli r3, r29, 13
> +; CHECK-P9-NEXT:    b .LBB1_2
> +; CHECK-P9-NEXT:  .LBB1_7: # %while.end
> +; CHECK-P9-NEXT:    lis r3, -13108
> +; CHECK-P9-NEXT:    ori r3, r3, 52429
> +; CHECK-P9-NEXT:    mullw r3, r28, r3
> +; CHECK-P9-NEXT:    lis r4, 13107
> +; CHECK-P9-NEXT:    ori r4, r4, 13108
> +; CHECK-P9-NEXT:    cmplw r3, r4
> +; CHECK-P9-NEXT:    blt cr0, .LBB1_9
> +; CHECK-P9-NEXT:  # %bb.8: # %if.then8
> +; CHECK-P9-NEXT:    mulli r3, r29, 13
> +; CHECK-P9-NEXT:    mulli r5, r30, 23
> +; CHECK-P9-NEXT:    extsw r4, r28
>  ; CHECK-P9-NEXT:    extsw r3, r3
> +; CHECK-P9-NEXT:    extsw r5, r5
> +; CHECK-P9-NEXT:    sub r3, r4, r3
>  ; CHECK-P9-NEXT:    sub r4, r5, r4
> -; CHECK-P9-NEXT:    sub r3, r3, r5
> -; CHECK-P9-NEXT:    rldicl r4, r4, 1, 63
>  ; CHECK-P9-NEXT:    rldicl r3, r3, 1, 63
> -; CHECK-P9-NEXT:    or r3, r4, r3
> -; CHECK-P9-NEXT:    b .LBB1_9
> -; CHECK-P9-NEXT:  .LBB1_8: # %cleanup20
> -; CHECK-P9-NEXT:    li r3, 0
> +; CHECK-P9-NEXT:    rldicl r4, r4, 1, 63
> +; CHECK-P9-NEXT:    or r3, r3, r4
> +; CHECK-P9-NEXT:    b .LBB1_10
>  ; CHECK-P9-NEXT:  .LBB1_9: # %cleanup20
> +; CHECK-P9-NEXT:    li r3, 0
> +; CHECK-P9-NEXT:  .LBB1_10: # %cleanup20
>  ; CHECK-P9-NEXT:    addi r1, r1, 80
>  ; CHECK-P9-NEXT:    ld r0, 16(r1)
>  ; CHECK-P9-NEXT:    mtlr r0
>
> Modified: llvm/trunk/test/CodeGen/X86/O3-pipeline.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/O3-pipeline.ll?rev=366570&r1=366569&r2=366570&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/O3-pipeline.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/O3-pipeline.ll Fri Jul 19 05:58:16 2019
> @@ -84,9 +84,9 @@
>  ; CHECK-NEXT:       MachineDominator Tree Construction
>  ; CHECK-NEXT:       Machine Natural Loop Construction
>  ; CHECK-NEXT:       Early Machine Loop Invariant Code Motion
> +; CHECK-NEXT:       Machine Block Frequency Analysis
>  ; CHECK-NEXT:       Machine Common Subexpression Elimination
>  ; CHECK-NEXT:       MachinePostDominator Tree Construction
> -; CHECK-NEXT:       Machine Block Frequency Analysis
>  ; CHECK-NEXT:       Machine code sinking
>  ; CHECK-NEXT:       Peephole Optimizations
>  ; CHECK-NEXT:       Remove dead machine instructions
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits


More information about the llvm-commits mailing list