[llvm] r366570 - [MachineCSE][MachinePRE] Avoid hoisting code from code regions into hot BBs.
Hans Wennborg via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 22 12:54:12 PDT 2019
Merged to llvm 9 in r366729. Please let me know if there are any follow-ups.
On Fri, Jul 19, 2019 at 5:57 AM Kai Luo via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
>
> Author: lkail
> Date: Fri Jul 19 05:58:16 2019
> New Revision: 366570
>
> URL: http://llvm.org/viewvc/llvm-project?rev=366570&view=rev
> Log:
> [MachineCSE][MachinePRE] Avoid hoisting code from code regions into hot BBs.
>
> Summary:
> Current PRE hoists common computations into
> CMBB = DT->findNearestCommonDominator(MBB, MBB1).
> However, if CMBB is in a hot loop body, we might get performance
> degradation.
>
> Differential Revision: https://reviews.llvm.org/D64394
>
> Modified:
> llvm/trunk/lib/CodeGen/MachineCSE.cpp
> llvm/trunk/test/CodeGen/AArch64/O3-pipeline.ll
> llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll
> llvm/trunk/test/CodeGen/PowerPC/machine-pre.ll
> llvm/trunk/test/CodeGen/X86/O3-pipeline.ll
>
> Modified: llvm/trunk/lib/CodeGen/MachineCSE.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineCSE.cpp?rev=366570&r1=366569&r2=366570&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/MachineCSE.cpp (original)
> +++ llvm/trunk/lib/CodeGen/MachineCSE.cpp Fri Jul 19 05:58:16 2019
> @@ -21,6 +21,7 @@
> #include "llvm/Analysis/AliasAnalysis.h"
> #include "llvm/Analysis/CFG.h"
> #include "llvm/CodeGen/MachineBasicBlock.h"
> +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
> #include "llvm/CodeGen/MachineDominators.h"
> #include "llvm/CodeGen/MachineFunction.h"
> #include "llvm/CodeGen/MachineFunctionPass.h"
> @@ -66,6 +67,7 @@ namespace {
> AliasAnalysis *AA;
> MachineDominatorTree *DT;
> MachineRegisterInfo *MRI;
> + MachineBlockFrequencyInfo *MBFI;
>
> public:
> static char ID; // Pass identification
> @@ -83,6 +85,8 @@ namespace {
> AU.addPreservedID(MachineLoopInfoID);
> AU.addRequired<MachineDominatorTree>();
> AU.addPreserved<MachineDominatorTree>();
> + AU.addRequired<MachineBlockFrequencyInfo>();
> + AU.addPreserved<MachineBlockFrequencyInfo>();
> }
>
> void releaseMemory() override {
> @@ -133,6 +137,11 @@ namespace {
> bool isPRECandidate(MachineInstr *MI);
> bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
> bool PerformSimplePRE(MachineDominatorTree *DT);
> + /// Heuristics to see if it's beneficial to move common computations of MBB
> + /// and MBB1 to CandidateBB.
> + bool isBeneficalToHoistInto(MachineBasicBlock *CandidateBB,
> + MachineBasicBlock *MBB,
> + MachineBasicBlock *MBB1);
> };
>
> } // end anonymous namespace
> @@ -802,6 +811,9 @@ bool MachineCSE::ProcessBlockPRE(Machine
> if (!CMBB->isLegalToHoistInto())
> continue;
>
> + if (!isBeneficalToHoistInto(CMBB, MBB, MBB1))
> + continue;
> +
> // Two instrs are partial redundant if their basic blocks are reachable
> // from one to another but one doesn't dominate another.
> if (CMBB != MBB1) {
> @@ -854,6 +866,18 @@ bool MachineCSE::PerformSimplePRE(Machin
> return Changed;
> }
>
> +bool MachineCSE::isBeneficalToHoistInto(MachineBasicBlock *CandidateBB,
> + MachineBasicBlock *MBB,
> + MachineBasicBlock *MBB1) {
> + if (CandidateBB->getParent()->getFunction().hasMinSize())
> + return true;
> + assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB");
> + assert(DT->dominates(CandidateBB, MBB1) &&
> + "CandidateBB should dominate MBB1");
> + return MBFI->getBlockFreq(CandidateBB) <=
> + MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1);
> +}
> +
> bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
> if (skipFunction(MF.getFunction()))
> return false;
> @@ -863,6 +887,7 @@ bool MachineCSE::runOnMachineFunction(Ma
> MRI = &MF.getRegInfo();
> AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
> DT = &getAnalysis<MachineDominatorTree>();
> + MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
> LookAheadLimit = TII->getMachineCSELookAheadLimit();
> bool ChangedPRE, ChangedCSE;
> ChangedPRE = PerformSimplePRE(DT);
>
> Modified: llvm/trunk/test/CodeGen/AArch64/O3-pipeline.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/O3-pipeline.ll?rev=366570&r1=366569&r2=366570&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/O3-pipeline.ll (original)
> +++ llvm/trunk/test/CodeGen/AArch64/O3-pipeline.ll Fri Jul 19 05:58:16 2019
> @@ -98,9 +98,9 @@
> ; CHECK-NEXT: MachineDominator Tree Construction
> ; CHECK-NEXT: Machine Natural Loop Construction
> ; CHECK-NEXT: Early Machine Loop Invariant Code Motion
> +; CHECK-NEXT: Machine Block Frequency Analysis
> ; CHECK-NEXT: Machine Common Subexpression Elimination
> ; CHECK-NEXT: MachinePostDominator Tree Construction
> -; CHECK-NEXT: Machine Block Frequency Analysis
> ; CHECK-NEXT: Machine code sinking
> ; CHECK-NEXT: Peephole Optimizations
> ; CHECK-NEXT: Remove dead machine instructions
>
> Modified: llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll?rev=366570&r1=366569&r2=366570&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll (original)
> +++ llvm/trunk/test/CodeGen/ARM/O3-pipeline.ll Fri Jul 19 05:58:16 2019
> @@ -72,9 +72,9 @@
> ; CHECK-NEXT: MachineDominator Tree Construction
> ; CHECK-NEXT: Machine Natural Loop Construction
> ; CHECK-NEXT: Early Machine Loop Invariant Code Motion
> +; CHECK-NEXT: Machine Block Frequency Analysis
> ; CHECK-NEXT: Machine Common Subexpression Elimination
> ; CHECK-NEXT: MachinePostDominator Tree Construction
> -; CHECK-NEXT: Machine Block Frequency Analysis
> ; CHECK-NEXT: Machine code sinking
> ; CHECK-NEXT: Peephole Optimizations
> ; CHECK-NEXT: Remove dead machine instructions
>
> Modified: llvm/trunk/test/CodeGen/PowerPC/machine-pre.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/machine-pre.ll?rev=366570&r1=366569&r2=366570&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/PowerPC/machine-pre.ll (original)
> +++ llvm/trunk/test/CodeGen/PowerPC/machine-pre.ll Fri Jul 19 05:58:16 2019
> @@ -8,25 +8,25 @@ define i32 @t(i32 %n, i32 %delta, i32 %a
> ; CHECK-P9: # %bb.0: # %entry
> ; CHECK-P9-NEXT: lis r7, 0
> ; CHECK-P9-NEXT: li r6, 0
> +; CHECK-P9-NEXT: li r8, 0
> ; CHECK-P9-NEXT: li r9, 0
> -; CHECK-P9-NEXT: li r10, 0
> ; CHECK-P9-NEXT: ori r7, r7, 65535
> ; CHECK-P9-NEXT: .p2align 5
> ; CHECK-P9-NEXT: .LBB0_1: # %header
> ; CHECK-P9-NEXT: #
> -; CHECK-P9-NEXT: addi r10, r10, 1
> -; CHECK-P9-NEXT: cmpw r10, r3
> -; CHECK-P9-NEXT: addi r8, r5, 1024
> +; CHECK-P9-NEXT: addi r9, r9, 1
> +; CHECK-P9-NEXT: cmpw r9, r3
> ; CHECK-P9-NEXT: blt cr0, .LBB0_4
> ; CHECK-P9-NEXT: # %bb.2: # %cont
> ; CHECK-P9-NEXT: #
> -; CHECK-P9-NEXT: add r9, r9, r4
> -; CHECK-P9-NEXT: cmpw r9, r7
> +; CHECK-P9-NEXT: add r8, r8, r4
> +; CHECK-P9-NEXT: cmpw r8, r7
> ; CHECK-P9-NEXT: bgt cr0, .LBB0_1
> ; CHECK-P9-NEXT: # %bb.3: # %cont.1
> -; CHECK-P9-NEXT: mr r6, r8
> +; CHECK-P9-NEXT: addi r6, r5, 1024
> ; CHECK-P9-NEXT: .LBB0_4: # %return
> -; CHECK-P9-NEXT: mullw r3, r6, r8
> +; CHECK-P9-NEXT: addi r3, r5, 1024
> +; CHECK-P9-NEXT: mullw r3, r6, r3
> ; CHECK-P9-NEXT: blr
> entry:
> br label %header
> @@ -75,16 +75,19 @@ define dso_local signext i32 @foo(i32 si
> ; CHECK-P9-NEXT: lis r3, 21845
> ; CHECK-P9-NEXT: add r28, r30, r29
> ; CHECK-P9-NEXT: ori r27, r3, 21846
> -; CHECK-P9-NEXT: b .LBB1_3
> +; CHECK-P9-NEXT: b .LBB1_4
> ; CHECK-P9-NEXT: .p2align 4
> ; CHECK-P9-NEXT: .LBB1_1: # %sw.bb3
> ; CHECK-P9-NEXT: #
> -; CHECK-P9-NEXT: add r28, r3, r28
> +; CHECK-P9-NEXT: mulli r3, r30, 23
> ; CHECK-P9-NEXT: .LBB1_2: # %sw.epilog
> ; CHECK-P9-NEXT: #
> +; CHECK-P9-NEXT: add r28, r3, r28
> +; CHECK-P9-NEXT: .LBB1_3: # %sw.epilog
> +; CHECK-P9-NEXT: #
> ; CHECK-P9-NEXT: cmpwi r28, 1025
> -; CHECK-P9-NEXT: bge cr0, .LBB1_6
> -; CHECK-P9-NEXT: .LBB1_3: # %while.cond
> +; CHECK-P9-NEXT: bge cr0, .LBB1_7
> +; CHECK-P9-NEXT: .LBB1_4: # %while.cond
> ; CHECK-P9-NEXT: #
> ; CHECK-P9-NEXT: extsw r3, r29
> ; CHECK-P9-NEXT: bl bar
> @@ -101,41 +104,40 @@ define dso_local signext i32 @foo(i32 si
> ; CHECK-P9-NEXT: add r4, r4, r5
> ; CHECK-P9-NEXT: slwi r5, r4, 1
> ; CHECK-P9-NEXT: add r4, r4, r5
> -; CHECK-P9-NEXT: subf r5, r4, r3
> -; CHECK-P9-NEXT: mulli r4, r29, 13
> -; CHECK-P9-NEXT: mulli r3, r30, 23
> -; CHECK-P9-NEXT: cmplwi r5, 1
> +; CHECK-P9-NEXT: subf r3, r4, r3
> +; CHECK-P9-NEXT: cmplwi r3, 1
> ; CHECK-P9-NEXT: beq cr0, .LBB1_1
> -; CHECK-P9-NEXT: # %bb.4: # %while.cond
> -; CHECK-P9-NEXT: #
> -; CHECK-P9-NEXT: cmplwi r5, 0
> -; CHECK-P9-NEXT: bne cr0, .LBB1_2
> -; CHECK-P9-NEXT: # %bb.5: # %sw.bb
> +; CHECK-P9-NEXT: # %bb.5: # %while.cond
> ; CHECK-P9-NEXT: #
> -; CHECK-P9-NEXT: add r28, r4, r28
> -; CHECK-P9-NEXT: cmpwi r28, 1025
> -; CHECK-P9-NEXT: blt cr0, .LBB1_3
> -; CHECK-P9-NEXT: .LBB1_6: # %while.end
> -; CHECK-P9-NEXT: lis r5, -13108
> -; CHECK-P9-NEXT: ori r5, r5, 52429
> -; CHECK-P9-NEXT: mullw r5, r28, r5
> -; CHECK-P9-NEXT: lis r6, 13107
> -; CHECK-P9-NEXT: ori r6, r6, 13108
> -; CHECK-P9-NEXT: cmplw r5, r6
> -; CHECK-P9-NEXT: blt cr0, .LBB1_8
> -; CHECK-P9-NEXT: # %bb.7: # %if.then8
> -; CHECK-P9-NEXT: extsw r4, r4
> -; CHECK-P9-NEXT: extsw r5, r28
> +; CHECK-P9-NEXT: cmplwi r3, 0
> +; CHECK-P9-NEXT: bne cr0, .LBB1_3
> +; CHECK-P9-NEXT: # %bb.6: # %sw.bb
> +; CHECK-P9-NEXT: #
> +; CHECK-P9-NEXT: mulli r3, r29, 13
> +; CHECK-P9-NEXT: b .LBB1_2
> +; CHECK-P9-NEXT: .LBB1_7: # %while.end
> +; CHECK-P9-NEXT: lis r3, -13108
> +; CHECK-P9-NEXT: ori r3, r3, 52429
> +; CHECK-P9-NEXT: mullw r3, r28, r3
> +; CHECK-P9-NEXT: lis r4, 13107
> +; CHECK-P9-NEXT: ori r4, r4, 13108
> +; CHECK-P9-NEXT: cmplw r3, r4
> +; CHECK-P9-NEXT: blt cr0, .LBB1_9
> +; CHECK-P9-NEXT: # %bb.8: # %if.then8
> +; CHECK-P9-NEXT: mulli r3, r29, 13
> +; CHECK-P9-NEXT: mulli r5, r30, 23
> +; CHECK-P9-NEXT: extsw r4, r28
> ; CHECK-P9-NEXT: extsw r3, r3
> +; CHECK-P9-NEXT: extsw r5, r5
> +; CHECK-P9-NEXT: sub r3, r4, r3
> ; CHECK-P9-NEXT: sub r4, r5, r4
> -; CHECK-P9-NEXT: sub r3, r3, r5
> -; CHECK-P9-NEXT: rldicl r4, r4, 1, 63
> ; CHECK-P9-NEXT: rldicl r3, r3, 1, 63
> -; CHECK-P9-NEXT: or r3, r4, r3
> -; CHECK-P9-NEXT: b .LBB1_9
> -; CHECK-P9-NEXT: .LBB1_8: # %cleanup20
> -; CHECK-P9-NEXT: li r3, 0
> +; CHECK-P9-NEXT: rldicl r4, r4, 1, 63
> +; CHECK-P9-NEXT: or r3, r3, r4
> +; CHECK-P9-NEXT: b .LBB1_10
> ; CHECK-P9-NEXT: .LBB1_9: # %cleanup20
> +; CHECK-P9-NEXT: li r3, 0
> +; CHECK-P9-NEXT: .LBB1_10: # %cleanup20
> ; CHECK-P9-NEXT: addi r1, r1, 80
> ; CHECK-P9-NEXT: ld r0, 16(r1)
> ; CHECK-P9-NEXT: mtlr r0
>
> Modified: llvm/trunk/test/CodeGen/X86/O3-pipeline.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/O3-pipeline.ll?rev=366570&r1=366569&r2=366570&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/O3-pipeline.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/O3-pipeline.ll Fri Jul 19 05:58:16 2019
> @@ -84,9 +84,9 @@
> ; CHECK-NEXT: MachineDominator Tree Construction
> ; CHECK-NEXT: Machine Natural Loop Construction
> ; CHECK-NEXT: Early Machine Loop Invariant Code Motion
> +; CHECK-NEXT: Machine Block Frequency Analysis
> ; CHECK-NEXT: Machine Common Subexpression Elimination
> ; CHECK-NEXT: MachinePostDominator Tree Construction
> -; CHECK-NEXT: Machine Block Frequency Analysis
> ; CHECK-NEXT: Machine code sinking
> ; CHECK-NEXT: Peephole Optimizations
> ; CHECK-NEXT: Remove dead machine instructions
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list