[llvm] r355512 - [CGP] Avoid repeatedly building DominatorTree causing long compile-time (NFC)

Wed Mar 6 06:57:40 PST 2019

Author: tejohnson
Date: Wed Mar  6 06:57:40 2019
New Revision: 355512

URL: http://llvm.org/viewvc/llvm-project?rev=355512&view=rev
Log:
[CGP] Avoid repeatedly building DominatorTree causing long compile-time (NFC)

Summary:
In r354298 a DominatorTree construction was added via new function
combineToUSubWithOverflow, which was subsequently restructured into
replaceMathCmpWithIntrinsic in r354689. We are hitting a very long
compile time due to this repeated construction, once per math cmp in
the function.

We shouldn't need to build the DominatorTree more than once per
function, except when a transformation invalidates it. There is already
a boolean flag that is returned from these methods indicating whether
the DT has been modified. We can simply build the DT once per
Function walk in CodeGenPrepare::runOnFunction, since any time a change
is made we break out of the Function walk and restart it.

I modified the code so that both replaceMathCmpWithIntrinsic as well as
mergeSExts (which was also building a DT) use the DT constructed by the
run method.

>From -mllvm -time-passes:
Before this patch: CodeGen Prepare user time is 328s
With this patch: CodeGen Prepare user time is 21s

Reviewers: spatel

Subscribers: jdoerfert, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D58995

Modified:
    llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp

Modified: llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp?rev=355512&r1=355511&r2=355512&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp (original)
+++ llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp Wed Mar  6 06:57:40 2019
@@ -345,8 +345,8 @@ class TypePromotionTransaction;
     void eliminateMostlyEmptyBlock(BasicBlock *BB);
     bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
                                        bool isPreheader);
-    bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
-    bool optimizeInst(Instruction *I, bool &ModifiedDT);
+    bool optimizeBlock(BasicBlock &BB, DominatorTree &DT, bool &ModifiedDT);
+    bool optimizeInst(Instruction *I, DominatorTree &DT, bool &ModifiedDT);
     bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
                             Type *AccessTy, unsigned AddrSpace);
     bool optimizeInlineAsmInst(CallInst *CS);
@@ -366,7 +366,7 @@ class TypePromotionTransaction;
                           const SmallVectorImpl<Instruction *> &Exts,
                           SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
                           unsigned CreatedInstsCost = 0);
-    bool mergeSExts(Function &F);
+    bool mergeSExts(Function &F, DominatorTree &DT);
     bool splitLargeGEPOffsets();
     bool performAddressTypePromotion(
         Instruction *&Inst,
@@ -455,17 +455,18 @@ bool CodeGenPrepare::runOnFunction(Funct
   bool MadeChange = true;
   while (MadeChange) {
     MadeChange = false;
+    DominatorTree DT(F);
     for (Function::iterator I = F.begin(); I != F.end(); ) {
       BasicBlock *BB = &*I++;
       bool ModifiedDTOnIteration = false;
-      MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
+      MadeChange |= optimizeBlock(*BB, DT, ModifiedDTOnIteration);
 
       // Restart BB iteration if the dominator tree of the Function was changed
       if (ModifiedDTOnIteration)
         break;
     }
     if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
-      MadeChange |= mergeSExts(F);
+      MadeChange |= mergeSExts(F, DT);
     if (!LargeOffsetGEPMap.empty())
       MadeChange |= splitLargeGEPOffsets();
 
@@ -1160,7 +1161,7 @@ static bool OptimizeNoopCopyExpression(C
 }
 
 static bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp,
-                                        Intrinsic::ID IID) {
+                                        Intrinsic::ID IID, DominatorTree &DT) {
   // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
   Value *Arg0 = BO->getOperand(0);
   Value *Arg1 = BO->getOperand(1);
@@ -1179,7 +1180,6 @@ static bool replaceMathCmpWithIntrinsic(
   } else {
     // The math and compare may be independent instructions. Check dominance to
     // determine the insertion point for the intrinsic.
-    DominatorTree DT(*BO->getFunction());
     bool MathDominates = DT.dominates(BO, Cmp);
     if (!MathDominates && !DT.dominates(Cmp, BO))
       return false;
@@ -1230,7 +1230,8 @@ static bool matchUAddWithOverflowConstan
 /// Try to combine the compare into a call to the llvm.uadd.with.overflow
 /// intrinsic. Return true if any changes were made.
 static bool combineToUAddWithOverflow(CmpInst *Cmp, const TargetLowering &TLI,
-                                      const DataLayout &DL, bool &ModifiedDT) {
+                                      const DataLayout &DL, DominatorTree &DT,
+                                      bool &ModifiedDT) {
   Value *A, *B;
   BinaryOperator *Add;
   if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add))))
@@ -1247,7 +1248,7 @@ static bool combineToUAddWithOverflow(Cm
   if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
     return false;
 
-  if (!replaceMathCmpWithIntrinsic(Add, Cmp, Intrinsic::uadd_with_overflow))
+  if (!replaceMathCmpWithIntrinsic(Add, Cmp, Intrinsic::uadd_with_overflow, DT))
     return false;
 
   // Reset callers - do not crash by iterating over a dead instruction.
@@ -1256,7 +1257,8 @@ static bool combineToUAddWithOverflow(Cm
 }
 
 static bool combineToUSubWithOverflow(CmpInst *Cmp, const TargetLowering &TLI,
-                                      const DataLayout &DL, bool &ModifiedDT) {
+                                      const DataLayout &DL, DominatorTree &DT,
+                                      bool &ModifiedDT) {
   // Convert (A u> B) to (A u< B) to simplify pattern matching.
   Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
   ICmpInst::Predicate Pred = Cmp->getPredicate();
@@ -1304,7 +1306,7 @@ static bool combineToUSubWithOverflow(Cm
                                 TLI.getValueType(DL, Sub->getType())))
     return false;
 
-  if (!replaceMathCmpWithIntrinsic(Sub, Cmp, Intrinsic::usub_with_overflow))
+  if (!replaceMathCmpWithIntrinsic(Sub, Cmp, Intrinsic::usub_with_overflow, DT))
     return false;
 
   // Reset callers - do not crash by iterating over a dead instruction.
@@ -1379,14 +1381,15 @@ static bool sinkCmpExpression(CmpInst *C
 }
 
 static bool optimizeCmp(CmpInst *Cmp, const TargetLowering &TLI,
-                        const DataLayout &DL, bool &ModifiedDT) {
+                        const DataLayout &DL, DominatorTree &DT,
+                        bool &ModifiedDT) {
   if (sinkCmpExpression(Cmp, TLI))
     return true;
 
-  if (combineToUAddWithOverflow(Cmp, TLI, DL, ModifiedDT))
+  if (combineToUAddWithOverflow(Cmp, TLI, DL, DT, ModifiedDT))
     return true;
 
-  if (combineToUSubWithOverflow(Cmp, TLI, DL, ModifiedDT))
+  if (combineToUSubWithOverflow(Cmp, TLI, DL, DT, ModifiedDT))
     return true;
 
   return false;
@@ -5169,8 +5172,7 @@ bool CodeGenPrepare::tryToPromoteExts(
 }
 
 /// Merging redundant sexts when one is dominating the other.
-bool CodeGenPrepare::mergeSExts(Function &F) {
-  DominatorTree DT(F);
+bool CodeGenPrepare::mergeSExts(Function &F, DominatorTree &DT) {
   bool Changed = false;
   for (auto &Entry : ValToSExtendedUses) {
     SExts &Insts = Entry.second;
@@ -6827,7 +6829,8 @@ static bool tryUnmergingGEPsAcrossIndire
   return true;
 }
 
-bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
+bool CodeGenPrepare::optimizeInst(Instruction *I, DominatorTree &DT,
+                                  bool &ModifiedDT) {
   // Bail out if we inserted the instruction to prevent optimizations from
   // stepping on each other's toes.
   if (InsertedInsts.count(I))
@@ -6876,7 +6879,7 @@ bool CodeGenPrepare::optimizeInst(Instru
   }
 
   if (auto *Cmp = dyn_cast<CmpInst>(I))
-    if (TLI && optimizeCmp(Cmp, *TLI, *DL, ModifiedDT))
+    if (TLI && optimizeCmp(Cmp, *TLI, *DL, DT, ModifiedDT))
       return true;
 
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
@@ -6938,7 +6941,7 @@ bool CodeGenPrepare::optimizeInst(Instru
       GEPI->replaceAllUsesWith(NC);
       GEPI->eraseFromParent();
       ++NumGEPsElim;
-      optimizeInst(NC, ModifiedDT);
+      optimizeInst(NC, DT, ModifiedDT);
       return true;
     }
     if (tryUnmergingGEPsAcrossIndirectBr(GEPI, TTI)) {
@@ -6989,13 +6992,14 @@ static bool makeBitReverse(Instruction &
 // In this pass we look for GEP and cast instructions that are used
 // across basic blocks and rewrite them to improve basic-block-at-a-time
 // selection.
-bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
+bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, DominatorTree &DT,
+                                   bool &ModifiedDT) {
   SunkAddrs.clear();
   bool MadeChange = false;
 
   CurInstIterator = BB.begin();
   while (CurInstIterator != BB.end()) {
-    MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
+    MadeChange |= optimizeInst(&*CurInstIterator++, DT, ModifiedDT);
     if (ModifiedDT)
       return true;
   }