[llvm] r320828 - Revert "[LICM] Allow sinking when foldable in loop"

Fri Dec 15 08:35:09 PST 2017

Author: junbuml
Date: Fri Dec 15 08:35:09 2017
New Revision: 320828

URL: http://llvm.org/viewvc/llvm-project?rev=320828&view=rev
Log:
Revert "[LICM] Allow sinking when foldable in loop"

This reverts commit r320823.

Removed:
    llvm/trunk/test/Transforms/LICM/sink-foldable.ll
Modified:
    llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h
    llvm/trunk/lib/Transforms/Scalar/LICM.cpp

Modified: llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h?rev=320828&r1=320827&r2=320828&view=diff
==============================================================================

--- llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h (original)
+++ llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h Fri Dec 15 08:35:09 2017
@@ -436,9 +436,8 @@ bool formLCSSARecursively(Loop &L, Domin
 /// instructions of the loop and loop safety information as
 /// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
 bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
-                TargetLibraryInfo *, TargetTransformInfo *, Loop *,
-                AliasSetTracker *, LoopSafetyInfo *,
-                OptimizationRemarkEmitter *ORE);
+                TargetLibraryInfo *, Loop *, AliasSetTracker *,
+                LoopSafetyInfo *, OptimizationRemarkEmitter *ORE);
 
 /// \brief Walk the specified region of the CFG (defined by all blocks
 /// dominated by the specified block, and that are in the current loop) in depth

Modified: llvm/trunk/lib/Transforms/Scalar/LICM.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LICM.cpp?rev=320828&r1=320827&r2=320828&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LICM.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LICM.cpp Fri Dec 15 08:35:09 2017
@@ -90,15 +90,14 @@ static cl::opt<uint32_t> MaxNumUsesTrave
              "invariance in loop using invariant start (default = 8)"));
 
 static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
-static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
-                                  const LoopSafetyInfo *SafetyInfo,
-                                  TargetTransformInfo *TTI, bool &FreeInLoop);
+static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
+                            const LoopSafetyInfo *SafetyInfo);
 static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
                   const LoopSafetyInfo *SafetyInfo,
                   OptimizationRemarkEmitter *ORE);
 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
                  const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
-                 OptimizationRemarkEmitter *ORE, bool FreeInLoop);
+                 OptimizationRemarkEmitter *ORE);
 static bool isSafeToExecuteUnconditionally(Instruction &Inst,
                                            const DominatorTree *DT,
                                            const Loop *CurLoop,
@@ -116,8 +115,7 @@ CloneInstructionInExitBlock(Instruction
 namespace {
 struct LoopInvariantCodeMotion {
   bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
-                 TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
-                 ScalarEvolution *SE, MemorySSA *MSSA,
+                 TargetLibraryInfo *TLI, ScalarEvolution *SE, MemorySSA *MSSA,
                  OptimizationRemarkEmitter *ORE, bool DeleteAST);
 
   DenseMap<Loop *, AliasSetTracker *> &getLoopToAliasSetMap() {
@@ -161,8 +159,6 @@ struct LegacyLICMPass : public LoopPass
                           &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
                           &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
                           &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
-                          &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
-                              *L->getHeader()->getParent()),
                           SE ? &SE->getSE() : nullptr, MSSA, &ORE, false);
   }
 
@@ -174,7 +170,6 @@ struct LegacyLICMPass : public LoopPass
     AU.addRequired<TargetLibraryInfoWrapperPass>();
     if (EnableMSSALoopDependency)
       AU.addRequired<MemorySSAWrapperPass>();
-    AU.addRequired<TargetTransformInfoWrapperPass>();
     getLoopAnalysisUsage(AU);
   }
 
@@ -215,8 +210,8 @@ PreservedAnalyses LICMPass::run(Loop &L,
                        "cached at a higher level");
 
   LoopInvariantCodeMotion LICM;
-  if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE,
-                      AR.MSSA, ORE, true))
+  if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.SE, AR.MSSA, ORE,
+                      true))
     return PreservedAnalyses::all();
 
   auto PA = getLoopPassPreservedAnalyses();
@@ -229,7 +224,6 @@ INITIALIZE_PASS_BEGIN(LegacyLICMPass, "l
                       false, false)
 INITIALIZE_PASS_DEPENDENCY(LoopPass)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
 INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
                     false)
@@ -242,10 +236,12 @@ Pass *llvm::createLICMPass() { return ne
 /// We should delete AST for inner loops in the new pass manager to avoid
 /// memory leak.
 ///
-bool LoopInvariantCodeMotion::runOnLoop(
-    Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
-    TargetLibraryInfo *TLI, TargetTransformInfo *TTI, ScalarEvolution *SE,
-    MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST) {
+bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA,
+                                        LoopInfo *LI, DominatorTree *DT,
+                                        TargetLibraryInfo *TLI,
+                                        ScalarEvolution *SE, MemorySSA *MSSA,
+                                        OptimizationRemarkEmitter *ORE,
+                                        bool DeleteAST) {
   bool Changed = false;
 
   assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
@@ -270,7 +266,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
   // instructions, we perform another pass to hoist them out of the loop.
   //
   if (L->hasDedicatedExits())
-    Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
+    Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
                           CurAST, &SafetyInfo, ORE);
   if (Preheader)
     Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
@@ -363,8 +359,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
 /// definitions, allowing us to sink a loop body in one pass without iteration.
 ///
 bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
-                      DominatorTree *DT, TargetLibraryInfo *TLI,
-                      TargetTransformInfo *TTI, Loop *CurLoop,
+                      DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
                       AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo,
                       OptimizationRemarkEmitter *ORE) {
 
@@ -405,15 +400,12 @@ bool llvm::sinkRegion(DomTreeNode *N, Al
       // outside of the loop.  In this case, it doesn't even matter if the
       // operands of the instruction are loop invariant.
       //
-      bool FreeInLoop = false;
-      if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
+      if (isNotUsedInLoop(I, CurLoop, SafetyInfo) &&
           canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)) {
-        if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE, FreeInLoop)) {
-          if (!FreeInLoop) {
-            ++II;
-            CurAST->deleteValue(&I);
-            I.eraseFromParent();
-          }
+        if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE)) {
+          ++II;
+          CurAST->deleteValue(&I);
+          I.eraseFromParent();
           Changed = true;
         }
       }
@@ -716,39 +708,13 @@ static bool isTriviallyReplacablePHI(con
   return true;
 }
 
-/// Return true if the instruction is free in the loop.
-static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
-                         const TargetTransformInfo *TTI) {
-  if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
-    if (TTI->getUserCost(&I) != TargetTransformInfo::TCC_Free)
-      return false;
-    // For a GEP, we cannot simply use getUserCost because currently it
-    // optimistically assume that a GEP will fold into addressing mode
-    // regardless of its users.
-    const BasicBlock *BB = I.getParent();
-    for (const User *U : I.users()) {
-      const Instruction *UI = cast<Instruction>(U);
-      if (CurLoop->contains(UI) &&
-          (BB != UI->getParent() ||
-           (!isa<StoreInst>(UI) && !isa<LoadInst>(UI))))
-        return false;
-    }
-    return true;
-  } else
-    return TTI->getUserCost(&I) == TargetTransformInfo::TCC_Free;
-}
-
 /// Return true if the only users of this instruction are outside of
 /// the loop. If this is true, we can sink the instruction to the exit
 /// blocks of the loop.
 ///
-/// We also return true if the instruction could be folded away in lowering.
-/// (e.g.,  a GEP can be folded into a load as an addressing mode in the loop).
-static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
-                                  const LoopSafetyInfo *SafetyInfo,
-                                  TargetTransformInfo *TTI, bool &FreeInLoop) {
+static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
+                            const LoopSafetyInfo *SafetyInfo) {
   const auto &BlockColors = SafetyInfo->BlockColors;
-  bool IsFree = isFreeInLoop(I, CurLoop, TTI);
   for (const User *U : I.users()) {
     const Instruction *UI = cast<Instruction>(U);
     if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
@@ -765,13 +731,8 @@ static bool isNotUsedOrFreeInLoop(const
           return false;
     }
 
-    if (CurLoop->contains(UI)) {
-      if (IsFree) {
-        FreeInLoop = true;
-        continue;
-      }
+    if (CurLoop->contains(UI))
       return false;
-    }
   }
   return true;
 }
@@ -927,7 +888,7 @@ static void splitPredecessorsOfLoopExit(
 ///
 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
                  const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
-                 OptimizationRemarkEmitter *ORE, bool FreeInLoop) {
+                 OptimizationRemarkEmitter *ORE) {
   DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
   ORE->emit([&]() {
     return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
@@ -939,6 +900,7 @@ static bool sink(Instruction &I, LoopInf
   else if (isa<CallInst>(I))
     ++NumMovedCalls;
   ++NumSunk;
+  Changed = true;
 
   // Iterate over users to be ready for actual sinking. Replace users via
   // unrechable blocks with undef and make all user PHIs trivially replcable.
@@ -948,12 +910,11 @@ static bool sink(Instruction &I, LoopInf
     Use &U = UI.getUse();
     ++UI;
 
-    if (VisitedUsers.count(User) || CurLoop->contains(User))
+    if (VisitedUsers.count(User))
       continue;
 
     if (!DT->isReachableFromEntry(User->getParent())) {
       U = UndefValue::get(I.getType());
-      Changed = true;
       continue;
     }
 
@@ -966,7 +927,6 @@ static bool sink(Instruction &I, LoopInf
     BasicBlock *BB = PN->getIncomingBlock(U);
     if (!DT->isReachableFromEntry(BB)) {
       U = UndefValue::get(I.getType());
-      Changed = true;
       continue;
     }
 
@@ -975,7 +935,7 @@ static bool sink(Instruction &I, LoopInf
       continue;
 
     if (!canSplitPredecessors(PN))
-      return Changed;
+      return false;
 
     // Split predecessors of the PHI so that we can make users trivially
     // replacable.
@@ -987,9 +947,6 @@ static bool sink(Instruction &I, LoopInf
     UE = I.user_end();
   }
 
-  if (VisitedUsers.empty())
-    return Changed;
-
 #ifndef NDEBUG
   SmallVector<BasicBlock *, 32> ExitBlocks;
   CurLoop->getUniqueExitBlocks(ExitBlocks);
@@ -1003,14 +960,9 @@ static bool sink(Instruction &I, LoopInf
   // If this instruction is only used outside of the loop, then all users are
   // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
   // the instruction.
-  SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end());
-  for (auto *UI : Users) {
-    auto *User = cast<Instruction>(UI);
-
-    if (CurLoop->contains(User))
-      continue;
-
-    PHINode *PN = cast<PHINode>(User);
+  while (!I.use_empty()) {
+    Value::user_iterator UI = I.user_begin();
+    PHINode *PN = cast<PHINode>(*UI);
     assert(ExitBlockSet.count(PN->getParent()) &&
            "The LCSSA PHI is not in an exit block!");
     // The PHI must be trivially replacable.
@@ -1018,7 +970,6 @@ static bool sink(Instruction &I, LoopInf
                                                          SafetyInfo, CurLoop);
     PN->replaceAllUsesWith(New);
     PN->eraseFromParent();
-    Changed = true;
   }
   return Changed;
 }

Removed: llvm/trunk/test/Transforms/LICM/sink-foldable.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LICM/sink-foldable.ll?rev=320827&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LICM/sink-foldable.ll (original)
+++ llvm/trunk/test/Transforms/LICM/sink-foldable.ll (removed)
@@ -1,147 +0,0 @@
-; RUN: opt < %s  -licm -S   | FileCheck %s
-target triple = "aarch64--linux-gnueabi"
-
-; CHECK-LABEL:@test1
-; CHECK-LABEL:loopexit1:
-; CHECK: %[[PHI:.+]] = phi i8** [ %arrayidx0, %if.end ]
-; CHECK: getelementptr inbounds i8*, i8** %[[PHI]], i64 1
-define i8** @test1(i32 %j, i8** readonly %P, i8* readnone %Q) {
-entry:
-  %cmp0 = icmp slt i32 0, %j
-  br i1 %cmp0, label %for.body.lr.ph, label %return
-
-for.body.lr.ph:
-  br label %for.body
-
-for.body:
-  %P.addr = phi i8** [ %P, %for.body.lr.ph ], [ %arrayidx0, %if.end  ]
-  %i0 = phi i32 [ 0, %for.body.lr.ph ], [ %i.add, %if.end]
-
-  %i0.ext = sext i32 %i0 to i64
-  %arrayidx0 = getelementptr inbounds i8*, i8** %P.addr, i64 %i0.ext
-  %l0 = load i8*, i8** %arrayidx0, align 8
-  %cmp1 = icmp ugt i8* %l0, %Q
-  br i1 %cmp1, label %loopexit0, label %if.end
-
-if.end:                                           ; preds = %for.body
-  %arrayidx1 = getelementptr inbounds i8*, i8** %arrayidx0, i64 1
-  %l1 = load i8*, i8** %arrayidx1, align 8
-  %cmp4 = icmp ugt i8* %l1, %Q
-  %i.add = add nsw i32 %i0, 2
-  br i1 %cmp4, label %loopexit1, label %for.body
-
-loopexit0:
-  %p1 = phi i8** [%arrayidx0, %for.body]
-  br label %return
-
-loopexit1:
-  %p2 = phi i8** [%arrayidx1, %if.end]
-  br label  %return
-
-return:
-  %retval.0 = phi i8** [ %p1, %loopexit0 ], [%p2, %loopexit1], [ null, %entry ]
-  ret i8** %retval.0
-}
-
-; CHECK-LABEL: @test2
-; CHECK-LABEL: loopexit2:
-; CHECK: %[[PHI:.*]] = phi i8** [ %add.ptr, %if.end ]
-; CHECK: getelementptr inbounds i8*, i8** %[[PHI]]
-define i8** @test2(i32 %j, i8** readonly %P, i8* readnone %Q) {
-
-entry:
-  br label %for.body
-
-for.cond:
-  %i.addr.0 = phi i32 [ %add, %if.end ]
-  %P.addr.0 = phi i8** [ %add.ptr, %if.end ]
-  %cmp = icmp slt i32 %i.addr.0, %j
-  br i1 %cmp, label %for.body, label %loopexit0
-
-for.body:
-  %P.addr = phi i8** [ %P, %entry ], [ %P.addr.0, %for.cond ]
-  %i.addr = phi i32 [ 0, %entry ], [ %i.addr.0, %for.cond ]
-
-  %idx.ext = sext i32 %i.addr to i64
-  %add.ptr = getelementptr inbounds i8*, i8** %P.addr, i64 %idx.ext
-  %l0 = load i8*, i8** %add.ptr, align 8
-
-  %cmp1 = icmp ugt i8* %l0, %Q
-  br i1 %cmp1, label %loopexit1, label %if.end
-
-if.end:
-  %add.i = add i32 %i.addr, 1
-  %idx2.ext = sext i32 %add.i to i64
-  %arrayidx2 = getelementptr inbounds i8*, i8** %add.ptr, i64 %idx2.ext
-  %l1 = load i8*, i8** %arrayidx2, align 8
-  %cmp2 = icmp ugt i8* %l1, %Q
-  %add = add nsw i32 %add.i, 1
-  br i1 %cmp2, label %loopexit2, label %for.cond
-
-loopexit0:
-  %p0 = phi i8** [ null, %for.cond ]
-  br label %return
-
-loopexit1:
-  %p1 = phi i8** [ %add.ptr, %for.body ]
-  br label %return
-
-loopexit2:
-  %p2 = phi i8** [ %arrayidx2, %if.end ]
-  br label %return
-
-return:
-  %retval.0 = phi i8** [ %p1, %loopexit1 ], [ %p2, %loopexit2 ], [ %p0, %loopexit0 ]
-  ret i8** %retval.0
-}
-
-
-; CHECK-LABEL: @test3
-; CHECK-LABEL: loopexit1:
-; CHECK: %[[ADD:.*]]  = phi i64 [ %add, %if.end ]
-; CHECK: %[[ADDR:.*]] = phi i8** [ %P.addr, %if.end ]
-; CHECK: %[[TRUNC:.*]] = trunc i64 %[[ADD]] to i32
-; CHECK: getelementptr inbounds i8*, i8** %[[ADDR]], i32 %[[TRUNC]]
-; CHECK: call void @dummy(i32 %[[TRUNC]])
-define i8** @test3(i64 %j, i8** readonly %P, i8* readnone %Q) {
-entry:
-  %cmp0 = icmp slt i64 0, %j
-  br i1 %cmp0, label %for.body.lr.ph, label %return
-
-for.body.lr.ph:
-  br label %for.body
-
-for.body:
-  %P.addr = phi i8** [ %P, %for.body.lr.ph ], [ %arrayidx0, %if.end  ]
-  %i0 = phi i32 [ 0, %for.body.lr.ph ], [ %i.add, %if.end]
-
-  %i0.ext = sext i32 %i0 to i64
-  %arrayidx0 = getelementptr inbounds i8*, i8** %P.addr, i64 %i0.ext
-  %l0 = load i8*, i8** %arrayidx0, align 8
-  %cmp1 = icmp ugt i8* %l0, %Q
-  br i1 %cmp1, label %loopexit0, label %if.end
-
-if.end:                                           ; preds = %for.body
-  %add = add i64 %i0.ext, 1
-  %trunc = trunc i64 %add to i32
-  %arrayidx1 = getelementptr inbounds i8*, i8** %P.addr, i32 %trunc
-  %l1 = load i8*, i8** %arrayidx1, align 8
-  %cmp4 = icmp ugt i8* %l1, %Q
-  %i.add = add nsw i32 %i0, 2
-  br i1 %cmp4, label %loopexit1, label %for.body
-
-loopexit0:
-  %p1 = phi i8** [%arrayidx0, %for.body]
-  br label %return
-
-loopexit1:
-  %p2 = phi i8** [%arrayidx1, %if.end]
-  call void @dummy(i32 %trunc)
-  br label  %return
-
-return:
-  %retval.0 = phi i8** [ %p1, %loopexit0 ], [%p2, %loopexit1], [ null, %entry ]
-  ret i8** %retval.0
-}
-
-declare void @dummy(i32)