[llvm] r320836 - Revert "Re-commit : [LICM] Allow sinking when foldable in loop"

Roman Lebedev via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 15 10:19:44 PST 2017


On Fri, Dec 15, 2017 at 9:12 PM, Jun Bum Lim via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
> Author: junbuml
> Date: Fri Dec 15 10:12:49 2017
> New Revision: 320836
>
> URL: http://llvm.org/viewvc/llvm-project?rev=320836&view=rev
> Log:
> Revert "Re-commit : [LICM] Allow sinking when foldable in loop"
>
> This reverts commit r320833.
It would be really great for the commit messages to actually be a
commit messages,
and not just a commit subjects. I.e. they ideally need to actually explain what
the commit is about, why the revert is happening, why re-commit is
happening, etc...

Roman.

> Removed:
>     llvm/trunk/test/Transforms/LICM/sink-foldable.ll
> Modified:
>     llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h
>     llvm/trunk/lib/Transforms/Scalar/LICM.cpp
>
> Modified: llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h?rev=320836&r1=320835&r2=320836&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h (original)
> +++ llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h Fri Dec 15 10:12:49 2017
> @@ -436,9 +436,8 @@ bool formLCSSARecursively(Loop &L, Domin
>  /// instructions of the loop and loop safety information as
>  /// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
>  bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
> -                TargetLibraryInfo *, TargetTransformInfo *, Loop *,
> -                AliasSetTracker *, LoopSafetyInfo *,
> -                OptimizationRemarkEmitter *ORE);
> +                TargetLibraryInfo *, Loop *, AliasSetTracker *,
> +                LoopSafetyInfo *, OptimizationRemarkEmitter *ORE);
>
>  /// \brief Walk the specified region of the CFG (defined by all blocks
>  /// dominated by the specified block, and that are in the current loop) in depth
>
> Modified: llvm/trunk/lib/Transforms/Scalar/LICM.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LICM.cpp?rev=320836&r1=320835&r2=320836&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Scalar/LICM.cpp (original)
> +++ llvm/trunk/lib/Transforms/Scalar/LICM.cpp Fri Dec 15 10:12:49 2017
> @@ -90,15 +90,14 @@ static cl::opt<uint32_t> MaxNumUsesTrave
>               "invariance in loop using invariant start (default = 8)"));
>
>  static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
> -static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
> -                                  const LoopSafetyInfo *SafetyInfo,
> -                                  TargetTransformInfo *TTI, bool &FreeInLoop);
> +static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
> +                            const LoopSafetyInfo *SafetyInfo);
>  static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
>                    const LoopSafetyInfo *SafetyInfo,
>                    OptimizationRemarkEmitter *ORE);
>  static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
>                   const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
> -                 OptimizationRemarkEmitter *ORE, bool FreeInLoop);
> +                 OptimizationRemarkEmitter *ORE);
>  static bool isSafeToExecuteUnconditionally(Instruction &Inst,
>                                             const DominatorTree *DT,
>                                             const Loop *CurLoop,
> @@ -116,8 +115,7 @@ CloneInstructionInExitBlock(Instruction
>  namespace {
>  struct LoopInvariantCodeMotion {
>    bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
> -                 TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
> -                 ScalarEvolution *SE, MemorySSA *MSSA,
> +                 TargetLibraryInfo *TLI, ScalarEvolution *SE, MemorySSA *MSSA,
>                   OptimizationRemarkEmitter *ORE, bool DeleteAST);
>
>    DenseMap<Loop *, AliasSetTracker *> &getLoopToAliasSetMap() {
> @@ -161,8 +159,6 @@ struct LegacyLICMPass : public LoopPass
>                            &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
>                            &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
>                            &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
> -                          &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
> -                              *L->getHeader()->getParent()),
>                            SE ? &SE->getSE() : nullptr, MSSA, &ORE, false);
>    }
>
> @@ -174,7 +170,6 @@ struct LegacyLICMPass : public LoopPass
>      AU.addRequired<TargetLibraryInfoWrapperPass>();
>      if (EnableMSSALoopDependency)
>        AU.addRequired<MemorySSAWrapperPass>();
> -    AU.addRequired<TargetTransformInfoWrapperPass>();
>      getLoopAnalysisUsage(AU);
>    }
>
> @@ -215,8 +210,8 @@ PreservedAnalyses LICMPass::run(Loop &L,
>                         "cached at a higher level");
>
>    LoopInvariantCodeMotion LICM;
> -  if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE,
> -                      AR.MSSA, ORE, true))
> +  if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.SE, AR.MSSA, ORE,
> +                      true))
>      return PreservedAnalyses::all();
>
>    auto PA = getLoopPassPreservedAnalyses();
> @@ -229,7 +224,6 @@ INITIALIZE_PASS_BEGIN(LegacyLICMPass, "l
>                        false, false)
>  INITIALIZE_PASS_DEPENDENCY(LoopPass)
>  INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
> -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
>  INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
>  INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
>                      false)
> @@ -242,10 +236,12 @@ Pass *llvm::createLICMPass() { return ne
>  /// We should delete AST for inner loops in the new pass manager to avoid
>  /// memory leak.
>  ///
> -bool LoopInvariantCodeMotion::runOnLoop(
> -    Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT,
> -    TargetLibraryInfo *TLI, TargetTransformInfo *TTI, ScalarEvolution *SE,
> -    MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool DeleteAST) {
> +bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA,
> +                                        LoopInfo *LI, DominatorTree *DT,
> +                                        TargetLibraryInfo *TLI,
> +                                        ScalarEvolution *SE, MemorySSA *MSSA,
> +                                        OptimizationRemarkEmitter *ORE,
> +                                        bool DeleteAST) {
>    bool Changed = false;
>
>    assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
> @@ -270,7 +266,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
>    // instructions, we perform another pass to hoist them out of the loop.
>    //
>    if (L->hasDedicatedExits())
> -    Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
> +    Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
>                            CurAST, &SafetyInfo, ORE);
>    if (Preheader)
>      Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
> @@ -363,8 +359,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
>  /// definitions, allowing us to sink a loop body in one pass without iteration.
>  ///
>  bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
> -                      DominatorTree *DT, TargetLibraryInfo *TLI,
> -                      TargetTransformInfo *TTI, Loop *CurLoop,
> +                      DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop,
>                        AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo,
>                        OptimizationRemarkEmitter *ORE) {
>
> @@ -405,15 +400,12 @@ bool llvm::sinkRegion(DomTreeNode *N, Al
>        // outside of the loop.  In this case, it doesn't even matter if the
>        // operands of the instruction are loop invariant.
>        //
> -      bool FreeInLoop = false;
> -      if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
> +      if (isNotUsedInLoop(I, CurLoop, SafetyInfo) &&
>            canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)) {
> -        if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE, FreeInLoop)) {
> -          if (!FreeInLoop) {
> -            ++II;
> -            CurAST->deleteValue(&I);
> -            I.eraseFromParent();
> -          }
> +        if (sink(I, LI, DT, CurLoop, SafetyInfo, ORE)) {
> +          ++II;
> +          CurAST->deleteValue(&I);
> +          I.eraseFromParent();
>            Changed = true;
>          }
>        }
> @@ -716,39 +708,13 @@ static bool isTriviallyReplacablePHI(con
>    return true;
>  }
>
> -/// Return true if the instruction is free in the loop.
> -static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
> -                         const TargetTransformInfo *TTI) {
> -  if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
> -    if (TTI->getUserCost(&I) != TargetTransformInfo::TCC_Free)
> -      return false;
> -    // For a GEP, we cannot simply use getUserCost because currently it
> -    // optimistically assume that a GEP will fold into addressing mode
> -    // regardless of its users.
> -    const BasicBlock *BB = I.getParent();
> -    for (const User *U : I.users()) {
> -      const Instruction *UI = cast<Instruction>(U);
> -      if (CurLoop->contains(UI) &&
> -          (BB != UI->getParent() ||
> -           (!isa<StoreInst>(UI) && !isa<LoadInst>(UI))))
> -        return false;
> -    }
> -    return true;
> -  } else
> -    return TTI->getUserCost(&I) == TargetTransformInfo::TCC_Free;
> -}
> -
>  /// Return true if the only users of this instruction are outside of
>  /// the loop. If this is true, we can sink the instruction to the exit
>  /// blocks of the loop.
>  ///
> -/// We also return true if the instruction could be folded away in lowering.
> -/// (e.g.,  a GEP can be folded into a load as an addressing mode in the loop).
> -static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
> -                                  const LoopSafetyInfo *SafetyInfo,
> -                                  TargetTransformInfo *TTI, bool &FreeInLoop) {
> +static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
> +                            const LoopSafetyInfo *SafetyInfo) {
>    const auto &BlockColors = SafetyInfo->BlockColors;
> -  bool IsFree = isFreeInLoop(I, CurLoop, TTI);
>    for (const User *U : I.users()) {
>      const Instruction *UI = cast<Instruction>(U);
>      if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
> @@ -765,13 +731,8 @@ static bool isNotUsedOrFreeInLoop(const
>            return false;
>      }
>
> -    if (CurLoop->contains(UI)) {
> -      if (IsFree) {
> -        FreeInLoop = true;
> -        continue;
> -      }
> +    if (CurLoop->contains(UI))
>        return false;
> -    }
>    }
>    return true;
>  }
> @@ -927,7 +888,7 @@ static void splitPredecessorsOfLoopExit(
>  ///
>  static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
>                   const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
> -                 OptimizationRemarkEmitter *ORE, bool FreeInLoop) {
> +                 OptimizationRemarkEmitter *ORE) {
>    DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
>    ORE->emit([&]() {
>      return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
> @@ -939,6 +900,7 @@ static bool sink(Instruction &I, LoopInf
>    else if (isa<CallInst>(I))
>      ++NumMovedCalls;
>    ++NumSunk;
> +  Changed = true;
>
>    // Iterate over users to be ready for actual sinking. Replace users via
>    // unrechable blocks with undef and make all user PHIs trivially replcable.
> @@ -948,12 +910,11 @@ static bool sink(Instruction &I, LoopInf
>      Use &U = UI.getUse();
>      ++UI;
>
> -    if (VisitedUsers.count(User) || CurLoop->contains(User))
> +    if (VisitedUsers.count(User))
>        continue;
>
>      if (!DT->isReachableFromEntry(User->getParent())) {
>        U = UndefValue::get(I.getType());
> -      Changed = true;
>        continue;
>      }
>
> @@ -966,7 +927,6 @@ static bool sink(Instruction &I, LoopInf
>      BasicBlock *BB = PN->getIncomingBlock(U);
>      if (!DT->isReachableFromEntry(BB)) {
>        U = UndefValue::get(I.getType());
> -      Changed = true;
>        continue;
>      }
>
> @@ -975,7 +935,7 @@ static bool sink(Instruction &I, LoopInf
>        continue;
>
>      if (!canSplitPredecessors(PN))
> -      return Changed;
> +      return false;
>
>      // Split predecessors of the PHI so that we can make users trivially
>      // replacable.
> @@ -987,9 +947,6 @@ static bool sink(Instruction &I, LoopInf
>      UE = I.user_end();
>    }
>
> -  if (VisitedUsers.empty())
> -    return Changed;
> -
>  #ifndef NDEBUG
>    SmallVector<BasicBlock *, 32> ExitBlocks;
>    CurLoop->getUniqueExitBlocks(ExitBlocks);
> @@ -1003,14 +960,9 @@ static bool sink(Instruction &I, LoopInf
>    // If this instruction is only used outside of the loop, then all users are
>    // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
>    // the instruction.
> -  SmallSetVector<User*, 8> Users(I.user_begin(), I.user_end());
> -  for (auto *UI : Users) {
> -    auto *User = cast<Instruction>(UI);
> -
> -    if (CurLoop->contains(User))
> -      continue;
> -
> -    PHINode *PN = cast<PHINode>(User);
> +  while (!I.use_empty()) {
> +    Value::user_iterator UI = I.user_begin();
> +    PHINode *PN = cast<PHINode>(*UI);
>      assert(ExitBlockSet.count(PN->getParent()) &&
>             "The LCSSA PHI is not in an exit block!");
>      // The PHI must be trivially replacable.
> @@ -1018,7 +970,6 @@ static bool sink(Instruction &I, LoopInf
>                                                           SafetyInfo, CurLoop);
>      PN->replaceAllUsesWith(New);
>      PN->eraseFromParent();
> -    Changed = true;
>    }
>    return Changed;
>  }
>
> Removed: llvm/trunk/test/Transforms/LICM/sink-foldable.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LICM/sink-foldable.ll?rev=320835&view=auto
> ==============================================================================
> --- llvm/trunk/test/Transforms/LICM/sink-foldable.ll (original)
> +++ llvm/trunk/test/Transforms/LICM/sink-foldable.ll (removed)
> @@ -1,150 +0,0 @@
> -; REQUIRES: aarch64-registered-target
> -
> -; RUN: opt < %s  -licm -S   | FileCheck %s
> -
> -target triple = "aarch64--linux-gnueabi"
> -
> -; CHECK-LABEL:@test1
> -; CHECK-LABEL:loopexit1:
> -; CHECK: %[[PHI:.+]] = phi i8** [ %arrayidx0, %if.end ]
> -; CHECK: getelementptr inbounds i8*, i8** %[[PHI]], i64 1
> -define i8** @test1(i32 %j, i8** readonly %P, i8* readnone %Q) {
> -entry:
> -  %cmp0 = icmp slt i32 0, %j
> -  br i1 %cmp0, label %for.body.lr.ph, label %return
> -
> -for.body.lr.ph:
> -  br label %for.body
> -
> -for.body:
> -  %P.addr = phi i8** [ %P, %for.body.lr.ph ], [ %arrayidx0, %if.end  ]
> -  %i0 = phi i32 [ 0, %for.body.lr.ph ], [ %i.add, %if.end]
> -
> -  %i0.ext = sext i32 %i0 to i64
> -  %arrayidx0 = getelementptr inbounds i8*, i8** %P.addr, i64 %i0.ext
> -  %l0 = load i8*, i8** %arrayidx0, align 8
> -  %cmp1 = icmp ugt i8* %l0, %Q
> -  br i1 %cmp1, label %loopexit0, label %if.end
> -
> -if.end:                                           ; preds = %for.body
> -  %arrayidx1 = getelementptr inbounds i8*, i8** %arrayidx0, i64 1
> -  %l1 = load i8*, i8** %arrayidx1, align 8
> -  %cmp4 = icmp ugt i8* %l1, %Q
> -  %i.add = add nsw i32 %i0, 2
> -  br i1 %cmp4, label %loopexit1, label %for.body
> -
> -loopexit0:
> -  %p1 = phi i8** [%arrayidx0, %for.body]
> -  br label %return
> -
> -loopexit1:
> -  %p2 = phi i8** [%arrayidx1, %if.end]
> -  br label  %return
> -
> -return:
> -  %retval.0 = phi i8** [ %p1, %loopexit0 ], [%p2, %loopexit1], [ null, %entry ]
> -  ret i8** %retval.0
> -}
> -
> -; CHECK-LABEL: @test2
> -; CHECK-LABEL: loopexit2:
> -; CHECK: %[[PHI:.*]] = phi i8** [ %add.ptr, %if.end ]
> -; CHECK: getelementptr inbounds i8*, i8** %[[PHI]]
> -define i8** @test2(i32 %j, i8** readonly %P, i8* readnone %Q) {
> -
> -entry:
> -  br label %for.body
> -
> -for.cond:
> -  %i.addr.0 = phi i32 [ %add, %if.end ]
> -  %P.addr.0 = phi i8** [ %add.ptr, %if.end ]
> -  %cmp = icmp slt i32 %i.addr.0, %j
> -  br i1 %cmp, label %for.body, label %loopexit0
> -
> -for.body:
> -  %P.addr = phi i8** [ %P, %entry ], [ %P.addr.0, %for.cond ]
> -  %i.addr = phi i32 [ 0, %entry ], [ %i.addr.0, %for.cond ]
> -
> -  %idx.ext = sext i32 %i.addr to i64
> -  %add.ptr = getelementptr inbounds i8*, i8** %P.addr, i64 %idx.ext
> -  %l0 = load i8*, i8** %add.ptr, align 8
> -
> -  %cmp1 = icmp ugt i8* %l0, %Q
> -  br i1 %cmp1, label %loopexit1, label %if.end
> -
> -if.end:
> -  %add.i = add i32 %i.addr, 1
> -  %idx2.ext = sext i32 %add.i to i64
> -  %arrayidx2 = getelementptr inbounds i8*, i8** %add.ptr, i64 %idx2.ext
> -  %l1 = load i8*, i8** %arrayidx2, align 8
> -  %cmp2 = icmp ugt i8* %l1, %Q
> -  %add = add nsw i32 %add.i, 1
> -  br i1 %cmp2, label %loopexit2, label %for.cond
> -
> -loopexit0:
> -  %p0 = phi i8** [ null, %for.cond ]
> -  br label %return
> -
> -loopexit1:
> -  %p1 = phi i8** [ %add.ptr, %for.body ]
> -  br label %return
> -
> -loopexit2:
> -  %p2 = phi i8** [ %arrayidx2, %if.end ]
> -  br label %return
> -
> -return:
> -  %retval.0 = phi i8** [ %p1, %loopexit1 ], [ %p2, %loopexit2 ], [ %p0, %loopexit0 ]
> -  ret i8** %retval.0
> -}
> -
> -
> -; CHECK-LABEL: @test3
> -; CHECK-LABEL: loopexit1:
> -; CHECK: %[[ADD:.*]]  = phi i64 [ %add, %if.end ]
> -; CHECK: %[[ADDR:.*]] = phi i8** [ %P.addr, %if.end ]
> -; CHECK: %[[TRUNC:.*]] = trunc i64 %[[ADD]] to i32
> -; CHECK: getelementptr inbounds i8*, i8** %[[ADDR]], i32 %[[TRUNC]]
> -; CHECK: call void @dummy(i32 %[[TRUNC]])
> -define i8** @test3(i64 %j, i8** readonly %P, i8* readnone %Q) {
> -entry:
> -  %cmp0 = icmp slt i64 0, %j
> -  br i1 %cmp0, label %for.body.lr.ph, label %return
> -
> -for.body.lr.ph:
> -  br label %for.body
> -
> -for.body:
> -  %P.addr = phi i8** [ %P, %for.body.lr.ph ], [ %arrayidx0, %if.end  ]
> -  %i0 = phi i32 [ 0, %for.body.lr.ph ], [ %i.add, %if.end]
> -
> -  %i0.ext = sext i32 %i0 to i64
> -  %arrayidx0 = getelementptr inbounds i8*, i8** %P.addr, i64 %i0.ext
> -  %l0 = load i8*, i8** %arrayidx0, align 8
> -  %cmp1 = icmp ugt i8* %l0, %Q
> -  br i1 %cmp1, label %loopexit0, label %if.end
> -
> -if.end:                                           ; preds = %for.body
> -  %add = add i64 %i0.ext, 1
> -  %trunc = trunc i64 %add to i32
> -  %arrayidx1 = getelementptr inbounds i8*, i8** %P.addr, i32 %trunc
> -  %l1 = load i8*, i8** %arrayidx1, align 8
> -  %cmp4 = icmp ugt i8* %l1, %Q
> -  %i.add = add nsw i32 %i0, 2
> -  br i1 %cmp4, label %loopexit1, label %for.body
> -
> -loopexit0:
> -  %p1 = phi i8** [%arrayidx0, %for.body]
> -  br label %return
> -
> -loopexit1:
> -  %p2 = phi i8** [%arrayidx1, %if.end]
> -  call void @dummy(i32 %trunc)
> -  br label  %return
> -
> -return:
> -  %retval.0 = phi i8** [ %p1, %loopexit0 ], [%p2, %loopexit1], [ null, %entry ]
> -  ret i8** %retval.0
> -}
> -
> -declare void @dummy(i32)
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits


More information about the llvm-commits mailing list