[llvm] 93175a5 - [IndVarSimplify][LoopUtils] rewriteLoopExitValues. NFCI

Mon Jan 20 01:05:20 PST 2020

Author: Sjoerd Meijer
Date: 2020-01-20T09:05:00Z
New Revision: 93175a5caa08360ca60b417cc04c094e1ed05c76

URL: https://github.com/llvm/llvm-project/commit/93175a5caa08360ca60b417cc04c094e1ed05c76
DIFF: https://github.com/llvm/llvm-project/commit/93175a5caa08360ca60b417cc04c094e1ed05c76.diff

LOG: [IndVarSimplify][LoopUtils] rewriteLoopExitValues. NFCI

This moves `rewriteLoopExitValues()` from IndVarSimplify to LoopUtils thus
making it a generic loop utility function.  This allows to rewrite loop exit
values by just calling this function without running the whole IndVarSimplify
pass.

We use this in D72714 to rematerialise the iteration count in exit blocks, so
that we can clean-up loop update expressions inside the hardware-loops later.

Differential Revision: https://reviews.llvm.org/D72602

Added: 
    

Modified: 
    llvm/include/llvm/Transforms/Utils/LoopUtils.h
    llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
    llvm/lib/Transforms/Utils/LoopUtils.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 9ed96809ed99..576a7e8d43e8 100644

--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -47,6 +47,7 @@ class PredicatedScalarEvolution;
 class PredIteratorCache;
 class ScalarEvolution;
 class SCEV;
+class SCEVExpander;
 class TargetLibraryInfo;
 class TargetTransformInfo;
 
@@ -357,6 +358,18 @@ bool cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
 bool cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
                        bool Signed);
 
+enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
+
+/// If the final value of any expressions that are recurrent in the loop can
+/// be computed, substitute the exit values from the loop into any instructions
+/// outside of the loop that use the final values of the current expressions.
+/// Return the number of loop exit values that have been replaced, and the
+/// corresponding phi node will be added to DeadInsts.
+int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
+                          ScalarEvolution *SE, SCEVExpander &Rewriter,
+                          DominatorTree *DT, ReplaceExitVal ReplaceExitValue,
+                          SmallVector<WeakTrackingVH, 16> &DeadInsts);
+
 } // end namespace llvm
 
 #endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H

diff  --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 117c0b47e8f7..c637534c5a61 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -103,8 +103,6 @@ static cl::opt<bool> VerifyIndvars(
   "verify-indvars", cl::Hidden,
   cl::desc("Verify the ScalarEvolution result after running indvars"));
 
-enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
-
 static cl::opt<ReplaceExitVal> ReplaceExitValue(
     "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
     cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
@@ -143,8 +141,6 @@ class IndVarSimplify {
 
   SmallVector<WeakTrackingVH, 16> DeadInsts;
 
-  bool isValidRewrite(Value *FromVal, Value *ToVal);
-
   bool handleFloatingPointIV(Loop *L, PHINode *PH);
   bool rewriteNonIntegerIVs(Loop *L);
 
@@ -155,10 +151,7 @@ class IndVarSimplify {
   /// iterations of the loop run when that is unobservable.
   bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
 
-  bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
-  bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
   bool rewriteFirstIterationLoopExitValues(Loop *L);
-  bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
 
   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
                                  const SCEV *ExitCount,
@@ -177,58 +170,6 @@ class IndVarSimplify {
 
 } // end anonymous namespace
 
-/// Return true if the SCEV expansion generated by the rewriter can replace the
-/// original value. SCEV guarantees that it produces the same value, but the way
-/// it is produced may be illegal IR.  Ideally, this function will only be
-/// called for verification.
-bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
-  // If an SCEV expression subsumed multiple pointers, its expansion could
-  // reassociate the GEP changing the base pointer. This is illegal because the
-  // final address produced by a GEP chain must be inbounds relative to its
-  // underlying object. Otherwise basic alias analysis, among other things,
-  // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
-  // producing an expression involving multiple pointers. Until then, we must
-  // bail out here.
-  //
-  // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
-  // because it understands lcssa phis while SCEV does not.
-  Value *FromPtr = FromVal;
-  Value *ToPtr = ToVal;
-  if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
-    FromPtr = GEP->getPointerOperand();
-  }
-  if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
-    ToPtr = GEP->getPointerOperand();
-  }
-  if (FromPtr != FromVal || ToPtr != ToVal) {
-    // Quickly check the common case
-    if (FromPtr == ToPtr)
-      return true;
-
-    // SCEV may have rewritten an expression that produces the GEP's pointer
-    // operand. That's ok as long as the pointer operand has the same base
-    // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
-    // base of a recurrence. This handles the case in which SCEV expansion
-    // converts a pointer type recurrence into a nonrecurrent pointer base
-    // indexed by an integer recurrence.
-
-    // If the GEP base pointer is a vector of pointers, abort.
-    if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
-      return false;
-
-    const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
-    const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
-    if (FromBase == ToBase)
-      return true;
-
-    LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase
-                      << " != " << *ToBase << "\n");
-
-    return false;
-  }
-  return true;
-}
-
 /// Determine the insertion point for this user. By default, insert immediately
 /// before the user. SCEVExpander or LICM will hoist loop invariants out of the
 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
@@ -522,222 +463,6 @@ bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
   return Changed;
 }
 
-namespace {
-
-// Collect information about PHI nodes which can be transformed in
-// rewriteLoopExitValues.
-struct RewritePhi {
-  PHINode *PN;
-
-  // Ith incoming value.
-  unsigned Ith;
-
-  // Exit value after expansion.
-  Value *Val;
-
-  // High Cost when expansion.
-  bool HighCost;
-
-  RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
-      : PN(P), Ith(I), Val(V), HighCost(H) {}
-};
-
-} // end anonymous namespace
-
-//===----------------------------------------------------------------------===//
-// rewriteLoopExitValues - Optimize IV users outside the loop.
-// As a side effect, reduces the amount of IV processing within the loop.
-//===----------------------------------------------------------------------===//
-
-bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const {
-  SmallPtrSet<const Instruction *, 8> Visited;
-  SmallVector<const Instruction *, 8> WorkList;
-  Visited.insert(I);
-  WorkList.push_back(I);
-  while (!WorkList.empty()) {
-    const Instruction *Curr = WorkList.pop_back_val();
-    // This use is outside the loop, nothing to do.
-    if (!L->contains(Curr))
-      continue;
-    // Do we assume it is a "hard" use which will not be eliminated easily?
-    if (Curr->mayHaveSideEffects())
-      return true;
-    // Otherwise, add all its users to worklist.
-    for (auto U : Curr->users()) {
-      auto *UI = cast<Instruction>(U);
-      if (Visited.insert(UI).second)
-        WorkList.push_back(UI);
-    }
-  }
-  return false;
-}
-
-/// Check to see if this loop has a computable loop-invariant execution count.
-/// If so, this means that we can compute the final value of any expressions
-/// that are recurrent in the loop, and substitute the exit values from the loop
-/// into any instructions outside of the loop that use the final values of the
-/// current expressions.
-///
-/// This is mostly redundant with the regular IndVarSimplify activities that
-/// happen later, except that it's more powerful in some cases, because it's
-/// able to brute-force evaluate arbitrary instructions as long as they have
-/// constant operands at the beginning of the loop.
-bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
-  // Check a pre-condition.
-  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
-         "Indvars did not preserve LCSSA!");
-
-  SmallVector<BasicBlock*, 8> ExitBlocks;
-  L->getUniqueExitBlocks(ExitBlocks);
-
-  SmallVector<RewritePhi, 8> RewritePhiSet;
-  // Find all values that are computed inside the loop, but used outside of it.
-  // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
-  // the exit blocks of the loop to find them.
-  for (BasicBlock *ExitBB : ExitBlocks) {
-    // If there are no PHI nodes in this exit block, then no values defined
-    // inside the loop are used on this path, skip it.
-    PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
-    if (!PN) continue;
-
-    unsigned NumPreds = PN->getNumIncomingValues();
-
-    // Iterate over all of the PHI nodes.
-    BasicBlock::iterator BBI = ExitBB->begin();
-    while ((PN = dyn_cast<PHINode>(BBI++))) {
-      if (PN->use_empty())
-        continue; // dead use, don't replace it
-
-      if (!SE->isSCEVable(PN->getType()))
-        continue;
-
-      // It's necessary to tell ScalarEvolution about this explicitly so that
-      // it can walk the def-use list and forget all SCEVs, as it may not be
-      // watching the PHI itself. Once the new exit value is in place, there
-      // may not be a def-use connection between the loop and every instruction
-      // which got a SCEVAddRecExpr for that loop.
-      SE->forgetValue(PN);
-
-      // Iterate over all of the values in all the PHI nodes.
-      for (unsigned i = 0; i != NumPreds; ++i) {
-        // If the value being merged in is not integer or is not defined
-        // in the loop, skip it.
-        Value *InVal = PN->getIncomingValue(i);
-        if (!isa<Instruction>(InVal))
-          continue;
-
-        // If this pred is for a subloop, not L itself, skip it.
-        if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
-          continue; // The Block is in a subloop, skip it.
-
-        // Check that InVal is defined in the loop.
-        Instruction *Inst = cast<Instruction>(InVal);
-        if (!L->contains(Inst))
-          continue;
-
-        // Okay, this instruction has a user outside of the current loop
-        // and varies predictably *inside* the loop.  Evaluate the value it
-        // contains when the loop exits, if possible.  We prefer to start with
-        // expressions which are true for all exits (so as to maximize
-        // expression reuse by the SCEVExpander), but resort to per-exit
-        // evaluation if that fails.  
-        const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
-        if (isa<SCEVCouldNotCompute>(ExitValue) ||
-            !SE->isLoopInvariant(ExitValue, L) ||
-            !isSafeToExpand(ExitValue, *SE)) {
-          // TODO: This should probably be sunk into SCEV in some way; maybe a
-          // getSCEVForExit(SCEV*, L, ExitingBB)?  It can be generalized for
-          // most SCEV expressions and other recurrence types (e.g. shift
-          // recurrences).  Is there existing code we can reuse?
-          const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));
-          if (isa<SCEVCouldNotCompute>(ExitCount))
-            continue;
-          if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))
-            if (AddRec->getLoop() == L)
-              ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
-          if (isa<SCEVCouldNotCompute>(ExitValue) ||
-              !SE->isLoopInvariant(ExitValue, L) ||
-              !isSafeToExpand(ExitValue, *SE))
-            continue;
-        }
-        
-        // Computing the value outside of the loop brings no benefit if it is
-        // definitely used inside the loop in a way which can not be optimized
-        // away.  Avoid doing so unless we know we have a value which computes
-        // the ExitValue already.  TODO: This should be merged into SCEV
-        // expander to leverage its knowledge of existing expressions.
-        if (ReplaceExitValue != AlwaysRepl &&
-            !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) &&
-            hasHardUserWithinLoop(L, Inst))
-          continue;
-
-        bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
-        Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
-
-        LLVM_DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
-                          << '\n'
-                          << "  LoopVal = " << *Inst << "\n");
-
-        if (!isValidRewrite(Inst, ExitVal)) {
-          DeadInsts.push_back(ExitVal);
-          continue;
-        }
-
-#ifndef NDEBUG
-        // If we reuse an instruction from a loop which is neither L nor one of
-        // its containing loops, we end up breaking LCSSA form for this loop by
-        // creating a new use of its instruction.
-        if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
-          if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
-            if (EVL != L)
-              assert(EVL->contains(L) && "LCSSA breach detected!");
-#endif
-
-        // Collect all the candidate PHINodes to be rewritten.
-        RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
-      }
-    }
-  }
-
-  bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
-
-  bool Changed = false;
-  // Transformation.
-  for (const RewritePhi &Phi : RewritePhiSet) {
-    PHINode *PN = Phi.PN;
-    Value *ExitVal = Phi.Val;
-
-    // Only do the rewrite when the ExitValue can be expanded cheaply.
-    // If LoopCanBeDel is true, rewrite exit value aggressively.
-    if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
-      DeadInsts.push_back(ExitVal);
-      continue;
-    }
-
-    Changed = true;
-    ++NumReplaced;
-    Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
-    PN->setIncomingValue(Phi.Ith, ExitVal);
-
-    // If this instruction is dead now, delete it. Don't do it now to avoid
-    // invalidating iterators.
-    if (isInstructionTriviallyDead(Inst, TLI))
-      DeadInsts.push_back(Inst);
-
-    // Replace PN with ExitVal if that is legal and does not break LCSSA.
-    if (PN->getNumIncomingValues() == 1 &&
-        LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
-      PN->replaceAllUsesWith(ExitVal);
-      PN->eraseFromParent();
-    }
-  }
-
-  // The insertion point instruction may have been deleted; clear it out
-  // so that the rewriter doesn't trip over it later.
-  Rewriter.clearInsertPoint();
-  return Changed;
-}
-
 //===---------------------------------------------------------------------===//
 // rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
 // they will exit at the first iteration.
@@ -813,61 +538,6 @@ bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
   return MadeAnyChanges;
 }
 
-/// Check whether it is possible to delete the loop after rewriting exit
-/// value. If it is possible, ignore ReplaceExitValue and do rewriting
-/// aggressively.
-bool IndVarSimplify::canLoopBeDeleted(
-    Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
-  BasicBlock *Preheader = L->getLoopPreheader();
-  // If there is no preheader, the loop will not be deleted.
-  if (!Preheader)
-    return false;
-
-  // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
-  // We obviate multiple ExitingBlocks case for simplicity.
-  // TODO: If we see testcase with multiple ExitingBlocks can be deleted
-  // after exit value rewriting, we can enhance the logic here.
-  SmallVector<BasicBlock *, 4> ExitingBlocks;
-  L->getExitingBlocks(ExitingBlocks);
-  SmallVector<BasicBlock *, 8> ExitBlocks;
-  L->getUniqueExitBlocks(ExitBlocks);
-  if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
-    return false;
-
-  BasicBlock *ExitBlock = ExitBlocks[0];
-  BasicBlock::iterator BI = ExitBlock->begin();
-  while (PHINode *P = dyn_cast<PHINode>(BI)) {
-    Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
-
-    // If the Incoming value of P is found in RewritePhiSet, we know it
-    // could be rewritten to use a loop invariant value in transformation
-    // phase later. Skip it in the loop invariant check below.
-    bool found = false;
-    for (const RewritePhi &Phi : RewritePhiSet) {
-      unsigned i = Phi.Ith;
-      if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
-        found = true;
-        break;
-      }
-    }
-
-    Instruction *I;
-    if (!found && (I = dyn_cast<Instruction>(Incoming)))
-      if (!L->hasLoopInvariantOperands(I))
-        return false;
-
-    ++BI;
-  }
-
-  for (auto *BB : L->blocks())
-    if (llvm::any_of(*BB, [](Instruction &I) {
-          return I.mayHaveSideEffects();
-        }))
-      return false;
-
-  return true;
-}
-
 //===----------------------------------------------------------------------===//
 //  IV Widening - Extend the width of an IV to cover its widest uses.
 //===----------------------------------------------------------------------===//
@@ -3016,8 +2686,13 @@ bool IndVarSimplify::run(Loop *L) {
   // that are recurrent in the loop, and substitute the exit values from the
   // loop into any instructions outside of the loop that use the final values
   // of the current expressions.
-  if (ReplaceExitValue != NeverRepl)
-    Changed |= rewriteLoopExitValues(L, Rewriter);
+  if (ReplaceExitValue != NeverRepl) {
+    if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, Rewriter, DT,
+                                             ReplaceExitValue, DeadInsts)) {
+      NumReplaced += Rewrites;
+      Changed = true;
+    }
+  }
 
   // Eliminate redundant IV cycles.
   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
@@ -3028,7 +2703,7 @@ bool IndVarSimplify::run(Loop *L) {
     // Given we've changed exit counts, notify SCEV
     SE->forgetLoop(L);
   }
-  
+
   // Try to form loop invariant tests for loop exits by changing how many
   // iterations of the loop run when that is unobservable.
   if (predicateLoopExits(L, Rewriter)) {
@@ -3038,7 +2713,7 @@ bool IndVarSimplify::run(Loop *L) {
   }
 
   // If we have a trip count expression, rewrite the loop's exit condition
-  // using it.  
+  // using it.
   if (!DisableLFTR) {
     SmallVector<BasicBlock*, 16> ExitingBlocks;
     L->getExitingBlocks(ExitingBlocks);
@@ -3049,10 +2724,10 @@ bool IndVarSimplify::run(Loop *L) {
 
       // If our exitting block exits multiple loops, we can only rewrite the
       // innermost one.  Otherwise, we're changing how many times the innermost
-      // loop runs before it exits. 
+      // loop runs before it exits.
       if (LI->getLoopFor(ExitingBB) != L)
         continue;
-      
+
       if (!needsLFTR(L, ExitingBB))
         continue;
 
@@ -3066,13 +2741,13 @@ bool IndVarSimplify::run(Loop *L) {
       // until stable to handle cases like this better.
       if (ExitCount->isZero())
         continue;
-      
+
       PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
       if (!IndVar)
         continue;
-      
+
       // Avoid high cost expansions.  Note: This heuristic is questionable in
-      // that our definition of "high cost" is not exactly principled.  
+      // that our definition of "high cost" is not exactly principled.
       if (Rewriter.isHighCostExpansion(ExitCount, L))
         continue;
 
@@ -3081,7 +2756,7 @@ bool IndVarSimplify::run(Loop *L) {
       // any pass that uses the SCEVExpander must do it. This does not work
       // well for loop passes because SCEVExpander makes assumptions about
       // all loops, while LoopPassManager only forces the current loop to be
-      // simplified. 
+      // simplified.
       //
       // FIXME: SCEV expansion has no way to bail out, so the caller must
       // explicitly check any assumptions made by SCEV. Brittle.

diff  --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index c4c40189fda4..054ef17a9647 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Analysis/MustExecute.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -39,6 +40,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 
 using namespace llvm;
 using namespace llvm::PatternMatch;
@@ -1042,3 +1044,307 @@ bool llvm::cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
          SE.isLoopEntryGuardedByCond(L, Predicate, S,
                                      SE.getConstant(Max));
 }
+
+//===----------------------------------------------------------------------===//
+// rewriteLoopExitValues - Optimize IV users outside the loop.
+// As a side effect, reduces the amount of IV processing within the loop.
+//===----------------------------------------------------------------------===//
+
+// Return true if the SCEV expansion generated by the rewriter can replace the
+// original value. SCEV guarantees that it produces the same value, but the way
+// it is produced may be illegal IR.  Ideally, this function will only be
+// called for verification.
+static bool isValidRewrite(ScalarEvolution *SE, Value *FromVal, Value *ToVal) {
+  // If an SCEV expression subsumed multiple pointers, its expansion could
+  // reassociate the GEP changing the base pointer. This is illegal because the
+  // final address produced by a GEP chain must be inbounds relative to its
+  // underlying object. Otherwise basic alias analysis, among other things,
+  // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
+  // producing an expression involving multiple pointers. Until then, we must
+  // bail out here.
+  //
+  // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
+  // because it understands lcssa phis while SCEV does not.
+  Value *FromPtr = FromVal;
+  Value *ToPtr = ToVal;
+  if (auto *GEP = dyn_cast<GEPOperator>(FromVal))
+    FromPtr = GEP->getPointerOperand();
+
+  if (auto *GEP = dyn_cast<GEPOperator>(ToVal))
+    ToPtr = GEP->getPointerOperand();
+
+  if (FromPtr != FromVal || ToPtr != ToVal) {
+    // Quickly check the common case
+    if (FromPtr == ToPtr)
+      return true;
+
+    // SCEV may have rewritten an expression that produces the GEP's pointer
+    // operand. That's ok as long as the pointer operand has the same base
+    // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
+    // base of a recurrence. This handles the case in which SCEV expansion
+    // converts a pointer type recurrence into a nonrecurrent pointer base
+    // indexed by an integer recurrence.
+
+    // If the GEP base pointer is a vector of pointers, abort.
+    if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
+      return false;
+
+    const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
+    const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
+    if (FromBase == ToBase)
+      return true;
+
+    LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: GEP rewrite bail out "
+                      << *FromBase << " != " << *ToBase << "\n");
+
+    return false;
+  }
+  return true;
+}
+
+static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) {
+  SmallPtrSet<const Instruction *, 8> Visited;
+  SmallVector<const Instruction *, 8> WorkList;
+  Visited.insert(I);
+  WorkList.push_back(I);
+  while (!WorkList.empty()) {
+    const Instruction *Curr = WorkList.pop_back_val();
+    // This use is outside the loop, nothing to do.
+    if (!L->contains(Curr))
+      continue;
+    // Do we assume it is a "hard" use which will not be eliminated easily?
+    if (Curr->mayHaveSideEffects())
+      return true;
+    // Otherwise, add all its users to worklist.
+    for (auto U : Curr->users()) {
+      auto *UI = cast<Instruction>(U);
+      if (Visited.insert(UI).second)
+        WorkList.push_back(UI);
+    }
+  }
+  return false;
+}
+
+// Collect information about PHI nodes which can be transformed in
+// rewriteLoopExitValues.
+struct RewritePhi {
+  PHINode *PN;
+  unsigned Ith;   // Ith incoming value.
+  Value *Val;     // Exit value after expansion.
+  bool HighCost;  // High Cost when expansion.
+
+  RewritePhi(PHINode *P, unsigned I, Value *V, bool H)
+      : PN(P), Ith(I), Val(V), HighCost(H) {}
+};
+
+// Check whether it is possible to delete the loop after rewriting exit
+// value. If it is possible, ignore ReplaceExitValue and do rewriting
+// aggressively.
+static bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
+  BasicBlock *Preheader = L->getLoopPreheader();
+  // If there is no preheader, the loop will not be deleted.
+  if (!Preheader)
+    return false;
+
+  // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
+  // We obviate multiple ExitingBlocks case for simplicity.
+  // TODO: If we see testcase with multiple ExitingBlocks can be deleted
+  // after exit value rewriting, we can enhance the logic here.
+  SmallVector<BasicBlock *, 4> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+  SmallVector<BasicBlock *, 8> ExitBlocks;
+  L->getUniqueExitBlocks(ExitBlocks);
+  if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
+    return false;
+
+  BasicBlock *ExitBlock = ExitBlocks[0];
+  BasicBlock::iterator BI = ExitBlock->begin();
+  while (PHINode *P = dyn_cast<PHINode>(BI)) {
+    Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
+
+    // If the Incoming value of P is found in RewritePhiSet, we know it
+    // could be rewritten to use a loop invariant value in transformation
+    // phase later. Skip it in the loop invariant check below.
+    bool found = false;
+    for (const RewritePhi &Phi : RewritePhiSet) {
+      unsigned i = Phi.Ith;
+      if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
+        found = true;
+        break;
+      }
+    }
+
+    Instruction *I;
+    if (!found && (I = dyn_cast<Instruction>(Incoming)))
+      if (!L->hasLoopInvariantOperands(I))
+        return false;
+
+    ++BI;
+  }
+
+  for (auto *BB : L->blocks())
+    if (llvm::any_of(*BB, [](Instruction &I) {
+          return I.mayHaveSideEffects();
+        }))
+      return false;
+
+  return true;
+}
+
+int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI,
+    TargetLibraryInfo *TLI, ScalarEvolution *SE, SCEVExpander &Rewriter,
+    DominatorTree *DT, ReplaceExitVal ReplaceExitValue,
+    SmallVector<WeakTrackingVH, 16> &DeadInsts) {
+  // Check a pre-condition.
+  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
+         "Indvars did not preserve LCSSA!");
+
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  L->getUniqueExitBlocks(ExitBlocks);
+
+  SmallVector<RewritePhi, 8> RewritePhiSet;
+  // Find all values that are computed inside the loop, but used outside of it.
+  // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
+  // the exit blocks of the loop to find them.
+  for (BasicBlock *ExitBB : ExitBlocks) {
+    // If there are no PHI nodes in this exit block, then no values defined
+    // inside the loop are used on this path, skip it.
+    PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
+    if (!PN) continue;
+
+    unsigned NumPreds = PN->getNumIncomingValues();
+
+    // Iterate over all of the PHI nodes.
+    BasicBlock::iterator BBI = ExitBB->begin();
+    while ((PN = dyn_cast<PHINode>(BBI++))) {
+      if (PN->use_empty())
+        continue; // dead use, don't replace it
+
+      if (!SE->isSCEVable(PN->getType()))
+        continue;
+
+      // It's necessary to tell ScalarEvolution about this explicitly so that
+      // it can walk the def-use list and forget all SCEVs, as it may not be
+      // watching the PHI itself. Once the new exit value is in place, there
+      // may not be a def-use connection between the loop and every instruction
+      // which got a SCEVAddRecExpr for that loop.
+      SE->forgetValue(PN);
+
+      // Iterate over all of the values in all the PHI nodes.
+      for (unsigned i = 0; i != NumPreds; ++i) {
+        // If the value being merged in is not integer or is not defined
+        // in the loop, skip it.
+        Value *InVal = PN->getIncomingValue(i);
+        if (!isa<Instruction>(InVal))
+          continue;
+
+        // If this pred is for a subloop, not L itself, skip it.
+        if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
+          continue; // The Block is in a subloop, skip it.
+
+        // Check that InVal is defined in the loop.
+        Instruction *Inst = cast<Instruction>(InVal);
+        if (!L->contains(Inst))
+          continue;
+
+        // Okay, this instruction has a user outside of the current loop
+        // and varies predictably *inside* the loop.  Evaluate the value it
+        // contains when the loop exits, if possible.  We prefer to start with
+        // expressions which are true for all exits (so as to maximize
+        // expression reuse by the SCEVExpander), but resort to per-exit
+        // evaluation if that fails.
+        const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
+        if (isa<SCEVCouldNotCompute>(ExitValue) ||
+            !SE->isLoopInvariant(ExitValue, L) ||
+            !isSafeToExpand(ExitValue, *SE)) {
+          // TODO: This should probably be sunk into SCEV in some way; maybe a
+          // getSCEVForExit(SCEV*, L, ExitingBB)?  It can be generalized for
+          // most SCEV expressions and other recurrence types (e.g. shift
+          // recurrences).  Is there existing code we can reuse?
+          const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));
+          if (isa<SCEVCouldNotCompute>(ExitCount))
+            continue;
+          if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))
+            if (AddRec->getLoop() == L)
+              ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
+          if (isa<SCEVCouldNotCompute>(ExitValue) ||
+              !SE->isLoopInvariant(ExitValue, L) ||
+              !isSafeToExpand(ExitValue, *SE))
+            continue;
+        }
+
+        // Computing the value outside of the loop brings no benefit if it is
+        // definitely used inside the loop in a way which can not be optimized
+        // away. Avoid doing so unless we know we have a value which computes
+        // the ExitValue already. TODO: This should be merged into SCEV
+        // expander to leverage its knowledge of existing expressions.
+        if (ReplaceExitValue != AlwaysRepl &&
+            !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) &&
+            hasHardUserWithinLoop(L, Inst))
+          continue;
+
+        bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
+        Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
+
+        LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = "
+                          << *ExitVal << '\n' << "  LoopVal = " << *Inst
+                          << "\n");
+
+        if (!isValidRewrite(SE, Inst, ExitVal)) {
+          DeadInsts.push_back(ExitVal);
+          continue;
+        }
+
+#ifndef NDEBUG
+        // If we reuse an instruction from a loop which is neither L nor one of
+        // its containing loops, we end up breaking LCSSA form for this loop by
+        // creating a new use of its instruction.
+        if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
+          if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
+            if (EVL != L)
+              assert(EVL->contains(L) && "LCSSA breach detected!");
+#endif
+
+        // Collect all the candidate PHINodes to be rewritten.
+        RewritePhiSet.emplace_back(PN, i, ExitVal, HighCost);
+      }
+    }
+  }
+
+  bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
+  int NumReplaced = 0;
+
+  // Transformation.
+  for (const RewritePhi &Phi : RewritePhiSet) {
+    PHINode *PN = Phi.PN;
+    Value *ExitVal = Phi.Val;
+
+    // Only do the rewrite when the ExitValue can be expanded cheaply.
+    // If LoopCanBeDel is true, rewrite exit value aggressively.
+    if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
+      DeadInsts.push_back(ExitVal);
+      continue;
+    }
+
+    NumReplaced++;
+    Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
+    PN->setIncomingValue(Phi.Ith, ExitVal);
+
+    // If this instruction is dead now, delete it. Don't do it now to avoid
+    // invalidating iterators.
+    if (isInstructionTriviallyDead(Inst, TLI))
+      DeadInsts.push_back(Inst);
+
+    // Replace PN with ExitVal if that is legal and does not break LCSSA.
+    if (PN->getNumIncomingValues() == 1 &&
+        LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
+      PN->replaceAllUsesWith(ExitVal);
+      PN->eraseFromParent();
+    }
+  }
+
+  // The insertion point instruction may have been deleted; clear it out
+  // so that the rewriter doesn't trip over it later.
+  Rewriter.clearInsertPoint();
+  return NumReplaced;
+}