[llvm] r303444 - Last of the major pieces to NewGVN - yay!

Fri May 19 12:01:28 PDT 2017

Author: dannyb
Date: Fri May 19 14:01:27 2017
New Revision: 303444

URL: http://llvm.org/viewvc/llvm-project?rev=303444&view=rev
Log:
Last of the major pieces to NewGVN - yay!

Summary:
NewGVN: Handle equivalence between phi of ops and op of phis.

This makes our GVN mostly-complete. It would be complete, modulo some
deliberate choices we make.  This means it detects roughly all herband
equivalences in polynomial time, including cases notoriously hard for
other GVN's to detect.  It also detects a very large swath of the
cases we currently rely on instcombine to detect that involve folding
upwards through phis.

Fixes PR 31125, 31463, PR 31868

Reviewers: davide

Subscribers: Prazek, llvm-commits

Differential Revision: https://reviews.llvm.org/D32151

Added:
    llvm/trunk/test/Transforms/NewGVN/completeness.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/NewGVN.cpp
    llvm/trunk/test/Transforms/NewGVN/storeoverstore.ll

Modified: llvm/trunk/lib/Transforms/Scalar/NewGVN.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/NewGVN.cpp?rev=303444&r1=303443&r2=303444&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/Scalar/NewGVN.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/NewGVN.cpp Fri May 19 14:01:27 2017
@@ -30,9 +30,19 @@
 /// tracks what operations have a given value number (IE it also tracks the
 /// reverse mapping from value number -> operations with that value number), so
 /// that it only needs to reprocess the instructions that are affected when
-/// something's value number changes.  The rest of the algorithm is devoted to
-/// performing symbolic evaluation, forward propagation, and simplification of
-/// operations based on the value numbers deduced so far.
+/// something's value number changes.  The vast majority of complexity and code
+/// in this file is devoted to tracking what value numbers could change for what
+/// instructions when various things happen.  The rest of the algorithm is
+/// devoted to performing symbolic evaluation, forward propagation, and
+/// simplification of operations based on the value numbers deduced so far
+///
+/// In order to make the GVN mostly-complete, we use a technique derived from
+/// "Detection of Redundant Expressions: A Complete and Polynomial-time
+/// Algorithm in SSA" by R.R. Pai.  The source of incompleteness in most SSA
+/// based GVN algorithms is related to their inability to detect equivalence
+/// between phi of ops (IE phi(a+b, c+d)) and op of phis (phi(a,c) + phi(b, d)).
+/// We resolve this issue by generating the equivalent "phi of ops" form for
+/// each op of phis we see, in a way that only takes polynomial time to resolve.
 ///
 /// We also do not perform elimination by using any published algorithm.  All
 /// published algorithms are O(Instructions). Instead, we use a technique that
@@ -105,9 +115,13 @@ STATISTIC(NumGVNSortedLeaderChanges, "Nu
 STATISTIC(NumGVNAvoidedSortedLeaderChanges,
           "Number of avoided sorted leader changes");
 STATISTIC(NumGVNDeadStores, "Number of redundant/dead stores eliminated");
+STATISTIC(NumGVNPHIOfOpsCreated, "Number of PHI of ops created");
+STATISTIC(NumGVNPHIOfOpsEliminations,
+          "Number of things eliminated using PHI of ops");
 DEBUG_COUNTER(VNCounter, "newgvn-vn",
               "Controls which instructions are value numbered")
-
+DEBUG_COUNTER(PHIOfOpsCounter, "newgvn-phi",
+              "Controls which instructions we create phi of ops for")
 // Currently store defining access refinement is too slow due to basicaa being
 // egregiously slow.  This flag lets us keep it working while we work on this
 // issue.
@@ -170,10 +184,9 @@ private:
       }
     }
     // See if we really were the root of a component, by seeing if we still have
-    // our DFSNumber.
-    // If we do, we are the root of the component, and we have completed a
-    // component. If we do not,
-    // we are not the root of a component, and belong on the component stack.
+    // our DFSNumber.  If we do, we are the root of the component, and we have
+    // completed a component. If we do not, we are not the root of a component,
+    // and belong on the component stack.
     if (Root.lookup(I) == OurDFS) {
       unsigned ComponentID = Components.size();
       Components.resize(Components.size() + 1);
@@ -365,6 +378,7 @@ private:
   int StoreCount = 0;
 };
 
+struct HashedExpression;
 namespace llvm {
 template <> struct DenseMapInfo<const Expression *> {
   static const Expression *getEmptyKey() {
@@ -377,9 +391,11 @@ template <> struct DenseMapInfo<const Ex
     Val <<= PointerLikeTypeTraits<const Expression *>::NumLowBitsAvailable;
     return reinterpret_cast<const Expression *>(Val);
   }
-  static unsigned getHashValue(const Expression *V) {
-    return static_cast<unsigned>(V->getHashValue());
+  static unsigned getHashValue(const Expression *E) {
+    return static_cast<unsigned>(E->getHashValue());
   }
+  static unsigned getHashValue(const HashedExpression &HE);
+  static bool isEqual(const HashedExpression &LHS, const Expression *RHS);
   static bool isEqual(const Expression *LHS, const Expression *RHS) {
     if (LHS == RHS)
       return true;
@@ -391,6 +407,26 @@ template <> struct DenseMapInfo<const Ex
 };
 } // end namespace llvm
 
+// This is just a wrapper around Expression that computes the hash value once at
+// creation time.  Hash values for an Expression can't change once they are
+// inserted into the DenseMap (it breaks DenseMap), so they must be immutable at
+// that point anyway.
+struct HashedExpression {
+  const Expression *E;
+  unsigned HashVal;
+  HashedExpression(const Expression *E)
+      : E(E), HashVal(DenseMapInfo<const Expression *>::getHashValue(E)) {}
+};
+
+unsigned
+DenseMapInfo<const Expression *>::getHashValue(const HashedExpression &HE) {
+  return HE.HashVal;
+}
+bool DenseMapInfo<const Expression *>::isEqual(const HashedExpression &LHS,
+                                               const Expression *RHS) {
+  return isEqual(LHS.E, RHS);
+}
+
 namespace {
 class NewGVN {
   Function &F;
@@ -428,6 +464,33 @@ class NewGVN {
   // Value Mappings.
   DenseMap<Value *, CongruenceClass *> ValueToClass;
   DenseMap<Value *, const Expression *> ValueToExpression;
+  // Value PHI handling, used to make equivalence between phi(op, op) and
+  // op(phi, phi).
+  // These mappings just store various data that would normally be part of the
+  // IR.
+  DenseSet<const Instruction *> PHINodeUses;
+  // Map a temporary instruction we created to a parent block.
+  DenseMap<const Value *, BasicBlock *> TempToBlock;
+  // Map between the temporary phis we created and the real instructions they
+  // are known equivalent to.
+  DenseMap<const Value *, PHINode *> RealToTemp;
+  // In order to know when we should re-process instructions that have
+  // phi-of-ops, we track the set of expressions that they needed as
+  // leaders. When we discover new leaders for those expressions, we process the
+  // associated phi-of-op instructions again in case they have changed.  The
+  // other way they may change is if they had leaders, and those leaders
+  // disappear.  However, at the point they have leaders, there are uses of the
+  // relevant operands in the created phi node, and so they will get reprocessed
+  // through the normal user marking we perform.
+  mutable DenseMap<const Value *, SmallPtrSet<Value *, 2>> AdditionalUsers;
+  DenseMap<const Expression *, SmallPtrSet<Instruction *, 2>>
+      ExpressionToPhiOfOps;
+  // Map from basic block to the temporary operations we created
+  DenseMap<const BasicBlock *, SmallVector<Instruction *, 8>> PHIOfOpsPHIs;
+  // Map from temporary operation to MemoryAccess.
+  DenseMap<const Instruction *, MemoryUseOrDef *> TempToMemory;
+  // Set of all temporary instructions we created.
+  DenseSet<Instruction *> AllTempInstructions;
 
   // Mapping from predicate info we used to the instructions we used it with.
   // In order to correctly ensure propagation, we must keep track of what
@@ -460,8 +523,8 @@ class NewGVN {
   enum MemoryPhiState { MPS_Invalid, MPS_TOP, MPS_Equivalent, MPS_Unique };
   DenseMap<const MemoryPhi *, MemoryPhiState> MemoryPhiState;
 
-  enum PhiCycleState { PCS_Unknown, PCS_CycleFree, PCS_Cycle };
-  mutable DenseMap<const PHINode *, PhiCycleState> PhiCycleState;
+  enum InstCycleState { ICS_Unknown, ICS_CycleFree, ICS_Cycle };
+  mutable DenseMap<const Instruction *, InstCycleState> InstCycleState;
   // Expression to class mapping.
   using ExpressionClassMap = DenseMap<const Expression *, CongruenceClass *>;
   ExpressionClassMap ExpressionToClass;
@@ -519,7 +582,7 @@ private:
   const Expression *createBinaryExpression(unsigned, Type *, Value *,
                                            Value *) const;
   PHIExpression *createPHIExpression(Instruction *, bool &HasBackEdge,
-                                     bool &AllConstant) const;
+                                     bool &OriginalOpsConstant) const;
   const VariableExpression *createVariableExpression(Value *) const;
   const ConstantExpression *createConstantExpression(Constant *) const;
   const Expression *createVariableOrConstant(Value *V) const;
@@ -560,6 +623,9 @@ private:
     return CClass;
   }
   void initializeCongruenceClasses(Function &F);
+  const Expression *makePossiblePhiOfOps(Instruction *, bool,
+                                         SmallPtrSetImpl<Value *> &);
+  void addPhiOfOps(PHINode *Op, BasicBlock *BB, Instruction *ExistingValue);
 
   // Value number an Instruction or MemoryPhi.
   void valueNumberMemoryPhi(MemoryPhi *);
@@ -568,7 +634,8 @@ private:
   // Symbolic evaluation.
   const Expression *checkSimplificationResults(Expression *, Instruction *,
                                                Value *) const;
-  const Expression *performSymbolicEvaluation(Value *) const;
+  const Expression *performSymbolicEvaluation(Value *,
+                                              SmallPtrSetImpl<Value *> &) const;
   const Expression *performSymbolicLoadCoercion(Type *, Value *, LoadInst *,
                                                 Instruction *,
                                                 MemoryAccess *) const;
@@ -593,7 +660,7 @@ private:
   bool setMemoryClass(const MemoryAccess *From, CongruenceClass *To);
   CongruenceClass *getMemoryClass(const MemoryAccess *MA) const;
   const MemoryAccess *lookupMemoryLeader(const MemoryAccess *) const;
-  bool isMemoryAccessTop(const MemoryAccess *) const;
+  bool isMemoryAccessTOP(const MemoryAccess *) const;
 
   // Ranking
   unsigned int getRank(const Value *) const;
@@ -617,6 +684,7 @@ private:
   void replaceInstruction(Instruction *, Value *);
   void markInstructionForDeletion(Instruction *);
   void deleteInstructionsInBlock(BasicBlock *);
+  Value *findPhiOfOpsLeader(const Expression *E, const BasicBlock *BB) const;
 
   // New instruction creation.
   void handleNewInstruction(Instruction *){};
@@ -628,8 +696,10 @@ private:
   void markPredicateUsersTouched(Instruction *);
   void markValueLeaderChangeTouched(CongruenceClass *CC);
   void markMemoryLeaderChangeTouched(CongruenceClass *CC);
+  void markPhiOfOpsChanged(const HashedExpression &HE);
   void addPredicateUsers(const PredicateBase *, Instruction *) const;
   void addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const;
+  void addAdditionalUsers(Value *To, Value *User) const;
 
   // Main loop of value numbering
   void iterateTouchedInstructions();
@@ -637,7 +707,7 @@ private:
   // Utilities.
   void cleanupTables();
   std::pair<unsigned, unsigned> assignDFSNumbers(BasicBlock *, unsigned);
-  void updateProcessedCount(Value *V);
+  void updateProcessedCount(const Value *V);
   void verifyMemoryCongruency() const;
   void verifyIterationSettled(Function &F);
   void verifyStoreExpressions() const;
@@ -645,6 +715,10 @@ private:
                               const MemoryAccess *, const MemoryAccess *) const;
   BasicBlock *getBlockForValue(Value *V) const;
   void deleteExpression(const Expression *E) const;
+  MemoryUseOrDef *getMemoryAccess(const Instruction *) const;
+  MemoryAccess *getDefiningAccess(const MemoryAccess *) const;
+  MemoryPhi *getMemoryAccess(const BasicBlock *) const;
+  template <class T, class Range> T *getMinDFSOfRange(const Range &) const;
   unsigned InstrToDFSNum(const Value *V) const {
     assert(isa<Instruction>(V) && "This should not be used for MemoryAccesses");
     return InstrDFS.lookup(V);
@@ -664,8 +738,8 @@ private:
                ? InstrToDFSNum(cast<MemoryUseOrDef>(MA)->getMemoryInst())
                : InstrDFS.lookup(MA);
   }
-  bool isCycleFree(const PHINode *PN) const;
-  template <class T, class Range> T *getMinDFSOfRange(const Range &) const;
+  bool isCycleFree(const Instruction *) const;
+  bool isBackedge(BasicBlock *From, BasicBlock *To) const;
   // Debug counter info.  When verifying, we have to reset the value numbering
   // debug counter to the same state it started in to get the same results.
   std::pair<int, int> StartingVNCounter;
@@ -693,20 +767,46 @@ bool StoreExpression::equals(const Expre
   return true;
 }
 
+// Determine if the edge From->To is a backedge
+bool NewGVN::isBackedge(BasicBlock *From, BasicBlock *To) const {
+  if (From == To)
+    return true;
+  auto *FromDTN = DT->getNode(From);
+  auto *ToDTN = DT->getNode(To);
+  return RPOOrdering.lookup(FromDTN) >= RPOOrdering.lookup(ToDTN);
+}
+
 #ifndef NDEBUG
 static std::string getBlockName(const BasicBlock *B) {
   return DOTGraphTraits<const Function *>::getSimpleNodeLabel(B, nullptr);
 }
 #endif
 
+// Get a MemoryAccess for an instruction, fake or real.
+MemoryUseOrDef *NewGVN::getMemoryAccess(const Instruction *I) const {
+  auto *Result = MSSA->getMemoryAccess(I);
+  return Result ? Result : TempToMemory.lookup(I);
+}
+
+// Get a MemoryPhi for a basic block. These are all real.
+MemoryPhi *NewGVN::getMemoryAccess(const BasicBlock *BB) const {
+  return MSSA->getMemoryAccess(BB);
+}
+
 // Get the basic block from an instruction/memory value.
 BasicBlock *NewGVN::getBlockForValue(Value *V) const {
-  if (auto *I = dyn_cast<Instruction>(V))
-    return I->getParent();
-  else if (auto *MP = dyn_cast<MemoryPhi>(V))
-    return MP->getBlock();
-  llvm_unreachable("Should have been able to figure out a block for our value");
-  return nullptr;
+  if (auto *I = dyn_cast<Instruction>(V)) {
+    auto *Parent = I->getParent();
+    if (Parent)
+      return Parent;
+    Parent = TempToBlock.lookup(V);
+    assert(Parent && "Every fake instruction should have a block");
+    return Parent;
+  }
+
+  auto *MP = dyn_cast<MemoryPhi>(V);
+  assert(MP && "Should have been an instruction or a MemoryPhi");
+  return MP->getBlock();
 }
 
 // Delete a definitely dead expression, so it can be reused by the expression
@@ -718,10 +818,9 @@ void NewGVN::deleteExpression(const Expr
   const_cast<BasicExpression *>(BE)->deallocateOperands(ArgRecycler);
   ExpressionAllocator.Deallocate(E);
 }
-
 PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge,
-                                           bool &AllConstant) const {
-  BasicBlock *PHIBlock = I->getParent();
+                                           bool &OriginalOpsConstant) const {
+  BasicBlock *PHIBlock = getBlockForValue(I);
   auto *PN = cast<PHINode>(I);
   auto *E =
       new (ExpressionAllocator) PHIExpression(PN->getNumOperands(), PHIBlock);
@@ -730,8 +829,6 @@ PHIExpression *NewGVN::createPHIExpressi
   E->setType(I->getType());
   E->setOpcode(I->getOpcode());
 
-  unsigned PHIRPO = RPOOrdering.lookup(DT->getNode(PHIBlock));
-
   // NewGVN assumes the operands of a PHI node are in a consistent order across
   // PHIs. LLVM doesn't seem to always guarantee this. While we need to fix
   // this in LLVM at some point we don't want GVN to find wrong congruences.
@@ -752,14 +849,12 @@ PHIExpression *NewGVN::createPHIExpressi
   auto Filtered = make_filter_range(PHIOperands, [&](const Use *U) {
     return ReachableEdges.count({PN->getIncomingBlock(*U), PHIBlock});
   });
-
   std::transform(Filtered.begin(), Filtered.end(), op_inserter(E),
                  [&](const Use *U) -> Value * {
                    auto *BB = PN->getIncomingBlock(*U);
-                   auto *DTN = DT->getNode(BB);
-                   if (RPOOrdering.lookup(DTN) >= PHIRPO)
-                     HasBackedge = true;
-                   AllConstant &= isa<UndefValue>(*U) || isa<Constant>(*U);
+                   HasBackedge = HasBackedge || isBackedge(BB, PHIBlock);
+                   OriginalOpsConstant =
+                       OriginalOpsConstant && isa<Constant>(*U);
 
                    // Don't try to transform self-defined phis.
                    if (*U == PN)
@@ -784,7 +879,7 @@ bool NewGVN::setBasicExpressionInfo(Inst
   // whether all members are constant.
   std::transform(I->op_begin(), I->op_end(), op_inserter(E), [&](Value *O) {
     auto Operand = lookupOperandLeader(O);
-    AllConstant &= isa<Constant>(Operand);
+    AllConstant = AllConstant && isa<Constant>(Operand);
     return Operand;
   });
 
@@ -1053,7 +1148,7 @@ const MemoryAccess *NewGVN::lookupMemory
 // Return true if the MemoryAccess is really equivalent to everything. This is
 // equivalent to the lattice value "TOP" in most lattices.  This is the initial
 // state of all MemoryAccesses.
-bool NewGVN::isMemoryAccessTop(const MemoryAccess *MA) const {
+bool NewGVN::isMemoryAccessTOP(const MemoryAccess *MA) const {
   return getMemoryClass(MA) == TOPClass;
 }
 
@@ -1099,7 +1194,7 @@ const Expression *NewGVN::performSymboli
   // Unlike loads, we never try to eliminate stores, so we do not check if they
   // are simple and avoid value numbering them.
   auto *SI = cast<StoreInst>(I);
-  auto *StoreAccess = MSSA->getMemoryAccess(SI);
+  auto *StoreAccess = getMemoryAccess(SI);
   // Get the expression, if any, for the RHS of the MemoryDef.
   const MemoryAccess *StoreRHS = StoreAccess->getDefiningAccess();
   if (EnableStoreRefinement)
@@ -1136,7 +1231,7 @@ const Expression *NewGVN::performSymboli
             dyn_cast<LoadInst>(lookupOperandLeader(SI->getValueOperand()))) {
       if ((lookupOperandLeader(LI->getPointerOperand()) ==
            lookupOperandLeader(SI->getPointerOperand())) &&
-          (lookupMemoryLeader(MSSA->getMemoryAccess(LI)->getDefiningAccess()) ==
+          (lookupMemoryLeader(getMemoryAccess(LI)->getDefiningAccess()) ==
            StoreRHS))
         return createVariableExpression(LI);
     }
@@ -1240,8 +1335,9 @@ const Expression *NewGVN::performSymboli
   // Load of undef is undef.
   if (isa<UndefValue>(LoadAddressLeader))
     return createConstantExpression(UndefValue::get(LI->getType()));
-
-  MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(I);
+  MemoryAccess *OriginalAccess = getMemoryAccess(I);
+  MemoryAccess *DefiningAccess =
+      MSSAWalker->getClobberingMemoryAccess(OriginalAccess);
 
   if (!MSSA->isLiveOnEntryDef(DefiningAccess)) {
     if (auto *MD = dyn_cast<MemoryDef>(DefiningAccess)) {
@@ -1330,6 +1426,7 @@ NewGVN::performSymbolicPredicateInfoEval
     // operands are equal, because assumes must always be true.
     if (CmpInst::isTrueWhenEqual(Predicate)) {
       addPredicateUsers(PI, I);
+      addAdditionalUsers(Cmp->getOperand(0), I);
       return createVariableOrConstant(FirstOp);
     }
   }
@@ -1342,6 +1439,7 @@ NewGVN::performSymbolicPredicateInfoEval
     if ((PBranch->TrueEdge && Predicate == CmpInst::ICMP_EQ) ||
         (!PBranch->TrueEdge && Predicate == CmpInst::ICMP_NE)) {
       addPredicateUsers(PI, I);
+      addAdditionalUsers(Cmp->getOperand(0), I);
       return createVariableOrConstant(FirstOp);
     }
     // Handle the special case of floating point.
@@ -1349,6 +1447,7 @@ NewGVN::performSymbolicPredicateInfoEval
          (!PBranch->TrueEdge && Predicate == CmpInst::FCMP_UNE)) &&
         isa<ConstantFP>(FirstOp) && !cast<ConstantFP>(FirstOp)->isZero()) {
       addPredicateUsers(PI, I);
+      addAdditionalUsers(Cmp->getOperand(0), I);
       return createConstantExpression(cast<Constant>(FirstOp));
     }
   }
@@ -1429,34 +1528,33 @@ bool NewGVN::setMemoryClass(const Memory
   return Changed;
 }
 
-// Determine if a phi is cycle-free.  That means the values in the phi don't
-// depend on any expressions that can change value as a result of the phi.
-// For example, a non-cycle free phi would be  v = phi(0, v+1).
-bool NewGVN::isCycleFree(const PHINode *PN) const {
-  // In order to compute cycle-freeness, we do SCC finding on the phi, and see
-  // what kind of SCC it ends up in.  If it is a singleton, it is cycle-free.
-  // If it is not in a singleton, it is only cycle free if the other members are
-  // all phi nodes (as they do not compute anything, they are copies).  TODO:
-  // There are likely a few other intrinsics or expressions that could be
-  // included here, but this happens so infrequently already that it is not
-  // likely to be worth it.
-  auto PCS = PhiCycleState.lookup(PN);
-  if (PCS == PCS_Unknown) {
-    SCCFinder.Start(PN);
-    auto &SCC = SCCFinder.getComponentFor(PN);
+// Determine if a instruction is cycle-free.  That means the values in the
+// instruction don't depend on any expressions that can change value as a result
+// of the instruction.  For example, a non-cycle free instruction would be v =
+// phi(0, v+1).
+bool NewGVN::isCycleFree(const Instruction *I) const {
+  // In order to compute cycle-freeness, we do SCC finding on the instruction,
+  // and see what kind of SCC it ends up in.  If it is a singleton, it is
+  // cycle-free.  If it is not in a singleton, it is only cycle free if the
+  // other members are all phi nodes (as they do not compute anything, they are
+  // copies).
+  auto ICS = InstCycleState.lookup(I);
+  if (ICS == ICS_Unknown) {
+    SCCFinder.Start(I);
+    auto &SCC = SCCFinder.getComponentFor(I);
     // It's cycle free if it's size 1 or or the SCC is *only* phi nodes.
     if (SCC.size() == 1)
-      PhiCycleState.insert({PN, PCS_CycleFree});
+      InstCycleState.insert({I, ICS_CycleFree});
     else {
       bool AllPhis =
           llvm::all_of(SCC, [](const Value *V) { return isa<PHINode>(V); });
-      PCS = AllPhis ? PCS_CycleFree : PCS_Cycle;
+      ICS = AllPhis ? ICS_CycleFree : ICS_Cycle;
       for (auto *Member : SCC)
         if (auto *MemberPhi = dyn_cast<PHINode>(Member))
-          PhiCycleState.insert({MemberPhi, PCS});
+          InstCycleState.insert({MemberPhi, ICS});
     }
   }
-  if (PCS == PCS_Cycle)
+  if (ICS == ICS_Cycle)
     return false;
   return true;
 }
@@ -1518,7 +1616,7 @@ const Expression *NewGVN::performSymboli
       // constants, or all operands are ignored but the undef, it also must be
       // cycle free.
       if (!AllConstant && HasBackedge && NumOps > 0 &&
-          !isa<UndefValue>(AllSameValue) && !isCycleFree(cast<PHINode>(I)))
+          !isa<UndefValue>(AllSameValue) && !isCycleFree(I))
         return E;
 
       // Only have to check for instructions
@@ -1689,8 +1787,18 @@ const Expression *NewGVN::performSymboli
   return createExpression(I);
 }
 
+// Return true if V is a value that will always be available (IE can
+// be placed anywhere) in the function.  We don't do globals here
+// because they are often worse to put in place.
+// TODO: Separate cost from availability
+static bool alwaysAvailable(Value *V) {
+  return isa<Constant>(V) || isa<Argument>(V);
+}
+
 // Substitute and symbolize the value before value numbering.
-const Expression *NewGVN::performSymbolicEvaluation(Value *V) const {
+const Expression *
+NewGVN::performSymbolicEvaluation(Value *V,
+                                  SmallPtrSetImpl<Value *> &Visited) const {
   const Expression *E = nullptr;
   if (auto *C = dyn_cast<Constant>(V))
     E = createConstantExpression(C);
@@ -1768,12 +1876,22 @@ const Expression *NewGVN::performSymboli
   return E;
 }
 
+void NewGVN::addAdditionalUsers(Value *To, Value *User) const {
+  AdditionalUsers[To].insert(User);
+}
+
 void NewGVN::markUsersTouched(Value *V) {
   // Now mark the users as touched.
   for (auto *User : V->users()) {
     assert(isa<Instruction>(User) && "Use of value not within an instruction?");
     TouchedInstructions.set(InstrToDFSNum(User));
   }
+  const auto Result = AdditionalUsers.find(V);
+  if (Result != AdditionalUsers.end()) {
+    for (auto *User : Result->second)
+      TouchedInstructions.set(InstrToDFSNum(User));
+    AdditionalUsers.erase(Result);
+  }
 }
 
 void NewGVN::addMemoryUsers(const MemoryAccess *To, MemoryAccess *U) const {
@@ -1800,6 +1918,10 @@ void NewGVN::markMemoryUsersTouched(cons
 
 // Add I to the set of users of a given predicate.
 void NewGVN::addPredicateUsers(const PredicateBase *PB, Instruction *I) const {
+  // Don't add temporary instructions to the user lists.
+  if (AllTempInstructions.count(I))
+    return;
+
   if (auto *PBranch = dyn_cast<PredicateBranch>(PB))
     PredicateToUsers[PBranch->Condition].insert(I);
   else if (auto *PAssume = dyn_cast<PredicateBranch>(PB))
@@ -1855,11 +1977,11 @@ const MemoryAccess *NewGVN::getNextMemor
   assert(!CC->definesNoMemory() && "Can't get next leader if there is none");
   if (CC->getStoreCount() > 0) {
     if (auto *NL = dyn_cast_or_null<StoreInst>(CC->getNextLeader().first))
-      return MSSA->getMemoryAccess(NL);
+      return getMemoryAccess(NL);
     // Find the store with the minimum DFS number.
     auto *V = getMinDFSOfRange<Value>(make_filter_range(
         *CC, [&](const Value *V) { return isa<StoreInst>(V); }));
-    return MSSA->getMemoryAccess(cast<StoreInst>(V));
+    return getMemoryAccess(cast<StoreInst>(V));
   }
   assert(CC->getStoreCount() == 0);
 
@@ -1982,7 +2104,7 @@ void NewGVN::moveValueToNewCongruenceCla
   // instructions before.
 
   // If it's not a memory use, set the MemoryAccess equivalence
-  auto *InstMA = dyn_cast_or_null<MemoryDef>(MSSA->getMemoryAccess(I));
+  auto *InstMA = dyn_cast_or_null<MemoryDef>(getMemoryAccess(I));
   if (InstMA)
     moveMemoryToNewCongruenceClass(I, InstMA, OldClass, NewClass);
   ValueToClass[I] = NewClass;
@@ -2014,21 +2136,31 @@ void NewGVN::moveValueToNewCongruenceCla
   }
 }
 
+// For a given expression, mark the phi of ops instructions that could have
+// changed as a result.
+void NewGVN::markPhiOfOpsChanged(const HashedExpression &HE) {
+  auto PhiOfOpsSet = ExpressionToPhiOfOps.find_as(HE);
+  if (PhiOfOpsSet != ExpressionToPhiOfOps.end()) {
+    for (auto I : PhiOfOpsSet->second)
+      TouchedInstructions.set(InstrToDFSNum(I));
+    ExpressionToPhiOfOps.erase(PhiOfOpsSet);
+  }
+}
 // Perform congruence finding on a given value numbering expression.
 void NewGVN::performCongruenceFinding(Instruction *I, const Expression *E) {
   // This is guaranteed to return something, since it will at least find
   // TOP.
-
   CongruenceClass *IClass = ValueToClass[I];
   assert(IClass && "Should have found a IClass");
   // Dead classes should have been eliminated from the mapping.
   assert(!IClass->isDead() && "Found a dead class");
 
-  CongruenceClass *EClass;
+  CongruenceClass *EClass = nullptr;
+  HashedExpression HE(E);
   if (const auto *VE = dyn_cast<VariableExpression>(E)) {
-    EClass = ValueToClass[VE->getVariableValue()];
+    EClass = ValueToClass.lookup(VE->getVariableValue());
   } else {
-    auto lookupResult = ExpressionToClass.insert({E, nullptr});
+    auto lookupResult = ExpressionToClass.insert_as({E, nullptr}, HE);
 
     // If it's not in the value table, create a new congruence class.
     if (lookupResult.second) {
@@ -2077,10 +2209,13 @@ void NewGVN::performCongruenceFinding(In
   if (ClassChanged || LeaderChanged) {
     DEBUG(dbgs() << "New class " << EClass->getID() << " for expression " << *E
                  << "\n");
-    if (ClassChanged)
+    if (ClassChanged) {
       moveValueToNewCongruenceClass(I, E, IClass, EClass);
+      markPhiOfOpsChanged(HE);
+    }
+
     markUsersTouched(I);
-    if (MemoryAccess *MA = MSSA->getMemoryAccess(I))
+    if (MemoryAccess *MA = getMemoryAccess(I))
       markMemoryUsersTouched(MA);
     if (auto *CI = dyn_cast<CmpInst>(I))
       markPredicateUsersTouched(CI);
@@ -2118,7 +2253,7 @@ void NewGVN::updateReachableEdge(BasicBl
       // impact predicates. Otherwise, only mark the phi nodes as touched, as
       // they are the only thing that depend on new edges. Anything using their
       // values will get propagated to if necessary.
-      if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(To))
+      if (MemoryAccess *MemPhi = getMemoryAccess(To))
         TouchedInstructions.set(InstrToDFSNum(MemPhi));
 
       auto BI = To->begin();
@@ -2126,6 +2261,12 @@ void NewGVN::updateReachableEdge(BasicBl
         TouchedInstructions.set(InstrToDFSNum(&*BI));
         ++BI;
       }
+      const auto PHIResult = PHIOfOpsPHIs.find(To);
+      if (PHIResult != PHIOfOpsPHIs.end()) {
+        const auto &PHIs = PHIResult->second;
+        for (auto I : PHIs)
+          TouchedInstructions.set(InstrToDFSNum(I));
+      }
     }
   }
 }
@@ -2215,7 +2356,7 @@ void NewGVN::processOutgoingEdges(Termin
     // This also may be a memory defining terminator, in which case, set it
     // equivalent only to itself.
     //
-    auto *MA = MSSA->getMemoryAccess(TI);
+    auto *MA = getMemoryAccess(TI);
     if (MA && !isa<MemoryUse>(MA)) {
       auto *CC = ensureLeaderOfMemoryClass(MA);
       if (setMemoryClass(MA, CC))
@@ -2224,6 +2365,158 @@ void NewGVN::processOutgoingEdges(Termin
   }
 }
 
+void NewGVN::addPhiOfOps(PHINode *Op, BasicBlock *BB,
+                         Instruction *ExistingValue) {
+  InstrDFS[Op] = InstrToDFSNum(ExistingValue);
+  AllTempInstructions.insert(Op);
+  PHIOfOpsPHIs[BB].push_back(Op);
+  TempToBlock[Op] = BB;
+  if (ExistingValue)
+    RealToTemp[ExistingValue] = Op;
+}
+
+static bool okayForPHIOfOps(const Instruction *I) {
+  return isa<BinaryOperator>(I) || isa<SelectInst>(I) || isa<CmpInst>(I) ||
+         isa<LoadInst>(I);
+}
+
+// When we see an instruction that is an op of phis, generate the equivalent phi
+// of ops form.
+const Expression *
+NewGVN::makePossiblePhiOfOps(Instruction *I, bool HasBackedge,
+                             SmallPtrSetImpl<Value *> &Visited) {
+  if (!okayForPHIOfOps(I))
+    return nullptr;
+
+  if (!Visited.insert(I).second)
+    return nullptr;
+  // For now, we require the instruction be cycle free because we don't
+  // *always* create a phi of ops for instructions that could be done as phi
+  // of ops, we only do it if we think it is useful.  If we did do it all the
+  // time, we could remove the cycle free check.
+  if (!isCycleFree(I))
+    return nullptr;
+
+  unsigned IDFSNum = InstrToDFSNum(I);
+  // Pretty much all of the instructions we can convert to phi of ops over a
+  // backedge that are adds, are really induction variables, and those are
+  // pretty much pointless to convert.  This is very coarse-grained for a
+  // test, so if we do find some value, we can change it later.
+  // But otherwise, what can happen is we convert the induction variable from
+  //
+  // i = phi (0, tmp)
+  // tmp = i + 1
+  //
+  // to
+  // i = phi (0, tmpphi)
+  // tmpphi = phi(1, tmpphi+1)
+  //
+  // Which we don't want to happen.  We could just avoid this for all non-cycle
+  // free phis, and we made go that route.
+  if (HasBackedge && I->getOpcode() == Instruction::Add)
+    return nullptr;
+
+  SmallPtrSet<const Value *, 8> ProcessedPHIs;
+  // TODO: We don't do phi translation on memory accesses because it's
+  // complicated. For a load, we'd need to be able to simulate a new memoryuse,
+  // which we don't have a good way of doing ATM.
+  auto *MemAccess = getMemoryAccess(I);
+  // If the memory operation is defined by a memory operation this block that
+  // isn't a MemoryPhi, transforming the pointer backwards through a scalar phi
+  // can't help, as it would still be killed by that memory operation.
+  if (MemAccess && !isa<MemoryPhi>(MemAccess->getDefiningAccess()) &&
+      MemAccess->getDefiningAccess()->getBlock() == I->getParent())
+    return nullptr;
+
+  // Convert op of phis to phi of ops
+  for (auto &Op : I->operands()) {
+    if (!isa<PHINode>(Op))
+      continue;
+    auto *OpPHI = cast<PHINode>(Op);
+    // No point in doing this for one-operand phis.
+    if (OpPHI->getNumOperands() == 1)
+      continue;
+    if (!DebugCounter::shouldExecute(PHIOfOpsCounter))
+      return nullptr;
+    SmallVector<std::pair<Value *, BasicBlock *>, 4> Ops;
+    auto *PHIBlock = getBlockForValue(OpPHI);
+    for (auto PredBB : OpPHI->blocks()) {
+      Value *FoundVal = nullptr;
+      // We could just skip unreachable edges entirely but it's tricky to do
+      // with rewriting existing phi nodes.
+      if (ReachableEdges.count({PredBB, PHIBlock})) {
+        // Clone the instruction, create an expression from it, and see if we
+        // have a leader.
+        Instruction *ValueOp = I->clone();
+        auto Iter = TempToMemory.end();
+        if (MemAccess)
+          Iter = TempToMemory.insert({ValueOp, MemAccess}).first;
+
+        for (auto &Op : ValueOp->operands()) {
+          Op = Op->DoPHITranslation(PHIBlock, PredBB);
+          // When this operand changes, it could change whether there is a
+          // leader for us or not.
+          addAdditionalUsers(Op, I);
+        }
+        // Make sure it's marked as a temporary instruction.
+        AllTempInstructions.insert(ValueOp);
+        // and make sure anything that tries to add it's DFS number is
+        // redirected to the instruction we are making a phi of ops
+        // for.
+        InstrDFS.insert({ValueOp, IDFSNum});
+        const Expression *E = performSymbolicEvaluation(ValueOp, Visited);
+        InstrDFS.erase(ValueOp);
+        AllTempInstructions.erase(ValueOp);
+        ValueOp->deleteValue();
+        if (MemAccess)
+          TempToMemory.erase(Iter);
+        if (!E)
+          return nullptr;
+        FoundVal = findPhiOfOpsLeader(E, PredBB);
+        if (!FoundVal) {
+          ExpressionToPhiOfOps[E].insert(I);
+          return nullptr;
+        }
+        if (auto *SI = dyn_cast<StoreInst>(FoundVal))
+          FoundVal = SI->getValueOperand();
+      } else {
+        DEBUG(dbgs() << "Skipping phi of ops operand for incoming block "
+                     << getBlockName(PredBB)
+                     << " because the block is unreachable\n");
+        FoundVal = UndefValue::get(I->getType());
+      }
+
+      Ops.push_back({FoundVal, PredBB});
+      DEBUG(dbgs() << "Found phi of ops operand " << *FoundVal << " in "
+                   << getBlockName(PredBB) << "\n");
+    }
+    auto *ValuePHI = RealToTemp.lookup(I);
+    bool NewPHI = false;
+    if (!ValuePHI) {
+      ValuePHI = PHINode::Create(I->getType(), OpPHI->getNumOperands());
+      addPhiOfOps(ValuePHI, PHIBlock, I);
+      NewPHI = true;
+      NumGVNPHIOfOpsCreated++;
+    }
+    if (NewPHI) {
+      for (auto PHIOp : Ops)
+        ValuePHI->addIncoming(PHIOp.first, PHIOp.second);
+    } else {
+      unsigned int i = 0;
+      for (auto PHIOp : Ops) {
+        ValuePHI->setIncomingValue(i, PHIOp.first);
+        ValuePHI->setIncomingBlock(i, PHIOp.second);
+        ++i;
+      }
+    }
+
+    DEBUG(dbgs() << "Created phi of ops " << *ValuePHI << " for " << *I
+                 << "\n");
+    return performSymbolicEvaluation(ValuePHI, Visited);
+  }
+  return nullptr;
+}
+
 // The algorithm initially places the values of the routine in the TOP
 // congruence class. The leader of TOP is the undetermined value `undef`.
 // When the algorithm has finished, values still in TOP are unreachable.
@@ -2266,6 +2559,12 @@ void NewGVN::initializeCongruenceClasses
           TOPClass->incStoreCount();
       }
     for (auto &I : *BB) {
+      // TODO: Move to helper
+      if (isa<PHINode>(&I))
+        for (auto *U : I.users())
+          if (auto *UInst = dyn_cast<Instruction>(U))
+            if (InstrToDFSNum(UInst) != 0 && okayForPHIOfOps(UInst))
+              PHINodeUses.insert(UInst);
       // Don't insert void terminators into the class. We don't value number
       // them, and they just end up sitting in TOP.
       if (isa<TerminatorInst>(I) && I.getType()->isVoidTy())
@@ -2290,12 +2589,35 @@ void NewGVN::cleanupTables() {
     CongruenceClasses[i] = nullptr;
   }
 
+  // Destroy the value expressions
+  SmallVector<Instruction *, 8> TempInst(AllTempInstructions.begin(),
+                                         AllTempInstructions.end());
+  AllTempInstructions.clear();
+
+  // We have to drop all references for everything first, so there are no uses
+  // left as we delete them.
+  for (auto *I : TempInst) {
+    I->dropAllReferences();
+  }
+
+  while (!TempInst.empty()) {
+    auto *I = TempInst.back();
+    TempInst.pop_back();
+    I->deleteValue();
+  }
+
   ValueToClass.clear();
   ArgRecycler.clear(ExpressionAllocator);
   ExpressionAllocator.Reset();
   CongruenceClasses.clear();
   ExpressionToClass.clear();
   ValueToExpression.clear();
+  RealToTemp.clear();
+  AdditionalUsers.clear();
+  ExpressionToPhiOfOps.clear();
+  TempToBlock.clear();
+  TempToMemory.clear();
+  PHIOfOpsPHIs.clear();
   ReachableBlocks.clear();
   ReachableEdges.clear();
 #ifndef NDEBUG
@@ -2311,14 +2633,17 @@ void NewGVN::cleanupTables() {
   MemoryToUsers.clear();
 }
 
+// Assign local DFS number mapping to instructions, and leave space for Value
+// PHI's.
 std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B,
                                                        unsigned Start) {
   unsigned End = Start;
-  if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(B)) {
+  if (MemoryAccess *MemPhi = getMemoryAccess(B)) {
     InstrDFS[MemPhi] = End++;
     DFSToInstr.emplace_back(MemPhi);
   }
 
+  // Then the real block goes next.
   for (auto &I : *B) {
     // There's no need to call isInstructionTriviallyDead more than once on
     // an instruction. Therefore, once we know that an instruction is dead
@@ -2329,7 +2654,6 @@ std::pair<unsigned, unsigned> NewGVN::as
       markInstructionForDeletion(&I);
       continue;
     }
-
     InstrDFS[&I] = End++;
     DFSToInstr.emplace_back(&I);
   }
@@ -2340,7 +2664,7 @@ std::pair<unsigned, unsigned> NewGVN::as
   return std::make_pair(Start, End);
 }
 
-void NewGVN::updateProcessedCount(Value *V) {
+void NewGVN::updateProcessedCount(const Value *V) {
 #ifndef NDEBUG
   if (ProcessedCount.count(V) == 0) {
     ProcessedCount.insert({V, 1});
@@ -2359,7 +2683,7 @@ void NewGVN::valueNumberMemoryPhi(Memory
   const BasicBlock *PHIBlock = MP->getBlock();
   auto Filtered = make_filter_range(MP->operands(), [&](const Use &U) {
     return lookupMemoryLeader(cast<MemoryAccess>(U)) != MP &&
-           !isMemoryAccessTop(cast<MemoryAccess>(U)) &&
+           !isMemoryAccessTOP(cast<MemoryAccess>(U)) &&
            ReachableEdges.count({MP->getIncomingBlock(U), PHIBlock});
   });
   // If all that is left is nothing, our memoryphi is undef. We keep it as
@@ -2412,18 +2736,26 @@ void NewGVN::valueNumberInstruction(Inst
   DEBUG(dbgs() << "Processing instruction " << *I << "\n");
   if (!I->isTerminator()) {
     const Expression *Symbolized = nullptr;
+    SmallPtrSet<Value *, 2> Visited;
     if (DebugCounter::shouldExecute(VNCounter)) {
-      Symbolized = performSymbolicEvaluation(I);
+      Symbolized = performSymbolicEvaluation(I, Visited);
+      // Make a phi of ops if necessary
+      if (Symbolized && !isa<ConstantExpression>(Symbolized) &&
+          !isa<VariableExpression>(Symbolized) && PHINodeUses.count(I)) {
+        // FIXME: Backedge argument
+        auto *PHIE = makePossiblePhiOfOps(I, false, Visited);
+        if (PHIE)
+          Symbolized = PHIE;
+      }
+
     } else {
       // Mark the instruction as unused so we don't value number it again.
       InstrDFS[I] = 0;
     }
     // If we couldn't come up with a symbolic expression, use the unknown
     // expression
-    if (Symbolized == nullptr) {
+    if (Symbolized == nullptr)
       Symbolized = createUnknownExpression(I);
-    }
-
     performCongruenceFinding(I, Symbolized);
   } else {
     // Handle terminators that return values. All of them produce values we
@@ -2542,8 +2874,6 @@ void NewGVN::verifyMemoryCongruency() co
 
   auto Filtered = make_filter_range(MemoryAccessToClass, ReachableAccessPred);
   for (auto KV : Filtered) {
-    assert(KV.second != TOPClass &&
-           "Memory not unreachable but ended up in TOP");
     if (auto *FirstMUD = dyn_cast<MemoryUseOrDef>(KV.first)) {
       auto *SecondMUD = dyn_cast<MemoryUseOrDef>(KV.second->getMemoryLeader());
       if (FirstMUD && SecondMUD) {
@@ -2663,7 +2993,7 @@ void NewGVN::iterateTouchedInstructions(
   // Nothing set, nothing to iterate, just return.
   if (FirstInstr == -1)
     return;
-  BasicBlock *LastBlock = getBlockForValue(InstrFromDFSNum(FirstInstr));
+  const BasicBlock *LastBlock = getBlockForValue(InstrFromDFSNum(FirstInstr));
   while (TouchedInstructions.any()) {
     ++Iterations;
     // Walk through all the instructions in all the blocks in RPO.
@@ -2680,7 +3010,7 @@ void NewGVN::iterateTouchedInstructions(
       }
 
       Value *V = InstrFromDFSNum(InstrNum);
-      BasicBlock *CurrBlock = getBlockForValue(V);
+      const BasicBlock *CurrBlock = getBlockForValue(V);
 
       // If we hit a new block, do reachability processing.
       if (CurrBlock != LastBlock) {
@@ -2761,6 +3091,7 @@ bool NewGVN::runGVN() {
     BlockInstRange.insert({B, BlockRange});
     ICount += BlockRange.second - BlockRange.first;
   }
+  initializeCongruenceClasses(F);
 
   TouchedInstructions.resize(ICount);
   // Ensure we don't end up resizing the expressionToClass map, as
@@ -2771,9 +3102,10 @@ bool NewGVN::runGVN() {
   // Initialize the touched instructions to include the entry block.
   const auto &InstRange = BlockInstRange.lookup(&F.getEntryBlock());
   TouchedInstructions.set(InstRange.first, InstRange.second);
+  DEBUG(dbgs() << "Block " << getBlockName(&F.getEntryBlock())
+               << " marked reachable\n");
   ReachableBlocks.insert(&F.getEntryBlock());
 
-  initializeCongruenceClasses(F);
   iterateTouchedInstructions();
   verifyMemoryCongruency();
   verifyIterationSettled(F);
@@ -2786,7 +3118,8 @@ bool NewGVN::runGVN() {
     if (!ToErase->use_empty())
       ToErase->replaceAllUsesWith(UndefValue::get(ToErase->getType()));
 
-    ToErase->eraseFromParent();
+    if (ToErase->getParent())
+      ToErase->eraseFromParent();
   }
 
   // Delete all unreachable blocks.
@@ -2805,14 +3138,6 @@ bool NewGVN::runGVN() {
   return Changed;
 }
 
-// Return true if V is a value that will always be available (IE can
-// be placed anywhere) in the function.  We don't do globals here
-// because they are often worse to put in place.
-// TODO: Separate cost from availability
-static bool alwaysAvailable(Value *V) {
-  return isa<Constant>(V) || isa<Argument>(V);
-}
-
 struct NewGVN::ValueDFS {
   int DFSIn = 0;
   int DFSOut = 0;
@@ -2902,9 +3227,21 @@ void NewGVN::convertClassToDFSOrdered(
     }
     assert(isa<Instruction>(D) &&
            "The dense set member should always be an instruction");
-    VDDef.LocalNum = InstrToDFSNum(D);
-    DFSOrderedSet.emplace_back(VDDef);
     Instruction *Def = cast<Instruction>(D);
+    VDDef.LocalNum = InstrToDFSNum(D);
+    DFSOrderedSet.push_back(VDDef);
+    // If there is a phi node equivalent, add it
+    if (auto *PN = RealToTemp.lookup(Def)) {
+      auto *PHIE =
+          dyn_cast_or_null<PHIExpression>(ValueToExpression.lookup(Def));
+      if (PHIE) {
+        VDDef.Def.setInt(false);
+        VDDef.Def.setPointer(PN);
+        VDDef.LocalNum = 0;
+        DFSOrderedSet.push_back(VDDef);
+      }
+    }
+
     unsigned int UseCount = 0;
     // Now add the uses.
     for (auto &U : Def->uses()) {
@@ -2921,7 +3258,7 @@ void NewGVN::convertClassToDFSOrdered(
           // they are from.
           VDUse.LocalNum = InstrDFS.size() + 1;
         } else {
-          IBlock = I->getParent();
+          IBlock = getBlockForValue(I);
           VDUse.LocalNum = InstrToDFSNum(I);
         }
 
@@ -3091,6 +3428,37 @@ private:
 };
 }
 
+// Given a value and a basic block we are trying to see if it is available in,
+// see if the value has a leader available in that block.
+Value *NewGVN::findPhiOfOpsLeader(const Expression *E,
+                                  const BasicBlock *BB) const {
+  // It would already be constant if we could make it constant
+  if (auto *CE = dyn_cast<ConstantExpression>(E))
+    return CE->getConstantValue();
+  if (auto *VE = dyn_cast<VariableExpression>(E))
+    return VE->getVariableValue();
+
+  auto *CC = ExpressionToClass.lookup(E);
+  if (!CC)
+    return nullptr;
+  if (alwaysAvailable(CC->getLeader()))
+    return CC->getLeader();
+
+  for (auto Member : *CC) {
+    auto *MemberInst = dyn_cast<Instruction>(Member);
+    // Anything that isn't an instruction is always available.
+    if (!MemberInst)
+      return Member;
+    // If we are looking for something in the same block as the member, it must
+    // be a leader because this function is looking for operands for a phi node.
+    if (MemberInst->getParent() == BB ||
+        DT->dominates(MemberInst->getParent(), BB)) {
+      return Member;
+    }
+  }
+  return nullptr;
+}
+
 bool NewGVN::eliminateInstructions(Function &F) {
   // This is a non-standard eliminator. The normal way to eliminate is
   // to walk the dominator tree in order, keeping track of available
@@ -3121,25 +3489,46 @@ bool NewGVN::eliminateInstructions(Funct
   // DFS numbers are updated, we compute some ourselves.
   DT->updateDFSNumbers();
 
-  for (auto &B : F) {
-    if (!ReachableBlocks.count(&B)) {
-      for (const auto S : successors(&B)) {
-        for (auto II = S->begin(); isa<PHINode>(II); ++II) {
-          auto &Phi = cast<PHINode>(*II);
-          DEBUG(dbgs() << "Replacing incoming value of " << *II << " for block "
-                       << getBlockName(&B)
-                       << " with undef due to it being unreachable\n");
-          for (auto &Operand : Phi.incoming_values())
-            if (Phi.getIncomingBlock(Operand) == &B)
-              Operand.set(UndefValue::get(Phi.getType()));
+  // Go through all of our phi nodes, and kill the arguments associated with unreachable edges.
+  auto ReplaceUnreachablePHIArgs = [&](PHINode &PHI, BasicBlock *BB) {
+    for (auto &Operand : PHI.incoming_values())
+      if (!ReachableEdges.count({PHI.getIncomingBlock(Operand), BB})) {
+        DEBUG(dbgs() << "Replacing incoming value of " << PHI << " for block "
+                     << getBlockName(PHI.getIncomingBlock(Operand))
+                     << " with undef due to it being unreachable\n");
+        Operand.set(UndefValue::get(PHI.getType()));
+      }
+  };
+  SmallPtrSet<BasicBlock *, 8> BlocksWithPhis;
+  for (auto &B : F)
+    if ((!B.empty() && isa<PHINode>(*B.begin())) ||
+        (PHIOfOpsPHIs.find(&B) != PHIOfOpsPHIs.end()))
+      BlocksWithPhis.insert(&B);
+  DenseMap<const BasicBlock *, unsigned> ReachablePredCount;
+  for (auto KV : ReachableEdges)
+    ReachablePredCount[KV.getEnd()]++;
+  for (auto *BB : BlocksWithPhis)
+    // TODO: It would be faster to use getNumIncomingBlocks() on a phi node in
+    // the block and subtract the pred count, but it's more complicated.
+    if (ReachablePredCount.lookup(BB) !=
+        std::distance(pred_begin(BB), pred_end(BB))) {
+      for (auto II = BB->begin(); isa<PHINode>(II); ++II) {
+        auto &PHI = cast<PHINode>(*II);
+        ReplaceUnreachablePHIArgs(PHI, BB);
+      }
+      auto PHIResult = PHIOfOpsPHIs.find(BB);
+      if (PHIResult != PHIOfOpsPHIs.end()) {
+        auto &PHIs = PHIResult->second;
+        for (auto I : PHIs) {
+          auto *PHI = dyn_cast<PHINode>(I);
+          ReplaceUnreachablePHIArgs(*PHI, BB);
         }
       }
     }
-  }
 
   // Map to store the use counts
   DenseMap<const Value *, unsigned int> UseCounts;
-  for (CongruenceClass *CC : reverse(CongruenceClasses)) {
+  for (auto *CC : reverse(CongruenceClasses)) {
     // Track the equivalent store info so we can decide whether to try
     // dead store elimination.
     SmallVector<ValueDFS, 8> PossibleDeadStores;
@@ -3187,7 +3576,7 @@ bool NewGVN::eliminateInstructions(Funct
       DEBUG(dbgs() << "Eliminating in congruence class " << CC->getID()
                    << "\n");
       // If this is a singleton, we can skip it.
-      if (CC->size() != 1) {
+      if (CC->size() != 1 || RealToTemp.lookup(Leader)) {
         // This is a stack because equality replacement/etc may place
         // constants in the middle of the member list, and we want to use
         // those constant values in preference to the current leader, over
@@ -3209,6 +3598,22 @@ bool NewGVN::eliminateInstructions(Funct
           // We ignore void things because we can't get a value from them.
           if (Def && Def->getType()->isVoidTy())
             continue;
+          auto *DefInst = dyn_cast_or_null<Instruction>(Def);
+          if (DefInst && AllTempInstructions.count(DefInst)) {
+            auto *PN = cast<PHINode>(DefInst);
+
+            // If this is a value phi and that's the expression we used, insert
+            // it into the program
+            // remove from temp instruction list.
+            AllTempInstructions.erase(PN);
+            auto *DefBlock = getBlockForValue(Def);
+            DEBUG(dbgs() << "Inserting fully real phi of ops" << *Def
+                         << " into block "
+                         << getBlockName(getBlockForValue(Def)) << "\n");
+            PN->insertBefore(&DefBlock->front());
+            Def = PN;
+            NumGVNPHIOfOpsEliminations++;
+          }
 
           if (EliminationStack.empty()) {
             DEBUG(dbgs() << "Elimination Stack is empty\n");
@@ -3373,7 +3778,6 @@ bool NewGVN::eliminateInstructions(Funct
       }
     }
   }
-
   return AnythingReplaced;
 }
 
@@ -3383,19 +3787,23 @@ bool NewGVN::eliminateInstructions(Funct
 // we will simplify an operation with all constants so that it doesn't matter
 // what order they appear in.
 unsigned int NewGVN::getRank(const Value *V) const {
-  // Prefer undef to anything else
+  // Prefer constants to undef to anything else
+  // Undef is a constant, have to check it first.
+  // Prefer smaller constants to constantexprs
+  if (isa<ConstantExpr>(V))
+    return 2;
   if (isa<UndefValue>(V))
-    return 0;
-  if (isa<Constant>(V))
     return 1;
+  if (isa<Constant>(V))
+    return 0;
   else if (auto *A = dyn_cast<Argument>(V))
-    return 2 + A->getArgNo();
+    return 3 + A->getArgNo();
 
   // Need to shift the instruction DFS by number of arguments + 3 to account for
   // the constant and argument ranking above.
   unsigned Result = InstrToDFSNum(V);
   if (Result > 0)
-    return 3 + NumFuncArgs + Result;
+    return 4 + NumFuncArgs + Result;
   // Unreachable or something else, just return a really large number.
   return ~0;
 }

Added: llvm/trunk/test/Transforms/NewGVN/completeness.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/completeness.ll?rev=303444&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/completeness.ll (added)
+++ llvm/trunk/test/Transforms/NewGVN/completeness.ll Fri May 19 14:01:27 2017
@@ -0,0 +1,415 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define i32 @test1(i32, i8**) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP0:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP5:%.*]]
+; CHECK:         br label [[TMP6:%.*]]
+; CHECK:         br label [[TMP6]]
+; CHECK:         [[TMP7:%.*]] = phi i32 [ 75, [[TMP4]] ], [ 105, [[TMP5]] ]
+; CHECK-NEXT:    [[DOT0:%.*]] = phi i32 [ 5, [[TMP4]] ], [ 7, [[TMP5]] ]
+; CHECK-NEXT:    ret i32 [[TMP7]]
+;
+  %3 = icmp ne i32 %0, 0
+  br i1 %3, label %4, label %5
+
+; <label>:4:                                      ; preds = %2
+  br label %6
+
+; <label>:5:                                      ; preds = %2
+  br label %6
+
+; <label>:6:                                      ; preds = %5, %4
+  %.0 = phi i32 [ 5, %4 ], [ 7, %5 ]
+  %7 = mul nsw i32 %.0, 15
+  ret i32 %7
+}
+
+define i32 @test2(i32) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP0:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
+; CHECK:         br label [[TMP5:%.*]]
+; CHECK:         br label [[TMP5]]
+; CHECK:         [[DOT01:%.*]] = phi i32 [ 3, [[TMP3]] ], [ 2, [[TMP4]] ]
+; CHECK-NEXT:    [[DOT0:%.*]] = phi i32 [ 2, [[TMP3]] ], [ 3, [[TMP4]] ]
+; CHECK-NEXT:    ret i32 5
+;
+  %2 = icmp ne i32 %0, 0
+  br i1 %2, label %3, label %4
+
+; <label>:3:                                      ; preds = %1
+  br label %5
+
+; <label>:4:                                      ; preds = %1
+  br label %5
+
+; <label>:5:                                      ; preds = %4, %3
+  %.01 = phi i32 [ 3, %3 ], [ 2, %4 ]
+  %.0 = phi i32 [ 2, %3 ], [ 3, %4 ]
+  %6 = add nsw i32 %.01, %.0
+  ret i32 %6
+}
+define i32 @test3(i1 %which) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ -877, [[ENTRY:%.*]] ], [ 113, [[DELAY]] ]
+; CHECK-NEXT:    [[A:%.*]] = phi i32 [ 1000, [[ENTRY]] ], [ 10, [[DELAY]] ]
+; CHECK-NEXT:    ret i32 [[TMP0]]
+;
+
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi i32 [ 1000, %entry ], [ 10, %delay ]
+  %value = sub i32 123, %A
+  ret i32 %value
+}
+
+define <2 x i32> @test3vec(i1 %which) {
+; CHECK-LABEL: @test3vec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i32> [ <i32 -877, i32 -877>, [[ENTRY:%.*]] ], [ <i32 113, i32 113>, [[DELAY]] ]
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 1000, i32 1000>, [[ENTRY]] ], [ <i32 10, i32 10>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[TMP0]]
+;
+
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 1000>, %entry ], [ <i32 10, i32 10>, %delay ]
+  %value = sub <2 x i32> <i32 123, i32 123>, %A
+  ret <2 x i32> %value
+}
+
+define <2 x i32> @test3vec2(i1 %which) {
+; CHECK-LABEL: @test3vec2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i32> [ <i32 -877, i32 -2167>, [[ENTRY:%.*]] ], [ <i32 113, i32 303>, [[DELAY]] ]
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 1000, i32 2500>, [[ENTRY]] ], [ <i32 10, i32 30>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[TMP0]]
+;
+
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 2500>, %entry ], [ <i32 10, i32 30>, %delay ]
+  %value = sub <2 x i32> <i32 123, i32 333>, %A
+  ret <2 x i32> %value
+}
+
+;; This example is a bit contrived because we can't create fake memoryuses, so we use two loads in the if blocks
+define i32 @test4(i32, i8**, i32* noalias, i32* noalias) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    store i32 5, i32* [[TMP2:%.*]], align 4
+; CHECK-NEXT:    store i32 7, i32* [[TMP3:%.*]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP0:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]]
+; CHECK:         br label [[TMP8:%.*]]
+; CHECK:         br label [[TMP8]]
+; CHECK:         [[DOT01:%.*]] = phi i32 [ 5, [[TMP6]] ], [ 7, [[TMP7]] ]
+; CHECK-NEXT:    [[DOT0:%.*]] = phi i32* [ [[TMP2]], [[TMP6]] ], [ [[TMP3]], [[TMP7]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOT0]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = mul nsw i32 [[TMP9]], 15
+; CHECK-NEXT:    [[TMP11:%.*]] = mul nsw i32 [[TMP10]], [[DOT01]]
+; CHECK-NEXT:    ret i32 [[TMP11]]
+;
+  store i32 5, i32* %2, align 4
+  store i32 7, i32* %3, align 4
+  %5 = icmp ne i32 %0, 0
+  br i1 %5, label %6, label %8
+
+; <label>:6:                                      ; preds = %4
+  %7 = load i32, i32* %2, align 4
+  br label %10
+
+; <label>:8:                                      ; preds = %4
+  %9 = load i32, i32* %3, align 4
+  br label %10
+
+; <label>:10:                                     ; preds = %8, %6
+  %.01 = phi i32 [ %7, %6 ], [ %9, %8 ]
+  %.0 = phi i32* [ %2, %6 ], [ %3, %8 ]
+  %11 = load i32, i32* %.0, align 4
+  %12 = mul nsw i32 %11, 15
+  %13 = mul nsw i32 %12, %.01
+  ret i32 %13
+}
+
+ at global = common global [100 x i64] zeroinitializer, align 16
+ at global.1 = common global [100 x i64] zeroinitializer, align 16
+define i64 @test5(i64 %arg) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[TMP:%.*]] = alloca i64, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[ARG:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP1]], label [[BB28:%.*]], label [[BB2:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br label [[BB7:%.*]]
+; CHECK:       bb4:
+; CHECK-NEXT:    br label [[BB5:%.*]]
+; CHECK:       bb5:
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP9:%.*]], 0
+; CHECK-NEXT:    br i1 [[TMP6]], label [[BB27:%.*]], label [[BB7]]
+; CHECK:       bb7:
+; CHECK-NEXT:    [[TMP8:%.*]] = phi i64 [ [[ARG]], [[BB2]] ], [ [[TMP9]], [[BB5]] ]
+; CHECK-NEXT:    [[TMP9]] = add nsw i64 [[TMP8]], -1
+; CHECK-NEXT:    [[TMP10:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @global, i64 0, i64 0), align 16
+; CHECK-NEXT:    [[TMP11:%.*]] = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @global.1, i64 0, i64 0), align 16
+; CHECK-NEXT:    [[TMP12:%.*]] = mul nsw i64 [[TMP11]], [[TMP10]]
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[TMP12]], 0
+; CHECK-NEXT:    br i1 [[TMP13]], label [[BB5]], label [[BB14:%.*]]
+; CHECK:       bb14:
+; CHECK-NEXT:    br label [[BB15:%.*]]
+; CHECK:       bb15:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i64 [ [[TMP25:%.*]], [[BB15]] ], [ [[TMP12]], [[BB14]] ]
+; CHECK-NEXT:    [[TMP16:%.*]] = phi i64 [ [[TMP24:%.*]], [[BB15]] ], [ [[TMP11]], [[BB14]] ]
+; CHECK-NEXT:    [[TMP17:%.*]] = phi i64 [ [[TMP22:%.*]], [[BB15]] ], [ [[TMP10]], [[BB14]] ]
+; CHECK-NEXT:    [[TMP18:%.*]] = phi i64 [ [[TMP20:%.*]], [[BB15]] ], [ 0, [[BB14]] ]
+; CHECK-NEXT:    store i64 [[TMP0]], i64* [[TMP]], align 8
+; CHECK-NEXT:    [[TMP20]] = add nuw nsw i64 [[TMP18]], 1
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [100 x i64], [100 x i64]* @global, i64 0, i64 [[TMP20]]
+; CHECK-NEXT:    [[TMP22]] = load i64, i64* [[TMP21]], align 8
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [100 x i64], [100 x i64]* @global.1, i64 0, i64 [[TMP20]]
+; CHECK-NEXT:    [[TMP24]] = load i64, i64* [[TMP23]], align 8
+; CHECK-NEXT:    [[TMP25]] = mul nsw i64 [[TMP24]], [[TMP22]]
+; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[TMP20]], [[TMP25]]
+; CHECK-NEXT:    br i1 [[TMP26]], label [[BB4:%.*]], label [[BB15]]
+; CHECK:       bb27:
+; CHECK-NEXT:    br label [[BB28]]
+; CHECK:       bb28:
+; CHECK-NEXT:    ret i64 0
+;
+bb:
+  %tmp = alloca i64, align 8
+  %tmp1 = icmp eq i64 %arg, 0
+  br i1 %tmp1, label %bb28, label %bb2
+
+bb2:                                              ; preds = %bb
+  %tmp3 = bitcast i64* %tmp to i8*
+  br label %bb7
+
+bb4:                                              ; preds = %bb15
+  br label %bb5
+
+bb5:                                              ; preds = %bb7, %bb4
+  %tmp6 = icmp eq i64 %tmp9, 0
+  br i1 %tmp6, label %bb27, label %bb7
+
+bb7:                                              ; preds = %bb5, %bb2
+  %tmp8 = phi i64 [ %arg, %bb2 ], [ %tmp9, %bb5 ]
+  %tmp9 = add nsw i64 %tmp8, -1
+  %tmp10 = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @global, i64 0, i64 0), align 16
+  %tmp11 = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]* @global.1, i64 0, i64 0), align 16
+  %tmp12 = mul nsw i64 %tmp11, %tmp10
+  %tmp13 = icmp eq i64 %tmp12, 0
+  br i1 %tmp13, label %bb5, label %bb14
+
+bb14:                                             ; preds = %bb7
+  br label %bb15
+
+bb15:                                             ; preds = %bb15, %bb14
+  %tmp16 = phi i64 [ %tmp24, %bb15 ], [ %tmp11, %bb14 ]
+  %tmp17 = phi i64 [ %tmp22, %bb15 ], [ %tmp10, %bb14 ]
+  %tmp18 = phi i64 [ %tmp20, %bb15 ], [ 0, %bb14 ]
+;; This multiply is an op of phis which is really equivalent to phi(tmp25, tmp12)
+  %tmp19 = mul nsw i64 %tmp16, %tmp17
+  store i64 %tmp19, i64* %tmp, align 8
+  %tmp20 = add nuw nsw i64 %tmp18, 1
+  %tmp21 = getelementptr inbounds [100 x i64], [100 x i64]* @global, i64 0, i64 %tmp20
+  %tmp22 = load i64, i64* %tmp21, align 8
+  %tmp23 = getelementptr inbounds [100 x i64], [100 x i64]* @global.1, i64 0, i64 %tmp20
+  %tmp24 = load i64, i64* %tmp23, align 8
+  %tmp25 = mul nsw i64 %tmp24, %tmp22
+  %tmp26 = icmp eq i64 %tmp20, %tmp25
+  br i1 %tmp26, label %bb4, label %bb15
+
+bb27:                                             ; preds = %bb5
+  br label %bb28
+
+bb28:                                             ; preds = %bb27, %bb
+  ret i64 0
+}
+
+;; These icmps are all equivalent to phis of constants
+define i8 @test6(i8* %addr) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:  entry-block:
+; CHECK-NEXT:    br label %main-loop
+; CHECK:       main-loop:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i1 [ true, %entry-block ], [ false, [[CORE:%.*]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i1 [ false, %entry-block ], [ true, [[CORE]] ]
+; CHECK-NEXT:    [[PHI:%.*]] = phi i8 [ 0, %entry-block ], [ 1, [[CORE]] ]
+; CHECK-NEXT:    store volatile i8 0, i8* [[ADDR:%.*]]
+; CHECK-NEXT:    br i1 [[TMP0]], label %busy-wait-phi-0, label [[EXIT:%.*]]
+; CHECK:       busy-wait-phi-0:
+; CHECK-NEXT:    [[LOAD:%.*]] = load volatile i8, i8* [[ADDR]]
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp eq i8 [[LOAD]], 0
+; CHECK-NEXT:    br i1 [[ICMP]], label %busy-wait-phi-0, label [[CORE]]
+; CHECK:       core:
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TRAP:%.*]], label %main-loop
+; CHECK:       trap:
+; CHECK-NEXT:    ret i8 1
+; CHECK:       exit:
+; CHECK-NEXT:    ret i8 0
+;
+entry-block:
+  br label %main-loop
+
+main-loop:
+  %phi = phi i8 [ 0, %entry-block ], [ 1, %core ]
+  %switch_0 = icmp eq i8 %phi, 0
+  store volatile i8 0, i8* %addr
+  br i1 %switch_0, label %busy-wait-phi-0, label %exit
+
+busy-wait-phi-0:
+  %load = load volatile i8, i8* %addr
+  %icmp = icmp eq i8 %load, 0
+  br i1 %icmp, label %busy-wait-phi-0, label %core
+
+core:
+  %switch_1 = icmp eq i8 %phi, 1
+  br i1 %switch_1, label %trap, label %main-loop
+
+trap:
+  ret i8 1
+
+exit:
+  ret i8 0
+}
+
+; Test that we don't infinite loop simplifying
+; an undefined value that can go both ways.
+define void @test7() {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB1]]
+;
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  %tmp = phi i32 [ undef, %bb ], [ %tmp3, %bb1 ]
+  %tmp2 = icmp eq i32 %tmp, 0
+  %tmp3 = select i1 %tmp2, i32 1, i32 %tmp
+  br label %bb1
+}
+
+
+
+; Test that we get a consistent answer about what the
+; value of this undefined select is.
+define void @test8() {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB1]]
+;
+bb:
+  %tmp = select i1 undef, i8 0, i8 1
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  %tmp2 = phi i8 [ %tmp4, %bb1 ], [ %tmp, %bb ]
+  %tmp3 = icmp eq i8 %tmp2, 0
+  %tmp4 = select i1 %tmp3, i8 1, i8 %tmp2
+  br label %bb1
+}
+
+
+;; Make sure we handle the case where we later come up with an expression that we need
+;; for a phi of ops.
+define void @test9() {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br label [[BB1:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br i1 undef, label [[BB1]], label [[BB2:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br label [[BB6:%.*]]
+; CHECK:       bb6:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ -13, [[BB2]] ], [ [[TMP11:%.*]], [[BB6]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = phi i32 [ 1, [[BB2]] ], [ [[TMP8:%.*]], [[BB6]] ]
+; CHECK-NEXT:    [[TMP8]] = add nuw nsw i32 [[TMP7]], 1
+; CHECK-NEXT:    [[TMP11]] = add i32 -14, [[TMP8]]
+; CHECK-NEXT:    br label [[BB6]]
+;
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  br i1 undef, label %bb1, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %tmp = select i1 true, i32 -14, i32 -10
+  %tmp3 = add i32 %tmp, 0
+  %tmp4 = select i1 true, i32 -14, i32 -10
+  %tmp5 = add i32 %tmp4, 0
+  br label %bb6
+
+bb6:                                              ; preds = %bb6, %bb2
+  %tmp7 = phi i32 [ 1, %bb2 ], [ %tmp13, %bb6 ]
+  %tmp8 = add nuw nsw i32 %tmp7, 1
+  %tmp9 = add i32 %tmp3, %tmp7
+  %tmp10 = select i1 false, i32 undef, i32 %tmp9
+  %tmp11 = add i32 %tmp5, %tmp8
+  %tmp12 = select i1 undef, i32 undef, i32 %tmp11
+  %tmp13 = add nuw nsw i32 %tmp7, 1
+  br label %bb6
+}
+
+;; Ensure that we revisit predicateinfo operands at the right points in time.
+define void @test10() {
+b:
+  %m = getelementptr i32, i32* null, i64 8
+  br label %g
+
+g:                                                ; preds = %i, %b
+  %n = phi i32* [ %h, %i ], [ null, %b ]
+  %h = getelementptr i32, i32* %n, i64 1
+  %j = icmp eq i32* %h, %m
+  br i1 %j, label %c, label %i
+
+i:                                                ; preds = %g
+  br i1 undef, label %k, label %g
+
+k:                                                ; preds = %i
+  %l = icmp eq i32* %n, %m
+  br i1 %l, label %c, label %o
+
+o:                                                ; preds = %k
+  br label %c
+
+c:                                                ; preds = %o, %k, %g
+  %0 = phi i32* [ undef, %o ], [ %m, %k ], [ %m, %g ]
+  ret void
+}

Modified: llvm/trunk/test/Transforms/NewGVN/storeoverstore.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/storeoverstore.ll?rev=303444&r1=303443&r2=303444&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/storeoverstore.ll (original)
+++ llvm/trunk/test/Transforms/NewGVN/storeoverstore.ll Fri May 19 14:01:27 2017
@@ -13,11 +13,11 @@ define i32 @foo(i32*, i32)  {
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP1:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP5:%.*]]
 ; CHECK:         br label [[TMP5]]
-; CHECK:         [[DOT0:%.*]] = phi i32 [ 10, [[TMP4]] ], [ 5, [[TMP2:%.*]] ]
-; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP6:%.*]], label [[TMP8:%.*]]
-; CHECK:         [[TMP7:%.*]] = add nsw i32 [[DOT0]], 5
-; CHECK-NEXT:    br label [[TMP8]]
-; CHECK:         [[DOT1:%.*]] = phi i32 [ [[TMP7]], [[TMP6]] ], [ [[DOT0]], [[TMP5]] ]
+; CHECK:         [[TMP6:%.*]] = phi i32 [ 15, [[TMP4]] ], [ 10, [[TMP2:%.*]] ]
+; CHECK-NEXT:    [[DOT0:%.*]] = phi i32 [ 10, [[TMP4]] ], [ 5, [[TMP2]] ]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP7:%.*]], label [[TMP8:%.*]]
+; CHECK:         br label [[TMP8]]
+; CHECK:         [[DOT1:%.*]] = phi i32 [ [[TMP6]], [[TMP7]] ], [ [[DOT0]], [[TMP5]] ]
 ; CHECK-NEXT:    ret i32 [[DOT1]]
 ;
   store i32 5, i32* %0, align 4
@@ -54,11 +54,11 @@ define i32 @foo2(i32*, i32)  {
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP5:%.*]]
 ; CHECK:         br label [[TMP6:%.*]]
 ; CHECK:         br label [[TMP6]]
-; CHECK:         [[DOT0:%.*]] = phi i32 [ 10, [[TMP4]] ], [ 5, [[TMP5]] ]
-; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP7:%.*]], label [[TMP9:%.*]]
-; CHECK:         [[TMP8:%.*]] = add nsw i32 [[DOT0]], 5
-; CHECK-NEXT:    br label [[TMP9]]
-; CHECK:         [[DOT1:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ [[DOT0]], [[TMP6]] ]
+; CHECK:         [[TMP7:%.*]] = phi i32 [ 15, [[TMP4]] ], [ 10, [[TMP5]] ]
+; CHECK-NEXT:    [[DOT0:%.*]] = phi i32 [ 10, [[TMP4]] ], [ 5, [[TMP5]] ]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]]
+; CHECK:         br label [[TMP9]]
+; CHECK:         [[DOT1:%.*]] = phi i32 [ [[TMP7]], [[TMP8]] ], [ [[DOT0]], [[TMP6]] ]
 ; CHECK-NEXT:    ret i32 [[DOT1]]
 ;
   store i32 5, i32* %0, align 4