[llvm] [NewGVN][3/3] Load coercion for loads that can be replaced by a phi (PR #68669)

Konstantina Mitropoulou via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 26 23:34:55 PDT 2023


https://github.com/kmitropoulou updated https://github.com/llvm/llvm-project/pull/68669

>From 93161908f23bf2eb9a7b442c6f8bc217797ec577 Mon Sep 17 00:00:00 2001
From: Konstantina Mitropoulou <Konstantina.Mitropoulou at amd.com>
Date: Mon, 9 Oct 2023 21:05:14 -0700
Subject: [PATCH 1/3] [NewGVN][1/3] Load coercion between load and store

Load coercion consists of two phases:
1. Collection of the load instructions that can be optiimized with load coercion.
We collect pairs of candidate load and its depending instructions. The candidate
load is the laod that will be eliminated by the value that we will extract from
the depending instruction. The reason that we can eliminate the candidate load is
because its memory location overlaps with the memory location of the depending
instruction.
For example, in the following snippet, the candidate load is %V2 and the
depending instruction is the store.

```
Beofre load coercion               After load coercion
BB1:                               BB1:
 store i32 100, ptr %P              store i32 100, ptr %P
 %V1 = ...                   =>     %V1 = ...
 %V2 = load i32, ptr %P             %V3 = add i32 %V1, 100
 %V3 = add i32 %V1, %V2
```

2. Code generation for load coercion: This phase updatest the IR by eliminating
the candidate load and by updating its uses.

This patch implements load coercion between a load candidate and a store
depending instruction. The follow-up patches implement load coercion support for
instructions that have live on entry definitions and MemoryPhi definitions.
---
 llvm/lib/Transforms/Scalar/NewGVN.cpp         | 410 ++++++++++++++++--
 .../load_coercion_between_store_and_load.ll   | 341 +++++++++++++++
 .../NewGVN/{pr14166-xfail.ll => pr14166.ll}   |   1 -
 3 files changed, 712 insertions(+), 40 deletions(-)
 create mode 100644 llvm/test/Transforms/NewGVN/load_coercion_between_store_and_load.ll
 rename llvm/test/Transforms/NewGVN/{pr14166-xfail.ll => pr14166.ll} (98%)

diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 19ac9526b5f88b6..c62777f8cb0db07 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -76,6 +76,7 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Argument.h"
@@ -154,6 +155,10 @@ static cl::opt<bool> EnableStoreRefinement("enable-store-refinement",
 static cl::opt<bool> EnablePhiOfOps("enable-phi-of-ops", cl::init(true),
                                     cl::Hidden);
 
+// Enables load coercion for non-constant values.
+static cl::opt<bool> EnableLoadCoercion("enable-load-coercion", cl::init(true),
+                                        cl::Hidden);
+
 //===----------------------------------------------------------------------===//
 //                                GVN Pass
 //===----------------------------------------------------------------------===//
@@ -653,6 +658,16 @@ class NewGVN {
   // Deletion info.
   SmallPtrSet<Instruction *, 8> InstructionsToErase;
 
+  // Map candidate load to their depending instructions.
+  mutable std::map<LoadInst *, DenseSet<std::pair<Instruction *, BasicBlock *>>>
+      LoadCoercion;
+
+  // Keep newly generated loads.
+  SmallVector<Instruction *, 2> NewLoadsInLoadCoercion;
+
+  // Keep newly generated instructions.
+  SmallVector<Instruction *, 2> NewlyGeneratedInsns;
+
 public:
   NewGVN(Function &F, DominatorTree *DT, AssumptionCache *AC,
          TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA,
@@ -776,9 +791,9 @@ class NewGVN {
   ExprResult checkExprResults(Expression *, Instruction *, Value *) const;
   ExprResult performSymbolicEvaluation(Instruction *,
                                        SmallPtrSetImpl<Value *> &) const;
-  const Expression *performSymbolicLoadCoercion(Type *, Value *, LoadInst *,
-                                                Instruction *,
-                                                MemoryAccess *) const;
+  const Expression *createLoadExpAndUpdateMemUses(LoadInst *, Value *,
+                                                  MemoryAccess *,
+                                                  MemoryAccess *) const;
   const Expression *performSymbolicLoadEvaluation(Instruction *) const;
   const Expression *performSymbolicStoreEvaluation(Instruction *) const;
   ExprResult performSymbolicCallEvaluation(Instruction *) const;
@@ -853,6 +868,7 @@ class NewGVN {
   // Utilities.
   void cleanupTables();
   std::pair<unsigned, unsigned> assignDFSNumbers(BasicBlock *, unsigned);
+  void updateDFSNumbers(unsigned &);
   void updateProcessedCount(const Value *V);
   void verifyMemoryCongruency() const;
   void verifyIterationSettled(Function &F);
@@ -893,6 +909,37 @@ class NewGVN {
   // Debug counter info.  When verifying, we have to reset the value numbering
   // debug counter to the same state it started in to get the same results.
   int64_t StartingVNCounter = 0;
+
+  // The following functions are used in load coercion:
+  // Try to add the load along with the depending instruction(s) in
+  // LoadCoercion map.
+  bool tryAddLoadDepInsnIntoLoadCoercionMap(LoadInst *, Instruction *,
+                                            BasicBlock *) const;
+  // Collect the load instructions that can be optimized with load coercion.
+  // The filtering of the load instructions is based the type of their memory
+  // access.
+  bool performSymbolicLoadCoercionForNonConstantMemoryDef(LoadInst *,
+                                                          StoreInst *,
+                                                          MemoryAccess *) const;
+  const Expression *performSymbolicLoadCoercionForConstantMemoryDef(
+      Type *, Value *, LoadInst *, Instruction *, MemoryAccess *) const;
+  // Code generation for load coercion. Replaces the load with the right
+  // instruction or the right sequence of instructions.
+  bool implementLoadCoercion();
+  // Update MemorySSA with the load instructions that are emitted during load
+  // coercion.
+  void updateMemorySSA(Instruction *, Instruction *);
+  // Extract the value that will replace the load from the depending
+  // instruction.
+  Value *getExtractedValue(LoadInst *, Instruction *);
+  // If load coercion is successful, the uses of the optimized load might need
+  // to be added to new congruence classes in order to optimize the code
+  // further. For this reason, we run value numbering for all the uses of the
+  // optimized load. If load coercion has failed, then we need to add the load
+  // (and its uses) to the right congruence class.
+  void updateUsesAfterLoadCoercionImpl(LoadInst *,
+                                       SmallVectorImpl<Instruction *> &);
+  void updateUsesAfterLoadCoercion(LoadInst *, Value *);
 };
 
 } // end anonymous namespace
@@ -1439,12 +1486,51 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) const {
   return createStoreExpression(SI, StoreAccess);
 }
 
+bool NewGVN::tryAddLoadDepInsnIntoLoadCoercionMap(
+    LoadInst *LI, Instruction *CurrentDepI, BasicBlock *CurrentDepIBB) const {
+  // Can't forward from non-atomic to atomic without violating memory model.
+  if (LI->isAtomic() > CurrentDepI->isAtomic())
+    return false;
+
+  if (auto *DepLI = dyn_cast<LoadInst>(CurrentDepI))
+    if (LI->getAlign() < DepLI->getAlign())
+      return false;
+
+  if (auto *DepSI = dyn_cast<StoreInst>(CurrentDepI))
+    if (LI->getAlign() < DepSI->getAlign())
+      return false;
+
+  // Add the load and the corresponding depending instruction in LoadCoercion
+  // map.
+  LoadCoercion[LI].insert(std::make_pair(CurrentDepI, CurrentDepIBB));
+  return true;
+}
+
+// Find load coercion opportunities between load (LI) and store instructions
+// (DepSI).
+bool NewGVN::performSymbolicLoadCoercionForNonConstantMemoryDef(
+    LoadInst *LI, StoreInst *DepSI, MemoryAccess *DefiningAccess) const {
+  Type *LoadType = LI->getType();
+  bool IsLoadCoercionCandidate = false;
+  if (LI->isAtomic() > DepSI->isAtomic() ||
+      LoadType == DepSI->getValueOperand()->getType())
+    return false;
+
+  int Offset = analyzeLoadFromClobberingStore(
+      LoadType, lookupOperandLeader(LI->getPointerOperand()), DepSI, DL);
+  if (Offset >= 0) {
+    IsLoadCoercionCandidate |=
+        tryAddLoadDepInsnIntoLoadCoercionMap(LI, DepSI, DepSI->getParent());
+  }
+
+  return IsLoadCoercionCandidate;
+}
+
 // See if we can extract the value of a loaded pointer from a load, a store, or
 // a memory instruction.
-const Expression *
-NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr,
-                                    LoadInst *LI, Instruction *DepInst,
-                                    MemoryAccess *DefiningAccess) const {
+const Expression *NewGVN::performSymbolicLoadCoercionForConstantMemoryDef(
+    Type *LoadType, Value *LoadPtr, LoadInst *LI, Instruction *DepInst,
+    MemoryAccess *DefiningAccess) const {
   assert((!LI || LI->isSimple()) && "Not a simple load");
   if (auto *DepSI = dyn_cast<StoreInst>(DepInst)) {
     // Can't forward from non-atomic to atomic without violating memory model.
@@ -1464,21 +1550,6 @@ NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr,
         }
       }
     }
-  } else if (auto *DepLI = dyn_cast<LoadInst>(DepInst)) {
-    // Can't forward from non-atomic to atomic without violating memory model.
-    if (LI->isAtomic() > DepLI->isAtomic())
-      return nullptr;
-    int Offset = analyzeLoadFromClobberingLoad(LoadType, LoadPtr, DepLI, DL);
-    if (Offset >= 0) {
-      // We can coerce a constant load into a load.
-      if (auto *C = dyn_cast<Constant>(lookupOperandLeader(DepLI)))
-        if (auto *PossibleConstant =
-                getConstantValueForLoad(C, Offset, LoadType, DL)) {
-          LLVM_DEBUG(dbgs() << "Coercing load from load " << *LI
-                            << " to constant " << *PossibleConstant << "\n");
-          return createConstantExpression(PossibleConstant);
-        }
-    }
   } else if (auto *DepMI = dyn_cast<MemIntrinsic>(DepInst)) {
     int Offset = analyzeLoadFromClobberingMemInst(LoadType, LoadPtr, DepMI, DL);
     if (Offset >= 0) {
@@ -1510,11 +1581,24 @@ NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr,
       return createConstantExpression(UndefValue::get(LoadType));
   } else if (auto *InitVal =
                  getInitialValueOfAllocation(DepInst, TLI, LoadType))
-      return createConstantExpression(InitVal);
+    return createConstantExpression(InitVal);
 
   return nullptr;
 }
 
+const Expression *
+NewGVN::createLoadExpAndUpdateMemUses(LoadInst *LI, Value *LoadAddressLeader,
+                                      MemoryAccess *OriginalAccess,
+                                      MemoryAccess *DefiningAccess) const {
+  const auto *LE = createLoadExpression(LI->getType(), LoadAddressLeader, LI,
+                                        DefiningAccess);
+  // If our MemoryLeader is not our defining access, add a use to the
+  // MemoryLeader, so that we get reprocessed when it changes.
+  if (LE->getMemoryLeader() != DefiningAccess)
+    addMemoryUsers(LE->getMemoryLeader(), OriginalAccess);
+  return LE;
+}
+
 const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) const {
   auto *LI = cast<LoadInst>(I);
 
@@ -1531,6 +1615,22 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) const {
   MemoryAccess *DefiningAccess =
       MSSAWalker->getClobberingMemoryAccess(OriginalAccess);
 
+  // Do not apply load coercion to load instructions that are candidates of
+  // phi-of-ops optimization.
+  if (TempToBlock.count(LI))
+    return createLoadExpAndUpdateMemUses(LI, LoadAddressLeader, OriginalAccess,
+                                         DefiningAccess);
+
+  // Do not apply load coercion to load isntructions that are generated during
+  // load coercion.
+  auto It = llvm::find(NewLoadsInLoadCoercion, LI);
+  if (It != NewLoadsInLoadCoercion.end())
+    return createLoadExpAndUpdateMemUses(LI, LoadAddressLeader, OriginalAccess,
+                                         DefiningAccess);
+
+  // Check if we can apply load coercion.
+  bool IsLoadCoercionCandidate = false;
+
   if (!MSSA->isLiveOnEntryDef(DefiningAccess)) {
     if (auto *MD = dyn_cast<MemoryDef>(DefiningAccess)) {
       Instruction *DefiningInst = MD->getMemoryInst();
@@ -1542,19 +1642,34 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) const {
       // certain memory operations that cause the memory to have a fixed value
       // (IE things like calloc).
       if (const auto *CoercionResult =
-              performSymbolicLoadCoercion(LI->getType(), LoadAddressLeader, LI,
-                                          DefiningInst, DefiningAccess))
+              performSymbolicLoadCoercionForConstantMemoryDef(
+                  LI->getType(), LoadAddressLeader, LI, DefiningInst,
+                  DefiningAccess))
         return CoercionResult;
+
+      if (EnableLoadCoercion) {
+        if (auto *DepSI = dyn_cast<StoreInst>(DefiningInst)) {
+          if (!isa<Constant>(lookupOperandLeader(DepSI->getValueOperand()))) {
+            IsLoadCoercionCandidate =
+                performSymbolicLoadCoercionForNonConstantMemoryDef(
+                    LI, DepSI, DefiningAccess);
+          }
+        }
+      }
     }
   }
 
-  const auto *LE = createLoadExpression(LI->getType(), LoadAddressLeader, LI,
-                                        DefiningAccess);
-  // If our MemoryLeader is not our defining access, add a use to the
-  // MemoryLeader, so that we get reprocessed when it changes.
-  if (LE->getMemoryLeader() != DefiningAccess)
-    addMemoryUsers(LE->getMemoryLeader(), OriginalAccess);
-  return LE;
+  // If LI is a candidate for load coercion, then we do not create a load
+  // expression and we remove it from PHINodeUses which keeps the candidates of
+  // phi-of-ops optimization.
+  if (EnableLoadCoercion && IsLoadCoercionCandidate) {
+    if (PHINodeUses.count(LI))
+      const_cast<NewGVN *>(this)->PHINodeUses.erase(LI);
+    return nullptr;
+  }
+  // Otherwise, we create a load expression.
+  return createLoadExpAndUpdateMemUses(LI, LoadAddressLeader, OriginalAccess,
+                                       DefiningAccess);
 }
 
 NewGVN::ExprResult
@@ -2986,6 +3101,9 @@ void NewGVN::cleanupTables() {
   MemoryToUsers.clear();
   RevisitOnReachabilityChange.clear();
   IntrinsicInstPred.clear();
+  LoadCoercion.clear();
+  NewLoadsInLoadCoercion.clear();
+  NewlyGeneratedInsns.clear();
 }
 
 // Assign local DFS number mapping to instructions, and leave space for Value
@@ -3021,6 +3139,16 @@ std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B,
   return std::make_pair(Start, End);
 }
 
+void NewGVN::updateDFSNumbers(unsigned &ICount) {
+  // Now a standard depth first ordering of the domtree is equivalent to RPO.
+  for (auto DTN : depth_first(DT->getRootNode())) {
+    BasicBlock *B = DTN->getBlock();
+    const auto &BlockRange = assignDFSNumbers(B, ICount);
+    BlockInstRange.insert({B, BlockRange});
+    ICount += BlockRange.second - BlockRange.first;
+  }
+}
+
 void NewGVN::updateProcessedCount(const Value *V) {
 #ifndef NDEBUG
   if (ProcessedCount.count(V) == 0) {
@@ -3458,13 +3586,7 @@ bool NewGVN::runGVN() {
       });
   }
 
-  // Now a standard depth first ordering of the domtree is equivalent to RPO.
-  for (auto *DTN : depth_first(DT->getRootNode())) {
-    BasicBlock *B = DTN->getBlock();
-    const auto &BlockRange = assignDFSNumbers(B, ICount);
-    BlockInstRange.insert({B, BlockRange});
-    ICount += BlockRange.second - BlockRange.first;
-  }
+  updateDFSNumbers(ICount);
   initializeCongruenceClasses(F);
 
   TouchedInstructions.resize(ICount);
@@ -3485,6 +3607,16 @@ bool NewGVN::runGVN() {
   verifyIterationSettled(F);
   verifyStoreExpressions();
 
+  if (EnableLoadCoercion && implementLoadCoercion()) {
+    // Update the newly generated instructions with the correct DFS numbers.
+    // TODO: Update DFS numbers faster.
+    InstrDFS.clear();
+    DFSToInstr.clear();
+    RevisitOnReachabilityChange.clear();
+    ICount = 0;
+    updateDFSNumbers(ICount);
+  }
+
   Changed |= eliminateInstructions(F);
 
   // Delete all instructions marked for deletion.
@@ -3821,6 +3953,206 @@ Value *NewGVN::findPHIOfOpsLeader(const Expression *E,
   return nullptr;
 }
 
+// Update MemorySSA for the newly emitted load instruction.
+void NewGVN::updateMemorySSA(Instruction *LoadToOptimize,
+                             Instruction *NewLoad) {
+  MemorySSAUpdater MemSSAUpdater(MSSA);
+  MemoryAccess *DefiningAccess = MSSA->getLiveOnEntryDef();
+  MemoryAccess *NewAccess = MemSSAUpdater.createMemoryAccessInBB(
+      NewLoad, DefiningAccess, NewLoad->getParent(),
+      MemorySSA::BeforeTerminator);
+  if (auto *NewDef = dyn_cast<MemoryDef>(NewAccess))
+    MemSSAUpdater.insertDef(NewDef, /*RenameUses=*/true);
+  else
+    MemSSAUpdater.insertUse(cast<MemoryUse>(NewAccess),
+                            /*RenameUses=*/true);
+}
+
+// Extract the correct value from the depending instruction.
+Value *NewGVN::getExtractedValue(LoadInst *LI, Instruction *DepI) {
+
+  Type *LoadTy = LI->getType();
+  Value *NewValue = nullptr;
+  Instruction *InsertPtr = nullptr;
+  // Emit the instructions that extract the coerced value from the depending
+  // instruction.
+  if (auto *Store = dyn_cast<StoreInst>(DepI)) {
+    int Offset = analyzeLoadFromClobberingStore(LoadTy, LI->getPointerOperand(),
+                                                Store, DL);
+    InsertPtr = Store->getNextNode();
+    NewValue = getValueForLoad(Store->getValueOperand(), Offset, LoadTy,
+                               InsertPtr, DL);
+  } else if (LoadInst *Load = dyn_cast<LoadInst>(DepI)) {
+    int Offset = analyzeLoadFromClobberingLoad(LoadTy, LI->getPointerOperand(),
+                                               Load, DL);
+    InsertPtr = Load->getNextNode();
+    NewValue = getValueForLoad(Load, Offset, LoadTy, InsertPtr, DL);
+  }
+
+  // Get the newly generated instructions and add them to NewLoadsInLoadCoercion
+  // and NewlyGeneratedInsns.
+  if (!isa<Constant>(NewValue) && !isa<Argument>(NewValue))
+    for (Instruction *CurInsn = DepI->getNextNode(); CurInsn != InsertPtr;
+         CurInsn = CurInsn->getNextNode()) {
+      if (LoadInst *NewLI = dyn_cast<LoadInst>(CurInsn)) {
+        updateMemorySSA(LI, NewLI);
+        NewLoadsInLoadCoercion.push_back(LI);
+      }
+      NewlyGeneratedInsns.push_back(CurInsn);
+    }
+
+  return NewValue;
+}
+
+void NewGVN::updateUsesAfterLoadCoercionImpl(
+    LoadInst *LI, SmallVectorImpl<Instruction *> &LIUses) {
+  // Run value numbering for the users of the candidate load instruction.
+  while (!LIUses.empty()) {
+    Instruction *I = LIUses.front();
+    assert(I != LI &&
+           "Vanity check that we do not process the optimized load.\n");
+    LIUses.erase(&*(LIUses.begin()));
+    if (InstructionsToErase.count(I))
+      continue;
+    CongruenceClass *OrigClass = ValueToClass.lookup(I);
+    valueNumberInstruction(I);
+    updateProcessedCount(I);
+    CongruenceClass *NewClass = ValueToClass.lookup(I);
+    if (OrigClass != NewClass) {
+      for (auto *User : I->users()) {
+        auto *UserI = cast<Instruction>(User);
+        LIUses.push_back(UserI);
+      }
+      if (auto *NewLI = dyn_cast<LoadInst>(I))
+        if (MemoryAccess *MA = getMemoryAccess(NewLI))
+          for (auto *MAU : MA->users()) {
+            Instruction *MAUInst = cast<Instruction>(MAU);
+            LIUses.push_back(MAUInst);
+          }
+      auto It = PredicateToUsers.find(I);
+      if (It != PredicateToUsers.end())
+        for (auto *PredI : PredicateToUsers[I]) {
+          LIUses.push_back(PredI);
+        }
+    }
+  }
+}
+
+void NewGVN::updateUsesAfterLoadCoercion(LoadInst *LI, Value *NewValue) {
+  SmallVector<Instruction *, 2> LIUses;
+  if (!NewValue) {
+    Value *LoadAddressLeader = lookupOperandLeader(LI->getPointerOperand());
+    MemoryAccess *OriginalAccess = getMemoryAccess(LI);
+    MemoryAccess *DefiningAccess =
+        MSSAWalker->getClobberingMemoryAccess(OriginalAccess);
+    const Expression *Symbolized = createLoadExpAndUpdateMemUses(
+        LI, LoadAddressLeader, OriginalAccess, DefiningAccess);
+    performCongruenceFinding(LI, Symbolized);
+    for (Use &U : LI->uses())
+      LIUses.push_back(cast<Instruction>(U.getUser()));
+    updateUsesAfterLoadCoercionImpl(LI, LIUses);
+  } else {
+    CongruenceClass *LIClass = ValueToClass.lookup(LI);
+    InstructionsToErase.insert(LI);
+    LI->replaceAllUsesWith(NewValue);
+    // Collect the instructions for which we have to run value numbering.
+    // These are all the instructions that are in the same congruence class as
+    // LI and all the newly generated instructions.
+    for (auto *Member : *LIClass) {
+      auto *I = cast<Instruction>(Member);
+      if (LI == I)
+        continue;
+      LIUses.push_back(I);
+    }
+    for (Instruction *I : NewlyGeneratedInsns) {
+      TOPClass->insert(I);
+      ValueToClass[I] = TOPClass;
+      LIUses.push_back(I);
+    }
+
+    updateUsesAfterLoadCoercionImpl(LI, LIUses);
+
+    if (isa<PHINode>(NewValue)) {
+      NewValue->takeName(LI);
+    }
+    if (Instruction *I = dyn_cast<Instruction>(NewValue))
+      I->setDebugLoc(LI->getDebugLoc());
+    LLVM_DEBUG(dbgs() << "Load coersion: The load " << *LI
+                      << " was eliminated and its uses were replaced by "
+                      << *NewValue << "\n");
+  }
+  LIUses.clear();
+  NewlyGeneratedInsns.clear();
+}
+
+// Iterate over the load instructions of LoadCoercion map and replace them with
+// the right sequence of instructions.
+bool NewGVN::implementLoadCoercion() {
+  bool Changed = false;
+  for (const auto &P : LoadCoercion) {
+    LoadInst *LI = cast<LoadInst>(P.first);
+    auto DependingInsns = P.second;
+    Value *NewValue = nullptr;
+    Instruction *FirstDepI = (*DependingInsns.begin()).first;
+    MemoryAccess *OriginalAccess = getMemoryAccess(LI);
+    MemoryAccess *DefiningAccess =
+        MSSAWalker->getClobberingMemoryAccess(OriginalAccess);
+    // Firstly, we check if we can extract the correct value from the depending
+    // instruction. This happens when LI is dominated by its only depending
+    // instruction:
+    //
+    // Example 1
+    // BB1:                               BB1:
+    //  store i32 100, ptr %P              store i32 100, ptr %P
+    //  %V1 = ...                   =>     %V1 = ...
+    //  %V2 = load i32, ptr %P             %V3 = add i32 %V1, 100
+    //  %V3 = add i32 %V1, %V2
+    //
+    // Example 2
+    // Before load coercion
+    //   BB1:
+    //    store i32 100, ptr %P
+    //    br i1 %Cond, label %BB2, label %BB3
+    //          /           \
+    // BB2                  BB3
+    //  ...                  ...
+    //  br label %BB4        br label %BB4
+    //            \         /
+    //           BB4:
+    //             %V1 = ...
+    //             %V2 = load i32, ptr %P
+    //             %V3 = add i32 %V1, %V2
+    //
+    // After load coercion
+    //   BB1:
+    //    store i32 100, ptr %P
+    //    br i1 %Cond, label %BB2, label %BB3
+    //          /           \
+    // BB2                  BB3
+    //  ...                  ...
+    //  br label %BB4        br label %BB4
+    //            \         /
+    //           BB4:
+    //             %V1 = ...
+    //             %V3 = add i32 %V1, 100
+    //
+    if (DependingInsns.size() == 1 && DT->dominates(FirstDepI, LI) &&
+        !isa<MemoryPhi>(DefiningAccess))
+      NewValue = getExtractedValue(LI, FirstDepI);
+    // If we could not eliminate the load, then we need run value numbering for
+    // the load (the load does not have an expression up to this point) and its
+    // uses.
+    if (!NewValue)
+      updateUsesAfterLoadCoercion(LI, NewValue);
+    else {
+      // Run value numbering for all the new instructions and their uses.
+      updateUsesAfterLoadCoercion(LI, NewValue);
+      Changed = true;
+    }
+  }
+  return Changed;
+}
+
 bool NewGVN::eliminateInstructions(Function &F) {
   // This is a non-standard eliminator. The normal way to eliminate is
   // to walk the dominator tree in order, keeping track of available
diff --git a/llvm/test/Transforms/NewGVN/load_coercion_between_store_and_load.ll b/llvm/test/Transforms/NewGVN/load_coercion_between_store_and_load.ll
new file mode 100644
index 000000000000000..dcb166f6d29c1a6
--- /dev/null
+++ b/llvm/test/Transforms/NewGVN/load_coercion_between_store_and_load.ll
@@ -0,0 +1,341 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -p=gvn < %s | FileCheck %s -check-prefixes=GVN,OLDGVN
+; RUN: opt -S -p=newgvn < %s | FileCheck %s -check-prefixes=GVN,NEWGVN
+
+define float @test1(i32 %V1, ptr %P) {
+; GVN-LABEL: @test1(
+; GVN-NEXT:    store i32 [[V1:%.*]], ptr [[P:%.*]], align 1
+; GVN-NEXT:    [[TMP1:%.*]] = bitcast i32 [[V1]] to float
+; GVN-NEXT:    ret float [[TMP1]]
+;
+  store i32 %V1, ptr %P, align 1
+  %V2 = load float, ptr %P, align 1
+  ret float %V2
+}
+
+define float @test2(ptr %V1, ptr %P) {
+; GVN-LABEL: @test2(
+; GVN-NEXT:    store ptr [[V1:%.*]], ptr [[P:%.*]], align 1
+; GVN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[V1]] to i64
+; GVN-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+; GVN-NEXT:    [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float
+; GVN-NEXT:    ret float [[TMP3]]
+;
+  store ptr %V1, ptr %P, align 1
+  %V2 = load float, ptr %P, align 1
+  ret float %V2
+}
+
+define i8 @test3(i32 %V1, ptr %P) {
+; GVN-LABEL: @test3(
+; GVN-NEXT:    store i32 [[V1:%.*]], ptr [[P:%.*]], align 1
+; GVN-NEXT:    [[TMP1:%.*]] = trunc i32 [[V1]] to i8
+; GVN-NEXT:    ret i8 [[TMP1]]
+;
+  store i32 %V1, ptr %P, align 1
+  %V2 = load i8, ptr %P, align 1
+  ret i8 %V2
+}
+
+define float @test4(i64 %V1, ptr %P) {
+; GVN-LABEL: @test4(
+; GVN-NEXT:    store i64 [[V1:%.*]], ptr [[P:%.*]], align 1
+; GVN-NEXT:    [[TMP1:%.*]] = trunc i64 [[V1]] to i32
+; GVN-NEXT:    [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
+; GVN-NEXT:    ret float [[TMP2]]
+;
+  store i64 %V1, ptr %P, align 1
+  %V2 = load float, ptr %P, align 1
+  ret float %V2
+}
+
+define i8 @test5(ptr %P, ptr %T) {
+; GVN-LABEL: @test5(
+; GVN-NEXT:    [[V1:%.*]] = load i8, ptr [[T:%.*]], align 1
+; GVN-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
+; GVN-NEXT:    store i8 [[V1]], ptr [[P2]], align 1
+; GVN-NEXT:    ret i8 [[V1]]
+;
+  %V1 = load i8, ptr %T, align 1
+  %P2 = getelementptr i8, ptr %P, i32 2
+  store i8 %V1, ptr %P2, align 1
+  %V2 = load i8, ptr %P2, align 1
+  ret i8 %V2
+}
+
+define ptr @test6(i64 %V1, ptr %P) {
+; GVN-LABEL: @test6(
+; GVN-NEXT:    store i64 [[V1:%.*]], ptr [[P:%.*]], align 1
+; GVN-NEXT:    [[TMP1:%.*]] = inttoptr i64 [[V1]] to ptr
+; GVN-NEXT:    ret ptr [[TMP1]]
+;
+  store i64 %V1, ptr %P, align 1
+  %V2 = load ptr, ptr %P, align 1
+  ret ptr %V2
+}
+
+define i32 @test7(double %V1, ptr %P) {
+; GVN-LABEL: @test7(
+; GVN-NEXT:    store double [[V1:%.*]], ptr [[P:%.*]], align 1
+; GVN-NEXT:    [[TMP1:%.*]] = bitcast double [[V1]] to i64
+; GVN-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
+; GVN-NEXT:    ret i32 [[TMP2]]
+;
+  store double %V1, ptr %P, align 1
+  %V2 = load i32, ptr %P, align 1
+  ret i32 %V2
+}
+
+define i8 @test8(i32 %V1, ptr %P) {
+; OLDGVN-LABEL: @test8(
+; OLDGVN-NEXT:    store i32 [[V1:%.*]], ptr [[P:%.*]], align 1
+; OLDGVN-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; OLDGVN-NEXT:    [[TMP1:%.*]] = lshr i32 [[V1]], 16
+; OLDGVN-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
+; OLDGVN-NEXT:    ret i8 [[TMP2]]
+;
+; NEWGVN-LABEL: @test8(
+; NEWGVN-NEXT:    store i32 [[V1:%.*]], ptr [[P:%.*]], align 1
+; NEWGVN-NEXT:    [[TMP1:%.*]] = lshr i32 [[V1]], 16
+; NEWGVN-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
+; NEWGVN-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; NEWGVN-NEXT:    ret i8 [[TMP2]]
+;
+  store i32 %V1, ptr %P, align 1
+  %P1 = getelementptr i8, ptr %P, i32 2
+  %V2 = load i8, ptr %P1, align 1
+  ret i8 %V2
+}
+
+define double @test9(i64 %V, ptr %P, i1 %cond) {
+;   Entry
+;    /  \
+;   T    F
+;
+; GVN-LABEL: @test9(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    store i64 [[V:%.*]], ptr [[P:%.*]], align 1
+; GVN-NEXT:    [[TMP0:%.*]] = bitcast i64 [[V]] to double
+; GVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; GVN:       T:
+; GVN-NEXT:    ret double [[TMP0]]
+; GVN:       F:
+; GVN-NEXT:    ret double [[TMP0]]
+;
+Entry:
+  %A = load i64 , ptr %P, align 1
+  store i64 %V, ptr %P, align 1
+  br i1 %cond, label %T, label %F
+T:
+  %B = load double, ptr %P, align 1
+  ret double %B
+
+F:
+  %C = load double, ptr %P, align 1
+  ret double %C
+}
+
+define <{i8, float}> @test10(i32 %V0, ptr %P) {
+; OLDGVN-LABEL: @test10(
+; OLDGVN-NEXT:    store i32 [[V0:%.*]], ptr [[P:%.*]], align 1
+; OLDGVN-NEXT:    [[TMP1:%.*]] = bitcast i32 [[V0]] to float
+; OLDGVN-NEXT:    [[TMP2:%.*]] = trunc i32 [[V0]] to i8
+; OLDGVN-NEXT:    [[I1:%.*]] = insertvalue <{ i8, float }> poison, i8 [[TMP2]], 0
+; OLDGVN-NEXT:    [[I2:%.*]] = insertvalue <{ i8, float }> [[I1]], float [[TMP1]], 1
+; OLDGVN-NEXT:    ret <{ i8, float }> [[I2]]
+;
+; NEWGVN-LABEL: @test10(
+; NEWGVN-NEXT:    store i32 [[V0:%.*]], ptr [[P:%.*]], align 1
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i32 [[V0]] to i8
+; NEWGVN-NEXT:    [[TMP2:%.*]] = bitcast i32 [[V0]] to float
+; NEWGVN-NEXT:    [[I1:%.*]] = insertvalue <{ i8, float }> poison, i8 [[TMP1]], 0
+; NEWGVN-NEXT:    [[I2:%.*]] = insertvalue <{ i8, float }> [[I1]], float [[TMP2]], 1
+; NEWGVN-NEXT:    ret <{ i8, float }> [[I2]]
+;
+  store i32 %V0, ptr %P, align 1
+  %V1 = load float, ptr %P, align 1
+  %V2 = load i8, ptr %P, align 1
+  %I1 = insertvalue <{i8, float}> poison, i8 %V2, 0
+  %I2 = insertvalue <{i8, float}> %I1, float %V1, 1
+  ret <{i8, float}> %I2
+}
+
+define <{i8, float}> @test11(i32 %V0, ptr %P, i1 %cond) {
+;   Entry
+;    /  \
+;   T    F
+;
+; GVN-LABEL: @test11(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    store i32 [[V0:%.*]], ptr [[P:%.*]], align 1
+; GVN-NEXT:    [[TMP0:%.*]] = trunc i32 [[V0]] to i8
+; GVN-NEXT:    [[TMP1:%.*]] = bitcast i32 [[V0]] to float
+; GVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; GVN:       T:
+; GVN-NEXT:    [[I1:%.*]] = insertvalue <{ i8, float }> poison, float [[TMP1]], 1
+; GVN-NEXT:    ret <{ i8, float }> [[I1]]
+; GVN:       F:
+; GVN-NEXT:    [[I2:%.*]] = insertvalue <{ i8, float }> poison, i8 [[TMP0]], 0
+; GVN-NEXT:    ret <{ i8, float }> [[I2]]
+;
+Entry:
+  store i32 %V0, ptr %P, align 1
+  br i1 %cond, label %T, label %F
+
+T:
+  %V1 = load float, ptr %P, align 1
+  %I1 = insertvalue <{i8, float}> poison, float %V1, 1
+  ret <{i8, float}> %I1
+
+F:
+  %V2 = load i8, ptr %P, align 1
+  %I2 = insertvalue <{i8, float}> poison, i8 %V2, 0
+  ret <{i8, float}> %I2
+}
+
+define <{float, float}> @test12(i32 %V0, ptr %P, i1 %cond) {
+;   Entry
+;    /  \
+;   T    F
+;
+; GVN-LABEL: @test12(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    store i32 [[V0:%.*]], ptr [[P:%.*]], align 1
+; GVN-NEXT:    [[TMP0:%.*]] = bitcast i32 [[V0]] to float
+; GVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; GVN:       T:
+; GVN-NEXT:    [[I1:%.*]] = insertvalue <{ float, float }> poison, float [[TMP0]], 1
+; GVN-NEXT:    ret <{ float, float }> [[I1]]
+; GVN:       F:
+; GVN-NEXT:    [[I2:%.*]] = insertvalue <{ float, float }> poison, float [[TMP0]], 0
+; GVN-NEXT:    ret <{ float, float }> [[I2]]
+;
+Entry:
+  store i32 %V0, ptr %P, align 1
+  br i1 %cond, label %T, label %F
+
+T:
+  %V1 = load float, ptr %P, align 1
+  %I1 = insertvalue <{float, float}> poison, float %V1, 1
+  ret <{float, float}> %I1
+
+F:
+  %V2 = load float, ptr %P, align 1
+  %I2 = insertvalue <{float, float}> poison, float %V2, 0
+  ret <{float, float}> %I2
+}
+
+define i8 @test13(ptr %P, i32 %V1) {
+; GVN-LABEL: @test13(
+; GVN-NEXT:    store i32 [[V1:%.*]], ptr [[P:%.*]], align 1
+; GVN-NEXT:    [[TMP1:%.*]] = trunc i32 [[V1]] to i8
+; GVN-NEXT:    [[V5:%.*]] = add i8 [[TMP1]], [[TMP1]]
+; GVN-NEXT:    ret i8 [[V5]]
+;
+  store i32 %V1, ptr %P, align 1
+  %V2 = load i8, ptr %P, align 1
+  %V3 = load i64, ptr %P, align 1
+  %V4 = trunc i32 %V1 to i8
+  %V5 = add i8 %V2, %V4
+  ret i8 %V5
+}
+
+define i8 @test14(ptr %P, i32 %V1) {
+; GVN-LABEL: @test14(
+; GVN-NEXT:    store i32 [[V1:%.*]], ptr [[P:%.*]], align 1
+; GVN-NEXT:    [[TMP1:%.*]] = trunc i32 [[V1]] to i8
+; GVN-NEXT:    [[V5:%.*]] = add i8 [[TMP1]], [[TMP1]]
+; GVN-NEXT:    ret i8 [[V5]]
+;
+  store i32 %V1, ptr %P, align 1
+  %V2 = load i8, ptr %P, align 1
+  %V3 = load i8, ptr %P, align 1
+  %V5 = add i8 %V2, %V3
+  ret i8 %V5
+}
+
+define i16 @test15(ptr %P, i1 %Cond) {
+;   Entry
+;    /  \
+;   T    F
+;    \  /
+;     vv
+;    Exit
+;
+; GVN-LABEL: @test15(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    store i32 13, ptr [[P:%.*]], align 1
+; GVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; GVN:       T:
+; GVN-NEXT:    br label [[EXIT:%.*]]
+; GVN:       F:
+; GVN-NEXT:    br label [[EXIT]]
+; GVN:       Exit:
+; GVN-NEXT:    ret i16 13
+;
+Entry:
+  store i32 13, ptr %P, align 1
+  br i1 %Cond, label %T, label %F
+
+T:
+  %V1 = load i16, ptr %P, align 1
+  br label %Exit
+
+F:
+  %V2 = load i32, ptr %P, align 1
+  br label %Exit
+
+Exit:
+  %V3 = load i16, ptr %P, align 1
+  ret i16 %V3
+}
+
+define i64 @test16(ptr %V1) {
+; GVN-LABEL: @test16(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    store ptr [[V1:%.*]], ptr inttoptr (i64 16 to ptr), align 8
+; GVN-NEXT:    [[V3:%.*]] = load i64, ptr [[V1]], align 4
+; GVN-NEXT:    ret i64 [[V3]]
+;
+Entry:
+  store ptr %V1, ptr inttoptr (i64 16 to ptr), align 8
+  %V2 = load ptr, ptr inttoptr (i64 16 to ptr), align 8
+  %V3 = load i64, ptr %V2
+  ret i64 %V3
+}
+
+declare void @foo1(ptr, i32) #0
+
+define i32 @test17(ptr %P, i32 %V1) {
+; GVN-LABEL: @test17(
+; GVN-NEXT:    store i32 [[V1:%.*]], ptr [[P:%.*]], align 1
+; GVN-NEXT:    [[V2:%.*]] = call i32 @foo1(ptr [[P]], i32 [[V1]]) #[[ATTR0:[0-9]+]]
+; GVN-NEXT:    [[V3:%.*]] = load i32, ptr [[P]], align 1
+; GVN-NEXT:    [[V4:%.*]] = add i32 [[V2]], [[V3]]
+; GVN-NEXT:    ret i32 [[V4]]
+;
+  store i32 %V1, ptr %P, align 1
+  %V2 = call i32 @foo1(ptr %P, i32 %V1) #0
+  %V3 = load i32, ptr %P, align 1
+  %V4 = add i32 %V2, %V3
+  ret i32 %V4
+}
+
+declare void @foo2(ptr, i32) #1
+
+define i32 @test18(ptr %P, i32 %V1) {
+; GVN-LABEL: @test18(
+; GVN-NEXT:    store i32 [[V1:%.*]], ptr [[P:%.*]], align 1
+; GVN-NEXT:    [[V2:%.*]] = call i32 @foo2(ptr [[P]], i32 [[V1]]) #[[ATTR1:[0-9]+]]
+; GVN-NEXT:    [[V4:%.*]] = add i32 [[V2]], [[V1]]
+; GVN-NEXT:    ret i32 [[V4]]
+;
+  store i32 %V1, ptr %P, align 1
+  %V2 = call i32 @foo2(ptr %P, i32 %V1) #1
+  %V3 = load i32, ptr %P, align 1
+  %V4 = add i32 %V2, %V3
+  ret i32 %V4
+}
+
+attributes #0 = { willreturn }
+attributes #1 = { readonly }
diff --git a/llvm/test/Transforms/NewGVN/pr14166-xfail.ll b/llvm/test/Transforms/NewGVN/pr14166.ll
similarity index 98%
rename from llvm/test/Transforms/NewGVN/pr14166-xfail.ll
rename to llvm/test/Transforms/NewGVN/pr14166.ll
index 1e722361d7c545f..ceb9d2c67e02c4d 100644
--- a/llvm/test/Transforms/NewGVN/pr14166-xfail.ll
+++ b/llvm/test/Transforms/NewGVN/pr14166.ll
@@ -1,4 +1,3 @@
-; XFAIL: *
 ; RUN: opt -disable-basic-aa -passes=newgvn -S < %s | FileCheck %s
 ; NewGVN fails this due to missing load coercion
 target datalayout = "e-p:32:32:32"

>From 1c32405f7a26e33959679d738af37c4a68bddf11 Mon Sep 17 00:00:00 2001
From: Konstantina Mitropoulou <Konstantina.Mitropoulou at amd.com>
Date: Mon, 9 Oct 2023 23:19:32 -0700
Subject: [PATCH 2/3] [NewGVN][2/3] Load coercion between loads that have
 live-on-entry definitions

In the following example, both %V1 and %V2 have live-on-entry definitions and
their memory locations are overlapping. After load coercion the value of %V2
is extracted from %V1 and the uses of %V2 are updated accordingly.

```
Before load coercion
BB1
  %V1 = load <2 x i32>, ptr %P, align 1
  %V2 = load i32, ptr %P, align 1
  %V3 = add i32 %V2, 42

After load coercion
BB1
  %V1 = load <2 x i32>, ptr %P, align 1
  %0 = bitcast <2 x i32> %V1 to i64
  %1 = trunc i64 %0 to i32
  %V3 = add i32 %1, 42
```
---
 llvm/lib/Transforms/Scalar/NewGVN.cpp         | 257 ++++++++++-
 .../NewGVN/load_coercion_between_loads.ll     | 424 ++++++++++++++++++
 2 files changed, 658 insertions(+), 23 deletions(-)
 create mode 100644 llvm/test/Transforms/NewGVN/load_coercion_between_loads.ll

diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index c62777f8cb0db07..1f4468b5e93e197 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -73,6 +73,7 @@
 #include "llvm/Analysis/CFGPrinter.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InstructionPrecedenceTracking.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemorySSA.h"
@@ -500,6 +501,7 @@ class NewGVN {
   AssumptionCache *AC = nullptr;
   const DataLayout &DL;
   std::unique_ptr<PredicateInfo> PredInfo;
+  ImplicitControlFlowTracking *ICF = nullptr;
 
   // These are the only two things the create* functions should have
   // side-effects on due to allocating memory.
@@ -915,6 +917,10 @@ class NewGVN {
   // LoadCoercion map.
   bool tryAddLoadDepInsnIntoLoadCoercionMap(LoadInst *, Instruction *,
                                             BasicBlock *) const;
+  // Check if the candidate load can be optimized by another load which is also
+  // a live of entry definition and add it in LoadCoercion map.
+  bool findLiveOnEntryDependency(LoadInst *, LoadInst *, ArrayRef<BasicBlock *>,
+                                 bool) const;
   // Collect the load instructions that can be optimized with load coercion.
   // The filtering of the load instructions is based the type of their memory
   // access.
@@ -923,6 +929,8 @@ class NewGVN {
                                                           MemoryAccess *) const;
   const Expression *performSymbolicLoadCoercionForConstantMemoryDef(
       Type *, Value *, LoadInst *, Instruction *, MemoryAccess *) const;
+  bool performSymbolicLoadCoercionForLiveOnEntryDef(LoadInst *,
+                                                    MemoryAccess *) const;
   // Code generation for load coercion. Replaces the load with the right
   // instruction or the right sequence of instructions.
   bool implementLoadCoercion();
@@ -1486,6 +1494,52 @@ const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I) const {
   return createStoreExpression(SI, StoreAccess);
 }
 
+// A load can have one or more dependencies as the following examples show:
+//
+// Example 1:
+//  BB1:
+//   ...
+//   store i32 %V1, ptr %P
+//   ...
+//   %V2 = load i32, ptr %P
+//   ...
+//
+// Example 2:
+//  BB1:                       BB2:
+//   store i32 %V1, ptr %P     %V2 = load i32, ptr %P
+//   br label %BB3              br label %BB3
+//                      \      /
+//                     BB3:
+//                      %V3 = load i32, ptr %P
+//
+// In the first example, the load (%V2) has only one dependency. In the second
+// example, the load (%V3) has two dependencies. Therefore, we add the load
+// along with its two dependencies in LoadCoercion map. However, this is not
+// always the case as it is shown below:
+//
+// Example 3:
+//                   BB1:
+//                    %V1 = load <4 x i32>, ptr %P
+//                    br i1 %cond, label %BB2, label %BB3
+//                   /                          \
+//   BB2:                                      BB3:
+//    %V2 = load <2 x i32>, ptr %P              %V3 = load i32, ptr %P
+//    br label %BB4                             br label %BB4
+//		     \                         /
+//                  BB4:
+//                   %V4 = load i32, ptr %P
+//
+// The %V4 load can be optimized by any of the loads (%V1, %V2, %V3). The loads
+// %V2 and %V3 can also be optimized by %V1. For this reason, we do the
+// following checks before we add the load in the map:
+// 1. If the existing depending instruction dominates the current depending
+// instruction, then we keep the existing instruction (e.g. Example 3, the %V4
+// load has only one dependency (%V1) and weadd only this one in LoadCoercion
+// map).
+// 2. If the current instruction dominates the existing depending instruction,
+// then we replace the existing instruction with the current instruction.
+// 3. If none of the above, then we add the current depending instruction
+// alongside the existing instruction (e.g. Example 2 above).
 bool NewGVN::tryAddLoadDepInsnIntoLoadCoercionMap(
     LoadInst *LI, Instruction *CurrentDepI, BasicBlock *CurrentDepIBB) const {
   // Can't forward from non-atomic to atomic without violating memory model.
@@ -1500,12 +1554,165 @@ bool NewGVN::tryAddLoadDepInsnIntoLoadCoercionMap(
     if (LI->getAlign() < DepSI->getAlign())
       return false;
 
+  // Check if LI already exists in LoadCoercion map.
+  auto It = LoadCoercion.find(LI);
+  if (It != LoadCoercion.end()) {
+    auto &ExistingDepInsns = It->second;
+    // Iterate over all the existing depending instructions of LI.
+    for (auto &P : llvm::make_early_inc_range(ExistingDepInsns)) {
+      Instruction *ExistingDepI = P.first;
+      if (MSSAWalker->getClobberingMemoryAccess(getMemoryAccess(CurrentDepI)) ==
+              MSSAWalker->getClobberingMemoryAccess(
+                  getMemoryAccess(ExistingDepI)) &&
+          isa<LoadInst>(ExistingDepI) && isa<LoadInst>(CurrentDepI)) {
+        // If the existing depending instruction dominates the current depending
+        // instruction, then we should not add the current depending instruction
+        // in LoadCoercion map (Example 3).
+        if (DT->dominates(ExistingDepI, CurrentDepI))
+          return true;
+        // If the current depending instruction dominates the existing one, then
+        // we remove the existing depending instruction from the LoadCoercion
+        // map. Next, we add the current depending instruction in LoadCoercion
+        // map.
+        if (DT->dominates(CurrentDepI, ExistingDepI))
+          ExistingDepInsns.erase(P);
+      }
+    }
+  }
+
   // Add the load and the corresponding depending instruction in LoadCoercion
   // map.
   LoadCoercion[LI].insert(std::make_pair(CurrentDepI, CurrentDepIBB));
   return true;
 }
 
+// Check if it is possible to apply load coercion between CandidateLI and
+// DependingLoad.
+bool NewGVN::findLiveOnEntryDependency(LoadInst *CandidateLI,
+                                       LoadInst *DependingLoad,
+                                       ArrayRef<BasicBlock *> DependingBlocks,
+                                       bool IsMemoryPhiDep) const {
+  int Offset = -1;
+
+  if (!DependingLoad || CandidateLI == DependingLoad ||
+      DependingLoad->getNumUses() == 0)
+    return false;
+
+  BasicBlock *DependingLoadBB = DependingLoad->getParent();
+  if (!ReachableBlocks.count(DependingLoadBB) ||
+      ICF->isDominatedByICFIFromSameBlock(CandidateLI))
+    return false;
+
+  if (InstructionsToErase.count(DependingLoad))
+    return false;
+
+  // We do not look deep in the CFG. We consider either instructions that
+  // dominate CandidateLI or instructions that are in one of the predecessors of
+  // CandidateLI.
+  if (DT->dominates(DependingLoad, CandidateLI))
+    Offset = analyzeLoadFromClobberingLoad(CandidateLI->getType(),
+                                           CandidateLI->getPointerOperand(),
+                                           DependingLoad, DL);
+  else {
+    BasicBlock *CandidateLIBB = CandidateLI->getParent();
+    auto It1 = llvm::find(DependingBlocks, CandidateLIBB);
+    auto It2 = llvm::find(DependingBlocks, DependingLoadBB);
+    auto Ite = DependingBlocks.end();
+    if (It1 == Ite && It2 != Ite && !isBackedge(DependingLoadBB, CandidateLIBB))
+      Offset = analyzeLoadFromClobberingLoad(CandidateLI->getType(),
+                                             CandidateLI->getPointerOperand(),
+                                             DependingLoad, DL);
+  }
+
+  bool IsLoadCoercionCandidate = false;
+  if (Offset >= 0) {
+    // If the candidate load depends on a MemoryPhi, then we do not consider the
+    // parent block of the depending instruction, but instead it is more
+    // convenient to consider the basic block of the MemoryPhi from which the
+    // value comes e.g.:
+    //                            BB1:
+    //                             %V1 = load i32, ptr %P
+    //                             br i1 %Cond, label %BB2, label %BB3
+    //                           /    \
+    //      BB2:                      BB3:
+    //       store i32 100, ptr %P    br label %BB4
+    //       br label %BB4             /
+    //                           \    /
+    //                          BB4:
+    //                           %V2 = load i32, ptr %P
+    //
+    BasicBlock *BB = IsMemoryPhiDep ? DependingBlocks.back() : DependingLoadBB;
+    IsLoadCoercionCandidate |=
+        tryAddLoadDepInsnIntoLoadCoercionMap(CandidateLI, DependingLoad, BB);
+  }
+  return IsLoadCoercionCandidate;
+}
+
+// Find load coercion opportunities between instructions with live on entry
+// definitions.
+bool NewGVN::performSymbolicLoadCoercionForLiveOnEntryDef(
+    LoadInst *LI, MemoryAccess *DefiningAccess) const {
+  bool IsLoadCoercionCandidate = false;
+  for (const auto &U : MSSA->getLiveOnEntryDef()->uses()) {
+    if (auto *MemUse = dyn_cast<MemoryUse>(U.getUser())) {
+      // TODO: Add support for calls.
+      LoadInst *DependingLoad = dyn_cast<LoadInst>(MemUse->getMemoryInst());
+      if (!DependingLoad || LI == DependingLoad)
+        continue;
+
+      // If the two instructions have the same type, then there is a load
+      // coercion opportunity only if the LI and the DependingLoad are in
+      // different basic blocks and the basic block of the DependingLoad is one
+      // of the predecessors of the basic block of the LI. For any other case,
+      // the LI will be eliminated by adding the two loads in the same
+      // congruence class.
+      //
+      // Example 1: Here, we do not need to apply load coercion. The two load
+      // will be added in the same congruence class and %V2 will be eliminated.
+      //
+      //  BB1:
+      //   ...
+      //   %V1 = load i32, ptr %P
+      //   br label %BB2
+      //
+      //  BB2
+      //   ...
+      //   %V2 = load i32, ptr %P
+      //   ...
+      //
+      // Example 2: Here, %V2 can be replaced by a phi node.
+      //   BB1:                              BB2:
+      //    %V1 = load <2 x i32>, ptr %P      br label %BB3
+      //    br label %BB3                    /
+      //		     \              /
+      //                  BB3:
+      //                   %V2 = load i32, ptr %P
+      //
+      // Hence, the code will become:
+      //   BB1:                                BB2:
+      //    %V1 = load <2 x i32>, ptr %P        %V2' = load i32, ptr %P
+      //    %0 = bitcast <2 x i32> %V1 to i64  br label %BB3
+      //    %1 = trunc i64 %0 to i32            /
+      //    br label %BB3                      /
+      //		     \                /
+      //                  BB3:
+      //                   %V2 = phi i32 [ %1, %BB1], [ %V2', %BB2 ]
+      //
+      if (DependingLoad->getType() == LI->getType() &&
+          (DT->dominates(DependingLoad, LI) ||
+           LI->getParent() == DependingLoad->getParent()))
+        continue;
+
+      SmallVector<BasicBlock *, 2> Preds;
+      for (auto *BB : predecessors(LI->getParent()))
+        Preds.push_back(BB);
+      IsLoadCoercionCandidate |=
+          findLiveOnEntryDependency(LI, DependingLoad, Preds, false);
+    }
+  }
+  return IsLoadCoercionCandidate;
+}
+
 // Find load coercion opportunities between load (LI) and store instructions
 // (DepSI).
 bool NewGVN::performSymbolicLoadCoercionForNonConstantMemoryDef(
@@ -1631,29 +1838,30 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) const {
   // Check if we can apply load coercion.
   bool IsLoadCoercionCandidate = false;
 
-  if (!MSSA->isLiveOnEntryDef(DefiningAccess)) {
-    if (auto *MD = dyn_cast<MemoryDef>(DefiningAccess)) {
-      Instruction *DefiningInst = MD->getMemoryInst();
-      // If the defining instruction is not reachable, replace with poison.
-      if (!ReachableBlocks.count(DefiningInst->getParent()))
-        return createConstantExpression(PoisonValue::get(LI->getType()));
-      // This will handle stores and memory insts.  We only do if it the
-      // defining access has a different type, or it is a pointer produced by
-      // certain memory operations that cause the memory to have a fixed value
-      // (IE things like calloc).
-      if (const auto *CoercionResult =
-              performSymbolicLoadCoercionForConstantMemoryDef(
-                  LI->getType(), LoadAddressLeader, LI, DefiningInst,
-                  DefiningAccess))
-        return CoercionResult;
-
-      if (EnableLoadCoercion) {
-        if (auto *DepSI = dyn_cast<StoreInst>(DefiningInst)) {
-          if (!isa<Constant>(lookupOperandLeader(DepSI->getValueOperand()))) {
-            IsLoadCoercionCandidate =
-                performSymbolicLoadCoercionForNonConstantMemoryDef(
-                    LI, DepSI, DefiningAccess);
-          }
+  if (MSSA->isLiveOnEntryDef(DefiningAccess) && EnableLoadCoercion)
+    IsLoadCoercionCandidate =
+        performSymbolicLoadCoercionForLiveOnEntryDef(LI, DefiningAccess);
+  else if (auto *MD = dyn_cast<MemoryDef>(DefiningAccess)) {
+    Instruction *DefiningInst = MD->getMemoryInst();
+    // If the defining instruction is not reachable, replace with poison.
+    if (!ReachableBlocks.count(DefiningInst->getParent()))
+      return createConstantExpression(PoisonValue::get(LI->getType()));
+    // This will handle stores and memory insts.  We only do if it the
+    // defining access has a different type, or it is a pointer produced by
+    // certain memory operations that cause the memory to have a fixed value
+    // (IE things like calloc).
+    if (const auto *CoercionResult =
+            performSymbolicLoadCoercionForConstantMemoryDef(
+                LI->getType(), LoadAddressLeader, LI, DefiningInst,
+                DefiningAccess))
+      return CoercionResult;
+
+    if (EnableLoadCoercion) {
+      if (auto *DepSI = dyn_cast<StoreInst>(DefiningInst)) {
+        if (!isa<Constant>(lookupOperandLeader(DepSI->getValueOperand()))) {
+          IsLoadCoercionCandidate =
+              performSymbolicLoadCoercionForNonConstantMemoryDef(
+                  LI, DepSI, DefiningAccess);
         }
       }
     }
@@ -3104,6 +3312,7 @@ void NewGVN::cleanupTables() {
   LoadCoercion.clear();
   NewLoadsInLoadCoercion.clear();
   NewlyGeneratedInsns.clear();
+  ICF->clear();
 }
 
 // Assign local DFS number mapping to instructions, and leave space for Value
@@ -3556,6 +3765,8 @@ bool NewGVN::runGVN() {
   NumFuncArgs = F.arg_size();
   MSSAWalker = MSSA->getWalker();
   SingletonDeadExpression = new (ExpressionAllocator) DeadExpression();
+  ImplicitControlFlowTracking ImplicitCFT;
+  ICF = &ImplicitCFT;
 
   // Count number of instructions for sizing of hash tables, and come
   // up with a global dfs numbering for instructions.
diff --git a/llvm/test/Transforms/NewGVN/load_coercion_between_loads.ll b/llvm/test/Transforms/NewGVN/load_coercion_between_loads.ll
new file mode 100644
index 000000000000000..c4193acfcb22418
--- /dev/null
+++ b/llvm/test/Transforms/NewGVN/load_coercion_between_loads.ll
@@ -0,0 +1,424 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -p=gvn < %s | FileCheck %s -check-prefixes=GVN,OLDGVN
+; RUN: opt -S -p=newgvn < %s | FileCheck %s -check-prefixes=GVN,NEWGVN
+
+define i8 @test1(ptr %P) {
+; GVN-LABEL: @test1(
+; GVN-NEXT:    [[V1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; GVN-NEXT:    [[TMP1:%.*]] = trunc i32 [[V1]] to i8
+; GVN-NEXT:    [[V4:%.*]] = add i8 [[TMP1]], [[TMP1]]
+; GVN-NEXT:    ret i8 [[V4]]
+;
+  %V1 = load i32, ptr %P, align 1
+  %V2 = load i8, ptr %P, align 1
+  %V3 = trunc i32 %V1 to i8
+  %V4 = add i8 %V2, %V3
+  ret i8 %V4
+}
+
+define i32 @test2(ptr %P) {
+; GVN-LABEL: @test2(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    [[V1:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 1
+; GVN-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64
+; GVN-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
+; GVN-NEXT:    [[V5:%.*]] = add i32 [[TMP1]], [[TMP1]]
+; GVN-NEXT:    ret i32 [[V5]]
+;
+Entry:
+  %V1 = load <2 x i32>, ptr %P, align 1
+  %V2 = load i32, ptr %P, align 1
+  %V3 = bitcast <2 x i32> %V1 to i64
+  %V4 = trunc i64 %V3 to i32
+  %V5 = add i32 %V2, %V4
+  ret i32 %V5
+}
+
+define <{<2 x i32>, i32}> @test3(ptr %P) {
+; GVN-LABEL: @test3(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    [[V1:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 1
+; GVN-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64
+; GVN-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
+; GVN-NEXT:    [[I1:%.*]] = insertvalue <{ <2 x i32>, i32 }> poison, <2 x i32> [[V1]], 0
+; GVN-NEXT:    [[I2:%.*]] = insertvalue <{ <2 x i32>, i32 }> [[I1]], i32 [[TMP1]], 1
+; GVN-NEXT:    ret <{ <2 x i32>, i32 }> [[I2]]
+;
+Entry:
+  %V1 = load <2 x i32>, ptr %P, align 1
+  %V2 = load i32, ptr %P, align 1
+  %I1 = insertvalue <{<2 x i32>, i32}> poison, <2 x i32> %V1, 0
+  %I2 = insertvalue <{<2 x i32>, i32}> %I1, i32 %V2, 1
+  ret <{<2 x i32>, i32}> %I2
+}
+
+define <{<2 x i32>, i32}> @test4(ptr %P) {
+; GVN-LABEL: @test4(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    [[V2:%.*]] = load i32, ptr [[P:%.*]], align 1
+; GVN-NEXT:    [[V1:%.*]] = load <2 x i32>, ptr [[P]], align 1
+; GVN-NEXT:    [[I1:%.*]] = insertvalue <{ <2 x i32>, i32 }> poison, <2 x i32> [[V1]], 0
+; GVN-NEXT:    [[I2:%.*]] = insertvalue <{ <2 x i32>, i32 }> [[I1]], i32 [[V2]], 1
+; GVN-NEXT:    ret <{ <2 x i32>, i32 }> [[I2]]
+;
+Entry:
+  %V2 = load i32, ptr %P, align 1
+  %V1 = load <2 x i32>, ptr %P, align 1
+  %I1 = insertvalue <{<2 x i32>, i32}> poison, <2 x i32> %V1, 0
+  %I2 = insertvalue <{<2 x i32>, i32}> %I1, i32 %V2, 1
+  ret <{<2 x i32>, i32}> %I2
+}
+
+define <{<2 x i32>, i32, i32}> @test5(ptr %P) {
+; GVN-LABEL: @test5(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    [[V0:%.*]] = load i32, ptr [[P:%.*]], align 1
+; GVN-NEXT:    [[V1:%.*]] = load <2 x i32>, ptr [[P]], align 1
+; GVN-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64
+; GVN-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
+; GVN-NEXT:    [[I1:%.*]] = insertvalue <{ <2 x i32>, i32, i32 }> poison, <2 x i32> [[V1]], 0
+; GVN-NEXT:    [[I2:%.*]] = insertvalue <{ <2 x i32>, i32, i32 }> [[I1]], i32 [[TMP1]], 1
+; GVN-NEXT:    [[I3:%.*]] = insertvalue <{ <2 x i32>, i32, i32 }> [[I2]], i32 [[V0]], 2
+; GVN-NEXT:    ret <{ <2 x i32>, i32, i32 }> [[I3]]
+;
+Entry:
+  %V0 = load i32, ptr %P, align 1
+  %V1 = load <2 x i32>, ptr %P, align 1
+  %V2 = load i32, ptr %P, align 1
+  %I1 = insertvalue <{<2 x i32>, i32, i32}> poison, <2 x i32> %V1, 0
+  %I2 = insertvalue <{<2 x i32>, i32, i32}> %I1, i32 %V2, 1
+  %I3 = insertvalue <{<2 x i32>, i32, i32}> %I2, i32 %V0, 2
+  ret <{<2 x i32>, i32, i32}> %I3
+}
+
+define <{<4 x i32>, <2 x i32>, i32, <2 x i32>}> @test6(ptr %P) {
+; GVN-LABEL: @test6(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    [[V1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1
+; GVN-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to i128
+; GVN-NEXT:    [[TMP1:%.*]] = trunc i128 [[TMP0]] to i64
+; GVN-NEXT:    [[TMP2:%.*]] = bitcast i64 [[TMP1]] to <2 x i32>
+; GVN-NEXT:    [[TMP3:%.*]] = trunc i128 [[TMP0]] to i32
+; GVN-NEXT:    [[I1:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> poison, <4 x i32> [[V1]], 0
+; GVN-NEXT:    [[I2:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I1]], <2 x i32> [[TMP2]], 1
+; GVN-NEXT:    [[I3:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I2]], i32 [[TMP3]], 2
+; GVN-NEXT:    [[I4:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I3]], <2 x i32> [[TMP2]], 3
+; GVN-NEXT:    ret <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I4]]
+;
+Entry:
+  %V1 = load <4 x i32>, ptr %P, align 1
+  %V2 = load <2 x i32>, ptr %P, align 1
+  %V3 = load i32, ptr %P, align 1
+  %V4 = load <2 x i32>, ptr %P, align 1
+  %I1 = insertvalue <{<4 x i32>, <2 x i32>, i32, <2 x i32>}> poison, <4 x i32> %V1, 0
+  %I2 = insertvalue <{<4 x i32>,<2 x i32>, i32, <2 x i32>}> %I1, <2 x i32> %V2, 1
+  %I3 = insertvalue <{<4 x i32>, <2 x i32>, i32, <2 x i32>}> %I2, i32 %V3, 2
+  %I4 = insertvalue <{<4 x i32>, <2 x i32>, i32, <2 x i32>}> %I3, <2 x i32> %V4, 3
+  ret <{<4 x i32>, <2 x i32>, i32, <2 x i32>}> %I4
+}
+
+define float @test7(ptr %P, i1 %cond) {
+;   Entry
+;    /  \
+;   T    F
+;
+; GVN-LABEL: @test7(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    [[V1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; GVN-NEXT:    [[TMP0:%.*]] = bitcast i32 [[V1]] to float
+; GVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; GVN:       T:
+; GVN-NEXT:    ret float [[TMP0]]
+; GVN:       F:
+; GVN-NEXT:    ret float [[TMP0]]
+;
+Entry:
+  %V1 = load i32, ptr %P, align 1
+  %V2 = load float, ptr %P, align 1
+  br i1 %cond, label %T, label %F
+T:
+  ret float %V2
+
+F:
+  %V3 = bitcast i32 %V1 to float
+  ret float %V3
+}
+
+define <{<2 x i32>, i32, i32, i32, i32}> @test8(ptr %P) {
+; GVN-LABEL: @test8(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    [[V0:%.*]] = load i32, ptr [[P:%.*]], align 1
+; GVN-NEXT:    [[V1:%.*]] = load <2 x i32>, ptr [[P]], align 1
+; GVN-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64
+; GVN-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
+; GVN-NEXT:    [[I1:%.*]] = insertvalue <{ <2 x i32>, i32, i32, i32, i32 }> poison, <2 x i32> [[V1]], 0
+; GVN-NEXT:    [[I2:%.*]] = insertvalue <{ <2 x i32>, i32, i32, i32, i32 }> [[I1]], i32 [[TMP1]], 1
+; GVN-NEXT:    [[I3:%.*]] = insertvalue <{ <2 x i32>, i32, i32, i32, i32 }> [[I2]], i32 [[V0]], 2
+; GVN-NEXT:    [[I4:%.*]] = insertvalue <{ <2 x i32>, i32, i32, i32, i32 }> [[I3]], i32 [[TMP1]], 3
+; GVN-NEXT:    [[I5:%.*]] = insertvalue <{ <2 x i32>, i32, i32, i32, i32 }> [[I4]], i32 [[TMP1]], 4
+; GVN-NEXT:    ret <{ <2 x i32>, i32, i32, i32, i32 }> [[I5]]
+;
+Entry:
+  %V0 = load i32, ptr %P, align 1
+  %V1 = load <2 x i32>, ptr %P, align 1
+  %V2 = load i32, ptr %P, align 1
+  %I1 = insertvalue <{<2 x i32>, i32, i32, i32, i32}> poison, <2 x i32> %V1, 0
+  %I2 = insertvalue <{<2 x i32>, i32, i32, i32, i32}> %I1, i32 %V2, 1
+  %I3 = insertvalue <{<2 x i32>, i32, i32, i32, i32}> %I2, i32 %V0, 2
+  %V3 = load i32, ptr %P, align 1
+  %I4 = insertvalue <{<2 x i32>, i32, i32, i32, i32}> %I3, i32 %V3, 3
+  %V4 = load i32, ptr %P, align 1
+  %I5 = insertvalue <{<2 x i32>, i32, i32, i32, i32}> %I4, i32 %V4, 4
+  ret <{<2 x i32>, i32, i32, i32, i32}> %I5
+}
+
+define <{<4 x i32>, <2 x i32>, i32, <2 x i32>}> @test9(ptr %P) {
+; GVN-LABEL: @test9(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    [[V1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1
+; GVN-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to i128
+; GVN-NEXT:    [[TMP1:%.*]] = trunc i128 [[TMP0]] to i64
+; GVN-NEXT:    [[TMP2:%.*]] = bitcast i64 [[TMP1]] to <2 x i32>
+; GVN-NEXT:    [[TMP3:%.*]] = trunc i128 [[TMP0]] to i32
+; GVN-NEXT:    [[I1:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> poison, <4 x i32> [[V1]], 0
+; GVN-NEXT:    [[I2:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I1]], <2 x i32> [[TMP2]], 1
+; GVN-NEXT:    [[I3:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I2]], i32 [[TMP3]], 2
+; GVN-NEXT:    [[I4:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I3]], <2 x i32> [[TMP2]], 3
+; GVN-NEXT:    ret <{ <4 x i32>, <2 x i32>, i32, <2 x i32> }> [[I4]]
+;
+Entry:
+  %V1 = load <4 x i32>, ptr %P, align 1
+  %V2 = load <2 x i32>, ptr %P, align 1
+  %V3 = load i32, ptr %P, align 1
+  %V4 = load <2 x i32>, ptr %P, align 1
+  %I1 = insertvalue <{<4 x i32>, <2 x i32>, i32, <2 x i32>}> poison, <4 x i32> %V1, 0
+  %I2 = insertvalue <{<4 x i32>,<2 x i32>, i32, <2 x i32>}> %I1, <2 x i32> %V2, 1
+  %I3 = insertvalue <{<4 x i32>, <2 x i32>, i32, <2 x i32>}> %I2, i32 %V3, 2
+  %I4 = insertvalue <{<4 x i32>, <2 x i32>, i32, <2 x i32>}> %I3, <2 x i32> %V4, 3
+  ret <{<4 x i32>, <2 x i32>, i32, <2 x i32>}> %I4
+}
+
+define <{<4 x i32>, <2 x i32>, i32}> @test10(ptr %P, i1 %cond) {
+;   Entry
+;    /  \
+;   T    F
+;
+; OLDGVN-LABEL: @test10(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    [[V1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1
+; OLDGVN-NEXT:    [[I1:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32 }> poison, <4 x i32> [[V1]], 0
+; OLDGVN-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to i128
+; OLDGVN-NEXT:    [[TMP1:%.*]] = trunc i128 [[TMP0]] to i32
+; OLDGVN-NEXT:    [[TMP2:%.*]] = trunc i128 [[TMP0]] to i64
+; OLDGVN-NEXT:    [[TMP3:%.*]] = bitcast i64 [[TMP2]] to <2 x i32>
+; OLDGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; OLDGVN:       T:
+; OLDGVN-NEXT:    [[I2:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32 }> [[I1]], <2 x i32> [[TMP3]], 1
+; OLDGVN-NEXT:    ret <{ <4 x i32>, <2 x i32>, i32 }> [[I2]]
+; OLDGVN:       F:
+; OLDGVN-NEXT:    [[I3:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32 }> [[I1]], i32 [[TMP1]], 2
+; OLDGVN-NEXT:    ret <{ <4 x i32>, <2 x i32>, i32 }> [[I3]]
+;
+; NEWGVN-LABEL: @test10(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    [[V1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to i128
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i128 [[TMP0]] to i32
+; NEWGVN-NEXT:    [[TMP2:%.*]] = trunc i128 [[TMP0]] to i64
+; NEWGVN-NEXT:    [[TMP3:%.*]] = bitcast i64 [[TMP2]] to <2 x i32>
+; NEWGVN-NEXT:    [[I1:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32 }> poison, <4 x i32> [[V1]], 0
+; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; NEWGVN:       T:
+; NEWGVN-NEXT:    [[I2:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32 }> [[I1]], <2 x i32> [[TMP3]], 1
+; NEWGVN-NEXT:    ret <{ <4 x i32>, <2 x i32>, i32 }> [[I2]]
+; NEWGVN:       F:
+; NEWGVN-NEXT:    [[I3:%.*]] = insertvalue <{ <4 x i32>, <2 x i32>, i32 }> [[I1]], i32 [[TMP1]], 2
+; NEWGVN-NEXT:    ret <{ <4 x i32>, <2 x i32>, i32 }> [[I3]]
+;
+Entry:
+  %V1 = load <4 x i32>, ptr %P, align 1
+  %I1 = insertvalue <{<4 x i32>, <2 x i32>, i32}> poison, <4 x i32> %V1, 0
+  br i1 %cond, label %T, label %F
+T:
+  %V2 = load <2 x i32>, ptr %P, align 1
+  %I2 = insertvalue <{<4 x i32>,<2 x i32>, i32}> %I1, <2 x i32> %V2, 1
+  ret <{<4 x i32>, <2 x i32>, i32}> %I2
+
+F:
+  %V3 = load i32, ptr %P, align 1
+  %I3 = insertvalue <{<4 x i32>, <2 x i32>, i32}> %I1, i32 %V3, 2
+  ret <{<4 x i32>, <2 x i32>, i32}> %I3
+}
+
+define <{<2 x i32>, i32, float}> @test11(ptr %P) {
+; GVN-LABEL: @test11(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    [[V1:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 1
+; GVN-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64
+; GVN-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
+; GVN-NEXT:    [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
+; GVN-NEXT:    [[I1:%.*]] = insertvalue <{ <2 x i32>, i32, float }> poison, <2 x i32> [[V1]], 0
+; GVN-NEXT:    [[I2:%.*]] = insertvalue <{ <2 x i32>, i32, float }> [[I1]], i32 [[TMP1]], 1
+; GVN-NEXT:    [[I3:%.*]] = insertvalue <{ <2 x i32>, i32, float }> [[I2]], float [[TMP2]], 2
+; GVN-NEXT:    ret <{ <2 x i32>, i32, float }> [[I3]]
+;
+Entry:
+  %V1 = load <2 x i32>, ptr %P, align 1
+  %V2 = load i32, ptr %P, align 1
+  %V3 = load float, ptr %P, align 1
+  %I1 = insertvalue <{<2 x i32>, i32, float}> poison, <2 x i32> %V1, 0
+  %I2 = insertvalue <{<2 x i32>, i32, float}> %I1, i32 %V2, 1
+  %I3 = insertvalue <{<2 x i32>, i32, float}> %I2, float %V3, 2
+  ret <{<2 x i32>, i32, float}> %I3
+}
+
+define i8 @test12(ptr %P, i32 %V) {
+; GVN-LABEL: @test12(
+; GVN-NEXT:    [[V1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; GVN-NEXT:    [[TMP1:%.*]] = trunc i32 [[V1]] to i8
+; GVN-NEXT:    store i32 [[V:%.*]], ptr [[P]], align 1
+; GVN-NEXT:    [[TMP2:%.*]] = trunc i32 [[V]] to i8
+; GVN-NEXT:    [[V5:%.*]] = add i8 [[TMP1]], [[TMP1]]
+; GVN-NEXT:    [[V6:%.*]] = add i8 [[TMP2]], [[V5]]
+; GVN-NEXT:    ret i8 [[V6]]
+;
+  %V1 = load i32, ptr %P, align 1
+  %V2 = load i8, ptr %P, align 1
+  %V3 = trunc i32 %V1 to i8
+  store i32 %V, ptr %P, align 1
+  %V4 = load i8, ptr %P, align 1
+  %V5 = add i8 %V2, %V3
+  %V6 = add i8 %V4, %V5
+  ret i8 %V6
+}
+
+define float @test13(ptr %P, i1 %cond) {
+;   Entry
+;    /  \
+;   T    F
+;
+; GVN-LABEL: @test13(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; GVN:       T:
+; GVN-NEXT:    [[V2:%.*]] = load float, ptr [[P:%.*]], align 1
+; GVN-NEXT:    ret float [[V2]]
+; GVN:       F:
+; GVN-NEXT:    [[V3:%.*]] = load float, ptr [[P]], align 1
+; GVN-NEXT:    ret float [[V3]]
+;
+Entry:
+  %V1 = load i32, ptr %P, align 1
+  br i1 %cond, label %T, label %F
+T:
+  %V2 = load float, ptr %P, align 1
+  ret float %V2
+
+F:
+  %V3 = load float, ptr %P, align 1
+  ret float %V3
+}
+
+define i32 @test14(ptr %P, i1 %Cond) {
+;   Entry
+;    /  \
+;   T    F
+;    \  /
+;     vv
+;    Exit
+;
+; GVN-LABEL: @test14(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    [[V1:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 1
+; GVN-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64
+; GVN-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
+; GVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; GVN:       T:
+; GVN-NEXT:    br label [[EXIT:%.*]]
+; GVN:       F:
+; GVN-NEXT:    br label [[EXIT]]
+; GVN:       Exit:
+; GVN-NEXT:    [[PHI:%.*]] = phi i32 [ 100, [[T]] ], [ 200, [[F]] ]
+; GVN-NEXT:    [[V2:%.*]] = extractelement <2 x i32> [[V1]], i64 1
+; GVN-NEXT:    [[V4:%.*]] = add i32 [[TMP1]], [[V2]]
+; GVN-NEXT:    [[V5:%.*]] = add i32 [[V4]], [[PHI]]
+; GVN-NEXT:    ret i32 [[V5]]
+;
+Entry:
+  %V1 = load <2 x i32>, ptr %P, align 1
+  br i1 %Cond, label %T, label %F
+
+T:
+  br label %Exit
+
+F:
+  br label %Exit
+
+Exit:
+  %Phi = phi i32 [100, %T], [200, %F]
+  %V2 = extractelement <2 x i32> %V1, i64 1
+  %V3 = load i32, ptr %P, align 1
+  %V4 = add i32 %V3, %V2
+  %V5 = add i32 %V4, %Phi
+  ret i32 %V5
+}
+
+define void @test15(i1 %Cond) {
+; OLDGVN-LABEL: @test15(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br label [[LOOP_HEADER:%.*]]
+; OLDGVN:       Loop.Header:
+; OLDGVN-NEXT:    store i64 0, ptr null, align 8
+; OLDGVN-NEXT:    br i1 [[COND:%.*]], label [[LOOP_HEADER]], label [[EXIT:%.*]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    ret void
+;
+; NEWGVN-LABEL: @test15(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br label [[LOOP_HEADER:%.*]]
+; NEWGVN:       Loop.Header:
+; NEWGVN-NEXT:    br label [[LOOP_LATCH:%.*]]
+; NEWGVN:       Loop.Latch:
+; NEWGVN-NEXT:    store i64 0, ptr null, align 8
+; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[LOOP_HEADER]], label [[EXIT:%.*]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    ret void
+;
+Entry:
+  br label %Loop.Header
+
+Loop.Header:                                       ; preds = %Loop.Latch, %entry
+  %V1 = load ptr, ptr inttoptr (i64 16 to ptr), align 8
+  %V2 = load i64, ptr %V1, align 8
+  br label %Loop.Latch
+
+Loop.Latch:                                       ; preds = %Loop.Header
+  %V3 = load ptr, ptr inttoptr (i64 16 to ptr), align 8
+  %V4 = load i64, ptr %V3, align 8
+  store i64 0, ptr null, align 8
+  br i1 %Cond, label %Loop.Header, label %Exit
+
+Exit:
+  ret void
+}
+
+declare void @foo(ptr, i32) #0
+
+define i32 @test16(ptr %P) {
+; GVN-LABEL: @test16(
+; GVN-NEXT:    [[V1:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 1
+; GVN-NEXT:    [[V2:%.*]] = extractelement <2 x i32> [[V1]], i64 1
+; GVN-NEXT:    [[V3:%.*]] = call i32 @foo(ptr [[P]], i32 [[V2]]) #[[ATTR0:[0-9]+]]
+; GVN-NEXT:    [[V4:%.*]] = load i32, ptr [[P]], align 1
+; GVN-NEXT:    [[V5:%.*]] = add i32 [[V3]], [[V4]]
+; GVN-NEXT:    ret i32 [[V5]]
+;
+  %V1 = load <2 x i32>, ptr %P, align 1
+  %V2 = extractelement <2 x i32> %V1, i64 1
+  %V3 = call i32 @foo(ptr %P, i32 %V2) #0
+  %V4 = load i32, ptr %P, align 1
+  %V5 = add i32 %V3, %V4
+  ret i32 %V5
+}
+
+attributes #0 = { willreturn }

>From 299b248c8bb03baddfbddef05be23e40abb9a8e2 Mon Sep 17 00:00:00 2001
From: Konstantina Mitropoulou <Konstantina.Mitropoulou at amd.com>
Date: Mon, 9 Oct 2023 23:59:29 -0700
Subject: [PATCH 3/3] [NewGVN][3/3] Load coercion for loads that can be
 replaced by a phi

In the following two examples, there are two cases where the load can be
replaced by a phi:

1. MemoryPhi: In Example 1, load %V is dependent on a MemoryPhi. This
indicates that there are two memory definitions in BB1 and BB2 for %V. As
a result, we replace the load with a phi.

Example 1:
```
Before load coercion
BB1:                        BB2:
 1 = MemoryDef(liveOnEntry)  2 = MemoryDef(liveOnEntry)
 store i32 100, ptr %P       store i32 500, ptr %P
 br label %BB3               br label %BB3
                      \     /
                     BB3:
		      3 = MemoryPhi({BB1,1},{BB2,2})
                      %V = load i32, ptr %P

After load coercion
 BB1:                       BB2:
  store i32 100, ptr %P      store i32 500, ptr %P
  br label %BB3              br label
                      \     /
                     BB3:
                      %V = phi i32 [ 100, %BB1 ], [ 500, %BB2 ]

```

2. Parial load elimination: In Example 2, %V1 and %V2 have live-on-entry
defintions and their memory locations overlap. By emitting, a new load
%V2' in BB2, we can replace %V2 with a phi node.

Example 2:
```
Before load coercion
  BB1:                              BB2:
   %V1 = load <2 x i32>, ptr %P      br label %BB3
   br label %BB3                    /
               \              /
                 BB3:
                  %V2 = load i32, ptr %P

After load coercion
  BB1:                                BB2:
   %V1 = load <2 x i32>, ptr %P        %V2' = load i32, ptr %P
   %0 = bitcast <2 x i32> %V1 to i64  br label %BB3
   %1 = trunc i64 %0 to i32            /
   br label %BB3                      /
               \                /
                 BB3:
                  %V2 = phi i32 [ %1, %BB1], [ %V2', %BB2 ]
```

The code includes more cases like these. Please refer to the examples in the
code comments for more details.
---
 llvm/lib/Transforms/Scalar/NewGVN.cpp         |  408 +-
 .../load_coercion_replace_load_with_phi.ll    | 3787 +++++++++++++++++
 llvm/test/Transforms/NewGVN/pr35125.ll        |    2 +-
 3 files changed, 4194 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/Transforms/NewGVN/load_coercion_replace_load_with_phi.ll

diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 1f4468b5e93e197..49d24628bd5a485 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -108,6 +108,7 @@
 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/PredicateInfo.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Transforms/Utils/VNCoercion.h"
 #include <algorithm>
 #include <cassert>
@@ -931,6 +932,8 @@ class NewGVN {
       Type *, Value *, LoadInst *, Instruction *, MemoryAccess *) const;
   bool performSymbolicLoadCoercionForLiveOnEntryDef(LoadInst *,
                                                     MemoryAccess *) const;
+  bool performSymbolicLoadCoercionForMemoryPhi(LoadInst *,
+                                               MemoryAccess *) const;
   // Code generation for load coercion. Replaces the load with the right
   // instruction or the right sequence of instructions.
   bool implementLoadCoercion();
@@ -945,6 +948,15 @@ class NewGVN {
   // further. For this reason, we run value numbering for all the uses of the
   // optimized load. If load coercion has failed, then we need to add the load
   // (and its uses) to the right congruence class.
+  // Emit the phi that replaces the load and it updates the SSA with the new
+  // phi.
+  Value *emitLoadCoercionPhi(LoadInst *, BasicBlock *,
+                             ArrayRef<std::pair<BasicBlock *, Instruction *>>);
+  // Check if the load can be replaced by a phi.
+  Value *tryReplaceLoadWithPhi(
+      LoadInst *, BasicBlock *,
+      SmallVectorImpl<std::pair<BasicBlock *, Instruction *>> &,
+      ArrayRef<BasicBlock *>);
   void updateUsesAfterLoadCoercionImpl(LoadInst *,
                                        SmallVectorImpl<Instruction *> &);
   void updateUsesAfterLoadCoercion(LoadInst *, Value *);
@@ -1648,6 +1660,137 @@ bool NewGVN::findLiveOnEntryDependency(LoadInst *CandidateLI,
   return IsLoadCoercionCandidate;
 }
 
+// Process load instructions that have MemoryPhi dependencies.
+bool NewGVN::performSymbolicLoadCoercionForMemoryPhi(
+    LoadInst *LI, MemoryAccess *DefiningAccess) const {
+  assert((!LI || LI->isSimple()) && "Not a simple load");
+  bool IsLoadCoercionCandidate = false;
+  if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess)) {
+    // If the candidate load is dominated by a call that never returns, then we
+    // do not replace the load with a phi node.
+    if (ICF->isDominatedByICFIFromSameBlock(LI))
+      return false;
+
+    // The MemoryPhi of Example 1 indicates that the load is dependent on the
+    // store (1) in Basic block T and store (2) in basic block F. Therefore,
+    // both of the store instructions should be added in LoadCoercion map.
+    //
+    // Example 1:
+    //     BB1:                        BB2:
+    //      1 = MemoryDef(liveOnEntry)  2 = MemoryDef(liveOnEntry)
+    //      store i32 100, ptr %P       store i32 500, ptr %P
+    //      br label %BB3               br label %BB3
+    //                             \    /
+    //                            BB3:
+    //                             3 = MemoryPhi({BB1,1},{BB2,2})
+    //                             %V = load i32, ptr %P
+    //
+    // In Example 2, the load of BB3 has two dependencies: the store in BB1 as
+    // the MemoryPhi indicates and the load in BB2 which is not included in
+    // MemoryPhi. To find this dependency, we check if it is possible to apply
+    // load coercion to any of the instructions that have live on entry
+    // definition. We restrict our search to the MemoryPhi predecessors and the
+    // instructions that dominate the MemoryPhi.
+    //
+    // Example 2:
+    //     BB1:                        BB2:
+    //      1 = MemoryDef(liveOnEntry)  0 = MemoryDef(liveOnEntry)
+    //      store i32 100, ptr %P       %V1 = load i32, ptr %P
+    //      br label %BB3               br label %BB3
+    //                             \    /
+    //                            BB3:
+    //                             2 = MemoryPhi({BB1,1},{BB2,liveOnEntry})
+    //                             %V2 = load i32, ptr %P
+    //
+    // Iterate over all the operands of the memory phi and check if any of its
+    // operands can optimize the current load.
+    SmallVector<std::pair<MemoryAccess *, BasicBlock *>, 1>
+        LiveOnEntryMemAccesses;
+    for (Use &Op : MemPhi->incoming_values()) {
+      // Bail out if one of the operands is not a memory use or definition.
+      // TODO: Add support for MemoryPhi operands.
+      if (!isa<MemoryUseOrDef>(&Op)) {
+        LoadCoercion.erase(LI);
+        return false;
+      }
+
+      MemoryUseOrDef *MemAccess = cast<MemoryUseOrDef>(&Op);
+      int Offset = -1;
+      Instruction *DepI = nullptr;
+      BasicBlock *IncomingBB = MemPhi->getIncomingBlock(Op);
+
+      // We collect the MemoryPhi operands that have live on entry definitions
+      // and we process them later only if it is possible to optimize LI with
+      // the MemoryDef operand. The search for the live on entry definitions is
+      // expensive and we need to do it only if it is necessary.
+      if (MSSA->isLiveOnEntryDef(MemAccess))
+        LiveOnEntryMemAccesses.push_back(std::make_pair(MemAccess, IncomingBB));
+      else if (isa<MemoryDef>(&Op)) {
+        // Process MemoryDef operands.
+        DepI = MemAccess->getMemoryInst();
+        Offset = -1;
+
+        if (!ReachableBlocks.count(DepI->getParent())) {
+          LoadCoercion.erase(LI);
+          return false;
+        }
+
+        if (DT->dominates(LI, DepI)) {
+          // In this case, there is a loop. For now, we bail-out load
+          // coercion.
+          LoadCoercion.erase(LI);
+          return false;
+        }
+
+        if (auto *DepS = dyn_cast<StoreInst>(DepI))
+          Offset = analyzeLoadFromClobberingStore(
+              LI->getType(), LI->getPointerOperand(), DepS, DL);
+        else if (auto *DepL = dyn_cast<LoadInst>(DepI))
+          Offset = analyzeLoadFromClobberingLoad(
+              LI->getType(), LI->getPointerOperand(), DepL, DL);
+        else if (auto *DepCall = dyn_cast<CallInst>(DepI)) {
+          // TODO: Improve call coverage.
+          if (AA->doesNotAccessMemory(DepCall) || AA->onlyReadsMemory(DepCall))
+            continue;
+          LoadCoercion.erase(LI);
+          return false;
+        } else {
+          LoadCoercion.erase(LI);
+          return false;
+        }
+        if (Offset >= 0)
+          IsLoadCoercionCandidate |=
+              tryAddLoadDepInsnIntoLoadCoercionMap(LI, DepI, IncomingBB);
+        else {
+          LoadCoercion.erase(LI);
+          return false;
+        }
+      }
+    }
+
+    if (IsLoadCoercionCandidate) {
+      // Process the operands with live on entry definitions.
+      for (auto P : LiveOnEntryMemAccesses) {
+        MemoryAccess *MemAccess = P.first;
+        int Offset;
+        for (const auto &U : MemAccess->uses()) {
+          Offset = -1;
+          auto *MemUse = dyn_cast<MemoryUse>(U.getUser());
+          if (MemUse == nullptr)
+            continue;
+          LoadInst *DependingLoad = dyn_cast<LoadInst>(MemUse->getMemoryInst());
+          if (!DependingLoad)
+            continue;
+          SmallVector<BasicBlock *, 1> IncomingBB;
+          IncomingBB.push_back(P.second);
+          findLiveOnEntryDependency(LI, DependingLoad, IncomingBB, true);
+        }
+      }
+    }
+  }
+  return IsLoadCoercionCandidate;
+}
+
 // Find load coercion opportunities between instructions with live on entry
 // definitions.
 bool NewGVN::performSymbolicLoadCoercionForLiveOnEntryDef(
@@ -1837,8 +1980,11 @@ const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I) const {
 
   // Check if we can apply load coercion.
   bool IsLoadCoercionCandidate = false;
-
-  if (MSSA->isLiveOnEntryDef(DefiningAccess) && EnableLoadCoercion)
+  if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess)) {
+    if (EnableLoadCoercion)
+      IsLoadCoercionCandidate =
+          performSymbolicLoadCoercionForMemoryPhi(LI, DefiningAccess);
+  } else if (MSSA->isLiveOnEntryDef(DefiningAccess) && EnableLoadCoercion)
     IsLoadCoercionCandidate =
         performSymbolicLoadCoercionForLiveOnEntryDef(LI, DefiningAccess);
   else if (auto *MD = dyn_cast<MemoryDef>(DefiningAccess)) {
@@ -4215,6 +4361,146 @@ Value *NewGVN::getExtractedValue(LoadInst *LI, Instruction *DepI) {
   return NewValue;
 }
 
+// Create the phi node that replaces the load in load coercion.
+Value *NewGVN::emitLoadCoercionPhi(
+    LoadInst *LI, BasicBlock *InsertBB,
+    ArrayRef<std::pair<BasicBlock *, Instruction *>> PhiOperands) {
+  Value *NewValue = nullptr;
+  SmallVector<PHINode *, 8> NewPHIs;
+  SSAUpdater SSAUpdate(&NewPHIs);
+  SSAUpdate.Initialize(LI->getType(), LI->getName());
+
+  for (const auto &P : PhiOperands) {
+    BasicBlock *PredBB = P.first;
+    Instruction *DepI = P.second;
+
+    if (SSAUpdate.HasValueForBlock(PredBB))
+      continue;
+
+    // Get the incoming value for this block. This values is the value that we
+    // extract from the corresponding depending instruction.
+    NewValue = getExtractedValue(LI, DepI);
+    // Match the coerced value with the corresponding incoming block.
+    SSAUpdate.AddAvailableValue(PredBB, NewValue);
+  }
+
+  // Generate the phi node.
+  NewValue = SSAUpdate.GetValueInMiddleOfBlock(InsertBB);
+  // Run value numbering for the new phi node.
+  if (Instruction *I = dyn_cast<Instruction>(NewValue))
+    NewlyGeneratedInsns.push_back(I);
+
+  return NewValue;
+}
+
+// Check if we can remove the candidate load instruction and replace it with a
+// phi node. In Example 1, all the predecessors have a depending instruction.
+// Therefore, the load is replaced by a phi node whose incoming values are
+// extracted from each depending instruction.
+//
+// Example 1:
+// Before Load Coercion:
+//     BB1:                        BB2:
+//      store i32 100, ptr %P      store i32 500, ptr %P
+//      br label %BB3              br label %BB3
+//                           \    /
+//                         BB3:
+//                          %V = load i32, ptr %P
+//
+// After Load Coercion:
+//     BB1:                        BB2:
+//      store i32 100, ptr %P      store i32 500, ptr %P
+//      br label %BB3              br label %BB3
+//                           \    /
+//                         BB3:
+//                          %phi = phi i32 [ 100, %BB1], [ 500, %BB2 ]
+//
+// In example 2, there is only one depending instruction in BB1. We eliminate
+// the load of BB3 by adding an artificial dependency in BB2. This is done by
+// adding a new load (%V1) in BB2. Now, the load of BB3 has two dependencies.
+// Therefore, we can replace it with a phi node as it is shown below:
+//
+// Example 2:
+// Before Load Coercion:
+//     BB1:                        BB2:
+//      store i32 100, i32* %P      br label %BB3
+//      br label %BB3                /
+//                             \    /
+//                            BB3:
+//                             %V = load i32, i32* %P
+//
+// After Load Coercion:
+//     BB1:                        BB2:
+//      store i32 100, i32* %P      %V1 = load i32, i32* %P
+//      br label %BB3              br label %BB3
+//                             \    /
+//                            BB3:
+//                             %phi = phi i32 [ 100, %BB1], [ %V2, %BB2 ]
+//
+Value *NewGVN::tryReplaceLoadWithPhi(
+    LoadInst *LI, BasicBlock *InsertBB,
+    SmallVectorImpl<std::pair<BasicBlock *, Instruction *>> &PhiOperands,
+    ArrayRef<BasicBlock *> IncomingBlocksWithoutDep) {
+
+  // If we have found all the phi operands (Example 1), then we are ready to
+  // replace the load with a phi node.
+  if (PhiOperands.size() == pred_size(InsertBB))
+    return emitLoadCoercionPhi(LI, InsertBB, PhiOperands);
+
+  // Vanity checks before we do partial load elimination (Example 2).
+  // If there are more than one predecessors without a depending instruction,
+  // then we do not perform load coercion.
+  // TODO: Create a new common predecessor and emit a new load in the common
+  // predecessor.
+  if (IncomingBlocksWithoutDep.size() > 1)
+    return nullptr;
+
+  // Check if all the incoming blocks are reachable.
+  if (llvm::all_of(IncomingBlocksWithoutDep, [this, LI](BasicBlock *BB) {
+        return !ReachableEdges.count({BB, LI->getParent()});
+      }))
+    return nullptr;
+
+  BasicBlock *IncomingBlock = IncomingBlocksWithoutDep.back();
+  // Do not add a new load in EHPad that does not allow non-phi instructions.
+  if (IncomingBlock->getTerminator()->isEHPad())
+    return nullptr;
+
+  // TODO: Add support for the case where IncomingBlock has more than one
+  // successors.
+  if (succ_size(IncomingBlock) != 1)
+    return nullptr;
+
+  // Generate a new load instruction in the incoming block.
+  Value *LIPtr = LI->getPointerOperand();
+  SmallVector<Instruction *, 8> NewInsts;
+  PHITransAddr Address(LIPtr, DL, AC);
+  LIPtr =
+      Address.translateWithInsertion(InsertBB, IncomingBlock, *DT, NewInsts);
+  if (!LIPtr)
+    return nullptr;
+  auto *NewDependingLoad = new LoadInst(
+      LI->getType(), LIPtr, LI->getName(), LI->isVolatile(), LI->getAlign(),
+      LI->getOrdering(), LI->getSyncScopeID(), IncomingBlock->getTerminator());
+  NewInsts.push_back(NewDependingLoad);
+
+  // Update the debug information of the new load.
+  NewDependingLoad->setDebugLoc(LI->getDebugLoc());
+
+  // Update MemorySSA with the new load instruction.
+  updateMemorySSA(LI, NewDependingLoad);
+
+  // Create the new phi operand.
+  PhiOperands.push_back(std::make_pair(IncomingBlock, NewDependingLoad));
+  for (auto *CurI : NewInsts) {
+    if (LoadInst *LI = dyn_cast<LoadInst>(CurI))
+      NewLoadsInLoadCoercion.push_back(LI);
+    NewlyGeneratedInsns.push_back(CurI);
+  }
+  // Create the new phi node.
+  return emitLoadCoercionPhi(LI, InsertBB, PhiOperands);
+}
+
 void NewGVN::updateUsesAfterLoadCoercionImpl(
     LoadInst *LI, SmallVectorImpl<Instruction *> &LIUses) {
   // Run value numbering for the users of the candidate load instruction.
@@ -4350,6 +4636,124 @@ bool NewGVN::implementLoadCoercion() {
     if (DependingInsns.size() == 1 && DT->dominates(FirstDepI, LI) &&
         !isa<MemoryPhi>(DefiningAccess))
       NewValue = getExtractedValue(LI, FirstDepI);
+    else {
+      // Next, we check if we can replace LI with a phi node. To do this, we
+      // first have to find the operands of the phi node. In Example 1, it is
+      // straightforward that the operands of the new phi node is (100, %BB1)
+      // and (500, %BB2).
+      //
+      // Example 1:
+      // Before load coercion
+      //     BB1:                        BB2:
+      //      store i32 100, ptr %P       store i32 500, ptr %P
+      //      br label %BB3               br label %BB3
+      //                           \     /
+      //                            BB3:
+      //                             %V = load i32, ptr %P
+      //
+      // After load coercion
+      //     BB1:                        BB2:
+      //      store i32 100, ptr %P       store i32 500, ptr %P
+      //      br label %BB3               br label %BB3
+      //                           \     /
+      //                            BB3:
+      //                             %V = phi i32 [ 100, %BB1 ], [ 500, %BB2 ]
+      //
+      // However, this is not the case in Example 2. The operands of the new
+      // phi node should be (100, %BB2) and (%V1, %BB3). Hence, the incoming
+      // value might not be in an incoming block. But, the incoming value can
+      // also be in a basic block that dominates the incoming block.
+      //
+      // Example 2:
+      // Before load coercion
+      //                            BB1:
+      //                             %V1 = load i32, ptr %P
+      //                             br i1 %Cond, label %BB2, label %BB3
+      //                           /    \
+      //      BB2:                      BB3:
+      //       store i32 100, ptr %P     br label %BB3
+      //       br label %BB3             /
+      //                           \    /
+      //                          BB4:
+      //                           %V2 = load i32, ptr %P
+      //
+      // After load coercion
+      //                            BB1:
+      //                             %V1 = load i32, ptr %P
+      //                             br i1 %Cond, label %BB2, label %BB3
+      //                           /    \
+      //      BB2:                      BB3:
+      //       store i32 100, ptr %P     br label %BB3
+      //       br label %BB3             /
+      //                           \    /
+      //                          BB4:
+      //                           %V2 = phi i32 [ 100, %BB2 ], [ %V1, %BB3 ]
+      //
+      // In addition, we have to find which of the incoming blocks do not have
+      // depending instructions.
+      //
+      // Example 3:
+      // Before load coercion
+      //   BB1:                              BB2:
+      //    %V1 = load <2 x i32>, ptr %P      br label %BB3
+      //    br label %BB3                    /
+      //		     \              /
+      //                  BB3:
+      //                   %V2 = load i32, ptr %P
+      //
+      // After load coercion
+      //   BB1:                                BB2:
+      //    %V1 = load <2 x i32>, ptr %P        %V2' = load i32, ptr %P
+      //    %0 = bitcast <2 x i32> %V1 to i64  br label %BB3
+      //    %1 = trunc i64 %0 to i32            /
+      //    br label %BB3                      /
+      //		     \                /
+      //                  BB3:
+      //                   %V2 = phi i32 [ %1, %BB1], [ %V2', %BB2 ]
+      //
+      // In this case, the first operands of the phi node is (%1, %BB1). But,
+      // we have to create the second operand by emitting a new load in BB3.
+      // Hence, the second operand is (%V2', %BB2).
+      //
+      // Get the insertion point of the new phi node. For the first two
+      // examples, the insertion point is the beginning of the basic block of
+      // the MemoryPhi. In the third example, there is not a MemoryPhi.
+      // Therefore, the insertion point is the beginning of the parent block of
+      // the optimized block.
+      BasicBlock *InsertBB = isa<MemoryPhi>(DefiningAccess)
+                                 ? DefiningAccess->getBlock()
+                                 : LI->getParent();
+      // keeps the incoming blocks of the memory phi that have a depending
+      // instruction.
+      SmallVector<std::pair<BasicBlock *, Instruction *>, 2>
+          IncomingBlocksWithDep;
+      // Keeps the incoming blocks of the memory phi that do not have a
+      // depending instruction.
+      SmallVector<BasicBlock *, 2> IncomingBlocksWithoutDep;
+      // Get the predecessors of the LI.
+      SmallVector<BasicBlock *, 2> LIPredBBs;
+      for (BasicBlock *PredBB : predecessors(InsertBB))
+        LIPredBBs.push_back(PredBB);
+      SmallPtrSet<Instruction *, 2> LIDependingInsns;
+      // Check which blocks have a depending instructions.
+      for (auto &P : DependingInsns) {
+        Instruction *CurrentDepI = P.first;
+        BasicBlock *CurrentDepIBB = P.second;
+        auto PredBBIt =
+            std::find(LIPredBBs.begin(), LIPredBBs.end(), CurrentDepIBB);
+        if (PredBBIt != LIPredBBs.end()) {
+          IncomingBlocksWithDep.push_back(
+              std::make_pair(CurrentDepIBB, CurrentDepI));
+          LIPredBBs.erase(PredBBIt);
+        }
+        LIDependingInsns.insert(P.first);
+      }
+      // Check which blocks do not have an instruction.
+      for (auto *CurPredBB : LIPredBBs)
+        IncomingBlocksWithoutDep.push_back(CurPredBB);
+      NewValue = tryReplaceLoadWithPhi(LI, InsertBB, IncomingBlocksWithDep,
+                                       IncomingBlocksWithoutDep);
+    }
     // If we could not eliminate the load, then we need run value numbering for
     // the load (the load does not have an expression up to this point) and its
     // uses.
diff --git a/llvm/test/Transforms/NewGVN/load_coercion_replace_load_with_phi.ll b/llvm/test/Transforms/NewGVN/load_coercion_replace_load_with_phi.ll
new file mode 100644
index 000000000000000..beb2c9d0e91c142
--- /dev/null
+++ b/llvm/test/Transforms/NewGVN/load_coercion_replace_load_with_phi.ll
@@ -0,0 +1,3787 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -p=gvn < %s | FileCheck %s -check-prefixes=GVN,OLDGVN
+; RUN: opt -S -p=newgvn < %s | FileCheck %s -check-prefixes=GVN,NEWGVN
+
+define i32 @test1(ptr %P, i32 %V1, i1 %Cond) {
+;   Entry
+;    /  \
+;   T    F
+;    \  /
+;     vv
+;    Exit
+;
+; GVN-LABEL: @test1(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; GVN:       T:
+; GVN-NEXT:    store i32 [[V1:%.*]], ptr [[P:%.*]], align 1
+; GVN-NEXT:    br label [[EXIT:%.*]]
+; GVN:       F:
+; GVN-NEXT:    store i32 13, ptr [[P]], align 1
+; GVN-NEXT:    br label [[EXIT]]
+; GVN:       Exit:
+; GVN-NEXT:    [[V2:%.*]] = phi i32 [ 13, [[F]] ], [ [[V1]], [[T]] ]
+; GVN-NEXT:    ret i32 [[V2]]
+;
+Entry:
+  br i1 %Cond, label %T, label %F
+
+T:
+  store i32 %V1, ptr %P, align 1
+  br label %Exit
+
+F:
+  store i32 13, ptr %P, align 1
+  br label %Exit
+
+Exit:
+  %V2 = load i32, ptr %P, align 1
+  ret i32 %V2
+}
+
+define i8 @test2(ptr %P, i1 %Cond) {
+;   Entry
+;    /  \
+;   T    F
+;    \  /
+;     vv
+;    Exit
+;
+; OLDGVN-LABEL: @test2(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
+; OLDGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; OLDGVN:       T:
+; OLDGVN-NEXT:    store i32 42, ptr [[P]], align 1
+; OLDGVN-NEXT:    br label [[EXIT:%.*]]
+; OLDGVN:       F:
+; OLDGVN-NEXT:    [[V4_PRE:%.*]] = load i8, ptr [[P1]], align 1
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    [[V4:%.*]] = phi i8 [ [[V4_PRE]], [[F]] ], [ 0, [[T]] ]
+; OLDGVN-NEXT:    ret i8 [[V4]]
+;
+; NEWGVN-LABEL: @test2(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2
+; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; NEWGVN:       T:
+; NEWGVN-NEXT:    store i32 42, ptr [[P]], align 1
+; NEWGVN-NEXT:    br label [[EXIT:%.*]]
+; NEWGVN:       F:
+; NEWGVN-NEXT:    [[V41:%.*]] = load i8, ptr [[P1]], align 1
+; NEWGVN-NEXT:    br label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[V4:%.*]] = phi i8 [ [[V41]], [[F]] ], [ 0, [[T]] ]
+; NEWGVN-NEXT:    ret i8 [[V4]]
+;
+Entry:
+  %P1 = getelementptr i8, ptr %P, i32 2
+  br i1 %Cond, label %T, label %F
+
+T:
+  store i32 42, ptr %P, align 1
+  br label %Exit
+
+F:
+  br label %Exit
+
+Exit:
+  %V4 = load i8, ptr %P1, align 1
+  ret i8 %V4
+}
+
+define i32 @test3(ptr %P, i1 %Cond1, i1 %Cond2) {
+;   Entry
+;   /   \
+;  T     F
+;  |    / \
+;  |   F1 F2
+;   \  |  |
+;    v v  v
+;     Exit
+;
+; OLDGVN-LABEL: @test3(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br i1 [[COND1:%.*]], label [[T:%.*]], label [[F:%.*]]
+; OLDGVN:       T:
+; OLDGVN-NEXT:    store i32 42, ptr [[P:%.*]], align 1
+; OLDGVN-NEXT:    br label [[EXIT:%.*]]
+; OLDGVN:       F:
+; OLDGVN-NEXT:    br i1 [[COND2:%.*]], label [[F1:%.*]], label [[F2:%.*]]
+; OLDGVN:       F1:
+; OLDGVN-NEXT:    store i32 13, ptr [[P]], align 1
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       F2:
+; OLDGVN-NEXT:    [[V1_PRE:%.*]] = load i32, ptr [[P]], align 1
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    [[V1:%.*]] = phi i32 [ 42, [[T]] ], [ 13, [[F1]] ], [ [[V1_PRE]], [[F2]] ]
+; OLDGVN-NEXT:    [[PHI:%.*]] = phi i32 [ 1, [[T]] ], [ 2, [[F1]] ], [ 3, [[F2]] ]
+; OLDGVN-NEXT:    [[V2:%.*]] = add i32 [[PHI]], [[V1]]
+; OLDGVN-NEXT:    ret i32 [[V2]]
+;
+; NEWGVN-LABEL: @test3(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br i1 [[COND1:%.*]], label [[T:%.*]], label [[F:%.*]]
+; NEWGVN:       T:
+; NEWGVN-NEXT:    store i32 42, ptr [[P:%.*]], align 1
+; NEWGVN-NEXT:    br label [[EXIT:%.*]]
+; NEWGVN:       F:
+; NEWGVN-NEXT:    br i1 [[COND2:%.*]], label [[F1:%.*]], label [[F2:%.*]]
+; NEWGVN:       F1:
+; NEWGVN-NEXT:    store i32 13, ptr [[P]], align 1
+; NEWGVN-NEXT:    br label [[EXIT]]
+; NEWGVN:       F2:
+; NEWGVN-NEXT:    [[V11:%.*]] = load i32, ptr [[P]], align 1
+; NEWGVN-NEXT:    br label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[V1:%.*]] = phi i32 [ 42, [[T]] ], [ 13, [[F1]] ], [ [[V11]], [[F2]] ]
+; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ 1, [[T]] ], [ 2, [[F1]] ], [ 3, [[F2]] ]
+; NEWGVN-NEXT:    [[V2:%.*]] = add i32 [[PHI]], [[V1]]
+; NEWGVN-NEXT:    ret i32 [[V2]]
+;
+Entry:
+  br i1 %Cond1, label %T, label %F
+
+T:
+  store i32 42, ptr %P, align 1
+  br label %Exit
+
+F:
+  br i1 %Cond2, label %F1, label %F2
+
+F1:
+  store i32 13, ptr %P, align 1
+  br label %Exit
+
+F2:
+  br label %Exit
+
+Exit:
+  %Phi = phi i32 [1, %T], [2, %F1], [3, %F2]
+  %V1 = load i32, ptr %P, align 1
+  %V2 = add i32 %Phi, %V1
+  ret i32 %V2
+}
+
+define i32 @test4(ptr %P, i1 %Cond) {
+;   Entry
+;    /  \
+;   T    F
+;    \  /
+;     vv
+;    Exit
+;
+; OLDGVN-LABEL: @test4(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; OLDGVN:       T:
+; OLDGVN-NEXT:    [[V1:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 1
+; OLDGVN-NEXT:    [[V2:%.*]] = extractelement <2 x i32> [[V1]], i64 1
+; OLDGVN-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64
+; OLDGVN-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
+; OLDGVN-NEXT:    br label [[EXIT:%.*]]
+; OLDGVN:       F:
+; OLDGVN-NEXT:    [[V3_PRE:%.*]] = load i32, ptr [[P]], align 1
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    [[V3:%.*]] = phi i32 [ [[V3_PRE]], [[F]] ], [ [[TMP1]], [[T]] ]
+; OLDGVN-NEXT:    [[PHI:%.*]] = phi i32 [ 100, [[F]] ], [ [[V2]], [[T]] ]
+; OLDGVN-NEXT:    [[V4:%.*]] = add i32 [[V3]], [[PHI]]
+; OLDGVN-NEXT:    ret i32 [[V4]]
+;
+; NEWGVN-LABEL: @test4(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; NEWGVN:       T:
+; NEWGVN-NEXT:    [[V1:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 1
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
+; NEWGVN-NEXT:    [[V2:%.*]] = extractelement <2 x i32> [[V1]], i64 1
+; NEWGVN-NEXT:    br label [[EXIT:%.*]]
+; NEWGVN:       F:
+; NEWGVN-NEXT:    [[V31:%.*]] = load i32, ptr [[P]], align 1
+; NEWGVN-NEXT:    br label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[V3:%.*]] = phi i32 [ [[V31]], [[F]] ], [ [[TMP1]], [[T]] ]
+; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ 100, [[F]] ], [ [[V2]], [[T]] ]
+; NEWGVN-NEXT:    [[V4:%.*]] = add i32 [[V3]], [[PHI]]
+; NEWGVN-NEXT:    ret i32 [[V4]]
+;
+Entry:
+  br i1 %Cond, label %T, label %F
+
+T:
+  %V1 = load <2 x i32>, <2 x i32>* %P, align 1
+  %V2 = extractelement <2 x i32> %V1, i64 1
+  br label %Exit
+
+F:
+  br label %Exit
+
+Exit:
+  %Phi = phi i32 [100, %F], [%V2, %T]
+  %V3 = load i32, ptr %P, align 1
+  %V4 = add i32 %V3, %Phi
+  ret i32 %V4
+}
+
+define i32 @test5(ptr %P, i1 %Cond) {
+;   Entry
+;    /  \
+;   T    F
+;    \  /
+;     vv
+;    Exit
+;
+; OLDGVN-LABEL: @test5(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; OLDGVN:       T:
+; OLDGVN-NEXT:    [[V4_PRE:%.*]] = load i32, ptr [[P:%.*]], align 1
+; OLDGVN-NEXT:    br label [[EXIT:%.*]]
+; OLDGVN:       F:
+; OLDGVN-NEXT:    [[V2:%.*]] = load <2 x i32>, ptr [[P]], align 1
+; OLDGVN-NEXT:    [[V3:%.*]] = extractelement <2 x i32> [[V2]], i64 1
+; OLDGVN-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V2]] to i64
+; OLDGVN-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    [[V4:%.*]] = phi i32 [ [[V4_PRE]], [[T]] ], [ [[TMP1]], [[F]] ]
+; OLDGVN-NEXT:    [[PHI:%.*]] = phi i32 [ 0, [[T]] ], [ [[V3]], [[F]] ]
+; OLDGVN-NEXT:    [[V5:%.*]] = add i32 [[V4]], [[PHI]]
+; OLDGVN-NEXT:    ret i32 [[V5]]
+;
+; NEWGVN-LABEL: @test5(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; NEWGVN:       T:
+; NEWGVN-NEXT:    [[V41:%.*]] = load i32, ptr [[P:%.*]], align 1
+; NEWGVN-NEXT:    br label [[EXIT:%.*]]
+; NEWGVN:       F:
+; NEWGVN-NEXT:    [[V2:%.*]] = load <2 x i32>, ptr [[P]], align 1
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V2]] to i64
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
+; NEWGVN-NEXT:    [[V3:%.*]] = extractelement <2 x i32> [[V2]], i64 1
+; NEWGVN-NEXT:    br label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[V4:%.*]] = phi i32 [ [[V41]], [[T]] ], [ [[TMP1]], [[F]] ]
+; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ 0, [[T]] ], [ [[V3]], [[F]] ]
+; NEWGVN-NEXT:    [[V5:%.*]] = add i32 [[V4]], [[PHI]]
+; NEWGVN-NEXT:    ret i32 [[V5]]
+;
+Entry:
+  br i1 %Cond, label %T, label %F
+
+T:
+  %V1 = load <2 x i32>, <2 x i32>* %P, align 1
+  br label %Exit
+
+F:
+  %V2 = load <2 x i32>, <2 x i32>* %P, align 1
+  %V3 = extractelement <2 x i32> %V2, i64 1
+  br label %Exit
+
+Exit:
+  %Phi = phi i32 [0, %T], [%V3, %F]
+  %V4 = load i32, ptr %P, align 1
+  %V5 = add i32 %V4, %Phi
+  ret i32 %V5
+}
+
+define i32 @test6(ptr %P, i1 %Cond) {
+;   Entry
+;    /  \
+;   T    F
+;    \  /
+;     vv
+;    Exit
+;
+; OLDGVN-LABEL: @test6(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; OLDGVN:       T:
+; OLDGVN-NEXT:    [[V1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; OLDGVN-NEXT:    br label [[EXIT:%.*]]
+; OLDGVN:       F:
+; OLDGVN-NEXT:    [[V2:%.*]] = load <2 x i32>, ptr [[P]], align 1
+; OLDGVN-NEXT:    [[V3:%.*]] = extractelement <2 x i32> [[V2]], i64 1
+; OLDGVN-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V2]] to i64
+; OLDGVN-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    [[V4:%.*]] = phi i32 [ [[V1]], [[T]] ], [ [[TMP1]], [[F]] ]
+; OLDGVN-NEXT:    [[PHI:%.*]] = phi i32 [ [[V1]], [[T]] ], [ [[V3]], [[F]] ]
+; OLDGVN-NEXT:    [[V5:%.*]] = add i32 [[PHI]], [[PHI]]
+; OLDGVN-NEXT:    [[V6:%.*]] = add i32 [[V5]], [[V4]]
+; OLDGVN-NEXT:    ret i32 [[V6]]
+;
+; NEWGVN-LABEL: @test6(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; NEWGVN:       T:
+; NEWGVN-NEXT:    [[V1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; NEWGVN-NEXT:    br label [[EXIT:%.*]]
+; NEWGVN:       F:
+; NEWGVN-NEXT:    [[V2:%.*]] = load <2 x i32>, ptr [[P]], align 1
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V2]] to i64
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
+; NEWGVN-NEXT:    [[V3:%.*]] = extractelement <2 x i32> [[V2]], i64 1
+; NEWGVN-NEXT:    br label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[V4:%.*]] = phi i32 [ [[V1]], [[T]] ], [ [[TMP1]], [[F]] ]
+; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ [[V1]], [[T]] ], [ [[V3]], [[F]] ]
+; NEWGVN-NEXT:    [[V5:%.*]] = add i32 [[PHI]], [[PHI]]
+; NEWGVN-NEXT:    [[V6:%.*]] = add i32 [[V5]], [[V4]]
+; NEWGVN-NEXT:    ret i32 [[V6]]
+;
+Entry:
+  br i1 %Cond, label %T, label %F
+
+T:
+  %V1 = load i32, ptr %P, align 1
+  br label %Exit
+
+F:
+  %V2 = load <2 x i32>, <2 x i32>* %P, align 1
+  %V3 = extractelement <2 x i32> %V2, i64 1
+  br label %Exit
+
+Exit:
+  %Phi = phi i32 [%V1, %T], [%V3, %F]
+  %V4 = load i32, ptr %P, align 1
+  %V5 = add i32 %Phi, %Phi
+  %V6 = add i32 %V5, %V4
+  ret i32 %V6
+}
+
+define i32 @test7(ptr %P, i1 %Cond) {
+;   Entry
+;    /  \
+;   T    F
+;    \  /
+;     vv
+;    Exit
+;
+; GVN-LABEL: @test7(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; GVN:       T:
+; GVN-NEXT:    [[V1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; GVN-NEXT:    br label [[EXIT:%.*]]
+; GVN:       F:
+; GVN-NEXT:    br label [[EXIT]]
+; GVN:       Exit:
+; GVN-NEXT:    [[PHI:%.*]] = phi i32 [ [[V1]], [[T]] ], [ 0, [[F]] ]
+; GVN-NEXT:    [[V2:%.*]] = load <2 x i32>, ptr [[P]], align 1
+; GVN-NEXT:    [[V3:%.*]] = extractelement <2 x i32> [[V2]], i64 1
+; GVN-NEXT:    [[V5:%.*]] = add i32 [[PHI]], [[V3]]
+; GVN-NEXT:    ret i32 [[V5]]
+;
+Entry:
+  br i1 %Cond, label %T, label %F
+
+T:
+  %V1 = load i32, ptr %P, align 1
+  br label %Exit
+
+F:
+  br label %Exit
+
+Exit:
+  %Phi = phi i32 [%V1, %T], [0, %F]
+  %V2 = load <2 x i32>, <2 x i32>* %P, align 1
+  %V3 = extractelement <2 x i32> %V2, i64 1
+  %V5 = add i32 %Phi, %V3
+  ret i32 %V5
+}
+
+define i32 @test8(ptr %P, i1 %Cond) {
+;   Entry
+;    /  \
+;   T    F
+;    \  /
+;     vv
+;    Exit
+;
+; OLDGVN-LABEL: @test8(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; OLDGVN:       T:
+; OLDGVN-NEXT:    [[V1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1
+; OLDGVN-NEXT:    [[V2:%.*]] = extractelement <4 x i32> [[V1]], i64 2
+; OLDGVN-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to i128
+; OLDGVN-NEXT:    [[TMP1:%.*]] = trunc i128 [[TMP0]] to i32
+; OLDGVN-NEXT:    br label [[EXIT:%.*]]
+; OLDGVN:       F:
+; OLDGVN-NEXT:    store i32 13, ptr [[P]], align 1
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    [[V3:%.*]] = phi i32 [ [[TMP1]], [[T]] ], [ 13, [[F]] ]
+; OLDGVN-NEXT:    [[PHI:%.*]] = phi i32 [ [[V2]], [[T]] ], [ 100, [[F]] ]
+; OLDGVN-NEXT:    [[V4:%.*]] = add i32 [[PHI]], [[V3]]
+; OLDGVN-NEXT:    ret i32 [[V4]]
+;
+; NEWGVN-LABEL: @test8(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; NEWGVN:       T:
+; NEWGVN-NEXT:    [[V1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to i128
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i128 [[TMP0]] to i32
+; NEWGVN-NEXT:    [[V2:%.*]] = extractelement <4 x i32> [[V1]], i64 2
+; NEWGVN-NEXT:    br label [[EXIT:%.*]]
+; NEWGVN:       F:
+; NEWGVN-NEXT:    store i32 13, ptr [[P]], align 1
+; NEWGVN-NEXT:    br label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[V3:%.*]] = phi i32 [ [[TMP1]], [[T]] ], [ 13, [[F]] ]
+; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ [[V2]], [[T]] ], [ 100, [[F]] ]
+; NEWGVN-NEXT:    [[V4:%.*]] = add i32 [[PHI]], [[V3]]
+; NEWGVN-NEXT:    ret i32 [[V4]]
+;
+Entry:
+  br i1 %Cond, label %T, label %F
+
+T:
+  %V1 = load <4 x i32>, <4 x i32>* %P, align 1
+  %V2 = extractelement <4 x i32> %V1, i64 2
+  br label %Exit
+
+F:
+  store i32 13, ptr %P, align 1
+  br label %Exit
+
+Exit:
+  %Phi = phi i32 [ %V2, %T ], [ 100, %F ]
+  %V3 = load i32, ptr %P, align 1
+  %V4 = add i32 %Phi, %V3
+  ret i32 %V4
+}
+
+define i32 @test9(ptr %P, i1 %Cond) {
+;   Entry
+;   /  |
+;  BB  |
+;   |  |
+;   v  v
+;   Exit
+;
+; GVN-LABEL: @test9(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    store i32 42, ptr [[P:%.*]], align 4
+; GVN-NEXT:    br i1 [[COND:%.*]], label [[BB:%.*]], label [[EXIT:%.*]]
+; GVN:       BB:
+; GVN-NEXT:    store i32 13, ptr [[P]], align 4
+; GVN-NEXT:    br label [[EXIT]]
+; GVN:       Exit:
+; GVN-NEXT:    [[V1:%.*]] = phi i32 [ 13, [[BB]] ], [ 42, [[ENTRY:%.*]] ]
+; GVN-NEXT:    ret i32 [[V1]]
+;
+Entry:
+  store i32 42, ptr %P, align 4
+  br i1 %Cond, label %BB, label %Exit
+
+BB:
+  store i32 13, ptr %P, align 4
+  br label %Exit
+
+Exit:
+  %V1 = load i32, ptr %P, align 4
+  ret i32 %V1
+}
+
+define i32 @test10(ptr %P, i1 %Cond) {
+;   Entry
+;   /  |
+;  BB  |
+;   |  |
+;   v  v
+;   Exit
+;
+; OLDGVN-LABEL: @test10(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    [[V1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1
+; OLDGVN-NEXT:    [[V2:%.*]] = extractelement <4 x i32> [[V1]], i64 2
+; OLDGVN-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to i128
+; OLDGVN-NEXT:    [[TMP1:%.*]] = trunc i128 [[TMP0]] to i32
+; OLDGVN-NEXT:    br i1 [[COND:%.*]], label [[BB:%.*]], label [[EXIT:%.*]]
+; OLDGVN:       BB:
+; OLDGVN-NEXT:    store i32 13, ptr [[P]], align 1
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    [[V3:%.*]] = phi i32 [ [[TMP1]], [[ENTRY:%.*]] ], [ 13, [[BB]] ]
+; OLDGVN-NEXT:    [[PHI:%.*]] = phi i32 [ [[V2]], [[ENTRY]] ], [ 100, [[BB]] ]
+; OLDGVN-NEXT:    [[V4:%.*]] = add i32 [[PHI]], [[V3]]
+; OLDGVN-NEXT:    ret i32 [[V4]]
+;
+; NEWGVN-LABEL: @test10(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    [[V1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to i128
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i128 [[TMP0]] to i32
+; NEWGVN-NEXT:    [[V2:%.*]] = extractelement <4 x i32> [[V1]], i64 2
+; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[BB:%.*]], label [[EXIT:%.*]]
+; NEWGVN:       BB:
+; NEWGVN-NEXT:    store i32 13, ptr [[P]], align 1
+; NEWGVN-NEXT:    br label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[V3:%.*]] = phi i32 [ [[TMP1]], [[ENTRY:%.*]] ], [ 13, [[BB]] ]
+; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ [[V2]], [[ENTRY]] ], [ 100, [[BB]] ]
+; NEWGVN-NEXT:    [[V4:%.*]] = add i32 [[PHI]], [[V3]]
+; NEWGVN-NEXT:    ret i32 [[V4]]
+;
+Entry:
+  %V1 = load <4 x i32>, <4 x i32>* %P, align 1
+  %V2 = extractelement <4 x i32> %V1, i64 2
+  br i1 %Cond, label %BB, label %Exit
+
+BB:
+  store i32 13, ptr %P, align 1
+  br label %Exit
+
+Exit:
+  %Phi = phi i32 [ %V2, %Entry ], [ 100, %BB ]
+  %V3 = load i32, ptr %P, align 1
+  %V4 = add i32 %Phi, %V3
+  ret i32 %V4
+}
+
+define i32 @test11(ptr %P, i1 %Cond) {
+;   Entry
+;    /  \
+;   T    F
+;    \  /
+;     vv
+;    Exit
+;
+; OLDGVN-LABEL: @test11(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    [[V1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1
+; OLDGVN-NEXT:    [[V2:%.*]] = extractelement <4 x i32> [[V1]], i64 2
+; OLDGVN-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to i128
+; OLDGVN-NEXT:    [[TMP1:%.*]] = trunc i128 [[TMP0]] to i32
+; OLDGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; OLDGVN:       T:
+; OLDGVN-NEXT:    br label [[EXIT:%.*]]
+; OLDGVN:       F:
+; OLDGVN-NEXT:    store <4 x i32> <i32 13, i32 13, i32 13, i32 13>, ptr [[P]], align 1
+; OLDGVN-NEXT:    store i32 100, ptr [[P]], align 1
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    [[V4:%.*]] = phi i32 [ [[TMP1]], [[T]] ], [ 100, [[F]] ]
+; OLDGVN-NEXT:    [[PHI:%.*]] = phi i32 [ [[V2]], [[T]] ], [ 13, [[F]] ]
+; OLDGVN-NEXT:    [[V5:%.*]] = add i32 [[PHI]], [[V4]]
+; OLDGVN-NEXT:    ret i32 [[V5]]
+;
+; NEWGVN-LABEL: @test11(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    [[V1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 1
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to i128
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i128 [[TMP0]] to i32
+; NEWGVN-NEXT:    [[V2:%.*]] = extractelement <4 x i32> [[V1]], i64 2
+; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; NEWGVN:       T:
+; NEWGVN-NEXT:    br label [[EXIT:%.*]]
+; NEWGVN:       F:
+; NEWGVN-NEXT:    store <4 x i32> <i32 13, i32 13, i32 13, i32 13>, ptr [[P]], align 1
+; NEWGVN-NEXT:    store i32 100, ptr [[P]], align 1
+; NEWGVN-NEXT:    br label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[V4:%.*]] = phi i32 [ [[TMP1]], [[T]] ], [ 100, [[F]] ]
+; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ [[V2]], [[T]] ], [ 13, [[F]] ]
+; NEWGVN-NEXT:    [[V5:%.*]] = add i32 [[PHI]], [[V4]]
+; NEWGVN-NEXT:    ret i32 [[V5]]
+;
+Entry:
+  %V1 = load <4 x i32>, <4 x i32>* %P, align 1
+  %V2 = extractelement <4 x i32> %V1, i64 2
+  br i1 %Cond, label %T, label %F
+
+T:
+  br label %Exit
+
+F:
+  store <4 x i32> <i32 13, i32 13, i32 13, i32 13>, <4 x i32>* %P, align 1
+  %V3 = load i32, ptr %P, align 1
+  store i32 100, ptr %P, align 1
+  br label %Exit
+
+Exit:
+  %Phi = phi i32 [ %V2, %T ], [ %V3, %F ]
+  %V4 = load i32, ptr %P, align 1
+  %V5 = add i32 %Phi, %V4
+  ret i32 %V5
+}
+
+define i16 @test12(ptr %P, i32 %V, i1 %Cond) {
+;   Entry
+;    /  \
+;   T    F
+;    \  /
+;     vv
+;    Exit
+;
+; GVN-LABEL: @test12(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    store i32 42, ptr [[P:%.*]], align 1
+; GVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; GVN:       T:
+; GVN-NEXT:    store i16 13, ptr [[P]], align 1
+; GVN-NEXT:    br label [[EXIT:%.*]]
+; GVN:       F:
+; GVN-NEXT:    store i32 [[V:%.*]], ptr [[P]], align 1
+; GVN-NEXT:    [[TMP0:%.*]] = trunc i32 [[V]] to i16
+; GVN-NEXT:    br label [[EXIT]]
+; GVN:       Exit:
+; GVN-NEXT:    [[V2:%.*]] = phi i16 [ [[TMP0]], [[F]] ], [ 13, [[T]] ]
+; GVN-NEXT:    ret i16 [[V2]]
+;
+Entry:
+  store i32 42, ptr %P, align 1
+  br i1 %Cond, label %T, label %F
+
+T:
+  store i16 13, ptr %P, align 1
+  br label %Exit
+
+F:
+  store i32 %V, ptr %P, align 1
+  br label %Exit
+
+Exit:
+  %V2 = load i16, ptr %P, align 1
+  ret i16 %V2
+}
+
+define i32 @test13(ptr %P1,  ptr %P2, i1 %cond) {
+;   Entry
+;    /  \
+;   T    F
+;    \  /
+;     vv
+;    Exit
+;
+; OLDGVN-LABEL: @test13(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; OLDGVN:       T:
+; OLDGVN-NEXT:    store i32 42, ptr [[P1:%.*]], align 4
+; OLDGVN-NEXT:    br label [[EXIT:%.*]]
+; OLDGVN:       F:
+; OLDGVN-NEXT:    [[V1_PRE:%.*]] = load i32, ptr [[P1]], align 4
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    [[V1:%.*]] = phi i32 [ [[V1_PRE]], [[F]] ], [ 42, [[T]] ]
+; OLDGVN-NEXT:    [[V2:%.*]] = load i32, ptr [[P2:%.*]], align 4
+; OLDGVN-NEXT:    [[V3:%.*]] = add i32 [[V1]], [[V2]]
+; OLDGVN-NEXT:    ret i32 [[V3]]
+;
+; NEWGVN-LABEL: @test13(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; NEWGVN:       T:
+; NEWGVN-NEXT:    store i32 42, ptr [[P1:%.*]], align 4
+; NEWGVN-NEXT:    br label [[EXIT:%.*]]
+; NEWGVN:       F:
+; NEWGVN-NEXT:    [[V11:%.*]] = load i32, ptr [[P1]], align 4
+; NEWGVN-NEXT:    br label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[V1:%.*]] = phi i32 [ [[V11]], [[F]] ], [ 42, [[T]] ]
+; NEWGVN-NEXT:    [[V2:%.*]] = load i32, ptr [[P2:%.*]], align 4
+; NEWGVN-NEXT:    [[V3:%.*]] = add i32 [[V1]], [[V2]]
+; NEWGVN-NEXT:    ret i32 [[V3]]
+;
+Entry:
+  br i1 %cond, label %T, label %F
+
+T:
+  store i32 42, ptr %P1, align 4
+  br label %Exit
+
+F:
+  br label %Exit
+
+Exit:
+  %V1 = load i32, ptr %P1, align 4
+  %V2 = load i32, ptr %P2, align 4
+  %V3 = add i32 %V1, %V2
+  ret i32 %V3
+}
+
+define i8 @test14(i32 %V1, ptr %P, i1 %Cond) {
+;   Entry
+;    /  \
+;   T    F
+;    \  /
+;     vv
+;    Exit
+;
+; OLDGVN-LABEL: @test14(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; OLDGVN:       T:
+; OLDGVN-NEXT:    store i32 [[V1:%.*]], ptr [[P:%.*]], align 1
+; OLDGVN-NEXT:    [[TMP0:%.*]] = lshr i32 [[V1]], 16
+; OLDGVN-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
+; OLDGVN-NEXT:    br label [[EXIT:%.*]]
+; OLDGVN:       F:
+; OLDGVN-NEXT:    [[GEP_PHI_TRANS_INSERT:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; OLDGVN-NEXT:    [[V2_PRE:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT]], align 1
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    [[V2:%.*]] = phi i8 [ [[TMP1]], [[T]] ], [ [[V2_PRE]], [[F]] ]
+; OLDGVN-NEXT:    [[PHI:%.*]] = phi i8 [ 0, [[T]] ], [ 100, [[F]] ]
+; OLDGVN-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; OLDGVN-NEXT:    [[V3:%.*]] = add i8 [[PHI]], [[V2]]
+; OLDGVN-NEXT:    ret i8 [[V3]]
+;
+; NEWGVN-LABEL: @test14(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; NEWGVN:       T:
+; NEWGVN-NEXT:    store i32 [[V1:%.*]], ptr [[P:%.*]], align 1
+; NEWGVN-NEXT:    [[TMP0:%.*]] = lshr i32 [[V1]], 16
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
+; NEWGVN-NEXT:    br label [[EXIT:%.*]]
+; NEWGVN:       F:
+; NEWGVN-NEXT:    [[GEP_PHI_TRANS_INSERT:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; NEWGVN-NEXT:    [[V21:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT]], align 1
+; NEWGVN-NEXT:    br label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[V2:%.*]] = phi i8 [ [[TMP1]], [[T]] ], [ [[V21]], [[F]] ]
+; NEWGVN-NEXT:    [[PHI:%.*]] = phi i8 [ 0, [[T]] ], [ 100, [[F]] ]
+; NEWGVN-NEXT:    [[V3:%.*]] = add i8 [[PHI]], [[V2]]
+; NEWGVN-NEXT:    ret i8 [[V3]]
+;
+Entry:
+  br i1 %Cond, label %T, label %F
+
+T:
+  store i32 %V1, ptr %P, align 1
+  br label %Exit
+
+F:
+  br label %Exit
+
+Exit:
+  %Phi = phi i8 [ 0, %T ], [ 100, %F ]
+  %Gep = getelementptr i8, ptr %P, i32 2
+  %V2 = load i8, ptr %Gep, align 1
+  %V3 = add i8 %Phi, %V2
+  ret i8 %V3
+}
+
+define i8 @test15(ptr %P, i1 %Cond) {
+;   Entry
+;    /  \
+;   T    F
+;    \  /
+;     vv
+;    Exit
+;
+; OLDGVN-LABEL: @test15(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; OLDGVN:       T:
+; OLDGVN-NEXT:    [[V1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; OLDGVN-NEXT:    [[V2:%.*]] = lshr i32 [[V1]], 16
+; OLDGVN-NEXT:    [[V3:%.*]] = trunc i32 [[V2]] to i8
+; OLDGVN-NEXT:    br label [[EXIT:%.*]]
+; OLDGVN:       F:
+; OLDGVN-NEXT:    [[GEP_PHI_TRANS_INSERT:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; OLDGVN-NEXT:    [[V4_PRE:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT]], align 1
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    [[V4:%.*]] = phi i8 [ [[V3]], [[T]] ], [ [[V4_PRE]], [[F]] ]
+; OLDGVN-NEXT:    [[PHI:%.*]] = phi i8 [ [[V3]], [[T]] ], [ 100, [[F]] ]
+; OLDGVN-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; OLDGVN-NEXT:    [[V5:%.*]] = add i8 [[PHI]], [[V4]]
+; OLDGVN-NEXT:    ret i8 [[V5]]
+;
+; NEWGVN-LABEL: @test15(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; NEWGVN:       T:
+; NEWGVN-NEXT:    [[V1:%.*]] = load i32, ptr [[P:%.*]], align 1
+; NEWGVN-NEXT:    [[TMP0:%.*]] = lshr i32 [[V1]], 16
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8
+; NEWGVN-NEXT:    br label [[EXIT:%.*]]
+; NEWGVN:       F:
+; NEWGVN-NEXT:    [[GEP_PHI_TRANS_INSERT:%.*]] = getelementptr i8, ptr [[P]], i32 2
+; NEWGVN-NEXT:    [[V41:%.*]] = load i8, ptr [[GEP_PHI_TRANS_INSERT]], align 1
+; NEWGVN-NEXT:    br label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[V4:%.*]] = phi i8 [ [[TMP1]], [[T]] ], [ [[V41]], [[F]] ]
+; NEWGVN-NEXT:    [[PHI:%.*]] = phi i8 [ [[TMP1]], [[T]] ], [ 100, [[F]] ]
+; NEWGVN-NEXT:    [[V5:%.*]] = add i8 [[PHI]], [[V4]]
+; NEWGVN-NEXT:    ret i8 [[V5]]
+;
+Entry:
+  br i1 %Cond, label %T, label %F
+
+T:
+  %V1 = load i32, ptr %P, align 1
+  %V2 = lshr i32 %V1, 16
+  %V3 = trunc i32 %V2 to i8
+  br label %Exit
+
+F:
+  br label %Exit
+
+Exit:
+  %Phi = phi i8 [ %V3, %T ], [ 100, %F ]
+  %Gep = getelementptr i8, ptr %P, i32 2
+  %V4 = load i8, ptr %Gep, align 1
+  %V5 = add i8 %Phi, %V4
+  ret i8 %V5
+}
+
+define i32 @test16(ptr %P, i1 %cond1, i1 %cond2) {
+;   Entry
+;   /   \
+; BB1   BB2
+;   \   /
+;    BB3
+;   /   \
+; BB4   BB5
+;   \   /
+;    BB6
+;     |
+;   Exit
+;
+; OLDGVN-LABEL: @test16(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; OLDGVN:       BB1:
+; OLDGVN-NEXT:    [[V1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16
+; OLDGVN-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[V1]] to i128
+; OLDGVN-NEXT:    [[TMP1:%.*]] = trunc i128 [[TMP0]] to i32
+; OLDGVN-NEXT:    br label [[BB3:%.*]]
+; OLDGVN:       BB2:
+; OLDGVN-NEXT:    [[V2:%.*]] = load <4 x i32>, ptr [[P]], align 16
+; OLDGVN-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[V2]] to i128
+; OLDGVN-NEXT:    [[TMP3:%.*]] = trunc i128 [[TMP2]] to i32
+; OLDGVN-NEXT:    br label [[BB3]]
+; OLDGVN:       BB3:
+; OLDGVN-NEXT:    [[V5:%.*]] = phi i32 [ [[TMP1]], [[BB1]] ], [ [[TMP3]], [[BB2]] ]
+; OLDGVN-NEXT:    [[PHI1:%.*]] = phi <4 x i32> [ [[V1]], [[BB1]] ], [ [[V2]], [[BB2]] ]
+; OLDGVN-NEXT:    br i1 [[COND2:%.*]], label [[BB4:%.*]], label [[BB5:%.*]]
+; OLDGVN:       BB4:
+; OLDGVN-NEXT:    [[V3:%.*]] = extractelement <4 x i32> [[PHI1]], i64 1
+; OLDGVN-NEXT:    br label [[BB6:%.*]]
+; OLDGVN:       BB5:
+; OLDGVN-NEXT:    br label [[BB6]]
+; OLDGVN:       BB6:
+; OLDGVN-NEXT:    [[PHI2:%.*]] = phi i32 [ [[V3]], [[BB4]] ], [ [[V5]], [[BB5]] ]
+; OLDGVN-NEXT:    [[V6:%.*]] = add i32 [[PHI2]], [[V5]]
+; OLDGVN-NEXT:    ret i32 [[V6]]
+;
+; NEWGVN-LABEL: @test16(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; NEWGVN:       BB1:
+; NEWGVN-NEXT:    [[V1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16
+; NEWGVN-NEXT:    br label [[BB3:%.*]]
+; NEWGVN:       BB2:
+; NEWGVN-NEXT:    [[V2:%.*]] = load <4 x i32>, ptr [[P]], align 16
+; NEWGVN-NEXT:    br label [[BB3]]
+; NEWGVN:       BB3:
+; NEWGVN-NEXT:    [[PHI1:%.*]] = phi <4 x i32> [ [[V1]], [[BB1]] ], [ [[V2]], [[BB2]] ]
+; NEWGVN-NEXT:    br i1 [[COND2:%.*]], label [[BB4:%.*]], label [[BB5:%.*]]
+; NEWGVN:       BB4:
+; NEWGVN-NEXT:    [[V3:%.*]] = extractelement <4 x i32> [[PHI1]], i64 1
+; NEWGVN-NEXT:    [[V51:%.*]] = load i32, ptr [[P]], align 4
+; NEWGVN-NEXT:    br label [[BB6:%.*]]
+; NEWGVN:       BB5:
+; NEWGVN-NEXT:    [[V4:%.*]] = load i32, ptr [[P]], align 4
+; NEWGVN-NEXT:    br label [[BB6]]
+; NEWGVN:       BB6:
+; NEWGVN-NEXT:    [[V5:%.*]] = phi i32 [ [[V51]], [[BB4]] ], [ [[V4]], [[BB5]] ]
+; NEWGVN-NEXT:    [[PHI2:%.*]] = phi i32 [ [[V3]], [[BB4]] ], [ [[V4]], [[BB5]] ]
+; NEWGVN-NEXT:    [[V6:%.*]] = add i32 [[PHI2]], [[V5]]
+; NEWGVN-NEXT:    br label [[EXIT:%.*]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    ret i32 [[V6]]
+;
+Entry:
+  br i1 %cond1, label %BB1, label %BB2
+
+BB1:
+  %V1 = load <4 x i32>, ptr %P
+  br label %BB3
+
+BB2:
+  %V2 = load <4 x i32>, ptr %P
+  br label %BB3
+
+BB3:
+  %Phi1 = phi <4 x i32> [ %V1, %BB1 ], [ %V2, %BB2 ]
+  br i1 %cond2, label %BB4, label %BB5
+
+BB4:
+  %V3 = extractelement <4 x i32> %Phi1, i64 1
+  br label %BB6
+
+BB5:
+  %V4 = load i32, ptr %P
+  br label %BB6
+
+BB6:
+  %Phi2 = phi i32 [ %V3, %BB4 ], [ %V4, %BB5 ]
+  %V5 = load i32, ptr %P
+  %V6 = add i32 %Phi2, %V5
+  br label %Exit
+
+Exit:
+  ret i32 %V6
+}
+
+define i32 @test17(ptr %P1, ptr %P2, i1 %cond1, i1 %cond2) {
+;   Entry
+;   /   \
+;  T     F
+;  |    / \
+;   \  F1 F2
+;    \ | /
+;    Exit
+;
+; GVN-LABEL: @test17(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    br i1 [[COND1:%.*]], label [[T:%.*]], label [[F:%.*]]
+; GVN:       T:
+; GVN-NEXT:    store i32 42, ptr [[P2:%.*]], align 4
+; GVN-NEXT:    br label [[EXIT:%.*]]
+; GVN:       F:
+; GVN-NEXT:    br i1 [[COND2:%.*]], label [[F1:%.*]], label [[F2:%.*]]
+; GVN:       F1:
+; GVN-NEXT:    br label [[EXIT]]
+; GVN:       F2:
+; GVN-NEXT:    br label [[EXIT]]
+; GVN:       Exit:
+; GVN-NEXT:    [[V3:%.*]] = load i32, ptr [[P2]], align 4
+; GVN-NEXT:    ret i32 [[V3]]
+;
+Entry:
+  %V1 = load <2 x i32>, <2 x i32>* %P1
+  br i1 %cond1, label %T, label %F
+
+T:
+  store i32 42, ptr %P2
+  br label %Exit
+
+F:
+  br i1 %cond2, label %F1, label %F2
+
+F1:
+  %V2 = load i32, ptr %P2
+  br label %Exit
+
+F2:
+  br label %Exit
+
+Exit:
+  %V3 = load i32, ptr %P2
+  ret i32 %V3
+}
+
+define i32 @test18(ptr %P1, ptr %P2, i1 %cond1, i1 %cond2) {
+;   Entry
+;   /   \
+;  T     F
+;  |    / \
+;  |  F1 F2
+;   \ /  |
+;    BB  |
+;     \  |
+;     Exit
+;
+; OLDGVN-LABEL: @test18(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br i1 [[COND1:%.*]], label [[T:%.*]], label [[F:%.*]]
+; OLDGVN:       T:
+; OLDGVN-NEXT:    store i32 42, ptr [[P2:%.*]], align 4
+; OLDGVN-NEXT:    br label [[BB:%.*]]
+; OLDGVN:       F:
+; OLDGVN-NEXT:    br i1 [[COND2:%.*]], label [[F1:%.*]], label [[F2:%.*]]
+; OLDGVN:       F1:
+; OLDGVN-NEXT:    [[V3_PRE:%.*]] = load i32, ptr [[P2]], align 4
+; OLDGVN-NEXT:    br label [[BB]]
+; OLDGVN:       F2:
+; OLDGVN-NEXT:    br label [[EXIT:%.*]]
+; OLDGVN:       BB:
+; OLDGVN-NEXT:    [[V3:%.*]] = phi i32 [ [[V3_PRE]], [[F1]] ], [ 42, [[T]] ]
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    [[PHI:%.*]] = phi i32 [ [[V3]], [[BB]] ], [ 100, [[F2]] ]
+; OLDGVN-NEXT:    ret i32 [[PHI]]
+;
+; NEWGVN-LABEL: @test18(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br i1 [[COND1:%.*]], label [[T:%.*]], label [[F:%.*]]
+; NEWGVN:       T:
+; NEWGVN-NEXT:    store i32 42, ptr [[P2:%.*]], align 4
+; NEWGVN-NEXT:    br label [[BB:%.*]]
+; NEWGVN:       F:
+; NEWGVN-NEXT:    br i1 [[COND2:%.*]], label [[F1:%.*]], label [[F2:%.*]]
+; NEWGVN:       F1:
+; NEWGVN-NEXT:    [[V31:%.*]] = load i32, ptr [[P2]], align 4
+; NEWGVN-NEXT:    br label [[BB]]
+; NEWGVN:       F2:
+; NEWGVN-NEXT:    br label [[EXIT:%.*]]
+; NEWGVN:       BB:
+; NEWGVN-NEXT:    [[V3:%.*]] = phi i32 [ [[V31]], [[F1]] ], [ 42, [[T]] ]
+; NEWGVN-NEXT:    br label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ [[V3]], [[BB]] ], [ 100, [[F2]] ]
+; NEWGVN-NEXT:    ret i32 [[PHI]]
+;
+Entry:
+  %V1 = load <2 x i32>, ptr %P1
+  br i1 %cond1, label %T, label %F
+
+T:
+  store i32 42, ptr %P2
+  br label %BB
+
+F:
+  br i1 %cond2, label %F1, label %F2
+
+F1:
+  %V2 = load i32, ptr %P2
+  br label %BB
+
+F2:
+  br label %Exit
+
+BB:
+  %V3 = load i32, ptr %P2
+  br label %Exit
+
+Exit:
+  %Phi = phi i32 [ %V3, %BB ], [ 100, %F2 ]
+  ret i32 %Phi
+}
+
+define i32 @test19(i32 *%P, i1 %Cond1, i1 %Cond2) {
+;   Entry
+;    /  \
+;   T1  F1
+;    \  /
+;    BB1
+;    /  \
+;   T2  F2
+;    \  /
+;    BB2
+;     |
+;     v
+;    Exit
+;
+; OLDGVN-LABEL: @test19(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br i1 [[COND1:%.*]], label [[T1:%.*]], label [[F1:%.*]]
+; OLDGVN:       T1:
+; OLDGVN-NEXT:    br label [[BB1:%.*]]
+; OLDGVN:       F1:
+; OLDGVN-NEXT:    br label [[BB1]]
+; OLDGVN:       BB1:
+; OLDGVN-NEXT:    [[PHI1:%.*]] = phi i32 [ 5, [[T1]] ], [ 7, [[F1]] ]
+; OLDGVN-NEXT:    [[LD1:%.*]] = load i32, ptr [[P:%.*]], align 4
+; OLDGVN-NEXT:    [[M1:%.*]] = mul i32 [[LD1]], 5
+; OLDGVN-NEXT:    [[M2:%.*]] = mul i32 [[LD1]], 7
+; OLDGVN-NEXT:    [[A1:%.*]] = add i32 [[M1]], [[M2]]
+; OLDGVN-NEXT:    br i1 [[COND2:%.*]], label [[T2:%.*]], label [[F2:%.*]]
+; OLDGVN:       T2:
+; OLDGVN-NEXT:    br label [[BB2:%.*]]
+; OLDGVN:       F2:
+; OLDGVN-NEXT:    br label [[BB2]]
+; OLDGVN:       BB2:
+; OLDGVN-NEXT:    [[M3_PRE_PHI:%.*]] = phi i32 [ [[M2]], [[F2]] ], [ [[M1]], [[T2]] ]
+; OLDGVN-NEXT:    [[PHI2:%.*]] = phi i32 [ 5, [[T2]] ], [ 7, [[F2]] ]
+; OLDGVN-NEXT:    [[A2:%.*]] = add i32 [[A1]], [[PHI1]]
+; OLDGVN-NEXT:    [[A3:%.*]] = add i32 [[M3_PRE_PHI]], [[A2]]
+; OLDGVN-NEXT:    ret i32 [[A3]]
+;
+; NEWGVN-LABEL: @test19(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br i1 [[COND1:%.*]], label [[T1:%.*]], label [[F1:%.*]]
+; NEWGVN:       T1:
+; NEWGVN-NEXT:    br label [[BB1:%.*]]
+; NEWGVN:       F1:
+; NEWGVN-NEXT:    br label [[BB1]]
+; NEWGVN:       BB1:
+; NEWGVN-NEXT:    [[PHI1:%.*]] = phi i32 [ 5, [[T1]] ], [ 7, [[F1]] ]
+; NEWGVN-NEXT:    [[LD1:%.*]] = load i32, ptr [[P:%.*]], align 4
+; NEWGVN-NEXT:    [[M1:%.*]] = mul i32 [[LD1]], 5
+; NEWGVN-NEXT:    [[M2:%.*]] = mul i32 [[LD1]], 7
+; NEWGVN-NEXT:    [[A1:%.*]] = add i32 [[M1]], [[M2]]
+; NEWGVN-NEXT:    br i1 [[COND2:%.*]], label [[T2:%.*]], label [[F2:%.*]]
+; NEWGVN:       T2:
+; NEWGVN-NEXT:    br label [[BB2:%.*]]
+; NEWGVN:       F2:
+; NEWGVN-NEXT:    br label [[BB2]]
+; NEWGVN:       BB2:
+; NEWGVN-NEXT:    [[PHI2:%.*]] = phi i32 [ 5, [[T2]] ], [ 7, [[F2]] ]
+; NEWGVN-NEXT:    br label [[EXIT:%.*]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[M3:%.*]] = mul i32 [[LD1]], [[PHI2]]
+; NEWGVN-NEXT:    [[A2:%.*]] = add i32 [[A1]], [[PHI1]]
+; NEWGVN-NEXT:    [[A3:%.*]] = add i32 [[M3]], [[A2]]
+; NEWGVN-NEXT:    ret i32 [[A3]]
+;
+Entry:
+  br i1 %Cond1, label %T1, label %F1
+
+T1:
+  br label %BB1
+
+F1:
+  br label %BB1
+
+BB1:
+  %Phi1 = phi i32 [ 5, %T1 ], [ 7, %F1 ]
+  %Ld1 = load i32, i32 *%P, align 4
+  %M1 = mul i32 %Ld1, 5
+  %M2 = mul i32 %Ld1, 7
+  %A1 = add i32 %M1, %M2
+  br i1 %Cond2, label %T2, label %F2
+
+T2:
+  br label %BB2
+
+F2:
+  br label %BB2
+
+BB2:
+  %Phi2 = phi i32 [ 5, %T2 ], [ 7, %F2 ]
+  br label %Exit
+
+Exit:
+  %Ld2 = load i32, i32 *%P, align 4
+  %M3 = mul i32 %Ld2, %Phi2
+  %A2 = add i32 %A1, %Phi1
+  %A3 = add i32 %M3, %A2
+  ret i32 %A3
+}
+
+define i32 @test20(ptr %P, i32 %V1, i1 %Cond1, i1 %Cond2) {
+;     Entry
+;     /   \
+;    T     F
+;   / \    |
+; BB1 BB2  |
+;    \ |  /
+;    v v v
+;     Exit
+;
+; GVN-LABEL: @test20(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    br i1 [[COND1:%.*]], label [[T:%.*]], label [[F:%.*]]
+; GVN:       T:
+; GVN-NEXT:    store i32 [[V1:%.*]], ptr [[P:%.*]], align 4
+; GVN-NEXT:    br i1 [[COND2:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; GVN:       F:
+; GVN-NEXT:    store i32 13, ptr [[P]], align 4
+; GVN-NEXT:    br label [[EXIT:%.*]]
+; GVN:       BB1:
+; GVN-NEXT:    [[V2:%.*]] = add i32 [[V1]], 100
+; GVN-NEXT:    br label [[EXIT]]
+; GVN:       BB2:
+; GVN-NEXT:    [[V3:%.*]] = add i32 [[V1]], 13
+; GVN-NEXT:    br label [[EXIT]]
+; GVN:       Exit:
+; GVN-NEXT:    [[V4:%.*]] = phi i32 [ 13, [[F]] ], [ [[V1]], [[BB1]] ], [ [[V1]], [[BB2]] ]
+; GVN-NEXT:    [[PHI:%.*]] = phi i32 [ 10, [[F]] ], [ [[V2]], [[BB1]] ], [ [[V3]], [[BB2]] ]
+; GVN-NEXT:    [[V5:%.*]] = add i32 [[PHI]], [[V4]]
+; GVN-NEXT:    ret i32 [[V5]]
+;
+Entry:
+  br i1 %Cond1, label %T, label %F
+
+T:
+  store i32 %V1, ptr %P, align 4
+  br i1 %Cond2, label %BB1, label %BB2
+
+F:
+  store i32 13, ptr %P, align 4
+  br label %Exit
+
+BB1:
+  %V2 = add i32 %V1, 100
+  br label %Exit
+
+BB2:
+  %V3 = add i32 %V1, 13
+  br label %Exit
+
+Exit:
+  %Phi = phi i32 [ 10, %F ], [ %V2, %BB1 ], [ %V3, %BB2 ]
+  %V4 = load i32, ptr %P, align 4
+  %V5 = add i32 %Phi, %V4
+  ret i32 %V5
+}
+
+define i32 @test21(ptr %P1, ptr %P2, i32 %V1) {
+;  Entry
+;   / |
+;  T  |
+;   \ |
+;    F
+;    |
+;    BB
+;    |
+;    v
+;   Exit
+;
+; OLDGVN-LABEL: @test21(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    [[COND:%.*]] = icmp eq i32 [[V1:%.*]], 0
+; OLDGVN-NEXT:    [[V5_PRE:%.*]] = load i32, ptr [[P1:%.*]], align 4
+; OLDGVN-NEXT:    br i1 [[COND]], label [[T:%.*]], label [[F:%.*]]
+; OLDGVN:       T:
+; OLDGVN-NEXT:    store i32 [[V5_PRE]], ptr [[P2:%.*]], align 4
+; OLDGVN-NEXT:    br label [[F]]
+; OLDGVN:       F:
+; OLDGVN-NEXT:    [[PHI:%.*]] = phi i32 [ [[V5_PRE]], [[T]] ], [ [[V1]], [[ENTRY:%.*]] ]
+; OLDGVN-NEXT:    [[V3:%.*]] = add i32 [[PHI]], 100
+; OLDGVN-NEXT:    [[V4:%.*]] = add i32 [[V3]], [[V1]]
+; OLDGVN-NEXT:    [[V6:%.*]] = add i32 [[V5_PRE]], [[V4]]
+; OLDGVN-NEXT:    ret i32 [[V6]]
+;
+; NEWGVN-LABEL: @test21(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    [[COND:%.*]] = icmp eq i32 [[V1:%.*]], 0
+; NEWGVN-NEXT:    br i1 [[COND]], label [[T:%.*]], label [[F:%.*]]
+; NEWGVN:       T:
+; NEWGVN-NEXT:    [[V2:%.*]] = load i32, ptr [[P1:%.*]], align 4
+; NEWGVN-NEXT:    store i32 [[V2]], ptr [[P2:%.*]], align 4
+; NEWGVN-NEXT:    br label [[F]]
+; NEWGVN:       F:
+; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ [[V2]], [[T]] ], [ [[V1]], [[ENTRY:%.*]] ]
+; NEWGVN-NEXT:    [[V3:%.*]] = add i32 [[PHI]], 100
+; NEWGVN-NEXT:    br label [[BB:%.*]]
+; NEWGVN:       BB:
+; NEWGVN-NEXT:    [[V4:%.*]] = add i32 [[V3]], [[V1]]
+; NEWGVN-NEXT:    br label [[EXIT:%.*]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[V5:%.*]] = load i32, ptr [[P1]], align 4
+; NEWGVN-NEXT:    [[V6:%.*]] = add i32 [[V5]], [[V4]]
+; NEWGVN-NEXT:    ret i32 [[V6]]
+;
+Entry:
+  %Cond = icmp eq i32 %V1, 0
+  br i1 %Cond, label %T, label %F
+
+T:
+  %V2 = load i32, ptr %P1, align 4
+  store i32 %V2, ptr %P2, align 4
+  br label %F
+
+F:
+  %Phi = phi i32 [ %V2, %T ], [ %V1, %Entry ]
+  %V3 = add i32 %Phi, 100
+  br label %BB
+
+BB:
+  %V4 = add i32 %V3, %V1
+  br label %Exit
+
+Exit:
+  %V5 = load i32, ptr %P1, align 4
+  %V6 = add i32 %V5, %V4
+  ret i32 %V6
+}
+
+define i32 @test22(ptr %P, i32 %V1, i1 %Cond1)  {
+;   Entry
+;    /  \
+;  BB1  BB2
+;    \  /
+;     BB3
+;    /  |
+;  BB4  |
+;    \  |
+;     v v
+;     Exit
+;
+; OLDGVN-LABEL: @test22(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    store i32 5, ptr [[P:%.*]], align 4
+; OLDGVN-NEXT:    br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; OLDGVN:       BB1:
+; OLDGVN-NEXT:    br label [[BB3:%.*]]
+; OLDGVN:       BB2:
+; OLDGVN-NEXT:    store i32 5, ptr [[P]], align 4
+; OLDGVN-NEXT:    br label [[BB3]]
+; OLDGVN:       BB3:
+; OLDGVN-NEXT:    [[PHI1:%.*]] = phi i32 [ 10, [[BB1]] ], [ 5, [[BB2]] ]
+; OLDGVN-NEXT:    [[COND2:%.*]] = icmp ne i32 [[V1:%.*]], 0
+; OLDGVN-NEXT:    br i1 [[COND2]], label [[BB4:%.*]], label [[EXIT:%.*]]
+; OLDGVN:       BB4:
+; OLDGVN-NEXT:    [[V6:%.*]] = add nsw i32 [[PHI1]], 5
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    [[PHI2:%.*]] = phi i32 [ [[PHI1]], [[BB3]] ], [ [[V6]], [[BB4]] ]
+; OLDGVN-NEXT:    ret i32 [[PHI2]]
+;
+; NEWGVN-LABEL: @test22(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    store i32 5, ptr [[P:%.*]], align 4
+; NEWGVN-NEXT:    br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; NEWGVN:       BB1:
+; NEWGVN-NEXT:    br label [[BB3:%.*]]
+; NEWGVN:       BB2:
+; NEWGVN-NEXT:    br label [[BB3]]
+; NEWGVN:       BB3:
+; NEWGVN-NEXT:    [[PHI1:%.*]] = phi i32 [ 10, [[BB1]] ], [ 5, [[BB2]] ]
+; NEWGVN-NEXT:    [[COND2:%.*]] = icmp ne i32 [[V1:%.*]], 0
+; NEWGVN-NEXT:    br i1 [[COND2]], label [[BB4:%.*]], label [[EXIT:%.*]]
+; NEWGVN:       BB4:
+; NEWGVN-NEXT:    [[V6:%.*]] = add nsw i32 [[PHI1]], 5
+; NEWGVN-NEXT:    br label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[PHI2:%.*]] = phi i32 [ [[PHI1]], [[BB3]] ], [ [[V6]], [[BB4]] ]
+; NEWGVN-NEXT:    ret i32 [[PHI2]]
+;
+Entry:
+  store i32 5, ptr %P, align 4, align 4
+  br i1 %Cond1, label %BB1, label %BB2
+
+BB1:
+  %V3 = load i32, ptr %P, align 4
+  %V4 = add nsw i32 5, %V3
+  br label %BB3
+
+BB2:
+  store i32 5, ptr %P, align 4
+  br label %BB3
+
+BB3:
+  %Phi1 = phi i32 [ %V4, %BB1 ], [ 5, %BB2 ]
+  %Cond2 = icmp ne i32 %V1, 0
+  br i1 %Cond2, label %BB4, label %Exit
+
+BB4:
+  %V5 = load i32, ptr %P, align 4
+  %V6 = add nsw i32 %Phi1, %V5
+  br label %Exit
+
+Exit:
+  %Phi2 = phi i32 [ %Phi1, %BB3 ], [ %V6, %BB4 ]
+  ret i32 %Phi2
+}
+
+define i32 @test23(ptr %P, <2 x i32> %V1, <4 x i32> %V2, i1 %Cond)  {
+;   Entry
+;    /  \
+;   T    F
+;    \  /
+;     vv
+;    Exit
+;
+; OLDGVN-LABEL: @test23(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; OLDGVN:       T:
+; OLDGVN-NEXT:    store <2 x i32> [[V1:%.*]], ptr [[P:%.*]], align 4
+; OLDGVN-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64
+; OLDGVN-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
+; OLDGVN-NEXT:    br label [[EXIT:%.*]]
+; OLDGVN:       F:
+; OLDGVN-NEXT:    store <4 x i32> [[V2:%.*]], ptr [[P]], align 4
+; OLDGVN-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[V2]] to i128
+; OLDGVN-NEXT:    [[TMP3:%.*]] = trunc i128 [[TMP2]] to i32
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    [[V5:%.*]] = phi i32 [ [[TMP1]], [[T]] ], [ [[TMP3]], [[F]] ]
+; OLDGVN-NEXT:    [[V6:%.*]] = add i32 [[V5]], [[V5]]
+; OLDGVN-NEXT:    ret i32 [[V5]]
+;
+; NEWGVN-LABEL: @test23(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; NEWGVN:       T:
+; NEWGVN-NEXT:    store <2 x i32> [[V1:%.*]], ptr [[P:%.*]], align 4
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
+; NEWGVN-NEXT:    br label [[EXIT:%.*]]
+; NEWGVN:       F:
+; NEWGVN-NEXT:    store <4 x i32> [[V2:%.*]], ptr [[P]], align 4
+; NEWGVN-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[V2]] to i128
+; NEWGVN-NEXT:    [[TMP3:%.*]] = trunc i128 [[TMP2]] to i32
+; NEWGVN-NEXT:    br label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[V5:%.*]] = phi i32 [ [[TMP1]], [[T]] ], [ [[TMP3]], [[F]] ]
+; NEWGVN-NEXT:    ret i32 [[V5]]
+;
+Entry:
+  br i1 %Cond, label %T, label %F
+
+T:
+  store <2 x i32> %V1, <2 x i32>* %P, align 4
+  %V3 = load i32, ptr %P, align 4
+  br label %Exit
+
+F:
+  store <4 x i32> %V2, <4 x i32>* %P, align 4
+  %V4 = load i32, ptr %P, align 4
+  br label %Exit
+
+Exit:
+  %Phi = phi i32 [ %V3, %T ], [ %V4, %F ]
+  %V5 = load i32, ptr %P, align 4
+  %V6 = add i32 %Phi, %V5
+  ret i32 %V5
+}
+
+declare i32 @foo1(ptr %P, i32 %V) #0
+
+define i32 @test24(ptr %P, i32 %V, i1 %Cond) {
+; OLDGVN-LABEL: @test24(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; OLDGVN:       T:
+; OLDGVN-NEXT:    store i32 42, ptr [[P:%.*]], align 1
+; OLDGVN-NEXT:    br label [[EXIT:%.*]]
+; OLDGVN:       F:
+; OLDGVN-NEXT:    [[V1:%.*]] = call i32 @foo1(ptr [[P]], i32 [[V:%.*]]) #[[ATTR0:[0-9]+]]
+; OLDGVN-NEXT:    [[V2_PRE:%.*]] = load i32, ptr [[P]], align 1
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    [[V2:%.*]] = phi i32 [ 42, [[T]] ], [ [[V2_PRE]], [[F]] ]
+; OLDGVN-NEXT:    [[PHI:%.*]] = phi i32 [ 10, [[T]] ], [ [[V1]], [[F]] ]
+; OLDGVN-NEXT:    [[V3:%.*]] = add i32 [[PHI]], [[V2]]
+; OLDGVN-NEXT:    ret i32 [[V3]]
+;
+; NEWGVN-LABEL: @test24(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; NEWGVN:       T:
+; NEWGVN-NEXT:    store i32 42, ptr [[P:%.*]], align 1
+; NEWGVN-NEXT:    br label [[EXIT:%.*]]
+; NEWGVN:       F:
+; NEWGVN-NEXT:    [[V1:%.*]] = call i32 @foo1(ptr [[P]], i32 [[V:%.*]]) #[[ATTR0:[0-9]+]]
+; NEWGVN-NEXT:    [[V21:%.*]] = load i32, ptr [[P]], align 1
+; NEWGVN-NEXT:    br label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[V2:%.*]] = phi i32 [ 42, [[T]] ], [ [[V21]], [[F]] ]
+; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ 10, [[T]] ], [ [[V1]], [[F]] ]
+; NEWGVN-NEXT:    [[V3:%.*]] = add i32 [[PHI]], [[V2]]
+; NEWGVN-NEXT:    ret i32 [[V3]]
+;
+Entry:
+  br i1 %Cond, label %T, label %F
+
+T:
+  store i32 42, ptr %P, align 1
+  br label %Exit
+
+F:
+  %V1 = call i32 @foo1(ptr %P, i32 %V) #0
+  br label %Exit
+
+Exit:
+  %Phi = phi i32 [ 10, %T ], [ %V1, %F ]
+  %V2 = load i32, ptr %P, align 1
+  %V3 =  add i32 %Phi, %V2
+  ret i32 %V3
+}
+
+define i32 @test25(ptr %P, <2 x i32> %V1, <2 x i32> %V2, i1 %cond)  {
+; GVN-LABEL: @test25(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; GVN:       T:
+; GVN-NEXT:    store <2 x i32> [[V1:%.*]], ptr [[P:%.*]], align 1
+; GVN-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64
+; GVN-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
+; GVN-NEXT:    br label [[EXIT:%.*]]
+; GVN:       F:
+; GVN-NEXT:    store <2 x i32> [[V2:%.*]], ptr [[P]], align 1
+; GVN-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[V2]] to i64
+; GVN-NEXT:    [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
+; GVN-NEXT:    br label [[EXIT]]
+; GVN:       Exit:
+; GVN-NEXT:    [[V5:%.*]] = phi i32 [ [[TMP3]], [[F]] ], [ [[TMP1]], [[T]] ]
+; GVN-NEXT:    [[V3:%.*]] = phi <2 x i32> [ [[V2]], [[F]] ], [ [[V1]], [[T]] ]
+; GVN-NEXT:    [[V4:%.*]] = extractelement <2 x i32> [[V3]], i64 1
+; GVN-NEXT:    [[V6:%.*]] = add i32 [[V4]], [[V5]]
+; GVN-NEXT:    ret i32 [[V6]]
+;
+Entry:
+  br i1 %cond, label %T, label %F
+
+T:
+  store <2 x i32> %V1, ptr %P, align 1
+  br label %Exit
+
+F:
+  store <2 x i32> %V2, ptr %P, align 1
+  br label %Exit
+
+Exit:
+  %V3 = load <2 x i32>, ptr %P, align 1
+  %V4 = extractelement <2 x i32> %V3, i64 1
+  %V5 = load i32, ptr %P, align 1
+  %V6 = add i32 %V4, %V5
+  ret i32 %V6
+}
+
+define i32 @test26(ptr %P, i1 %Cond1, i1 %Cond2) {
+;   Entry    __
+;    |  \   /  |
+;    |   v v   |
+;    |   Loop  |
+;    |   /  \__|
+;    v  v
+;    Exit
+;
+; GVN-LABEL: @test26(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    store i32 42, ptr [[P:%.*]], align 1
+; GVN-NEXT:    br i1 [[COND1:%.*]], label [[LOOP:%.*]], label [[EXIT:%.*]]
+; GVN:       Loop:
+; GVN-NEXT:    store i32 13, ptr [[P]], align 1
+; GVN-NEXT:    br i1 [[COND2:%.*]], label [[LOOP]], label [[EXIT]]
+; GVN:       Exit:
+; GVN-NEXT:    [[V:%.*]] = phi i32 [ 13, [[LOOP]] ], [ 42, [[ENTRY:%.*]] ]
+; GVN-NEXT:    ret i32 [[V]]
+;
+Entry:
+  store i32 42, ptr %P, align 1
+  br i1 %Cond1, label %Loop, label %Exit
+
+Loop:
+  store i32 13, ptr %P, align 1
+  br i1 %Cond2, label %Loop, label %Exit
+
+Exit:
+  %V = load i32, ptr %P, align 1
+  ret i32 %V
+}
+
+define i32 @test27(ptr %P, i1 %Cond1, i1 %Cond2) {
+;   Entry    __
+;    |  \   /  |
+;    |   v v   |
+;    |   Loop  |
+;    |   /  \__|
+;    v  v
+;    Exit
+;
+; OLDGVN-LABEL: @test27(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    [[V1:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 1
+; OLDGVN-NEXT:    [[V2:%.*]] = extractelement <2 x i32> [[V1]], i64 1
+; OLDGVN-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64
+; OLDGVN-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
+; OLDGVN-NEXT:    br i1 [[COND1:%.*]], label [[LOOP:%.*]], label [[EXIT:%.*]]
+; OLDGVN:       Loop:
+; OLDGVN-NEXT:    br i1 [[COND2:%.*]], label [[LOOP]], label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    [[PHI:%.*]] = phi i32 [ [[V2]], [[ENTRY:%.*]] ], [ [[TMP1]], [[LOOP]] ]
+; OLDGVN-NEXT:    ret i32 [[PHI]]
+;
+; NEWGVN-LABEL: @test27(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    [[V1:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 1
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[V1]] to i64
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
+; NEWGVN-NEXT:    [[V2:%.*]] = extractelement <2 x i32> [[V1]], i64 1
+; NEWGVN-NEXT:    br i1 [[COND1:%.*]], label [[LOOP:%.*]], label [[EXIT:%.*]]
+; NEWGVN:       Loop:
+; NEWGVN-NEXT:    br i1 [[COND2:%.*]], label [[LOOP]], label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ [[V2]], [[ENTRY:%.*]] ], [ [[TMP1]], [[LOOP]] ]
+; NEWGVN-NEXT:    ret i32 [[PHI]]
+;
+Entry:
+  %V1 = load <2 x i32>, <2 x i32>* %P, align 1
+  %V2 = extractelement <2 x i32> %V1, i64 1
+  br i1 %Cond1, label %Loop, label %Exit
+
+Loop:
+  %V3 = load i32, ptr %P, align 1
+  br i1 %Cond2, label %Loop, label %Exit
+
+Exit:
+  %Phi = phi i32 [ %V2, %Entry ], [ %V3, %Loop ]
+  ret i32 %Phi
+}
+
+define i32 @test28(ptr %P, i32 %V, i1 %cond1, i1 %cond2) {
+;   Entry   __
+;    /  \   v |
+;   BB   Loop |
+;    \  /   \_|
+;     vv
+;    Exit
+;
+; OLDGVN-LABEL: @test28(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br i1 [[COND1:%.*]], label [[BB:%.*]], label [[LOOP:%.*]]
+; OLDGVN:       BB:
+; OLDGVN-NEXT:    store i32 42, ptr [[P:%.*]], align 4
+; OLDGVN-NEXT:    br label [[EXIT:%.*]]
+; OLDGVN:       Loop:
+; OLDGVN-NEXT:    [[PHI:%.*]] = phi i32 [ 100, [[ENTRY:%.*]] ], [ [[V1:%.*]], [[LOOP]] ]
+; OLDGVN-NEXT:    [[V1]] = add i32 [[V:%.*]], [[PHI]]
+; OLDGVN-NEXT:    br i1 [[COND2:%.*]], label [[LOOP]], label [[LOOP_EXIT_CRIT_EDGE:%.*]]
+; OLDGVN:       Loop.Exit_crit_edge:
+; OLDGVN-NEXT:    [[V2_PRE:%.*]] = load i32, ptr [[P]], align 4
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    [[V2:%.*]] = phi i32 [ [[V2_PRE]], [[LOOP_EXIT_CRIT_EDGE]] ], [ 42, [[BB]] ]
+; OLDGVN-NEXT:    ret i32 [[V2]]
+;
+; NEWGVN-LABEL: @test28(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br i1 [[COND1:%.*]], label [[BB:%.*]], label [[LOOP:%.*]]
+; NEWGVN:       BB:
+; NEWGVN-NEXT:    store i32 42, ptr [[P:%.*]], align 4
+; NEWGVN-NEXT:    br label [[EXIT:%.*]]
+; NEWGVN:       Loop:
+; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ 100, [[ENTRY:%.*]] ], [ [[V1:%.*]], [[LOOP]] ]
+; NEWGVN-NEXT:    [[V1]] = add i32 [[V:%.*]], [[PHI]]
+; NEWGVN-NEXT:    br i1 [[COND2:%.*]], label [[LOOP]], label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[V2:%.*]] = load i32, ptr [[P]], align 4
+; NEWGVN-NEXT:    ret i32 [[V2]]
+;
+Entry:
+  br i1 %cond1, label %BB, label %Loop
+
+BB:
+  store i32 42, ptr %P, align 4
+  br label %Exit
+
+Loop:
+  %phi = phi i32 [ 100, %Entry ], [ %V1, %Loop ]
+  %V1 = add i32 %V, %phi
+  br i1 %cond2, label %Loop, label %Exit
+
+Exit:
+  %V2 = load i32, ptr %P, align 4
+  ret i32 %V2
+}
+
+define i32 @test29(i1 %Cond, i64 %TC) {
+;    Entry
+;      |
+;    Loop<-----+
+;     / \      |
+;    T   F     |
+;     \ /      |
+;  Loop.Latch--+
+;      |
+;      v
+;     Exit
+;
+; OLDGVN-LABEL: @test29(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br label [[LOOP:%.*]]
+; OLDGVN:       Loop:
+; OLDGVN-NEXT:    [[INDEX:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[INDEX_INC:%.*]], [[LOOP_LATCH:%.*]] ]
+; OLDGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; OLDGVN:       T:
+; OLDGVN-NEXT:    store i32 undef, ptr null, align 4
+; OLDGVN-NEXT:    br label [[LOOP_LATCH]]
+; OLDGVN:       F:
+; OLDGVN-NEXT:    store i32 undef, ptr undef, align 4
+; OLDGVN-NEXT:    [[V0_PRE:%.*]] = load i32, ptr null, align 4
+; OLDGVN-NEXT:    br label [[LOOP_LATCH]]
+; OLDGVN:       Loop.Latch:
+; OLDGVN-NEXT:    [[V0:%.*]] = phi i32 [ [[V0_PRE]], [[F]] ], [ undef, [[T]] ]
+; OLDGVN-NEXT:    [[V1:%.*]] = xor i32 [[V0]], -1
+; OLDGVN-NEXT:    [[INDEX_INC]] = add i64 [[INDEX]], 1
+; OLDGVN-NEXT:    [[COND2:%.*]] = icmp ne i64 [[INDEX_INC]], [[TC:%.*]]
+; OLDGVN-NEXT:    br i1 [[COND2]], label [[LOOP]], label [[EXIT:%.*]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    ret i32 [[V1]]
+;
+; NEWGVN-LABEL: @test29(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br label [[LOOP:%.*]]
+; NEWGVN:       Loop:
+; NEWGVN-NEXT:    [[INDEX:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[INDEX_INC:%.*]], [[LOOP_LATCH:%.*]] ]
+; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; NEWGVN:       T:
+; NEWGVN-NEXT:    store i32 undef, ptr null, align 4
+; NEWGVN-NEXT:    br label [[LOOP_LATCH]]
+; NEWGVN:       F:
+; NEWGVN-NEXT:    store i32 undef, ptr undef, align 4
+; NEWGVN-NEXT:    br label [[LOOP_LATCH]]
+; NEWGVN:       Loop.Latch:
+; NEWGVN-NEXT:    [[V0:%.*]] = load i32, ptr null, align 4
+; NEWGVN-NEXT:    [[V1:%.*]] = xor i32 [[V0]], -1
+; NEWGVN-NEXT:    [[INDEX_INC]] = add i64 [[INDEX]], 1
+; NEWGVN-NEXT:    [[COND2:%.*]] = icmp ne i64 [[INDEX_INC]], [[TC:%.*]]
+; NEWGVN-NEXT:    br i1 [[COND2]], label [[LOOP]], label [[EXIT:%.*]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    ret i32 [[V1]]
+;
+Entry:
+  br label %Loop
+
+Loop:
+  %Index = phi i64 [ 1, %Entry ], [ %Index.inc, %Loop.Latch ]
+  br i1 %Cond, label %T, label %F
+
+T:
+  store i32 undef, ptr null, align 4
+  br label %Loop.Latch
+
+F:
+  store i32 undef, ptr undef, align 4
+  br label %Loop.Latch
+
+Loop.Latch:
+  %Phi = phi ptr [ null, %T ], [ undef, %F ]
+  %V0 = load i32, ptr %Phi, align 4
+  %V1 = xor i32 %V0, -1
+  %Index.inc = add i64 %Index, 1
+  %Cond2 = icmp ne i64 %Index.inc, %TC
+  br i1 %Cond2, label %Loop, label %Exit
+
+Exit:
+  ret i32 %V1
+}
+
+define i64 @test30(ptr %P, i64 %TC) {
+;  Entry _
+;    |  / |
+;    | v  |
+;  Loop   |
+;    | \__|
+;    |
+;    v
+;   Exit
+;
+; GVN-LABEL: @test30(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    br label [[LOOP:%.*]]
+; GVN:       Loop:
+; GVN-NEXT:    [[INDEX:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[INDEX_INC:%.*]], [[LOOP]] ]
+; GVN-NEXT:    [[P1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[INDEX]]
+; GVN-NEXT:    [[V1:%.*]] = load i64, ptr [[P1]], align 4
+; GVN-NEXT:    [[V3:%.*]] = add i64 [[V1]], [[V1]]
+; GVN-NEXT:    [[INDEX_INC]] = add i64 [[INDEX]], 1
+; GVN-NEXT:    [[COND:%.*]] = icmp ne i64 [[INDEX_INC]], [[TC:%.*]]
+; GVN-NEXT:    br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
+; GVN:       Exit:
+; GVN-NEXT:    ret i64 [[V3]]
+;
+Entry:
+  br label %Loop
+
+Loop:
+  %Index = phi i64 [ 1, %Entry ], [ %Index.inc, %Loop ]
+  %P1 = getelementptr i64, ptr %P, i64 %Index
+  %V1 = load i64, ptr %P1, align 4
+  %V2 = load i64, ptr %P1, align 4
+  %V3 = add i64 %V1, %V2
+  %Index.inc = add i64 %Index, 1
+  %Cond = icmp ne i64 %Index.inc, %TC
+  br i1 %Cond, label %Loop, label %Exit
+
+Exit:
+  ret i64 %V3
+}
+
+define i64 @test31(ptr %P, i64 %TC, i1 %Cond1) {
+;    Entry
+;      |
+;    Loop<-----+
+;     / \      |
+;    T   F     |
+;     \ /      |
+;  Loop.Latch--+
+;      |
+;      v
+;     Exit
+;
+; OLDGVN-LABEL: @test31(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br label [[LOOP:%.*]]
+; OLDGVN:       Loop:
+; OLDGVN-NEXT:    [[INDEX:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[INDEX_INC:%.*]], [[LOOP_LATCH:%.*]] ]
+; OLDGVN-NEXT:    [[P1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[INDEX]]
+; OLDGVN-NEXT:    br i1 [[COND1:%.*]], label [[T:%.*]], label [[F:%.*]]
+; OLDGVN:       T:
+; OLDGVN-NEXT:    [[V1:%.*]] = load i64, ptr [[P1]], align 4
+; OLDGVN-NEXT:    br label [[LOOP_LATCH]]
+; OLDGVN:       F:
+; OLDGVN-NEXT:    [[V2_PRE:%.*]] = load i64, ptr [[P1]], align 4
+; OLDGVN-NEXT:    br label [[LOOP_LATCH]]
+; OLDGVN:       Loop.Latch:
+; OLDGVN-NEXT:    [[V2:%.*]] = phi i64 [ [[V1]], [[T]] ], [ [[V2_PRE]], [[F]] ]
+; OLDGVN-NEXT:    [[PHI:%.*]] = phi i64 [ [[V1]], [[T]] ], [ 100, [[F]] ]
+; OLDGVN-NEXT:    [[V3:%.*]] = add i64 [[PHI]], [[V2]]
+; OLDGVN-NEXT:    [[INDEX_INC]] = add i64 [[INDEX]], 1
+; OLDGVN-NEXT:    [[COND2:%.*]] = icmp ne i64 [[INDEX_INC]], [[TC:%.*]]
+; OLDGVN-NEXT:    br i1 [[COND2]], label [[LOOP]], label [[EXIT:%.*]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    ret i64 [[V3]]
+;
+; NEWGVN-LABEL: @test31(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br label [[LOOP:%.*]]
+; NEWGVN:       Loop:
+; NEWGVN-NEXT:    [[INDEX:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[INDEX_INC:%.*]], [[LOOP_LATCH:%.*]] ]
+; NEWGVN-NEXT:    [[P1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[INDEX]]
+; NEWGVN-NEXT:    br i1 [[COND1:%.*]], label [[T:%.*]], label [[F:%.*]]
+; NEWGVN:       T:
+; NEWGVN-NEXT:    [[V1:%.*]] = load i64, ptr [[P1]], align 4
+; NEWGVN-NEXT:    br label [[LOOP_LATCH]]
+; NEWGVN:       F:
+; NEWGVN-NEXT:    [[V21:%.*]] = load i64, ptr [[P1]], align 4
+; NEWGVN-NEXT:    br label [[LOOP_LATCH]]
+; NEWGVN:       Loop.Latch:
+; NEWGVN-NEXT:    [[V2:%.*]] = phi i64 [ [[V1]], [[T]] ], [ [[V21]], [[F]] ]
+; NEWGVN-NEXT:    [[PHI:%.*]] = phi i64 [ [[V1]], [[T]] ], [ 100, [[F]] ]
+; NEWGVN-NEXT:    [[V3:%.*]] = add i64 [[PHI]], [[V2]]
+; NEWGVN-NEXT:    [[INDEX_INC]] = add i64 [[INDEX]], 1
+; NEWGVN-NEXT:    [[COND2:%.*]] = icmp ne i64 [[INDEX_INC]], [[TC:%.*]]
+; NEWGVN-NEXT:    br i1 [[COND2]], label [[LOOP]], label [[EXIT:%.*]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    ret i64 [[V3]]
+;
+Entry:
+  br label %Loop
+
+Loop:
+  %Index = phi i64 [ 1, %Entry ], [ %Index.inc, %Loop.Latch ]
+  %P1 = getelementptr i64, ptr %P, i64 %Index
+  br i1 %Cond1, label %T, label %F
+
+T:
+  %V1 = load i64, ptr %P1, align 4
+  br label %Loop.Latch
+
+F:
+  br label %Loop.Latch
+
+Loop.Latch:
+  %Phi = phi i64 [ %V1, %T ], [ 100, %F ]
+  %V2 = load i64, ptr %P1, align 4
+  %V3 = add i64 %Phi, %V2
+  %Index.inc = add i64 %Index, 1
+  %Cond2 = icmp ne i64 %Index.inc, %TC
+  br i1 %Cond2, label %Loop, label %Exit
+
+Exit:
+  ret i64 %V3
+}
+
+define i64 @test32(ptr %P, i64 %TC) {
+;  Entry _
+;    |  / |
+;    | v  |
+;  Loop   |
+;    | \__|
+;    |
+;    BB
+;    |
+;    v
+;   Exit
+;
+; OLDGVN-LABEL: @test32(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br label [[LOOP:%.*]]
+; OLDGVN:       Loop:
+; OLDGVN-NEXT:    [[INDEX:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[INDEX_INC:%.*]], [[LOOP]] ]
+; OLDGVN-NEXT:    [[I1:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0
+; OLDGVN-NEXT:    [[I2:%.*]] = insertelement <4 x i64> [[I1]], i64 [[INDEX]], i32 1
+; OLDGVN-NEXT:    [[I3:%.*]] = insertelement <4 x i64> [[I2]], i64 100, i32 2
+; OLDGVN-NEXT:    [[I4:%.*]] = insertelement <4 x i64> [[I3]], i64 1000, i32 3
+; OLDGVN-NEXT:    store <4 x i64> [[I4]], ptr [[P:%.*]], align 4
+; OLDGVN-NEXT:    [[INDEX_INC]] = add i64 [[INDEX]], 1
+; OLDGVN-NEXT:    [[COND:%.*]] = icmp ne i64 [[INDEX_INC]], [[TC:%.*]]
+; OLDGVN-NEXT:    [[TMP0:%.*]] = bitcast <4 x i64> [[I4]] to i256
+; OLDGVN-NEXT:    [[TMP1:%.*]] = trunc i256 [[TMP0]] to i64
+; OLDGVN-NEXT:    br i1 [[COND]], label [[LOOP]], label [[BB:%.*]]
+; OLDGVN:       BB:
+; OLDGVN-NEXT:    [[V5:%.*]] = add i64 [[TMP1]], [[TC]]
+; OLDGVN-NEXT:    ret i64 [[V5]]
+;
+; NEWGVN-LABEL: @test32(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br label [[LOOP:%.*]]
+; NEWGVN:       Loop:
+; NEWGVN-NEXT:    [[INDEX:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[INDEX_INC:%.*]], [[LOOP]] ]
+; NEWGVN-NEXT:    [[I1:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0
+; NEWGVN-NEXT:    [[I2:%.*]] = insertelement <4 x i64> [[I1]], i64 [[INDEX]], i32 1
+; NEWGVN-NEXT:    [[I3:%.*]] = insertelement <4 x i64> [[I2]], i64 100, i32 2
+; NEWGVN-NEXT:    [[I4:%.*]] = insertelement <4 x i64> [[I3]], i64 1000, i32 3
+; NEWGVN-NEXT:    store <4 x i64> [[I4]], ptr [[P:%.*]], align 4
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <4 x i64> [[I4]] to i256
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i256 [[TMP0]] to i64
+; NEWGVN-NEXT:    [[INDEX_INC]] = add i64 [[INDEX]], 1
+; NEWGVN-NEXT:    [[COND:%.*]] = icmp ne i64 [[INDEX_INC]], [[TC:%.*]]
+; NEWGVN-NEXT:    br i1 [[COND]], label [[LOOP]], label [[BB:%.*]]
+; NEWGVN:       BB:
+; NEWGVN-NEXT:    br label [[EXIT:%.*]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[V5:%.*]] = add i64 [[TMP1]], [[TC]]
+; NEWGVN-NEXT:    ret i64 [[V5]]
+;
+Entry:
+  br label %Loop
+
+Loop:
+  %Index = phi i64 [ 1, %Entry ], [ %Index.inc, %Loop ]
+  %I1 = insertelement <4 x i64> poison, i64 %Index, i32 0
+  %I2 = insertelement <4 x i64> %I1, i64 %Index, i32 1
+  %I3 = insertelement <4 x i64> %I2, i64 100, i32 2
+  %I4 = insertelement <4 x i64> %I3, i64 1000, i32 3
+  store <4 x i64> %I4, <4 x i64>* %P, align 4
+  %Index.inc = add i64 %Index, 1
+  %Cond = icmp ne i64 %Index.inc, %TC
+  br i1 %Cond, label %Loop, label %BB
+
+BB:
+  %V4 = load i64, ptr %P, align 4
+  br label %Exit
+
+Exit:
+  %V5 = add i64 %V4, %Index.inc
+  ret i64 %V5
+}
+
+define i64 @test33(ptr %P, i64 %TC, i1 %Cond1) {
+;  Entry _
+;    |  / |
+;    | v  |
+;  Loop   |
+;    | \__|
+;    |
+;   BB1
+;  /  \
+; BB2 BB3
+;  \  /
+;   vv
+;  Exit
+;
+; OLDGVN-LABEL: @test33(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br label [[LOOP:%.*]]
+; OLDGVN:       Loop:
+; OLDGVN-NEXT:    [[INDEX:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[INDEX_INC:%.*]], [[LOOP]] ]
+; OLDGVN-NEXT:    [[I1:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0
+; OLDGVN-NEXT:    [[I2:%.*]] = insertelement <4 x i64> [[I1]], i64 [[INDEX]], i32 1
+; OLDGVN-NEXT:    [[I3:%.*]] = insertelement <4 x i64> [[I2]], i64 100, i32 2
+; OLDGVN-NEXT:    [[I4:%.*]] = insertelement <4 x i64> [[I3]], i64 1000, i32 3
+; OLDGVN-NEXT:    store <4 x i64> [[I4]], ptr [[P:%.*]], align 4
+; OLDGVN-NEXT:    [[INDEX_INC]] = add i64 [[INDEX]], 1
+; OLDGVN-NEXT:    [[COND:%.*]] = icmp ne i64 [[INDEX_INC]], [[TC:%.*]]
+; OLDGVN-NEXT:    [[TMP0:%.*]] = bitcast <4 x i64> [[I4]] to i256
+; OLDGVN-NEXT:    [[TMP1:%.*]] = trunc i256 [[TMP0]] to i64
+; OLDGVN-NEXT:    [[TMP2:%.*]] = trunc i256 [[TMP0]] to i128
+; OLDGVN-NEXT:    [[TMP3:%.*]] = bitcast i128 [[TMP2]] to <2 x i64>
+; OLDGVN-NEXT:    br i1 [[COND]], label [[LOOP]], label [[BB1:%.*]]
+; OLDGVN:       BB1:
+; OLDGVN-NEXT:    br i1 [[COND1:%.*]], label [[BB2:%.*]], label [[BB3:%.*]]
+; OLDGVN:       BB2:
+; OLDGVN-NEXT:    br label [[EXIT:%.*]]
+; OLDGVN:       BB3:
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    [[PHI:%.*]] = phi i64 [ [[TMP1]], [[BB2]] ], [ 100, [[BB3]] ]
+; OLDGVN-NEXT:    [[V6:%.*]] = extractelement <2 x i64> [[TMP3]], i64 1
+; OLDGVN-NEXT:    [[V7:%.*]] = add i64 [[PHI]], [[V6]]
+; OLDGVN-NEXT:    ret i64 [[V7]]
+;
+; NEWGVN-LABEL: @test33(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br label [[LOOP:%.*]]
+; NEWGVN:       Loop:
+; NEWGVN-NEXT:    [[INDEX:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[INDEX_INC:%.*]], [[LOOP]] ]
+; NEWGVN-NEXT:    [[I1:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0
+; NEWGVN-NEXT:    [[I2:%.*]] = insertelement <4 x i64> [[I1]], i64 [[INDEX]], i32 1
+; NEWGVN-NEXT:    [[I3:%.*]] = insertelement <4 x i64> [[I2]], i64 100, i32 2
+; NEWGVN-NEXT:    [[I4:%.*]] = insertelement <4 x i64> [[I3]], i64 1000, i32 3
+; NEWGVN-NEXT:    store <4 x i64> [[I4]], ptr [[P:%.*]], align 4
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <4 x i64> [[I4]] to i256
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i256 [[TMP0]] to i128
+; NEWGVN-NEXT:    [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <2 x i64>
+; NEWGVN-NEXT:    [[TMP3:%.*]] = trunc i256 [[TMP0]] to i64
+; NEWGVN-NEXT:    [[INDEX_INC]] = add i64 [[INDEX]], 1
+; NEWGVN-NEXT:    [[COND:%.*]] = icmp ne i64 [[INDEX_INC]], [[TC:%.*]]
+; NEWGVN-NEXT:    br i1 [[COND]], label [[LOOP]], label [[BB1:%.*]]
+; NEWGVN:       BB1:
+; NEWGVN-NEXT:    br i1 [[COND1:%.*]], label [[BB2:%.*]], label [[BB3:%.*]]
+; NEWGVN:       BB2:
+; NEWGVN-NEXT:    br label [[EXIT:%.*]]
+; NEWGVN:       BB3:
+; NEWGVN-NEXT:    br label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[PHI:%.*]] = phi i64 [ [[TMP3]], [[BB2]] ], [ 100, [[BB3]] ]
+; NEWGVN-NEXT:    [[V6:%.*]] = extractelement <2 x i64> [[TMP2]], i64 1
+; NEWGVN-NEXT:    [[V7:%.*]] = add i64 [[PHI]], [[V6]]
+; NEWGVN-NEXT:    ret i64 [[V7]]
+;
+Entry:
+  br label %Loop
+
+Loop:
+  %Index = phi i64 [ 1, %Entry ], [ %Index.inc, %Loop ]
+  %I1 = insertelement <4 x i64> poison, i64 %Index, i32 0
+  %I2 = insertelement <4 x i64> %I1, i64 %Index, i32 1
+  %I3 = insertelement <4 x i64> %I2, i64 100, i32 2
+  %I4 = insertelement <4 x i64> %I3, i64 1000, i32 3
+  store <4 x i64> %I4, <4 x i64>* %P, align 4
+  %Index.inc = add i64 %Index, 1
+  %Cond = icmp ne i64 %Index.inc, %TC
+  br i1 %Cond, label %Loop, label %BB1
+
+BB1:
+  br i1 %Cond1, label %BB2, label %BB3
+
+BB2:
+  %V4 = load i64, ptr %P, align 4
+  br label %Exit
+
+BB3:
+  br label %Exit
+
+Exit:
+  %Phi = phi i64 [ %V4, %BB2 ], [ 100, %BB3 ]
+  %V5 = load <2 x i64>, <2 x i64>* %P, align 4
+  %V6 = extractelement <2 x i64> %V5, i64 1
+  %V7 = add i64 %Phi, %V6
+  ret i64 %V7
+}
+
+define i64 @test34(ptr %P, i64 %TC1, i1 %Cond, i64 %TC2) {
+;  Entry _
+;    |  / |
+;    | v  |
+;  Loop1  |
+;    | \__|
+;    |
+;   BB1
+;   / \
+; BB2 BB3
+;   \ /  _
+;    |  / |
+;    | v  |
+;  Loop2  |
+;    | \__|
+;    |
+;    v
+;   Exit
+;
+; OLDGVN-LABEL: @test34(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br label [[LOOP1:%.*]]
+; OLDGVN:       Loop1:
+; OLDGVN-NEXT:    [[INDEX1:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[INDEX1_INC:%.*]], [[LOOP1]] ]
+; OLDGVN-NEXT:    [[I1:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX1]], i32 0
+; OLDGVN-NEXT:    [[I2:%.*]] = insertelement <4 x i64> [[I1]], i64 [[INDEX1]], i32 1
+; OLDGVN-NEXT:    [[I3:%.*]] = insertelement <4 x i64> [[I2]], i64 100, i32 2
+; OLDGVN-NEXT:    [[I4:%.*]] = insertelement <4 x i64> [[I3]], i64 1000, i32 3
+; OLDGVN-NEXT:    store <4 x i64> [[I4]], ptr [[P:%.*]], align 4
+; OLDGVN-NEXT:    [[INDEX1_INC]] = add i64 [[INDEX1]], 1
+; OLDGVN-NEXT:    [[COND1:%.*]] = icmp ne i64 [[INDEX1_INC]], [[TC1:%.*]]
+; OLDGVN-NEXT:    [[TMP0:%.*]] = bitcast <4 x i64> [[I4]] to i256
+; OLDGVN-NEXT:    [[TMP1:%.*]] = trunc i256 [[TMP0]] to i64
+; OLDGVN-NEXT:    [[TMP2:%.*]] = trunc i256 [[TMP0]] to i128
+; OLDGVN-NEXT:    [[TMP3:%.*]] = bitcast i128 [[TMP2]] to <2 x i64>
+; OLDGVN-NEXT:    br i1 [[COND1]], label [[LOOP1]], label [[BB1:%.*]]
+; OLDGVN:       BB1:
+; OLDGVN-NEXT:    br i1 [[COND:%.*]], label [[BB2:%.*]], label [[BB3:%.*]]
+; OLDGVN:       BB2:
+; OLDGVN-NEXT:    br label [[LOOP2:%.*]]
+; OLDGVN:       BB3:
+; OLDGVN-NEXT:    br label [[LOOP2]]
+; OLDGVN:       Loop2:
+; OLDGVN-NEXT:    [[INDEX2:%.*]] = phi i64 [ [[TMP1]], [[BB2]] ], [ 1, [[BB3]] ], [ [[INDEX2_INC:%.*]], [[LOOP2]] ]
+; OLDGVN-NEXT:    [[V6:%.*]] = extractelement <2 x i64> [[TMP3]], i64 1
+; OLDGVN-NEXT:    [[V7:%.*]] = add i64 [[V6]], [[INDEX2]]
+; OLDGVN-NEXT:    [[INDEX2_INC]] = add i64 [[INDEX2]], 1
+; OLDGVN-NEXT:    [[COND2:%.*]] = icmp ne i64 [[INDEX2_INC]], [[TC2:%.*]]
+; OLDGVN-NEXT:    br i1 [[COND2]], label [[LOOP2]], label [[EXIT:%.*]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    ret i64 [[V7]]
+;
+; NEWGVN-LABEL: @test34(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br label [[LOOP1:%.*]]
+; NEWGVN:       Loop1:
+; NEWGVN-NEXT:    [[INDEX1:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[INDEX1_INC:%.*]], [[LOOP1]] ]
+; NEWGVN-NEXT:    [[I1:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX1]], i32 0
+; NEWGVN-NEXT:    [[I2:%.*]] = insertelement <4 x i64> [[I1]], i64 [[INDEX1]], i32 1
+; NEWGVN-NEXT:    [[I3:%.*]] = insertelement <4 x i64> [[I2]], i64 100, i32 2
+; NEWGVN-NEXT:    [[I4:%.*]] = insertelement <4 x i64> [[I3]], i64 1000, i32 3
+; NEWGVN-NEXT:    store <4 x i64> [[I4]], ptr [[P:%.*]], align 4
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <4 x i64> [[I4]] to i256
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i256 [[TMP0]] to i128
+; NEWGVN-NEXT:    [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <2 x i64>
+; NEWGVN-NEXT:    [[TMP3:%.*]] = trunc i256 [[TMP0]] to i64
+; NEWGVN-NEXT:    [[INDEX1_INC]] = add i64 [[INDEX1]], 1
+; NEWGVN-NEXT:    [[COND1:%.*]] = icmp ne i64 [[INDEX1_INC]], [[TC1:%.*]]
+; NEWGVN-NEXT:    br i1 [[COND1]], label [[LOOP1]], label [[BB1:%.*]]
+; NEWGVN:       BB1:
+; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[BB2:%.*]], label [[BB3:%.*]]
+; NEWGVN:       BB2:
+; NEWGVN-NEXT:    br label [[LOOP2:%.*]]
+; NEWGVN:       BB3:
+; NEWGVN-NEXT:    br label [[LOOP2]]
+; NEWGVN:       Loop2:
+; NEWGVN-NEXT:    [[INDEX2:%.*]] = phi i64 [ [[TMP3]], [[BB2]] ], [ 1, [[BB3]] ], [ [[INDEX2_INC:%.*]], [[LOOP2]] ]
+; NEWGVN-NEXT:    [[V6:%.*]] = extractelement <2 x i64> [[TMP2]], i64 1
+; NEWGVN-NEXT:    [[V7:%.*]] = add i64 [[V6]], [[INDEX2]]
+; NEWGVN-NEXT:    [[INDEX2_INC]] = add i64 [[INDEX2]], 1
+; NEWGVN-NEXT:    [[COND2:%.*]] = icmp ne i64 [[INDEX2_INC]], [[TC2:%.*]]
+; NEWGVN-NEXT:    br i1 [[COND2]], label [[LOOP2]], label [[EXIT:%.*]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    ret i64 [[V7]]
+;
+Entry:
+  br label %Loop1
+
+Loop1:
+  %Index1 = phi i64 [ 1, %Entry ], [ %Index1.inc, %Loop1 ]
+  %I1 = insertelement <4 x i64> poison, i64 %Index1, i32 0
+  %I2 = insertelement <4 x i64> %I1, i64 %Index1, i32 1
+  %I3 = insertelement <4 x i64> %I2, i64 100, i32 2
+  %I4 = insertelement <4 x i64> %I3, i64 1000, i32 3
+  store <4 x i64> %I4, <4 x i64>* %P, align 4
+  %Index1.inc = add i64 %Index1, 1
+  %Cond1 = icmp ne i64 %Index1.inc, %TC1
+  br i1 %Cond1, label %Loop1, label %BB1
+
+BB1:
+  br i1 %Cond, label %BB2, label %BB3
+
+BB2:
+  %V4 = load i64, ptr %P, align 4
+  br label %Loop2
+
+BB3:
+  br label %Loop2
+
+Loop2:
+  %Index2 = phi i64 [ %V4, %BB2 ], [ 1, %BB3 ], [ %Index2.inc, %Loop2 ]
+  %V5 = load <2 x i64>, <2 x i64>* %P, align 4
+  %V6 = extractelement <2 x i64> %V5, i64 1
+  %V7 = add i64 %V6, %Index2
+  %Index2.inc = add i64 %Index2, 1
+  %Cond2 = icmp ne i64 %Index2.inc, %TC2
+  br i1 %Cond2, label %Loop2, label %Exit
+
+Exit:
+  ret i64 %V7
+}
+
+define i64 @test35(ptr %P, i64 %TC1, i1 %Cond1, i64 %TC2) {
+;  Entry _
+;    |  / |
+;    | v  |
+;  Loop1  |
+;    | \__|
+;    |
+;   BB1
+;   / \
+; BB2 BB3
+;   \ /  _
+;    |  / |
+;    | v  |
+;  Loop2  |
+;    | \__|
+;    |
+;    v
+;   Exit
+;
+; OLDGVN-LABEL: @test35(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br label [[LOOP1:%.*]]
+; OLDGVN:       Loop1:
+; OLDGVN-NEXT:    [[INDEX1:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[INDEX1_INC:%.*]], [[LOOP1]] ]
+; OLDGVN-NEXT:    [[I1:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX1]], i32 0
+; OLDGVN-NEXT:    [[I2:%.*]] = insertelement <4 x i64> [[I1]], i64 [[INDEX1]], i32 1
+; OLDGVN-NEXT:    [[I3:%.*]] = insertelement <4 x i64> [[I2]], i64 100, i32 2
+; OLDGVN-NEXT:    [[I4:%.*]] = insertelement <4 x i64> [[I3]], i64 1000, i32 3
+; OLDGVN-NEXT:    store <4 x i64> [[I4]], ptr [[P:%.*]], align 4
+; OLDGVN-NEXT:    [[INDEX1_INC]] = add i64 [[INDEX1]], 1
+; OLDGVN-NEXT:    [[COND2:%.*]] = icmp ne i64 [[INDEX1_INC]], [[TC1:%.*]]
+; OLDGVN-NEXT:    [[TMP0:%.*]] = bitcast <4 x i64> [[I4]] to i256
+; OLDGVN-NEXT:    [[TMP1:%.*]] = trunc i256 [[TMP0]] to i64
+; OLDGVN-NEXT:    [[TMP2:%.*]] = trunc i256 [[TMP0]] to i128
+; OLDGVN-NEXT:    [[TMP3:%.*]] = bitcast i128 [[TMP2]] to <2 x i64>
+; OLDGVN-NEXT:    br i1 [[COND2]], label [[LOOP1]], label [[BB1:%.*]]
+; OLDGVN:       BB1:
+; OLDGVN-NEXT:    br i1 [[COND1:%.*]], label [[BB2:%.*]], label [[BB3:%.*]]
+; OLDGVN:       BB2:
+; OLDGVN-NEXT:    br label [[LOOP2:%.*]]
+; OLDGVN:       BB3:
+; OLDGVN-NEXT:    br label [[LOOP2]]
+; OLDGVN:       Loop2:
+; OLDGVN-NEXT:    [[INDEX2:%.*]] = phi i64 [ [[TMP1]], [[BB2]] ], [ 1, [[BB3]] ], [ [[INDEX2_INC:%.*]], [[LOOP2]] ]
+; OLDGVN-NEXT:    [[V6:%.*]] = extractelement <2 x i64> [[TMP3]], i64 1
+; OLDGVN-NEXT:    [[TMP4:%.*]] = trunc i128 [[TMP2]] to i64
+; OLDGVN-NEXT:    [[V8:%.*]] = add i64 [[V6]], [[INDEX2]]
+; OLDGVN-NEXT:    [[V9:%.*]] = add i64 [[V8]], [[TMP4]]
+; OLDGVN-NEXT:    [[INDEX2_INC]] = add i64 [[INDEX2]], 1
+; OLDGVN-NEXT:    [[COND3:%.*]] = icmp ne i64 [[INDEX2_INC]], [[TC2:%.*]]
+; OLDGVN-NEXT:    br i1 [[COND3]], label [[LOOP2]], label [[EXIT:%.*]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    ret i64 [[V9]]
+;
+; NEWGVN-LABEL: @test35(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br label [[LOOP1:%.*]]
+; NEWGVN:       Loop1:
+; NEWGVN-NEXT:    [[INDEX1:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[INDEX1_INC:%.*]], [[LOOP1]] ]
+; NEWGVN-NEXT:    [[I1:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX1]], i32 0
+; NEWGVN-NEXT:    [[I2:%.*]] = insertelement <4 x i64> [[I1]], i64 [[INDEX1]], i32 1
+; NEWGVN-NEXT:    [[I3:%.*]] = insertelement <4 x i64> [[I2]], i64 100, i32 2
+; NEWGVN-NEXT:    [[I4:%.*]] = insertelement <4 x i64> [[I3]], i64 1000, i32 3
+; NEWGVN-NEXT:    store <4 x i64> [[I4]], ptr [[P:%.*]], align 4
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <4 x i64> [[I4]] to i256
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i256 [[TMP0]] to i128
+; NEWGVN-NEXT:    [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <2 x i64>
+; NEWGVN-NEXT:    [[TMP3:%.*]] = trunc i256 [[TMP0]] to i64
+; NEWGVN-NEXT:    [[INDEX1_INC]] = add i64 [[INDEX1]], 1
+; NEWGVN-NEXT:    [[COND2:%.*]] = icmp ne i64 [[INDEX1_INC]], [[TC1:%.*]]
+; NEWGVN-NEXT:    br i1 [[COND2]], label [[LOOP1]], label [[BB1:%.*]]
+; NEWGVN:       BB1:
+; NEWGVN-NEXT:    br i1 [[COND1:%.*]], label [[BB2:%.*]], label [[BB3:%.*]]
+; NEWGVN:       BB2:
+; NEWGVN-NEXT:    br label [[LOOP2:%.*]]
+; NEWGVN:       BB3:
+; NEWGVN-NEXT:    br label [[LOOP2]]
+; NEWGVN:       Loop2:
+; NEWGVN-NEXT:    [[INDEX2:%.*]] = phi i64 [ [[TMP3]], [[BB2]] ], [ 1, [[BB3]] ], [ [[INDEX2_INC:%.*]], [[LOOP2]] ]
+; NEWGVN-NEXT:    [[V6:%.*]] = extractelement <2 x i64> [[TMP2]], i64 1
+; NEWGVN-NEXT:    [[V7:%.*]] = load i64, ptr [[P]], align 1
+; NEWGVN-NEXT:    [[V8:%.*]] = add i64 [[V6]], [[INDEX2]]
+; NEWGVN-NEXT:    [[V9:%.*]] = add i64 [[V8]], [[V7]]
+; NEWGVN-NEXT:    [[INDEX2_INC]] = add i64 [[INDEX2]], 1
+; NEWGVN-NEXT:    [[COND3:%.*]] = icmp ne i64 [[INDEX2_INC]], [[TC2:%.*]]
+; NEWGVN-NEXT:    br i1 [[COND3]], label [[LOOP2]], label [[EXIT:%.*]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    ret i64 [[V9]]
+;
+Entry:
+  br label %Loop1
+
+Loop1:
+  %Index1 = phi i64 [ 1, %Entry ], [ %Index1.inc, %Loop1 ]
+  %I1 = insertelement <4 x i64> poison, i64 %Index1, i32 0
+  %I2 = insertelement <4 x i64> %I1, i64 %Index1, i32 1
+  %I3 = insertelement <4 x i64> %I2, i64 100, i32 2
+  %I4 = insertelement <4 x i64> %I3, i64 1000, i32 3
+  store <4 x i64> %I4, <4 x i64>* %P, align 4
+  %Index1.inc = add i64 %Index1, 1
+  %Cond2 = icmp ne i64 %Index1.inc, %TC1
+  br i1 %Cond2, label %Loop1, label %BB1
+
+BB1:
+  br i1 %Cond1, label %BB2, label %BB3
+
+BB2:
+  %V4 = load i64, ptr %P, align 4
+  br label %Loop2
+
+BB3:
+  br label %Loop2
+
+Loop2:
+  %Index2 = phi i64 [ %V4, %BB2 ], [ 1, %BB3 ], [ %Index2.inc, %Loop2 ]
+  %V5 = load <2 x i64>, <2 x i64>* %P, align 4
+  %V6 = extractelement <2 x i64> %V5, i64 1
+  %V7 = load i64, ptr %P, align 1
+  %V8 = add i64 %V6, %Index2
+  %V9 = add i64 %V8, %V7
+  %Index2.inc = add i64 %Index2, 1
+  %Cond3 = icmp ne i64 %Index2.inc, %TC2
+  br i1 %Cond3, label %Loop2, label %Exit
+
+Exit:
+  ret i64 %V9
+}
+
+define i64 @test36(ptr %P, i64 %TC, i1 %Cond1) {
+;    Entry
+;      |
+;    Loop<-----+
+;     / \      |
+;   BB1 BB2    |
+;     \ /      |
+;     BB3      |
+;      |       |
+;  Loop.Latch--+
+;      |
+;      v
+;     Exit
+;
+; OLDGVN-LABEL: @test36(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    [[V1_PRE:%.*]] = load <4 x i64>, ptr [[P:%.*]], align 4
+; OLDGVN-NEXT:    br label [[LOOP:%.*]]
+; OLDGVN:       Loop:
+; OLDGVN-NEXT:    [[V1:%.*]] = phi <4 x i64> [ [[V1_PRE]], [[ENTRY:%.*]] ], [ [[I1:%.*]], [[BB3:%.*]] ]
+; OLDGVN-NEXT:    [[INDEX:%.*]] = phi i64 [ 1, [[ENTRY]] ], [ [[INDEX_INC:%.*]], [[BB3]] ]
+; OLDGVN-NEXT:    [[I1]] = insertelement <4 x i64> [[V1]], i64 [[INDEX]], i32 1
+; OLDGVN-NEXT:    store <4 x i64> [[I1]], ptr [[P]], align 4
+; OLDGVN-NEXT:    [[TMP0:%.*]] = bitcast <4 x i64> [[I1]] to i256
+; OLDGVN-NEXT:    [[TMP1:%.*]] = trunc i256 [[TMP0]] to i64
+; OLDGVN-NEXT:    br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; OLDGVN:       BB1:
+; OLDGVN-NEXT:    br label [[BB3]]
+; OLDGVN:       BB2:
+; OLDGVN-NEXT:    br label [[BB3]]
+; OLDGVN:       BB3:
+; OLDGVN-NEXT:    [[PHI:%.*]] = phi i64 [ 100, [[BB1]] ], [ [[TMP1]], [[BB2]] ]
+; OLDGVN-NEXT:    [[V3:%.*]] = add i64 [[PHI]], [[INDEX]]
+; OLDGVN-NEXT:    [[INDEX_INC]] = add i64 [[INDEX]], 1
+; OLDGVN-NEXT:    [[COND2:%.*]] = icmp ne i64 [[INDEX_INC]], [[TC:%.*]]
+; OLDGVN-NEXT:    br i1 [[COND2]], label [[LOOP]], label [[EXIT:%.*]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    ret i64 [[V3]]
+;
+; NEWGVN-LABEL: @test36(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br label [[LOOP:%.*]]
+; NEWGVN:       Loop:
+; NEWGVN-NEXT:    [[INDEX:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[INDEX_INC:%.*]], [[LOOP_LATCH:%.*]] ]
+; NEWGVN-NEXT:    [[V1:%.*]] = load <4 x i64>, ptr [[P:%.*]], align 4
+; NEWGVN-NEXT:    [[I1:%.*]] = insertelement <4 x i64> [[V1]], i64 [[INDEX]], i32 1
+; NEWGVN-NEXT:    store <4 x i64> [[I1]], ptr [[P]], align 4
+; NEWGVN-NEXT:    [[TMP0:%.*]] = bitcast <4 x i64> [[I1]] to i256
+; NEWGVN-NEXT:    [[TMP1:%.*]] = trunc i256 [[TMP0]] to i64
+; NEWGVN-NEXT:    br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; NEWGVN:       BB1:
+; NEWGVN-NEXT:    br label [[BB3:%.*]]
+; NEWGVN:       BB2:
+; NEWGVN-NEXT:    br label [[BB3]]
+; NEWGVN:       BB3:
+; NEWGVN-NEXT:    [[PHI:%.*]] = phi i64 [ 100, [[BB1]] ], [ [[TMP1]], [[BB2]] ]
+; NEWGVN-NEXT:    [[V3:%.*]] = add i64 [[PHI]], [[INDEX]]
+; NEWGVN-NEXT:    br label [[LOOP_LATCH]]
+; NEWGVN:       Loop.Latch:
+; NEWGVN-NEXT:    [[INDEX_INC]] = add i64 [[INDEX]], 1
+; NEWGVN-NEXT:    [[COND2:%.*]] = icmp ne i64 [[INDEX_INC]], [[TC:%.*]]
+; NEWGVN-NEXT:    br i1 [[COND2]], label [[LOOP]], label [[EXIT:%.*]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    ret i64 [[V3]]
+;
+Entry:
+  br label %Loop
+
+Loop:
+  %Index = phi i64 [ 1, %Entry ], [ %Index.inc, %Loop.Latch ]
+  %V1 = load <4 x i64>, <4 x i64>* %P, align 4
+  %I1 = insertelement <4 x i64> %V1, i64 %Index, i32 1
+  store <4 x i64> %I1, <4 x i64>* %P, align 4
+  br i1 %Cond1, label %BB1, label %BB2
+
+BB1:
+  br label %BB3
+
+BB2:
+  %V2 = load i64, ptr %P, align 4
+  br label %BB3
+
+BB3:
+  %Phi = phi i64 [ 100, %BB1 ], [ %V2, %BB2 ]
+  %V3 = add i64 %Phi, %Index
+  br label %Loop.Latch
+
+Loop.Latch:
+  %Index.inc = add i64 %Index, 1
+  %Cond2 = icmp ne i64 %Index.inc, %TC
+  br i1 %Cond2, label %Loop, label %Exit
+
+Exit:
+  ret i64 %V3
+}
+
+define i64 @test37(i1 %Cond, ptr %V1) {
+; OLDGVN-LABEL: @test37(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; OLDGVN:       T:
+; OLDGVN-NEXT:    store ptr [[V1:%.*]], ptr inttoptr (i64 16 to ptr), align 8
+; OLDGVN-NEXT:    br label [[EXIT:%.*]]
+; OLDGVN:       F:
+; OLDGVN-NEXT:    [[V2_PRE:%.*]] = load ptr, ptr inttoptr (i64 16 to ptr), align 8
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    [[V2:%.*]] = phi ptr [ [[V2_PRE]], [[F]] ], [ [[V1]], [[T]] ]
+; OLDGVN-NEXT:    [[V3:%.*]] = load i64, ptr [[V2]], align 4
+; OLDGVN-NEXT:    ret i64 [[V3]]
+;
+; NEWGVN-LABEL: @test37(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; NEWGVN:       T:
+; NEWGVN-NEXT:    store ptr [[V1:%.*]], ptr inttoptr (i64 16 to ptr), align 8
+; NEWGVN-NEXT:    br label [[EXIT:%.*]]
+; NEWGVN:       F:
+; NEWGVN-NEXT:    [[V21:%.*]] = load ptr, ptr inttoptr (i64 16 to ptr), align 8
+; NEWGVN-NEXT:    br label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[V2:%.*]] = phi ptr [ [[V21]], [[F]] ], [ [[V1]], [[T]] ]
+; NEWGVN-NEXT:    [[V3:%.*]] = load i64, ptr [[V2]], align 4
+; NEWGVN-NEXT:    ret i64 [[V3]]
+;
+Entry:
+  br i1 %Cond, label %T, label %F
+
+T:
+  store ptr %V1, ptr inttoptr (i64 16 to ptr), align 8
+  br label %Exit
+
+F:
+  br label %Exit
+
+Exit:
+  %V2 = load ptr, ptr inttoptr (i64 16 to ptr), align 8
+  %V3 = load i64, ptr %V2
+  ret i64 %V3
+}
+
+define i32 @test38(ptr %P, i1 %cond1, i1 %cond2) {
+;   Entry
+;    /  \
+;  BB1  BB2
+;   |    |
+;  BB3  BB4
+;    \  /
+;     vv
+;    Exit
+;
+; OLDGVN-LABEL: @test38(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; OLDGVN:       BB1:
+; OLDGVN-NEXT:    store i32 42, ptr [[P:%.*]], align 4
+; OLDGVN-NEXT:    br label [[EXIT:%.*]]
+; OLDGVN:       BB2:
+; OLDGVN-NEXT:    [[V2:%.*]] = load i32, ptr [[P]], align 4
+; OLDGVN-NEXT:    [[V5:%.*]] = add i32 [[V2]], 100
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    [[PHI:%.*]] = phi i32 [ 142, [[BB1]] ], [ [[V5]], [[BB2]] ]
+; OLDGVN-NEXT:    ret i32 [[PHI]]
+;
+; NEWGVN-LABEL: @test38(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; NEWGVN:       BB1:
+; NEWGVN-NEXT:    store i32 42, ptr [[P:%.*]], align 4
+; NEWGVN-NEXT:    br label [[BB3:%.*]]
+; NEWGVN:       BB2:
+; NEWGVN-NEXT:    [[V2:%.*]] = load i32, ptr [[P]], align 4
+; NEWGVN-NEXT:    br label [[BB4:%.*]]
+; NEWGVN:       BB3:
+; NEWGVN-NEXT:    br label [[EXIT:%.*]]
+; NEWGVN:       BB4:
+; NEWGVN-NEXT:    [[V5:%.*]] = add i32 [[V2]], 100
+; NEWGVN-NEXT:    br label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ 142, [[BB3]] ], [ [[V5]], [[BB4]] ]
+; NEWGVN-NEXT:    ret i32 [[PHI]]
+;
+Entry:
+  %V1 = load <2 x i32>, <2 x i32>* %P, align 4
+  br i1 %cond1, label %BB1, label %BB2
+
+BB1:
+  store i32 42, ptr %P, align 4
+  br label %BB3
+
+BB2:
+  %V2 = load i32, ptr %P, align 4
+  br label %BB4
+
+BB3:
+  %V3 = load i32, ptr %P, align 4
+  %V4 = add i32 %V3, 100
+  br label %Exit
+
+BB4:
+  %V5 = add i32 %V2, 100
+  br label %Exit
+
+Exit:
+  %phi = phi i32 [ %V4, %BB3 ], [ %V5, %BB4 ]
+  ret i32 %phi
+}
+
+; Tests with phi-of-ops optimization.
+
+define i32 @test39(ptr %P1, ptr %P2, i1 %cond1, i1 %cond2, i1 %cond3, i1 %cond4) {
+;      Entry
+;      /   \
+;    BB1   BB2
+;      \   /
+;       BB3
+;       / \
+;      BB4 |
+;      / \ |
+;    BB5  ||
+;    /  \ ||
+; Exit1 Exit2
+;
+; OLDGVN-LABEL: @test39(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; OLDGVN:       BB1:
+; OLDGVN-NEXT:    store i32 87, ptr [[P2:%.*]], align 4
+; OLDGVN-NEXT:    br label [[BB3:%.*]]
+; OLDGVN:       BB2:
+; OLDGVN-NEXT:    store i32 97, ptr [[P2]], align 4
+; OLDGVN-NEXT:    br label [[BB3]]
+; OLDGVN:       BB3:
+; OLDGVN-NEXT:    [[V1:%.*]] = phi i32 [ 87, [[BB1]] ], [ 97, [[BB2]] ]
+; OLDGVN-NEXT:    [[PHI:%.*]] = phi i32 [ -1, [[BB1]] ], [ 42, [[BB2]] ]
+; OLDGVN-NEXT:    br i1 [[COND2:%.*]], label [[BB4:%.*]], label [[EXIT2:%.*]]
+; OLDGVN:       BB4:
+; OLDGVN-NEXT:    [[V2:%.*]] = add i32 [[PHI]], [[V1]]
+; OLDGVN-NEXT:    store i32 [[V2]], ptr [[P2]], align 4
+; OLDGVN-NEXT:    br i1 [[COND3:%.*]], label [[BB5:%.*]], label [[EXIT2]]
+; OLDGVN:       BB5:
+; OLDGVN-NEXT:    store i32 [[V2]], ptr [[P1:%.*]], align 4
+; OLDGVN-NEXT:    br i1 [[COND4:%.*]], label [[EXIT1:%.*]], label [[BB5_EXIT2_CRIT_EDGE:%.*]]
+; OLDGVN:       BB5.Exit2_crit_edge:
+; OLDGVN-NEXT:    [[V4_PRE:%.*]] = load i32, ptr [[P2]], align 4
+; OLDGVN-NEXT:    br label [[EXIT2]]
+; OLDGVN:       Exit1:
+; OLDGVN-NEXT:    ret i32 [[V2]]
+; OLDGVN:       Exit2:
+; OLDGVN-NEXT:    [[V4:%.*]] = phi i32 [ [[V4_PRE]], [[BB5_EXIT2_CRIT_EDGE]] ], [ [[V2]], [[BB4]] ], [ [[V1]], [[BB3]] ]
+; OLDGVN-NEXT:    ret i32 [[V4]]
+;
+; NEWGVN-LABEL: @test39(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; NEWGVN:       BB1:
+; NEWGVN-NEXT:    store i32 87, ptr [[P2:%.*]], align 4
+; NEWGVN-NEXT:    br label [[BB3:%.*]]
+; NEWGVN:       BB2:
+; NEWGVN-NEXT:    store i32 97, ptr [[P2]], align 4
+; NEWGVN-NEXT:    br label [[BB3]]
+; NEWGVN:       BB3:
+; NEWGVN-NEXT:    [[PHIOFOPS:%.*]] = phi i32 [ 139, [[BB2]] ], [ 86, [[BB1]] ]
+; NEWGVN-NEXT:    [[V1:%.*]] = phi i32 [ 87, [[BB1]] ], [ 97, [[BB2]] ]
+; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ -1, [[BB1]] ], [ 42, [[BB2]] ]
+; NEWGVN-NEXT:    br i1 [[COND2:%.*]], label [[BB4:%.*]], label [[EXIT2:%.*]]
+; NEWGVN:       BB4:
+; NEWGVN-NEXT:    store i32 [[PHIOFOPS]], ptr [[P2]], align 4
+; NEWGVN-NEXT:    br i1 [[COND3:%.*]], label [[BB5:%.*]], label [[EXIT2]]
+; NEWGVN:       BB5:
+; NEWGVN-NEXT:    store i32 [[PHIOFOPS]], ptr [[P1:%.*]], align 4
+; NEWGVN-NEXT:    br i1 [[COND4:%.*]], label [[EXIT1:%.*]], label [[EXIT2]]
+; NEWGVN:       Exit1:
+; NEWGVN-NEXT:    ret i32 [[PHIOFOPS]]
+; NEWGVN:       Exit2:
+; NEWGVN-NEXT:    [[V4:%.*]] = load i32, ptr [[P2]], align 4
+; NEWGVN-NEXT:    ret i32 [[V4]]
+;
+Entry:
+  br i1 %cond1, label %BB1, label %BB2
+
+BB1:
+  store i32 87, ptr %P2
+  br label %BB3
+
+BB2:
+  store i32 97, ptr %P2
+  br label %BB3
+
+BB3:
+  %Phi = phi i32 [-1, %BB1], [42, %BB2]
+  %V1 = load i32, ptr %P2
+  br i1 %cond2, label %BB4, label %Exit2
+
+BB4:
+  %V2 = add i32 %Phi, %V1
+  store i32 %V2, ptr %P2
+  br i1 %cond3, label %BB5, label %Exit2
+
+BB5:
+  store i32 %V2, ptr %P1
+  br i1 %cond4, label %Exit1, label %Exit2
+
+Exit1:
+  %V3 = load i32, ptr %P1
+  ret i32 %V3
+
+Exit2:
+  %V4 = load i32, ptr %P2
+  ret i32 %V4
+}
+
+define i8 @test40(ptr %P, i1 %Cond) {
+;   Entry
+;    /  \
+;   T    F
+;    \  /
+;     vv
+;    Exit
+;
+; OLDGVN-LABEL: @test40(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; OLDGVN:       T:
+; OLDGVN-NEXT:    store i32 42, ptr [[P:%.*]], align 1
+; OLDGVN-NEXT:    br label [[EXIT:%.*]]
+; OLDGVN:       F:
+; OLDGVN-NEXT:    store float 1.000000e+00, ptr [[P]], align 1
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    [[V2:%.*]] = phi i8 [ 0, [[F]] ], [ 42, [[T]] ]
+; OLDGVN-NEXT:    [[V3:%.*]] = add i8 [[V2]], [[V2]]
+; OLDGVN-NEXT:    ret i8 [[V3]]
+;
+; NEWGVN-LABEL: @test40(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; NEWGVN:       T:
+; NEWGVN-NEXT:    store i32 42, ptr [[P:%.*]], align 1
+; NEWGVN-NEXT:    br label [[EXIT:%.*]]
+; NEWGVN:       F:
+; NEWGVN-NEXT:    store float 1.000000e+00, ptr [[P]], align 1
+; NEWGVN-NEXT:    br label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[PHIOFOPS:%.*]] = phi i8 [ 0, [[F]] ], [ 84, [[T]] ]
+; NEWGVN-NEXT:    [[V2:%.*]] = phi i8 [ 0, [[F]] ], [ 42, [[T]] ]
+; NEWGVN-NEXT:    ret i8 [[PHIOFOPS]]
+;
+Entry:
+  br i1 %Cond, label %T, label %F
+
+T:
+  store i32 42, ptr %P, align 1
+  br label %Exit
+
+F:
+  store float 1.0, ptr %P, align 1
+  br label %Exit
+
+Exit:
+  %Phi = phi i8 [ 0, %F ], [ 42, %T ]
+  %V2 = load i8, ptr %P, align 1
+  %V3 = add i8 %V2, %Phi
+  ret i8 %V3
+}
+
+define i32 @test41(ptr %P, i1 %Cond1, i1 %Cond2, i1 %Cond3) {
+;       Entry
+;         |
+;  +--Outer.Loop<-------+
+;  |      |     _       |
+;  |      |    / |      |
+;  |      |   v  |      |
+;  |  Inner.Loop |      |
+;  |      |    \_|      |
+;  |      |             |
+;  +->Outer.Loop.Latch--+
+;         |
+;        Exit
+;
+; OLDGVN-LABEL: @test41(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br label [[OUTER_LOOP:%.*]]
+; OLDGVN:       Outer.Loop:
+; OLDGVN-NEXT:    store i32 42, ptr [[P:%.*]], align 1
+; OLDGVN-NEXT:    br i1 [[COND1:%.*]], label [[INNER_LOOP:%.*]], label [[OUTER_LOOP_LATCH:%.*]]
+; OLDGVN:       Inner.Loop:
+; OLDGVN-NEXT:    store i32 13, ptr [[P]], align 1
+; OLDGVN-NEXT:    br i1 [[COND2:%.*]], label [[INNER_LOOP]], label [[OUTER_LOOP_LATCH]]
+; OLDGVN:       Outer.Loop.Latch:
+; OLDGVN-NEXT:    [[V2:%.*]] = phi i32 [ 13, [[INNER_LOOP]] ], [ 42, [[OUTER_LOOP]] ]
+; OLDGVN-NEXT:    [[PHI:%.*]] = phi i32 [ 100, [[INNER_LOOP]] ], [ 200, [[OUTER_LOOP]] ]
+; OLDGVN-NEXT:    [[V3:%.*]] = add i32 [[V2]], [[PHI]]
+; OLDGVN-NEXT:    br i1 [[COND3:%.*]], label [[OUTER_LOOP]], label [[EXIT:%.*]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    ret i32 [[V3]]
+;
+; NEWGVN-LABEL: @test41(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br label [[OUTER_LOOP:%.*]]
+; NEWGVN:       Outer.Loop:
+; NEWGVN-NEXT:    store i32 42, ptr [[P:%.*]], align 1
+; NEWGVN-NEXT:    br i1 [[COND1:%.*]], label [[INNER_LOOP:%.*]], label [[OUTER_LOOP_LATCH:%.*]]
+; NEWGVN:       Inner.Loop:
+; NEWGVN-NEXT:    store i32 13, ptr [[P]], align 1
+; NEWGVN-NEXT:    br i1 [[COND2:%.*]], label [[INNER_LOOP]], label [[OUTER_LOOP_LATCH]]
+; NEWGVN:       Outer.Loop.Latch:
+; NEWGVN-NEXT:    [[PHIOFOPS:%.*]] = phi i32 [ 242, [[OUTER_LOOP]] ], [ 113, [[INNER_LOOP]] ]
+; NEWGVN-NEXT:    [[V2:%.*]] = phi i32 [ 13, [[INNER_LOOP]] ], [ 42, [[OUTER_LOOP]] ]
+; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ 100, [[INNER_LOOP]] ], [ 200, [[OUTER_LOOP]] ]
+; NEWGVN-NEXT:    br i1 [[COND3:%.*]], label [[OUTER_LOOP]], label [[EXIT:%.*]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    ret i32 [[PHIOFOPS]]
+;
+Entry:
+  br label %Outer.Loop
+
+Outer.Loop:
+  store i32 42, ptr %P, align 1
+  br i1 %Cond1, label %Inner.Loop, label %Outer.Loop.Latch
+
+Inner.Loop:
+  store i32 13, ptr %P, align 1
+  br i1 %Cond2, label %Inner.Loop, label %Outer.Loop.Latch
+
+Outer.Loop.Latch:
+  %Phi = phi i32 [ 100, %Inner.Loop ], [ 200, %Outer.Loop ]
+  %V2 = load i32, ptr %P, align 1
+  %V3 = add i32 %V2, %Phi
+  br i1 %Cond3, label %Outer.Loop, label %Exit
+
+Exit:
+  ret i32 %V3
+}
+
+define i32 @test42(ptr %P, i1 %Cond) {
+;   Entry
+;    /  \
+;   T    F
+;    \  /
+;     vv
+;    Exit
+;
+; OLDGVN-LABEL: @test42(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    store <4 x i32> <i32 13, i32 13, i32 13, i32 13>, ptr [[P:%.*]], align 1
+; OLDGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; OLDGVN:       T:
+; OLDGVN-NEXT:    br label [[EXIT:%.*]]
+; OLDGVN:       F:
+; OLDGVN-NEXT:    store i32 100, ptr [[P]], align 1
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    [[V3:%.*]] = phi i32 [ 13, [[T]] ], [ 100, [[F]] ]
+; OLDGVN-NEXT:    [[V4:%.*]] = add i32 [[V3]], [[V3]]
+; OLDGVN-NEXT:    ret i32 [[V4]]
+;
+; NEWGVN-LABEL: @test42(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    store <4 x i32> <i32 13, i32 13, i32 13, i32 13>, ptr [[P:%.*]], align 1
+; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; NEWGVN:       T:
+; NEWGVN-NEXT:    br label [[EXIT:%.*]]
+; NEWGVN:       F:
+; NEWGVN-NEXT:    store i32 100, ptr [[P]], align 1
+; NEWGVN-NEXT:    br label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[PHIOFOPS:%.*]] = phi i32 [ 200, [[F]] ], [ 26, [[T]] ]
+; NEWGVN-NEXT:    ret i32 [[PHIOFOPS]]
+;
+Entry:
+  store <4 x i32> <i32 13, i32 13, i32 13, i32 13>, <4 x i32>* %P, align 1
+  br i1 %Cond, label %T, label %F
+
+T:
+  %V1 = load <4 x i32>, <4 x i32>* %P, align 1
+  %V2 = extractelement <4 x i32> %V1, i64 2
+  br label %Exit
+
+F:
+  store i32 100, ptr %P, align 1
+  br label %Exit
+
+Exit:
+  %Phi = phi i32 [ %V2, %T ], [ 100, %F ]
+  %V3 = load i32, ptr %P, align 1
+  %V4 = add i32 %Phi, %V3
+  ret i32 %V4
+}
+
+define i32 @test43(ptr %P, i1 %Cond1, i1 %Cond2) {
+;     Entry--+
+;      |     |
+;     BB0    |
+;    /  \    |
+;  BB1  BB2  |
+;    \  /    |
+;     vv     |
+;      |     |
+;     BB3   BB4
+;       \   /
+;       Exit
+;        |
+;
+; OLDGVN-LABEL: @test43(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    store <4 x i32> <i32 13, i32 14, i32 15, i32 16>, ptr [[P:%.*]], align 1
+; OLDGVN-NEXT:    br i1 [[COND1:%.*]], label [[BB0:%.*]], label [[BB4:%.*]]
+; OLDGVN:       BB0:
+; OLDGVN-NEXT:    br i1 [[COND2:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; OLDGVN:       BB1:
+; OLDGVN-NEXT:    br label [[BB3:%.*]]
+; OLDGVN:       BB2:
+; OLDGVN-NEXT:    store i32 100, ptr [[P]], align 1
+; OLDGVN-NEXT:    br label [[BB3]]
+; OLDGVN:       BB3:
+; OLDGVN-NEXT:    [[V3:%.*]] = phi i32 [ 13, [[BB1]] ], [ 100, [[BB2]] ]
+; OLDGVN-NEXT:    [[PHI1:%.*]] = phi i32 [ 15, [[BB1]] ], [ 100, [[BB2]] ]
+; OLDGVN-NEXT:    [[V4:%.*]] = add i32 [[PHI1]], [[V3]]
+; OLDGVN-NEXT:    br label [[EXIT:%.*]]
+; OLDGVN:       BB4:
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    [[PHI2:%.*]] = phi i32 [ [[V4]], [[BB3]] ], [ 14, [[BB4]] ]
+; OLDGVN-NEXT:    [[V7:%.*]] = add i32 [[PHI2]], 10
+; OLDGVN-NEXT:    ret i32 [[V7]]
+;
+; NEWGVN-LABEL: @test43(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    store <4 x i32> <i32 13, i32 14, i32 15, i32 16>, ptr [[P:%.*]], align 1
+; NEWGVN-NEXT:    br i1 [[COND1:%.*]], label [[BB0:%.*]], label [[BB4:%.*]]
+; NEWGVN:       BB0:
+; NEWGVN-NEXT:    br i1 [[COND2:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; NEWGVN:       BB1:
+; NEWGVN-NEXT:    br label [[BB3:%.*]]
+; NEWGVN:       BB2:
+; NEWGVN-NEXT:    store i32 100, ptr [[P]], align 1
+; NEWGVN-NEXT:    br label [[BB3]]
+; NEWGVN:       BB3:
+; NEWGVN-NEXT:    [[PHIOFOPS:%.*]] = phi i32 [ 200, [[BB2]] ], [ 28, [[BB1]] ]
+; NEWGVN-NEXT:    [[V3:%.*]] = phi i32 [ 13, [[BB1]] ], [ 100, [[BB2]] ]
+; NEWGVN-NEXT:    [[PHI1:%.*]] = phi i32 [ 15, [[BB1]] ], [ 100, [[BB2]] ]
+; NEWGVN-NEXT:    br label [[EXIT:%.*]]
+; NEWGVN:       BB4:
+; NEWGVN-NEXT:    br label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[PHI2:%.*]] = phi i32 [ [[PHIOFOPS]], [[BB3]] ], [ 14, [[BB4]] ]
+; NEWGVN-NEXT:    [[V7:%.*]] = add i32 [[PHI2]], 10
+; NEWGVN-NEXT:    ret i32 [[V7]]
+;
+Entry:
+  store <4 x i32> <i32 13, i32 14, i32 15, i32 16>, <4 x i32>* %P, align 1
+  br i1 %Cond1, label %BB0, label %BB4
+
+BB0:
+  br i1 %Cond2, label %BB1, label %BB2
+
+BB1:
+  %V1 = load <4 x i32>, <4 x i32>* %P, align 1
+  %V2 = extractelement <4 x i32> %V1, i64 2
+  br label %BB3
+
+BB2:
+  store i32 100, ptr %P, align 1
+  br label %BB3
+
+BB3:
+  %Phi1 = phi i32 [ %V2, %BB1 ], [ 100, %BB2 ]
+  %V3 = load i32, ptr %P, align 1
+  %V4 = add i32 %Phi1, %V3
+  br label %Exit
+
+BB4:
+  %V5 = load <4 x i32>, <4 x i32>* %P, align 1
+  %V6 = extractelement <4 x i32> %V5, i64 1
+  br label %Exit
+
+Exit:
+  %Phi2 = phi i32 [ %V4, %BB3 ], [ %V6, %BB4]
+  %V7 = add i32 %Phi2, 10
+  ret i32 %V7
+}
+
+define i32 @test44(ptr %P, i1 %Cond1, i1 %Cond2) {
+;     Entry--+
+;      |     |
+;     BB0    |
+;    /  \    |
+;  BB1  BB2  |
+;    \  /    |
+;     vv     |
+;      |     |
+;     BB3   BB4
+;       \   /
+;        BB5
+;         |
+;        BB6
+;         |
+;        BB7
+;       /   \
+;   Exit1  Exit2
+;
+; OLDGVN-LABEL: @test44(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    store <4 x i32> <i32 13, i32 14, i32 15, i32 16>, ptr [[P:%.*]], align 1
+; OLDGVN-NEXT:    br i1 [[COND1:%.*]], label [[BB0:%.*]], label [[BB4:%.*]]
+; OLDGVN:       BB0:
+; OLDGVN-NEXT:    br i1 [[COND2:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; OLDGVN:       BB1:
+; OLDGVN-NEXT:    br label [[BB3:%.*]]
+; OLDGVN:       BB2:
+; OLDGVN-NEXT:    store i32 100, ptr [[P]], align 1
+; OLDGVN-NEXT:    br label [[BB3]]
+; OLDGVN:       BB3:
+; OLDGVN-NEXT:    [[V3:%.*]] = phi i32 [ 13, [[BB1]] ], [ 100, [[BB2]] ]
+; OLDGVN-NEXT:    [[PHI1:%.*]] = phi i32 [ 15, [[BB1]] ], [ 100, [[BB2]] ]
+; OLDGVN-NEXT:    br label [[BB5:%.*]]
+; OLDGVN:       BB4:
+; OLDGVN-NEXT:    br label [[BB5]]
+; OLDGVN:       BB5:
+; OLDGVN-NEXT:    [[PHI2:%.*]] = phi i32 [ [[V3]], [[BB3]] ], [ 15, [[BB4]] ]
+; OLDGVN-NEXT:    [[V8:%.*]] = add i32 [[PHI2]], 10
+; OLDGVN-NEXT:    [[V9:%.*]] = add i32 [[PHI2]], 100
+; OLDGVN-NEXT:    [[COND3:%.*]] = icmp eq i32 [[PHI2]], 15
+; OLDGVN-NEXT:    br i1 [[COND3]], label [[EXIT1:%.*]], label [[EXIT2:%.*]]
+; OLDGVN:       Exit1:
+; OLDGVN-NEXT:    ret i32 [[V8]]
+; OLDGVN:       Exit2:
+; OLDGVN-NEXT:    ret i32 [[V9]]
+;
+; NEWGVN-LABEL: @test44(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    store <4 x i32> <i32 13, i32 14, i32 15, i32 16>, ptr [[P:%.*]], align 1
+; NEWGVN-NEXT:    br i1 [[COND1:%.*]], label [[BB0:%.*]], label [[BB4:%.*]]
+; NEWGVN:       BB0:
+; NEWGVN-NEXT:    br i1 [[COND2:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; NEWGVN:       BB1:
+; NEWGVN-NEXT:    br label [[BB3:%.*]]
+; NEWGVN:       BB2:
+; NEWGVN-NEXT:    store i32 100, ptr [[P]], align 1
+; NEWGVN-NEXT:    br label [[BB3]]
+; NEWGVN:       BB3:
+; NEWGVN-NEXT:    [[V3:%.*]] = phi i32 [ 13, [[BB1]] ], [ 100, [[BB2]] ]
+; NEWGVN-NEXT:    br label [[BB5:%.*]]
+; NEWGVN:       BB4:
+; NEWGVN-NEXT:    br label [[BB5]]
+; NEWGVN:       BB5:
+; NEWGVN-NEXT:    [[PHIOFOPS:%.*]] = phi i1 [ true, [[BB4]] ], [ false, [[BB3]] ]
+; NEWGVN-NEXT:    [[PHI2:%.*]] = phi i32 [ [[V3]], [[BB3]] ], [ 15, [[BB4]] ]
+; NEWGVN-NEXT:    [[V8:%.*]] = add i32 [[PHI2]], 10
+; NEWGVN-NEXT:    br label [[BB6:%.*]]
+; NEWGVN:       BB6:
+; NEWGVN-NEXT:    [[V9:%.*]] = add i32 [[PHI2]], 100
+; NEWGVN-NEXT:    br label [[BB7:%.*]]
+; NEWGVN:       BB7:
+; NEWGVN-NEXT:    br i1 [[PHIOFOPS]], label [[EXIT1:%.*]], label [[EXIT2:%.*]]
+; NEWGVN:       Exit1:
+; NEWGVN-NEXT:    ret i32 [[V8]]
+; NEWGVN:       Exit2:
+; NEWGVN-NEXT:    ret i32 [[V9]]
+;
+Entry:
+  store <4 x i32> <i32 13, i32 14, i32 15, i32 16>, <4 x i32>* %P, align 1
+  br i1 %Cond1, label %BB0, label %BB4
+
+BB0:
+  br i1 %Cond2, label %BB1, label %BB2
+
+BB1:
+  %V1 = load <4 x i32>, <4 x i32>* %P, align 1
+  %V2 = extractelement <4 x i32> %V1, i64 2
+  br label %BB3
+
+BB2:
+  store i32 100, ptr %P, align 1
+  br label %BB3
+
+BB3:
+  %Phi1 = phi i32 [ %V2, %BB1 ], [ 100, %BB2 ]
+  %V3 = load i32, ptr %P, align 1
+  br label %BB5
+
+BB4:
+  %V5 = load <4 x i32>, <4 x i32>* %P, align 1
+  %V6 = extractelement <4 x i32> %V5, i64 1
+  %V7 = add i32 %V6, 1
+  br label %BB5
+
+BB5:
+  %Phi2 = phi i32 [ %V3, %BB3 ], [ %V7, %BB4]
+  %V8 = add i32 %Phi2, 10
+  br label %BB6
+
+BB6:
+  %V9 = add i32 %Phi2, 100
+  br label %BB7
+
+BB7:
+  %cond3 = icmp eq i32 %Phi2, 15
+  br i1 %cond3, label %Exit1, label %Exit2
+
+Exit1:
+  ret i32 %V8
+
+Exit2:
+  ret i32 %V9
+}
+
+; Negative tests.
+
+define i32 @test45(ptr %P, i1 %cond1, i1 %cond2) {
+;   Entry
+;    / \
+;  BB1 BB2
+;   | \ |
+;   |  BB3
+;   | /
+;  Exit
+;
+; OLDGVN-LABEL: @test45(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; OLDGVN:       BB1:
+; OLDGVN-NEXT:    br i1 [[COND2:%.*]], label [[BB1_BB3_CRIT_EDGE:%.*]], label [[EXIT:%.*]]
+; OLDGVN:       BB1.BB3_crit_edge:
+; OLDGVN-NEXT:    [[V1_PRE:%.*]] = load i32, ptr [[P:%.*]], align 4
+; OLDGVN-NEXT:    br label [[BB3:%.*]]
+; OLDGVN:       BB2:
+; OLDGVN-NEXT:    store i32 0, ptr [[P]], align 4
+; OLDGVN-NEXT:    br label [[BB3]]
+; OLDGVN:       BB3:
+; OLDGVN-NEXT:    [[V1:%.*]] = phi i32 [ [[V1_PRE]], [[BB1_BB3_CRIT_EDGE]] ], [ 0, [[BB2]] ]
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    [[V2:%.*]] = phi i32 [ 0, [[BB1]] ], [ [[V1]], [[BB3]] ]
+; OLDGVN-NEXT:    ret i32 [[V2]]
+;
+; NEWGVN-LABEL: @test45(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; NEWGVN:       BB1:
+; NEWGVN-NEXT:    br i1 [[COND2:%.*]], label [[BB3:%.*]], label [[EXIT:%.*]]
+; NEWGVN:       BB2:
+; NEWGVN-NEXT:    store i32 0, ptr [[P:%.*]], align 4
+; NEWGVN-NEXT:    br label [[BB3]]
+; NEWGVN:       BB3:
+; NEWGVN-NEXT:    [[V1:%.*]] = load i32, ptr [[P]], align 4
+; NEWGVN-NEXT:    br label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[V2:%.*]] = phi i32 [ 0, [[BB1]] ], [ [[V1]], [[BB3]] ]
+; NEWGVN-NEXT:    ret i32 [[V2]]
+;
+Entry:
+  br i1 %cond1, label %BB1, label %BB2
+
+BB1:
+  br i1 %cond2, label %BB3, label %Exit
+
+BB2:
+  store i32 0, ptr %P
+  br label %BB3
+
+BB3:
+  %V1 = load i32, ptr %P
+  br label %Exit
+
+Exit:
+  %V2 = phi i32 [ 0, %BB1 ], [ %V1, %BB3 ]
+  ret i32 %V2
+}
+
+define i32 @test46(ptr %P, i1 %Cond1, i1 %Cond2) {
+;   Entry
+;   /   \
+;  T     F
+;  |    / \
+;  |   F1 F2
+;   \  |  |
+;    v v  v
+;     Exit
+;
+; GVN-LABEL: @test46(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    br i1 [[COND1:%.*]], label [[T:%.*]], label [[F:%.*]]
+; GVN:       T:
+; GVN-NEXT:    store i32 42, ptr [[P:%.*]], align 1
+; GVN-NEXT:    br label [[EXIT:%.*]]
+; GVN:       F:
+; GVN-NEXT:    br i1 [[COND2:%.*]], label [[F1:%.*]], label [[F2:%.*]]
+; GVN:       F1:
+; GVN-NEXT:    br label [[EXIT]]
+; GVN:       F2:
+; GVN-NEXT:    br label [[EXIT]]
+; GVN:       Exit:
+; GVN-NEXT:    [[PHI:%.*]] = phi i32 [ 1, [[T]] ], [ 2, [[F1]] ], [ 3, [[F2]] ]
+; GVN-NEXT:    [[V1:%.*]] = load i32, ptr [[P]], align 1
+; GVN-NEXT:    [[V2:%.*]] = add i32 [[PHI]], [[V1]]
+; GVN-NEXT:    ret i32 [[V2]]
+;
+Entry:
+  br i1 %Cond1, label %T, label %F
+
+T:
+  store i32 42, ptr %P, align 1
+  br label %Exit
+
+F:
+  br i1 %Cond2, label %F1, label %F2
+
+F1:
+  br label %Exit
+
+F2:
+  br label %Exit
+
+Exit:
+  %Phi = phi i32 [1, %T], [2, %F1], [3, %F2]
+  %V1 = load i32, ptr %P, align 1
+  %V2 = add i32 %Phi, %V1
+  ret i32 %V2
+}
+
+; TODO: Add support for the case where MemoryPhi's definitions are from different pointers.
+define i32 @test47(ptr %P1,  ptr %P2, i1 %cond) {
+;   Entry
+;    /  \
+;   T    F
+;    \  /
+;     vv
+;    Exit
+;
+; OLDGVN-LABEL: @test47(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; OLDGVN:       T:
+; OLDGVN-NEXT:    store i32 42, ptr [[P1:%.*]], align 4
+; OLDGVN-NEXT:    [[V2_PRE:%.*]] = load i32, ptr [[P2:%.*]], align 4
+; OLDGVN-NEXT:    br label [[EXIT:%.*]]
+; OLDGVN:       F:
+; OLDGVN-NEXT:    store i32 13, ptr [[P2]], align 4
+; OLDGVN-NEXT:    [[V1_PRE:%.*]] = load i32, ptr [[P1]], align 4
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    [[V2:%.*]] = phi i32 [ 13, [[F]] ], [ [[V2_PRE]], [[T]] ]
+; OLDGVN-NEXT:    [[V1:%.*]] = phi i32 [ [[V1_PRE]], [[F]] ], [ 42, [[T]] ]
+; OLDGVN-NEXT:    [[V3:%.*]] = add i32 [[V1]], [[V2]]
+; OLDGVN-NEXT:    ret i32 [[V3]]
+;
+; NEWGVN-LABEL: @test47(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; NEWGVN:       T:
+; NEWGVN-NEXT:    store i32 42, ptr [[P1:%.*]], align 4
+; NEWGVN-NEXT:    br label [[EXIT:%.*]]
+; NEWGVN:       F:
+; NEWGVN-NEXT:    store i32 13, ptr [[P2:%.*]], align 4
+; NEWGVN-NEXT:    br label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[V1:%.*]] = load i32, ptr [[P1]], align 4
+; NEWGVN-NEXT:    [[V2:%.*]] = load i32, ptr [[P2]], align 4
+; NEWGVN-NEXT:    [[V3:%.*]] = add i32 [[V1]], [[V2]]
+; NEWGVN-NEXT:    ret i32 [[V3]]
+;
+Entry:
+  br i1 %cond, label %T, label %F
+
+T:
+  store i32 42, ptr %P1, align 4
+  br label %Exit
+
+F:
+  store i32 13, ptr %P2, align 4
+  br label %Exit
+
+Exit:
+  %V1 = load i32, ptr %P1, align 4
+  %V2 = load i32, ptr %P2, align 4
+  %V3 = add i32 %V1, %V2
+  ret i32 %V3
+}
+
+; TODO: Add extra analysis to find load coercion opportunities deeper in the CFG.
+define i32 @test48(ptr %P, i1 %cond1, i1 %cond2) {
+;   Entry
+;    /  \
+;   T    F
+;   |   / \
+;   |  F1 F2
+;    \ /  /
+;     v  v
+;     Exit
+;
+; GVN-LABEL: @test48(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    br i1 [[COND1:%.*]], label [[T:%.*]], label [[F:%.*]]
+; GVN:       T:
+; GVN-NEXT:    store i32 42, ptr [[P:%.*]], align 4
+; GVN-NEXT:    br label [[EXIT:%.*]]
+; GVN:       F:
+; GVN-NEXT:    br i1 [[COND2:%.*]], label [[F1:%.*]], label [[F2:%.*]]
+; GVN:       F1:
+; GVN-NEXT:    br label [[EXIT]]
+; GVN:       F2:
+; GVN-NEXT:    br label [[EXIT]]
+; GVN:       Exit:
+; GVN-NEXT:    [[V3:%.*]] = load i32, ptr [[P]], align 4
+; GVN-NEXT:    ret i32 [[V3]]
+;
+Entry:
+  %V1 = load <2 x i32>, <2 x i32>* %P, align 4
+  br i1 %cond1, label %T, label %F
+
+T:
+  store i32 42, ptr %P, align 4
+  br label %Exit
+
+F:
+  br i1 %cond2, label %F1, label %F2
+
+F1:
+  %V2 = load i32, ptr %P, align 4
+  br label %Exit
+
+F2:
+  br label %Exit
+
+Exit:
+  %V3 = load i32, ptr %P, align 4
+  ret i32 %V3
+}
+
+define void @test49(ptr %P, float %V1) {
+;  Entry _
+;    |  / |
+;    | v  |
+;  Loop   |
+;    | \__|
+;    |
+;    v
+;   Exit
+;
+; GVN-LABEL: @test49(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    br label [[LOOP:%.*]]
+; GVN:       Loop:
+; GVN-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_INC:%.*]], [[LOOP]] ]
+; GVN-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[P:%.*]], i64 [[INDEX]]
+; GVN-NEXT:    [[V2:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; GVN-NEXT:    [[V3:%.*]] = fdiv fast float [[V2]], [[V1:%.*]]
+; GVN-NEXT:    store float [[V3]], ptr [[ARRAYIDX]], align 4
+; GVN-NEXT:    [[INDEX_INC]] = add nuw nsw i64 [[INDEX]], 1
+; GVN-NEXT:    [[COND:%.*]] = icmp eq i64 [[INDEX_INC]], 1024
+; GVN-NEXT:    br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]]
+; GVN:       Exit:
+; GVN-NEXT:    ret void
+;
+Entry:
+  br label %Loop
+
+Loop:
+  %Index = phi i64 [ 0, %Entry ], [ %Index.inc, %Loop ]
+  %Arrayidx = getelementptr inbounds float, ptr %P, i64 %Index
+  %V2 = load float, ptr %Arrayidx, align 4
+  %V3 = fdiv fast float %V2, %V1
+  store float %V3, ptr %Arrayidx, align 4
+  %Index.inc = add nuw nsw i64 %Index, 1
+  %Cond = icmp eq i64 %Index.inc, 1024
+  br i1 %Cond, label %Exit, label %Loop
+
+Exit:
+  ret void
+}
+
+define i64 @test50(ptr %P, i64 %TC) {
+;  Entry _
+;    |  / |
+;    | v  |
+;  Loop   |
+;    | \__|
+;    |
+;    v
+;   Exit
+;
+; GVN-LABEL: @test50(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    br label [[LOOP:%.*]]
+; GVN:       Loop:
+; GVN-NEXT:    [[INDEX:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[INDEX_INC:%.*]], [[LOOP]] ]
+; GVN-NEXT:    [[P1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[INDEX]]
+; GVN-NEXT:    store i64 [[INDEX]], ptr [[P1]], align 4
+; GVN-NEXT:    [[V1:%.*]] = load i64, ptr [[P]], align 4
+; GVN-NEXT:    [[INDEX_INC]] = add i64 [[INDEX]], 1
+; GVN-NEXT:    [[COND:%.*]] = icmp ne i64 [[INDEX_INC]], [[TC:%.*]]
+; GVN-NEXT:    br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
+; GVN:       Exit:
+; GVN-NEXT:    ret i64 [[V1]]
+;
+Entry:
+  br label %Loop
+
+Loop:
+  %Index = phi i64 [ 1, %Entry ], [ %Index.inc, %Loop ]
+  %P1 = getelementptr i64, ptr %P, i64 %Index
+  store i64 %Index, ptr %P1, align 4
+  %V1 = load i64, ptr %P, align 4
+  %Index.inc = add i64 %Index, 1
+  %Cond = icmp ne i64 %Index.inc, %TC
+  br i1 %Cond, label %Loop, label %Exit
+
+Exit:
+  ret i64 %V1
+}
+
+define i64 @test51(ptr %P, i64 %TC, i1 %Cond1) {
+;    Entry
+;      |
+;    Loop<-----+
+;     / \      |
+;    T   F     |
+;     \ /      |
+;  Loop.Latch--+
+;      |
+;      v
+;     Exit
+;
+; GVN-LABEL: @test51(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    br label [[LOOP:%.*]]
+; GVN:       Loop:
+; GVN-NEXT:    [[INDEX:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[INDEX_INC:%.*]], [[LOOP_LATCH:%.*]] ]
+; GVN-NEXT:    [[P1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[INDEX]]
+; GVN-NEXT:    br i1 [[COND1:%.*]], label [[T:%.*]], label [[F:%.*]]
+; GVN:       T:
+; GVN-NEXT:    store i64 [[INDEX]], ptr [[P1]], align 4
+; GVN-NEXT:    br label [[LOOP_LATCH]]
+; GVN:       F:
+; GVN-NEXT:    br label [[LOOP_LATCH]]
+; GVN:       Loop.Latch:
+; GVN-NEXT:    [[PHI:%.*]] = phi i64 [ 100, [[T]] ], [ 50, [[F]] ]
+; GVN-NEXT:    [[V1:%.*]] = load i64, ptr [[P]], align 4
+; GVN-NEXT:    [[V2:%.*]] = add i64 [[V1]], [[PHI]]
+; GVN-NEXT:    [[INDEX_INC]] = add i64 [[INDEX]], 1
+; GVN-NEXT:    [[COND2:%.*]] = icmp ne i64 [[INDEX_INC]], [[TC:%.*]]
+; GVN-NEXT:    br i1 [[COND2]], label [[LOOP]], label [[EXIT:%.*]]
+; GVN:       Exit:
+; GVN-NEXT:    ret i64 [[V2]]
+;
+Entry:
+  br label %Loop
+
+Loop:
+  %Index = phi i64 [ 1, %Entry ], [ %Index.inc, %Loop.Latch ]
+  %P1 = getelementptr i64, ptr %P, i64 %Index
+  br i1 %Cond1, label %T, label %F
+
+T:
+  store i64 %Index, ptr %P1, align 4
+  br label %Loop.Latch
+
+F:
+  br label %Loop.Latch
+
+Loop.Latch:
+  %phi = phi i64 [ 100, %T ], [ 50, %F ]
+  %V1 = load i64, ptr %P, align 4
+  %V2 = add i64 %V1, %phi
+  %Index.inc = add i64 %Index, 1
+  %Cond2 = icmp ne i64 %Index.inc, %TC
+  br i1 %Cond2, label %Loop, label %Exit
+
+Exit:
+  ret i64 %V2
+}
+
+; TODO: Add loop load coercion.
+define i64 @test52(ptr %P, i64 %TC, i1 %Cond1) {
+;    Entry
+;      |
+;    Loop<-----+
+;     / \      |
+;    T   F     |
+;     \ /      |
+;  Loop.Latch--+
+;      |
+;      v
+;     Exit
+;
+; OLDGVN-LABEL: @test52(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br label [[LOOP:%.*]]
+; OLDGVN:       Loop:
+; OLDGVN-NEXT:    [[INDEX:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[INDEX_INC:%.*]], [[LOOP_LATCH:%.*]] ]
+; OLDGVN-NEXT:    [[P1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[INDEX]]
+; OLDGVN-NEXT:    br i1 [[COND1:%.*]], label [[T:%.*]], label [[F:%.*]]
+; OLDGVN:       T:
+; OLDGVN-NEXT:    store i64 [[INDEX]], ptr [[P1]], align 4
+; OLDGVN-NEXT:    br label [[LOOP_LATCH]]
+; OLDGVN:       F:
+; OLDGVN-NEXT:    [[V1_PRE:%.*]] = load i64, ptr [[P1]], align 4
+; OLDGVN-NEXT:    br label [[LOOP_LATCH]]
+; OLDGVN:       Loop.Latch:
+; OLDGVN-NEXT:    [[V1:%.*]] = phi i64 [ [[INDEX]], [[T]] ], [ [[V1_PRE]], [[F]] ]
+; OLDGVN-NEXT:    [[PHI:%.*]] = phi i64 [ 100, [[T]] ], [ 50, [[F]] ]
+; OLDGVN-NEXT:    [[V2:%.*]] = add i64 [[V1]], [[PHI]]
+; OLDGVN-NEXT:    [[INDEX_INC]] = add i64 [[INDEX]], 1
+; OLDGVN-NEXT:    [[COND2:%.*]] = icmp ne i64 [[INDEX_INC]], [[TC:%.*]]
+; OLDGVN-NEXT:    br i1 [[COND2]], label [[LOOP]], label [[EXIT:%.*]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    ret i64 [[V2]]
+;
+; NEWGVN-LABEL: @test52(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br label [[LOOP:%.*]]
+; NEWGVN:       Loop:
+; NEWGVN-NEXT:    [[INDEX:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[INDEX_INC:%.*]], [[LOOP_LATCH:%.*]] ]
+; NEWGVN-NEXT:    [[P1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[INDEX]]
+; NEWGVN-NEXT:    br i1 [[COND1:%.*]], label [[T:%.*]], label [[F:%.*]]
+; NEWGVN:       T:
+; NEWGVN-NEXT:    store i64 [[INDEX]], ptr [[P1]], align 4
+; NEWGVN-NEXT:    br label [[LOOP_LATCH]]
+; NEWGVN:       F:
+; NEWGVN-NEXT:    br label [[LOOP_LATCH]]
+; NEWGVN:       Loop.Latch:
+; NEWGVN-NEXT:    [[PHI:%.*]] = phi i64 [ 100, [[T]] ], [ 50, [[F]] ]
+; NEWGVN-NEXT:    [[V1:%.*]] = load i64, ptr [[P1]], align 4
+; NEWGVN-NEXT:    [[V2:%.*]] = add i64 [[V1]], [[PHI]]
+; NEWGVN-NEXT:    [[INDEX_INC]] = add i64 [[INDEX]], 1
+; NEWGVN-NEXT:    [[COND2:%.*]] = icmp ne i64 [[INDEX_INC]], [[TC:%.*]]
+; NEWGVN-NEXT:    br i1 [[COND2]], label [[LOOP]], label [[EXIT:%.*]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    ret i64 [[V2]]
+;
+Entry:
+  br label %Loop
+
+Loop:
+  %Index = phi i64 [ 1, %Entry ], [ %Index.inc, %Loop.Latch ]
+  %P1 = getelementptr i64, ptr %P, i64 %Index
+  br i1 %Cond1, label %T, label %F
+
+T:
+  store i64 %Index, ptr %P1, align 4
+  br label %Loop.Latch
+
+F:
+  br label %Loop.Latch
+
+Loop.Latch:
+  %Phi = phi i64 [ 100, %T ], [ 50, %F ]
+  %V1 = load i64, ptr %P1, align 4
+  %V2 = add i64 %V1, %Phi
+  %Index.inc = add i64 %Index, 1
+  %Cond2 = icmp ne i64 %Index.inc, %TC
+  br i1 %Cond2, label %Loop, label %Exit
+
+Exit:
+  ret i64 %V2
+}
+
+define i64 @test53(ptr %P, i64 %TC, i1 %Cond1) {
+;    Entry
+;      |
+;    Loop<-----+
+;     / \      |
+;    T   F     |
+;     \ /      |
+;  Loop.Latch--+
+;      |
+;      v
+;     Exit
+;
+; GVN-LABEL: @test53(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    br label [[LOOP:%.*]]
+; GVN:       Loop:
+; GVN-NEXT:    [[INDEX:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[V0:%.*]], [[LOOP_LATCH:%.*]] ]
+; GVN-NEXT:    [[V0]] = add i64 [[INDEX]], 1
+; GVN-NEXT:    [[P1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[V0]]
+; GVN-NEXT:    [[V1:%.*]] = load i64, ptr [[P1]], align 4
+; GVN-NEXT:    br i1 [[COND1:%.*]], label [[T:%.*]], label [[F:%.*]]
+; GVN:       T:
+; GVN-NEXT:    br label [[LOOP_LATCH]]
+; GVN:       F:
+; GVN-NEXT:    br label [[LOOP_LATCH]]
+; GVN:       Loop.Latch:
+; GVN-NEXT:    [[PHI:%.*]] = phi i64 [ 500, [[T]] ], [ 100, [[F]] ]
+; GVN-NEXT:    [[P2:%.*]] = getelementptr i64, ptr [[P]], i64 [[INDEX]]
+; GVN-NEXT:    [[V2:%.*]] = load i64, ptr [[P2]], align 4
+; GVN-NEXT:    [[V3:%.*]] = add i64 [[PHI]], [[V2]]
+; GVN-NEXT:    [[V4:%.*]] = add i64 [[V3]], [[V1]]
+; GVN-NEXT:    [[COND2:%.*]] = icmp ne i64 [[V0]], [[TC:%.*]]
+; GVN-NEXT:    br i1 [[COND2]], label [[LOOP]], label [[EXIT:%.*]]
+; GVN:       Exit:
+; GVN-NEXT:    ret i64 [[V4]]
+;
+Entry:
+  br label %Loop
+
+Loop:
+  %Index = phi i64 [ 1, %Entry ], [ %Index.inc, %Loop.Latch ]
+  %V0 = add i64 %Index, 1
+  %P1 = getelementptr i64, ptr %P, i64 %V0
+  %V1 = load i64, ptr %P1, align 4
+  br i1 %Cond1, label %T, label %F
+
+T:
+  br label %Loop.Latch
+
+F:
+  br label %Loop.Latch
+
+Loop.Latch:
+  %Phi = phi i64 [ 500, %T ], [ 100, %F ]
+  %P2 = getelementptr i64, ptr %P, i64 %Index
+  %V2 = load i64, ptr %P2, align 4
+  %V3 = add i64 %Phi, %V2
+  %V4 = add i64 %V3, %V1
+  %Index.inc = add i64 %Index, 1
+  %Cond2 = icmp ne i64 %Index.inc, %TC
+  br i1 %Cond2, label %Loop, label %Exit
+
+Exit:
+  ret i64 %V4
+}
+
+declare void @foo2() #1
+
+define i32 @test54(ptr %P, i1 %Cond) {
+;   Entry
+;    /  \
+;   T    F
+;    \  /
+;     vv
+;    Exit
+;
+; GVN-LABEL: @test54(
+; GVN-NEXT:  Entry:
+; GVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; GVN:       T:
+; GVN-NEXT:    [[V1:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 4
+; GVN-NEXT:    [[V2:%.*]] = extractelement <2 x i32> [[V1]], i64 1
+; GVN-NEXT:    br label [[EXIT:%.*]]
+; GVN:       F:
+; GVN-NEXT:    br label [[EXIT]]
+; GVN:       Exit:
+; GVN-NEXT:    [[PHI:%.*]] = phi i32 [ [[V2]], [[T]] ], [ 100, [[F]] ]
+; GVN-NEXT:    call void @foo2() #[[ATTR1:[0-9]+]]
+; GVN-NEXT:    [[V3:%.*]] = load i32, ptr [[P]], align 8
+; GVN-NEXT:    [[V4:%.*]] = add i32 [[V3]], [[PHI]]
+; GVN-NEXT:    ret i32 [[V4]]
+;
+Entry:
+  br i1 %Cond, label %T, label %F
+
+T:
+  %V1 = load <2 x i32>, <2 x i32>* %P, align 4
+  %V2 = extractelement <2 x i32> %V1, i64 1
+  br label %Exit
+
+F:
+  br label %Exit
+
+Exit:
+  %Phi = phi i32 [ %V2, %T ], [ 100, %F ]
+  call void @foo2() #1
+  %V3 = load i32, ptr %P, align 8
+  %V4 = add i32 %V3, %Phi
+  ret i32 %V4
+}
+
+define i32 @test55(ptr %P, i32 %V1) {
+; OLDGVN-LABEL: @test55(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br i1 true, label [[T:%.*]], label [[F:%.*]]
+; OLDGVN:       T:
+; OLDGVN-NEXT:    store i32 100, ptr [[P:%.*]], align 1
+; OLDGVN-NEXT:    br label [[EXIT:%.*]]
+; OLDGVN:       F:
+; OLDGVN-NEXT:    store i32 [[V1:%.*]], ptr [[P]], align 1
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    ret i32 300
+;
+; NEWGVN-LABEL: @test55(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br i1 true, label [[T:%.*]], label [[F:%.*]]
+; NEWGVN:       T:
+; NEWGVN-NEXT:    store i32 100, ptr [[P:%.*]], align 1
+; NEWGVN-NEXT:    br label [[EXIT:%.*]]
+; NEWGVN:       F:
+; NEWGVN-NEXT:    store i8 poison, ptr null, align 1
+; NEWGVN-NEXT:    br label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    ret i32 300
+;
+Entry:
+  br i1 true, label %T, label %F
+
+T:
+  store i32 100, ptr %P, align 1
+  br label %Exit
+
+F:
+  store i32 %V1, ptr %P, align 1
+  br label %Exit
+
+Exit:
+  %phi = phi i32 [ 200, %T ], [ 500, %F ]
+  %V2 = load i32, ptr %P, align 1
+  %V3 = add i32 %V2, %phi
+  ret i32 %V3
+}
+
+define i32 @test56(ptr %P, i32 %V1) {
+; OLDGVN-LABEL: @test56(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br i1 true, label [[T:%.*]], label [[F:%.*]]
+; OLDGVN:       T:
+; OLDGVN-NEXT:    [[V2:%.*]] = load i32, ptr [[P:%.*]], align 1
+; OLDGVN-NEXT:    br label [[EXIT:%.*]]
+; OLDGVN:       F:
+; OLDGVN-NEXT:    [[V3:%.*]] = load i32, ptr [[P]], align 1
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    [[V4:%.*]] = phi i32 [ [[V2]], [[T]] ], [ undef, [[F]] ]
+; OLDGVN-NEXT:    [[PHI:%.*]] = phi i32 [ [[V2]], [[T]] ], [ poison, [[F]] ]
+; OLDGVN-NEXT:    [[V5:%.*]] = add i32 [[V4]], [[PHI]]
+; OLDGVN-NEXT:    ret i32 [[V5]]
+;
+; NEWGVN-LABEL: @test56(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br i1 true, label [[T:%.*]], label [[F:%.*]]
+; NEWGVN:       T:
+; NEWGVN-NEXT:    [[V2:%.*]] = load i32, ptr [[P:%.*]], align 1
+; NEWGVN-NEXT:    br label [[EXIT:%.*]]
+; NEWGVN:       F:
+; NEWGVN-NEXT:    store i8 poison, ptr null, align 1
+; NEWGVN-NEXT:    br label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ [[V2]], [[T]] ], [ poison, [[F]] ]
+; NEWGVN-NEXT:    [[V5:%.*]] = add i32 [[PHI]], [[PHI]]
+; NEWGVN-NEXT:    ret i32 [[V5]]
+;
+Entry:
+  br i1 true, label %T, label %F
+
+T:
+  %V2 = load i32, ptr %P, align 1
+  br label %Exit
+
+F:
+  %V3 = load i32, ptr %P, align 1
+  br label %Exit
+
+Exit:
+  %phi = phi i32 [ %V2, %T ], [ %V3, %F ]
+  %V4 = load i32, ptr %P, align 1
+  %V5 = add i32 %V4, %phi
+  ret i32 %V5
+}
+
+define i64 @test57(ptr %P, i64 %TC) {
+;  Entry _
+;    |  / |
+;    | v  |
+;  Loop   |
+;    | \__|
+;    |
+;    v
+;   Exit
+;
+; OLDGVN-LABEL: @test57(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    [[V1_PRE:%.*]] = load <2 x i64>, ptr [[P:%.*]], align 4
+; OLDGVN-NEXT:    br label [[LOOP:%.*]]
+; OLDGVN:       Loop:
+; OLDGVN-NEXT:    [[INDEX:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[INDEX_INC:%.*]], [[LOOP]] ]
+; OLDGVN-NEXT:    [[P1:%.*]] = getelementptr i64, ptr [[P]], i64 [[INDEX]]
+; OLDGVN-NEXT:    [[V4:%.*]] = extractelement <2 x i64> [[V1_PRE]], i64 1
+; OLDGVN-NEXT:    [[V2:%.*]] = load i64, ptr [[P1]], align 4
+; OLDGVN-NEXT:    [[V3:%.*]] = add i64 [[V4]], [[V2]]
+; OLDGVN-NEXT:    [[INDEX_INC]] = add i64 [[INDEX]], 1
+; OLDGVN-NEXT:    [[COND:%.*]] = icmp ne i64 [[INDEX_INC]], [[TC:%.*]]
+; OLDGVN-NEXT:    br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    ret i64 [[V3]]
+;
+; NEWGVN-LABEL: @test57(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br label [[LOOP:%.*]]
+; NEWGVN:       Loop:
+; NEWGVN-NEXT:    [[INDEX:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[INDEX_INC:%.*]], [[LOOP]] ]
+; NEWGVN-NEXT:    [[P1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[INDEX]]
+; NEWGVN-NEXT:    [[V1:%.*]] = load <2 x i64>, ptr [[P]], align 4
+; NEWGVN-NEXT:    [[V4:%.*]] = extractelement <2 x i64> [[V1]], i64 1
+; NEWGVN-NEXT:    [[V2:%.*]] = load i64, ptr [[P1]], align 4
+; NEWGVN-NEXT:    [[V3:%.*]] = add i64 [[V4]], [[V2]]
+; NEWGVN-NEXT:    [[INDEX_INC]] = add i64 [[INDEX]], 1
+; NEWGVN-NEXT:    [[COND:%.*]] = icmp ne i64 [[INDEX_INC]], [[TC:%.*]]
+; NEWGVN-NEXT:    br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    ret i64 [[V3]]
+;
+Entry:
+  br label %Loop
+
+Loop:
+  %Index = phi i64 [ 1, %Entry ], [ %Index.inc, %Loop ]
+  %P1 = getelementptr i64, ptr %P, i64 %Index
+  %V1 = load <2 x i64>, ptr %P, align 4
+  %V4 = extractelement <2 x i64> %V1, i64 1
+  %V2 = load i64, ptr %P1, align 4
+  %V3 = add i64 %V4, %V2
+  %Index.inc = add i64 %Index, 1
+  %Cond = icmp ne i64 %Index.inc, %TC
+  br i1 %Cond, label %Loop, label %Exit
+
+Exit:
+  ret i64 %V3
+}
+
+; TODO: Add better support for call instructions.
+define i32 @test58(ptr %P, i1 %Cond) {
+; OLDGVN-LABEL: @test58(
+; OLDGVN-NEXT:  Entry:
+; OLDGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; OLDGVN:       T:
+; OLDGVN-NEXT:    store i32 42, ptr [[P:%.*]], align 1
+; OLDGVN-NEXT:    br label [[EXIT:%.*]]
+; OLDGVN:       F:
+; OLDGVN-NEXT:    [[ALLOCA:%.*]] = alloca [256 x i32], align 4
+; OLDGVN-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[ALLOCA]], i8 0, i64 1024, i1 false)
+; OLDGVN-NEXT:    [[V2_PRE:%.*]] = load i32, ptr [[P]], align 1
+; OLDGVN-NEXT:    br label [[EXIT]]
+; OLDGVN:       Exit:
+; OLDGVN-NEXT:    [[V2:%.*]] = phi i32 [ 42, [[T]] ], [ [[V2_PRE]], [[F]] ]
+; OLDGVN-NEXT:    [[PHI:%.*]] = phi i32 [ 10, [[T]] ], [ 0, [[F]] ]
+; OLDGVN-NEXT:    [[V3:%.*]] = add i32 [[PHI]], [[V2]]
+; OLDGVN-NEXT:    ret i32 [[V3]]
+;
+; NEWGVN-LABEL: @test58(
+; NEWGVN-NEXT:  Entry:
+; NEWGVN-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; NEWGVN:       T:
+; NEWGVN-NEXT:    store i32 42, ptr [[P:%.*]], align 1
+; NEWGVN-NEXT:    br label [[EXIT:%.*]]
+; NEWGVN:       F:
+; NEWGVN-NEXT:    [[ALLOCA:%.*]] = alloca [256 x i32], align 4
+; NEWGVN-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[ALLOCA]], i8 0, i64 1024, i1 false)
+; NEWGVN-NEXT:    br label [[EXIT]]
+; NEWGVN:       Exit:
+; NEWGVN-NEXT:    [[PHI:%.*]] = phi i32 [ 10, [[T]] ], [ 0, [[F]] ]
+; NEWGVN-NEXT:    [[V2:%.*]] = load i32, ptr [[P]], align 1
+; NEWGVN-NEXT:    [[V3:%.*]] = add i32 [[PHI]], [[V2]]
+; NEWGVN-NEXT:    ret i32 [[V3]]
+;
+Entry:
+  br i1 %Cond, label %T, label %F
+
+T:
+  store i32 42, ptr %P, align 1
+  br label %Exit
+
+F:
+  %Alloca = alloca [256 x i32], align 4
+  call void @llvm.memset.p0.i64(ptr align 4 %Alloca, i8 0, i64 1024, i1 false)
+  %Gep = getelementptr inbounds [256 x i32], ptr %Alloca, i32 0, i32 0
+  %V1 = load i32, ptr %Gep
+  br label %Exit
+
+Exit:
+  %Phi = phi i32 [ 10, %T ], [ %V1, %F ]
+  %V2 = load i32, ptr %P, align 1
+  %V3 =  add i32 %Phi, %V2
+  ret i32 %V3
+}
+
+attributes #0 = { readonly }
+attributes #1 = { readnone noreturn }
+
+declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
diff --git a/llvm/test/Transforms/NewGVN/pr35125.ll b/llvm/test/Transforms/NewGVN/pr35125.ll
index 9a96594e3446db1..0041a1754469900 100644
--- a/llvm/test/Transforms/NewGVN/pr35125.ll
+++ b/llvm/test/Transforms/NewGVN/pr35125.ll
@@ -29,7 +29,7 @@ define i32 @main() #0 {
 ; CHECK-NEXT:    store i32 [[TMP1]], ptr @a, align 4
 ; CHECK-NEXT:    br label [[IF_END6]]
 ; CHECK:       if.end6:
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr @a, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[TMP1]], [[LOR_END]] ], [ [[TMP0]], [[IF_END]] ]
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr @.str, i32 [[TMP2]])
 ; CHECK-NEXT:    ret i32 0
 ;



More information about the llvm-commits mailing list