[llvm] 42f8b91 - Revert "[DSE] Add first version of MemorySSA-backed DSE (Bottom up walk)."

Kadir Cetinkaya via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 11 06:34:58 PST 2020


Author: Kadir Cetinkaya
Date: 2020-02-11T15:34:48+01:00
New Revision: 42f8b915eb72364cc5e84adf58a2c2d4947e8b10

URL: https://github.com/llvm/llvm-project/commit/42f8b915eb72364cc5e84adf58a2c2d4947e8b10
DIFF: https://github.com/llvm/llvm-project/commit/42f8b915eb72364cc5e84adf58a2c2d4947e8b10.diff

LOG: Revert "[DSE] Add first version of MemorySSA-backed DSE (Bottom up walk)."

This reverts commit d0c4d4fe0929098a98d9fb20c5e5e19d71341517.

Revert "[DSE,MSSA] Move more passing test cases from todo to simple.ll."

This reverts commit 02266e64bb6dacf76f3aa510df4b59f66d834b1f.

Revert "[DSE,MSSA] Adjust mda-with-dbg-values.ll to MSSA backed DSE."

This reverts commit 74f03e4ff0c1f11c29102329af95f7d9782426dc.

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
    llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreBegin.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreEnd.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/calloc-store.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/inst-limits.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/memcpy-complete-overwrite.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/memintrinsics.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores-big-endian.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/operand-bundles.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll

Removed: 
    llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index e039d97139db..f56a4c5f8920 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -29,10 +29,7 @@
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Analysis/MemoryLocation.h"
-#include "llvm/Analysis/MemorySSA.h"
-#include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/Analysis/OrderedBasicBlock.h"
-#include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Argument.h"
@@ -43,7 +40,6 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
-#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
@@ -91,20 +87,11 @@ EnablePartialStoreMerging("enable-dse-partial-store-merging",
   cl::init(true), cl::Hidden,
   cl::desc("Enable partial store merging in DSE"));
 
+// Temporary dummy option for tests.
 static cl::opt<bool>
     EnableMemorySSA("enable-dse-memoryssa", cl::init(false), cl::Hidden,
                     cl::desc("Use the new MemorySSA-backed DSE."));
 
-static cl::opt<unsigned>
-    MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(100), cl::Hidden,
-                       cl::desc("The number of memory instructions to scan for "
-                                "dead store elimination (default = 100)"));
-
-static cl::opt<unsigned> MemorySSADefsPerBlockLimit(
-    "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,
-    cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
-             "other stores per basic block (default = 5000)"));
-
 //===----------------------------------------------------------------------===//
 // Helper functions
 //===----------------------------------------------------------------------===//
@@ -1367,490 +1354,22 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis *AA,
   return MadeChange;
 }
 
-namespace {
-//=============================================================================
-// MemorySSA backed dead store elimination.
-//
-// The code below implements dead store elimination using MemorySSA. It uses
-// the following general approach: given a MemoryDef, walk upwards to find
-// clobbering MemoryDefs that may be killed by the starting def. Then check
-// that there are no uses that may read the location of the original MemoryDef
-// in between both MemoryDefs. A bit more concretely:
-//
-// For all MemoryDefs StartDef:
-// 1. Get the next dominating clobbering MemoryDef (DomAccess) by walking
-//    upwards.
-// 2. Check that there are no reads between DomAccess and the StartDef by
-//    checking all uses starting at DomAccess and walking until we see StartDef.
-// 3. For each found DomDef, check that:
-//   1. There are no barrier instructions between DomDef and StartDef (like
-//       throws or stores with ordering constraints).
-//   2. StartDef is executed whenever DomDef is executed.
-//   3. StartDef completely overwrites DomDef.
-// 4. Erase DomDef from the function and MemorySSA.
-
-// Returns true if \p M is an intrisnic that does not read or write memory.
-bool isNoopIntrinsic(MemoryUseOrDef *M) {
-  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(M->getMemoryInst())) {
-    switch (II->getIntrinsicID()) {
-    case Intrinsic::lifetime_start:
-    case Intrinsic::lifetime_end:
-    case Intrinsic::invariant_end:
-    case Intrinsic::launder_invariant_group:
-    case Intrinsic::assume:
-      return true;
-    case Intrinsic::dbg_addr:
-    case Intrinsic::dbg_declare:
-    case Intrinsic::dbg_label:
-    case Intrinsic::dbg_value:
-      llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
-    default:
-      return false;
-    }
-  }
-  return false;
-}
-
-// Check if we can ignore \p D for DSE.
-bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
-  Instruction *DI = D->getMemoryInst();
-  // Calls that only access inaccessible memory cannot read or write any memory
-  // locations we consider for elimination.
-  if (auto CS = CallSite(DI))
-    if (CS.onlyAccessesInaccessibleMemory())
-      return true;
-
-  // We can eliminate stores to locations not visible to the caller across
-  // throwing instructions.
-  if (DI->mayThrow() && !DefVisibleToCaller)
-    return true;
-
-  // We can remove the dead stores, irrespective of the fence and its ordering
-  // (release/acquire/seq_cst). Fences only constraints the ordering of
-  // already visible stores, it does not make a store visible to other
-  // threads. So, skipping over a fence does not change a store from being
-  // dead.
-  if (isa<FenceInst>(DI))
-    return true;
-
-  // Skip intrinsics that do not really read or modify memory.
-  if (isNoopIntrinsic(D))
-    return true;
-
-  return false;
-}
-
-struct DSEState {
-  Function &F;
-  AliasAnalysis &AA;
-  MemorySSA &MSSA;
-  DominatorTree &DT;
-  PostDominatorTree &PDT;
-  const TargetLibraryInfo &TLI;
-
-  // All MemoryDefs that potentially could kill other MemDefs.
-  SmallVector<MemoryDef *, 64> MemDefs;
-  // Any that should be skipped as they are already deleted
-  SmallPtrSet<MemoryAccess *, 4> SkipStores;
-  // Keep track of all of the objects that are invisible to the caller until the
-  // function returns.
-  SmallPtrSet<const Value *, 16> InvisibleToCaller;
-  // Keep track of blocks with throwing instructions not modeled in MemorySSA.
-  SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
-
-  DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
-           PostDominatorTree &PDT, const TargetLibraryInfo &TLI)
-      : F(F), AA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI) {}
-
-  static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
-                      DominatorTree &DT, PostDominatorTree &PDT,
-                      const TargetLibraryInfo &TLI) {
-    DSEState State(F, AA, MSSA, DT, PDT, TLI);
-    // Collect blocks with throwing instructions not modeled in MemorySSA and
-    // alloc-like objects.
-    for (Instruction &I : instructions(F)) {
-      if (I.mayThrow() && !MSSA.getMemoryAccess(&I))
-        State.ThrowingBlocks.insert(I.getParent());
-
-      auto *MD = dyn_cast_or_null<MemoryDef>(MSSA.getMemoryAccess(&I));
-      if (MD && State.MemDefs.size() < MemorySSADefsPerBlockLimit &&
-          hasAnalyzableMemoryWrite(&I, TLI) && isRemovable(&I))
-        State.MemDefs.push_back(MD);
-
-      // Track alloca and alloca-like objects. Here we care about objects not
-      // visible to the caller during function execution. Alloca objects are
-      // invalid in the caller, for alloca-like objects we ensure that they are
-      // not captured throughout the function.
-      if (isa<AllocaInst>(&I) ||
-          (isAllocLikeFn(&I, &TLI) && !PointerMayBeCaptured(&I, false, true)))
-        State.InvisibleToCaller.insert(&I);
-    }
-    // Treat byval or inalloca arguments the same as Allocas, stores to them are
-    // dead at the end of the function.
-    for (Argument &AI : F.args())
-      if (AI.hasByValOrInAllocaAttr())
-        State.InvisibleToCaller.insert(&AI);
-    return State;
-  }
-
-  Optional<MemoryLocation> getLocForWriteEx(Instruction *I) const {
-    if (!I->mayWriteToMemory())
-      return None;
-
-    if (auto *MTI = dyn_cast<AnyMemIntrinsic>(I))
-      return {MemoryLocation::getForDest(MTI)};
-
-    if (auto CS = CallSite(I)) {
-      if (Function *F = CS.getCalledFunction()) {
-        StringRef FnName = F->getName();
-        if (TLI.has(LibFunc_strcpy) && FnName == TLI.getName(LibFunc_strcpy))
-          return {MemoryLocation(CS.getArgument(0))};
-        if (TLI.has(LibFunc_strncpy) && FnName == TLI.getName(LibFunc_strncpy))
-          return {MemoryLocation(CS.getArgument(0))};
-        if (TLI.has(LibFunc_strcat) && FnName == TLI.getName(LibFunc_strcat))
-          return {MemoryLocation(CS.getArgument(0))};
-        if (TLI.has(LibFunc_strncat) && FnName == TLI.getName(LibFunc_strncat))
-          return {MemoryLocation(CS.getArgument(0))};
-      }
-      return None;
-    }
-
-    return MemoryLocation::getOrNone(I);
-  }
-
-  /// Returns true if \p Use completely overwrites \p DefLoc.
-  bool isCompleteOverwrite(MemoryLocation DefLoc, Instruction *UseInst) const {
-    // UseInst has a MemoryDef associated in MemorySSA. It's possible for a
-    // MemoryDef to not write to memory, e.g. a volatile load is modeled as a
-    // MemoryDef.
-    if (!UseInst->mayWriteToMemory())
-      return false;
-
-    if (auto CS = CallSite(UseInst))
-      if (CS.onlyAccessesInaccessibleMemory())
-        return false;
-
-    ModRefInfo MR = AA.getModRefInfo(UseInst, DefLoc);
-    // If necessary, perform additional analysis.
-    if (isModSet(MR))
-      MR = AA.callCapturesBefore(UseInst, DefLoc, &DT);
-
-    Optional<MemoryLocation> UseLoc = getLocForWriteEx(UseInst);
-    return isModSet(MR) && isMustSet(MR) &&
-           UseLoc->Size.getValue() >= DefLoc.Size.getValue();
-  }
-
-  /// Returns true if \p Use may read from \p DefLoc.
-  bool isReadClobber(MemoryLocation DefLoc, Instruction *UseInst) const {
-    if (!UseInst->mayReadFromMemory())
-      return false;
-
-    if (auto CS = CallSite(UseInst))
-      if (CS.onlyAccessesInaccessibleMemory())
-        return false;
-
-    ModRefInfo MR = AA.getModRefInfo(UseInst, DefLoc);
-    // If necessary, perform additional analysis.
-    if (isRefSet(MR))
-      MR = AA.callCapturesBefore(UseInst, DefLoc, &DT);
-    return isRefSet(MR);
-  }
-
-  // Find a MemoryDef writing to \p DefLoc and dominating \p Current, with no
-  // read access in between or return None otherwise. The returned value may not
-  // (completely) overwrite \p DefLoc. Currently we bail out when we encounter
-  // any of the following
-  //  * An aliasing MemoryUse (read).
-  //  * A MemoryPHI.
-  Optional<MemoryAccess *> getDomMemoryDef(MemoryDef *KillingDef,
-                                           MemoryAccess *Current,
-                                           MemoryLocation DefLoc,
-                                           bool DefVisibleToCaller,
-                                           int &ScanLimit) const {
-    MemoryDef *DomDef;
-    MemoryAccess *StartDef = Current;
-    bool StepAgain;
-    LLVM_DEBUG(dbgs() << "  trying to get dominating access for " << *Current
-                      << "\n");
-    // Find the next clobbering Mod access for DefLoc, starting at Current.
-    do {
-      StepAgain = false;
-      // Reached TOP.
-      if (MSSA.isLiveOnEntryDef(Current))
-        return None;
-
-      MemoryUseOrDef *CurrentUD = dyn_cast<MemoryUseOrDef>(Current);
-      if (!CurrentUD)
-        return None;
-
-      // Look for access that clobber DefLoc.
-      MemoryAccess *DomAccess =
-          MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(
-              CurrentUD->getDefiningAccess(), DefLoc);
-      DomDef = dyn_cast<MemoryDef>(DomAccess);
-      if (!DomDef || MSSA.isLiveOnEntryDef(DomDef))
-        return None;
-
-      // Check if we can skip DomDef for DSE. We also require the KillingDef
-      // execute whenever DomDef executes and use post-dominance to ensure that.
-      if (canSkipDef(DomDef, DefVisibleToCaller) ||
-          !PDT.dominates(KillingDef->getBlock(), DomDef->getBlock())) {
-        StepAgain = true;
-        Current = DomDef;
-      }
-
-    } while (StepAgain);
-
-    LLVM_DEBUG(dbgs() << "  Checking for reads of " << *DomDef << " ("
-                      << *DomDef->getMemoryInst() << ")\n");
-
-    SmallSetVector<MemoryAccess *, 32> WorkList;
-    auto PushMemUses = [&WorkList](MemoryAccess *Acc) {
-      for (Use &U : Acc->uses())
-        WorkList.insert(cast<MemoryAccess>(U.getUser()));
-    };
-    PushMemUses(DomDef);
-
-    // Check if DomDef may be read.
-    for (unsigned I = 0; I < WorkList.size(); I++) {
-      MemoryAccess *UseAccess = WorkList[I];
-
-      LLVM_DEBUG(dbgs() << "   Checking use " << *UseAccess);
-      if (--ScanLimit == 0) {
-        LLVM_DEBUG(dbgs() << "  ...  hit scan limit\n");
-        return None;
-      }
-
-      // Bail out on MemoryPhis for now.
-      if (isa<MemoryPhi>(UseAccess)) {
-        LLVM_DEBUG(dbgs() << "  ...  hit MemoryPhi\n");
-        return None;
-      }
-
-      Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
-      LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
-
-      if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess))) {
-        PushMemUses(UseAccess);
-        continue;
-      }
-
-      // Uses which may read the original MemoryDef mean we cannot eliminate the
-      // original MD. Stop walk.
-      if (isReadClobber(DefLoc, UseInst)) {
-        LLVM_DEBUG(dbgs() << "  ... found read clobber\n");
-        return None;
-      }
-
-      if (StartDef == UseAccess)
-        continue;
-
-      // Check all uses for MemoryDefs, except for defs completely overwriting
-      // the original location. Otherwise we have to check uses of *all*
-      // MemoryDefs we discover, including non-aliasing ones. Otherwise we might
-      // miss cases like the following
-      //   1 = Def(LoE) ; <----- DomDef stores [0,1]
-      //   2 = Def(1)   ; (2, 1) = NoAlias,   stores [2,3]
-      //   Use(2)       ; MayAlias 2 *and* 1, loads [0, 3].
-      //                  (The Use points to the *first* Def it may alias)
-      //   3 = Def(1)   ; <---- Current  (3, 2) = NoAlias, (3,1) = MayAlias,
-      //                  stores [0,1]
-      if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
-        if (!isCompleteOverwrite(DefLoc, UseInst))
-          PushMemUses(UseDef);
-      }
-    }
-
-    // No aliasing MemoryUses of DomDef found, DomDef is potentially dead.
-    return {DomDef};
-  }
-
-  // Delete dead memory defs
-  void deleteDeadInstruction(Instruction *SI) {
-    MemorySSAUpdater Updater(&MSSA);
-    SmallVector<Instruction *, 32> NowDeadInsts;
-    NowDeadInsts.push_back(SI);
-    --NumFastOther;
-
-    while (!NowDeadInsts.empty()) {
-      Instruction *DeadInst = NowDeadInsts.pop_back_val();
-      ++NumFastOther;
-
-      // Try to preserve debug information attached to the dead instruction.
-      salvageDebugInfo(*DeadInst);
-
-      // Remove the Instruction from MSSA.
-      if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) {
-        Updater.removeMemoryAccess(MA);
-        if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) {
-          SkipStores.insert(MD);
-        }
-      }
-
-      // Remove its operands
-      for (Use &O : DeadInst->operands())
-        if (Instruction *OpI = dyn_cast<Instruction>(O)) {
-          O = nullptr;
-          if (isInstructionTriviallyDead(OpI, &TLI))
-            NowDeadInsts.push_back(OpI);
-        }
-
-      DeadInst->eraseFromParent();
-    }
-  }
-
-  // Check for any extra throws between SI and NI that block DSE.  This only
-  // checks extra maythrows (those that aren't MemoryDef's). MemoryDef that may
-  // throw are handled during the walk from one def to the next.
-  bool mayThrowBetween(Instruction *SI, Instruction *NI,
-                       const Value *SILocUnd) const {
-    // First see if we can ignore it by using the fact that SI is an
-    // alloca/alloca like object that is not visible to the caller during
-    // execution of the function.
-    if (SILocUnd && InvisibleToCaller.count(SILocUnd))
-      return false;
-
-    if (SI->getParent() == NI->getParent())
-      return ThrowingBlocks.find(SI->getParent()) != ThrowingBlocks.end();
-    return !ThrowingBlocks.empty();
-  }
-
-  // Check if \p NI acts as a DSE barrier for \p SI. The following instructions
-  // act as barriers:
-  //  * A memory instruction that may throw and \p SI accesses a non-stack
-  //  object.
-  //  * Atomic stores stronger that monotonic.
-  bool isDSEBarrier(Instruction *SI, MemoryLocation &SILoc,
-                    const Value *SILocUnd, Instruction *NI,
-                    MemoryLocation &NILoc) const {
-    // If NI may throw it acts as a barrier, unless we are to an alloca/alloca
-    // like object that does not escape.
-    if (NI->mayThrow() && !InvisibleToCaller.count(SILocUnd))
-      return true;
-
-    if (NI->isAtomic()) {
-      if (auto *NSI = dyn_cast<StoreInst>(NI)) {
-        if (isStrongerThanMonotonic(NSI->getOrdering()))
-          return true;
-      } else
-        llvm_unreachable(
-            "Other instructions should be modeled/skipped in MemorySSA");
-    }
-
-    return false;
-  }
-};
-
-bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
-                                  MemorySSA &MSSA, DominatorTree &DT,
-                                  PostDominatorTree &PDT,
-                                  const TargetLibraryInfo &TLI) {
-  const DataLayout &DL = F.getParent()->getDataLayout();
-  bool MadeChange = false;
-
-  DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI);
-  // For each store:
-  for (unsigned I = 0; I < State.MemDefs.size(); I++) {
-    MemoryDef *Current = State.MemDefs[I];
-    if (State.SkipStores.count(Current))
-      continue;
-    Instruction *SI = cast<MemoryDef>(Current)->getMemoryInst();
-    auto MaybeSILoc = State.getLocForWriteEx(SI);
-    if (!MaybeSILoc) {
-      LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
-                        << *SI << "\n");
-      continue;
-    }
-    MemoryLocation SILoc = *MaybeSILoc;
-    assert(SILoc.Ptr && "SILoc should not be null");
-    const Value *SILocUnd = GetUnderlyingObject(SILoc.Ptr, DL);
-    Instruction *DefObj =
-        const_cast<Instruction *>(dyn_cast<Instruction>(SILocUnd));
-    bool DefVisibleToCaller = !State.InvisibleToCaller.count(SILocUnd);
-    if (DefObj && ((isAllocLikeFn(DefObj, &TLI) &&
-                    !PointerMayBeCapturedBefore(DefObj, false, true, SI, &DT))))
-      DefVisibleToCaller = false;
-
-    LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by " << *SI
-                      << "\n");
-
-    int ScanLimit = MemorySSAScanLimit;
-    MemoryDef *StartDef = Current;
-    // Walk MemorySSA upward to find MemoryDefs that might be killed by SI.
-    while (Optional<MemoryAccess *> Next = State.getDomMemoryDef(
-               StartDef, Current, SILoc, DefVisibleToCaller, ScanLimit)) {
-      MemoryAccess *DomAccess = *Next;
-      LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DomAccess << "\n");
-      MemoryDef *NextDef = dyn_cast<MemoryDef>(DomAccess);
-      Instruction *NI = NextDef->getMemoryInst();
-      LLVM_DEBUG(dbgs() << "  def " << *NI << "\n");
-
-      if (!hasAnalyzableMemoryWrite(NI, TLI))
-        break;
-      MemoryLocation NILoc = *State.getLocForWriteEx(NI);
-      // Check for anything that looks like it will be a barrier to further
-      // removal
-      if (State.isDSEBarrier(SI, SILoc, SILocUnd, NI, NILoc)) {
-        LLVM_DEBUG(dbgs() << "  stop, barrier\n");
-        break;
-      }
-
-      // Before we try to remove anything, check for any extra throwing
-      // instructions that block us from DSEing
-      if (State.mayThrowBetween(SI, NI, SILocUnd)) {
-        LLVM_DEBUG(dbgs() << " stop, may throw!\n");
-        break;
-      }
-
-      // Check if NI overwrites SI.
-      int64_t InstWriteOffset, DepWriteOffset;
-      InstOverlapIntervalsTy IOL;
-      OverwriteResult OR = isOverwrite(SILoc, NILoc, DL, TLI, DepWriteOffset,
-                                       InstWriteOffset, NI, IOL, AA, &F);
-
-      if (OR == OW_Complete) {
-        LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: " << *NI
-                          << "\n  KILLER: " << *SI << '\n');
-        State.deleteDeadInstruction(NI);
-        ++NumFastStores;
-        MadeChange = true;
-      } else
-        Current = NextDef;
-    }
-  }
-
-  return MadeChange;
-}
-} // end anonymous namespace
-
 //===----------------------------------------------------------------------===//
 // DSE Pass
 //===----------------------------------------------------------------------===//
 PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
-  AliasAnalysis &AA = AM.getResult<AAManager>(F);
-  const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F);
-  DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
-
-  if (EnableMemorySSA) {
-    MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
-    PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
-
-    if (!eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI))
-      return PreservedAnalyses::all();
-  } else {
-    MemoryDependenceResults &MD = AM.getResult<MemoryDependenceAnalysis>(F);
+  AliasAnalysis *AA = &AM.getResult<AAManager>(F);
+  DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
+  MemoryDependenceResults *MD = &AM.getResult<MemoryDependenceAnalysis>(F);
+  const TargetLibraryInfo *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
 
-    if (!eliminateDeadStores(F, &AA, &MD, &DT, &TLI))
-      return PreservedAnalyses::all();
-  }
+  if (!eliminateDeadStores(F, AA, MD, DT, TLI))
+    return PreservedAnalyses::all();
 
   PreservedAnalyses PA;
   PA.preserveSet<CFGAnalyses>();
   PA.preserve<GlobalsAA>();
-  if (EnableMemorySSA)
-    PA.preserve<MemorySSAAnalysis>();
-  else
-    PA.preserve<MemoryDependenceAnalysis>();
+  PA.preserve<MemoryDependenceAnalysis>();
   return PA;
 }
 
@@ -1869,42 +1388,25 @@ class DSELegacyPass : public FunctionPass {
     if (skipFunction(F))
       return false;
 
-    AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
-    DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-    const TargetLibraryInfo &TLI =
-        getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
-
-    if (EnableMemorySSA) {
-      MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
-      PostDominatorTree &PDT =
-          getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
-
-      return eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI);
-    } else {
-      MemoryDependenceResults &MD =
-          getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
+    DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+    AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+    MemoryDependenceResults *MD =
+        &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
+    const TargetLibraryInfo *TLI =
+        &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
 
-      return eliminateDeadStores(F, &AA, &MD, &DT, &TLI);
-    }
+    return eliminateDeadStores(F, AA, MD, DT, TLI);
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
+    AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<AAResultsWrapperPass>();
+    AU.addRequired<MemoryDependenceWrapperPass>();
     AU.addRequired<TargetLibraryInfoWrapperPass>();
-    AU.addPreserved<GlobalsAAWrapperPass>();
-    AU.addRequired<DominatorTreeWrapperPass>();
     AU.addPreserved<DominatorTreeWrapperPass>();
-
-    if (EnableMemorySSA) {
-      AU.addRequired<PostDominatorTreeWrapperPass>();
-      AU.addRequired<MemorySSAWrapperPass>();
-      AU.addPreserved<PostDominatorTreeWrapperPass>();
-      AU.addPreserved<MemorySSAWrapperPass>();
-    } else {
-      AU.addRequired<MemoryDependenceWrapperPass>();
-      AU.addPreserved<MemoryDependenceWrapperPass>();
-    }
+    AU.addPreserved<GlobalsAAWrapperPass>();
+    AU.addPreserved<MemoryDependenceWrapperPass>();
   }
 };
 
@@ -1915,10 +1417,8 @@ char DSELegacyPass::ID = 0;
 INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
                       false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll
index 0d84e9cf9d51..b9a0ea76d7fb 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll
@@ -1,4 +1,3 @@
-; XFAIL: *
 ; RUN: opt -dse -enable-dse-memoryssa -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreBegin.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreBegin.ll
index 1028c8ed47f6..a7278d56e04c 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreBegin.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreBegin.ll
@@ -1,5 +1,4 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; XFAIL: *
 ; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s
 
 define void @write4to7(i32* nocapture %p) {

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreEnd.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreEnd.ll
index 0711855532fe..d8791aa5e930 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreEnd.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreEnd.ll
@@ -1,5 +1,4 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; XFAIL: *
 ; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll
index 9558033b37c7..b21183c1ac2b 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll
@@ -1,4 +1,3 @@
-; XFAIL: *
 ; RUN: opt -basicaa -dse -enable-dse-memoryssa -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/calloc-store.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/calloc-store.ll
index 397c5947ae5c..19ebaa93e7d3 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/calloc-store.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/calloc-store.ll
@@ -1,5 +1,3 @@
-; XFAIL: *
-
 ; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s
 
 declare noalias i8* @calloc(i64, i64)

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll
deleted file mode 100644
index 4ee295821e22..000000000000
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll
+++ /dev/null
@@ -1,50 +0,0 @@
-; XFAIL: *
-
-; RUN: opt -S -basicaa -dse -enable-dse-memoryssa < %s | FileCheck %s
-
-; We DSE stack alloc'ed and byval locations, in the presence of fences.
-; Fence does not make an otherwise thread local store visible.
-; Right now the DSE in presence of fence is only done in end blocks (with no successors),
-; but the same logic applies to other basic blocks as well.
-; The store to %addr.i can be removed since it is a byval attribute
-define void @test3(i32* byval %addr.i) {
-; CHECK-LABEL: @test3
-; CHECK-NOT: store
-; CHECK: fence
-; CHECK: ret
-  store i32 5, i32* %addr.i, align 4
-  fence release
-  ret void
-}
-
-declare void @foo(i8* nocapture %p)
-
-declare noalias i8* @malloc(i32)
-
-; DSE of stores in locations allocated through library calls.
-define void @test_nocapture() {
-; CHECK-LABEL: @test_nocapture
-; CHECK: malloc
-; CHECK: foo
-; CHECK-NOT: store
-; CHECK: fence
-  %m  =  call i8* @malloc(i32 24)
-  call void @foo(i8* %m)
-  store i8 4, i8* %m
-  fence release
-  ret void
-}
-
-
-; This is a full fence, but it does not make a thread local store visible.
-; We can DSE the store in presence of the fence.
-define void @fence_seq_cst() {
-; CHECK-LABEL: @fence_seq_cst
-; CHECK-NEXT: fence seq_cst
-; CHECK-NEXT: ret void
-  %P1 = alloca i32
-  store i32 0, i32* %P1, align 4
-  fence seq_cst
-  store i32 4, i32* %P1, align 4
-  ret void
-}

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll
index b22fc9c92004..9b43d376b295 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll
@@ -46,3 +46,51 @@ define void @test2(i32* %addr.i) {
   store i32 5, i32* %addr.i, align 4
   ret void
 }
+
+; We DSE stack alloc'ed and byval locations, in the presence of fences.
+; Fence does not make an otherwise thread local store visible.
+; Right now the DSE in presence of fence is only done in end blocks (with no successors),
+; but the same logic applies to other basic blocks as well.
+; The store to %addr.i can be removed since it is a byval attribute
+define void @test3(i32* byval %addr.i) {
+; CHECK-LABEL: @test3
+; CHECK-NOT: store
+; CHECK: fence
+; CHECK: ret
+  store i32 5, i32* %addr.i, align 4
+  fence release
+  ret void
+}
+
+declare void @foo(i8* nocapture %p)
+
+declare noalias i8* @malloc(i32)
+
+; DSE of stores in locations allocated through library calls.
+define void @test_nocapture() {
+; CHECK-LABEL: @test_nocapture
+; CHECK: malloc
+; CHECK: foo
+; CHECK-NOT: store
+; CHECK: fence
+  %m  =  call i8* @malloc(i32 24)
+  call void @foo(i8* %m)
+  store i8 4, i8* %m
+  fence release
+  ret void
+}
+
+
+; This is a full fence, but it does not make a thread local store visible.
+; We can DSE the store in presence of the fence.
+define void @fence_seq_cst() {
+; CHECK-LABEL: @fence_seq_cst
+; CHECK-NEXT: fence seq_cst
+; CHECK-NEXT: ret void
+  %P1 = alloca i32
+  store i32 0, i32* %P1, align 4
+  fence seq_cst
+  store i32 4, i32* %P1, align 4
+  ret void
+}
+

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll
index a6e47d7bb107..a9a32348cb6b 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll
@@ -1,5 +1,3 @@
-; XFAIL: *
-
 ; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64"

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/inst-limits.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/inst-limits.ll
index 638571f6f417..78fe292d0cf5 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/inst-limits.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/inst-limits.ll
@@ -1,9 +1,10 @@
 ; RUN: opt -S -dse -enable-dse-memoryssa < %s | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
-; This test is not relevant for DSE with MemorySSA. Non-memory instructions
-; are ignored anyways. The limits for the MemorySSA traversal are tested in
-; llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll
+; If there are two stores to the same location, DSE should be able to remove
+; the first store if the two stores are separated by no more than 98
+; instructions. The existence of debug intrinsics between the stores should
+; not affect this instruction limit.
 
 @x = global i32 0, align 4
 
@@ -128,7 +129,7 @@ entry:
 define i32 @test_outside_limit() {
 entry:
   ; The first store; later there is a second store to the same location
-  ; CHECK-NOT: store i32 1, i32* @x, align 4
+  ; CHECK: store i32 1, i32* @x, align 4
   store i32 1, i32* @x, align 4
 
   ; Insert 99 dummy instructions between the two stores; this is

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll
index e4d4722677a7..3b3f4d498e9f 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll
@@ -1,5 +1,3 @@
-; XFAIL: *
-
 ; RUN: opt -S -basicaa -dse -enable-dse-memoryssa < %s | FileCheck %s
 
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll
index 06bcc346a435..52e2ecdc7290 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll
@@ -1,12 +1,13 @@
-; RUN: opt -S -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=2 < %s | FileCheck %s
-; RUN: opt -S -strip-debug -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=2 < %s | FileCheck %s
+; RUN: opt -S -dse -enable-dse-memoryssa -memdep-block-scan-limit=3 < %s | FileCheck %s
+; RUN: opt -S -strip-debug -dse -enable-dse-memoryssa -memdep-block-scan-limit=3 < %s | FileCheck %s
 
-; Test case to check that DSE gets the same result even if we have a dbg value
-; between the memcpy.
+; Test case to check that the memory dependency analysis gets the same
+; result even if we have a dbg value between the memcpy and
+; store. The memory dependency is then used by DSE to remove the store.
 
-; This test case is less relevant for the MemorySSA backed version of DSE, as
-; debug values are not modeled in MemorySSA and are skipped regardless of the
-; exploration limit.
+; We use -memdep-block-scan-limit=3 to be able to create a small test case.
+; Without it, we would need to squeeze in 100 instructions since the default
+; limit is 100.
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -19,11 +20,6 @@ entry:
   %i = alloca i8, align 1
   store i8 1, i8* %i, align 1, !dbg !19
   call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !17, metadata !DIExpression()), !dbg !18
-  call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !17, metadata !DIExpression()), !dbg !18
-  call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !17, metadata !DIExpression()), !dbg !18
-  call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !17, metadata !DIExpression()), !dbg !18
-  call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !17, metadata !DIExpression()), !dbg !18
-  call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !17, metadata !DIExpression()), !dbg !18
   %0 = bitcast [1 x i8]* @g to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %i, i8* %0, i64 1, i1 false), !dbg !20
   br label %bb2

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memcpy-complete-overwrite.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memcpy-complete-overwrite.ll
index 931597265e41..38cb8a38511b 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memcpy-complete-overwrite.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memcpy-complete-overwrite.ll
@@ -1,6 +1,4 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-
-; XFAIL: *
 ; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s
 ; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa -S | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memintrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memintrinsics.ll
index c22ad19198b4..51c45c32d8c5 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memintrinsics.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memintrinsics.ll
@@ -1,5 +1,4 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; XFAIL: *
 ; RUN: opt -S -dse -enable-dse-memoryssa < %s | FileCheck %s
 
 declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i1) nounwind

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll
deleted file mode 100644
index 43a9f0c92f1b..000000000000
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll
+++ /dev/null
@@ -1,72 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck --check-prefix=NO-LIMIT %s
-; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=0 -S | FileCheck --check-prefix=LIMIT-0 %s
-; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=3 -S | FileCheck --check-prefix=LIMIT-3 %s
-; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=4 -S | FileCheck --check-prefix=LIMIT-4 %s
-
-target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
-
-
-define void @test2(i32* noalias %P, i32* noalias %Q, i32* noalias %R) {
-;
-; NO-LIMIT-LABEL: @test2(
-; NO-LIMIT-NEXT:    br i1 true, label [[BB1:%.*]], label [[BB2:%.*]]
-; NO-LIMIT:       bb1:
-; NO-LIMIT-NEXT:    br label [[BB3:%.*]]
-; NO-LIMIT:       bb2:
-; NO-LIMIT-NEXT:    br label [[BB3]]
-; NO-LIMIT:       bb3:
-; NO-LIMIT-NEXT:    store i32 0, i32* [[Q:%.*]]
-; NO-LIMIT-NEXT:    store i32 0, i32* [[R:%.*]]
-; NO-LIMIT-NEXT:    store i32 0, i32* [[P:%.*]]
-; NO-LIMIT-NEXT:    ret void
-;
-; LIMIT-0-LABEL: @test2(
-; LIMIT-0-NEXT:    br i1 true, label [[BB1:%.*]], label [[BB2:%.*]]
-; LIMIT-0:       bb1:
-; LIMIT-0-NEXT:    br label [[BB3:%.*]]
-; LIMIT-0:       bb2:
-; LIMIT-0-NEXT:    br label [[BB3]]
-; LIMIT-0:       bb3:
-; LIMIT-0-NEXT:    store i32 0, i32* [[Q:%.*]]
-; LIMIT-0-NEXT:    store i32 0, i32* [[R:%.*]]
-; LIMIT-0-NEXT:    store i32 0, i32* [[P:%.*]]
-; LIMIT-0-NEXT:    ret void
-;
-; LIMIT-3-LABEL: @test2(
-; LIMIT-3-NEXT:    store i32 1, i32* [[P:%.*]]
-; LIMIT-3-NEXT:    br i1 true, label [[BB1:%.*]], label [[BB2:%.*]]
-; LIMIT-3:       bb1:
-; LIMIT-3-NEXT:    br label [[BB3:%.*]]
-; LIMIT-3:       bb2:
-; LIMIT-3-NEXT:    br label [[BB3]]
-; LIMIT-3:       bb3:
-; LIMIT-3-NEXT:    store i32 0, i32* [[Q:%.*]]
-; LIMIT-3-NEXT:    store i32 0, i32* [[R:%.*]]
-; LIMIT-3-NEXT:    store i32 0, i32* [[P]]
-; LIMIT-3-NEXT:    ret void
-;
-; LIMIT-4-LABEL: @test2(
-; LIMIT-4-NEXT:    br i1 true, label [[BB1:%.*]], label [[BB2:%.*]]
-; LIMIT-4:       bb1:
-; LIMIT-4-NEXT:    br label [[BB3:%.*]]
-; LIMIT-4:       bb2:
-; LIMIT-4-NEXT:    br label [[BB3]]
-; LIMIT-4:       bb3:
-; LIMIT-4-NEXT:    store i32 0, i32* [[Q:%.*]]
-; LIMIT-4-NEXT:    store i32 0, i32* [[R:%.*]]
-; LIMIT-4-NEXT:    store i32 0, i32* [[P:%.*]]
-; LIMIT-4-NEXT:    ret void
-;
-  store i32 1, i32* %P
-  br i1 true, label %bb1, label %bb2
-bb1:
-  br label %bb3
-bb2:
-  br label %bb3
-bb3:
-  store i32 0, i32* %Q
-  store i32 0, i32* %R
-  store i32 0, i32* %P
-  ret void
-}

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll
index c142ea6cf8de..be4574e54cb1 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll
@@ -59,7 +59,8 @@ define void @test17_atomic_weaker_2(i8* %P, i8* noalias %Q) nounwind ssp {
 ; Should not delete the volatile memset.
 define void @test17v(i8* %P, i8* %Q) nounwind ssp {
 ; CHECK-LABEL: @test17v(
-; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memset.p0i8.i64(i8* [[P:%.*]], i8 42, i64 8, i1 true)
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[Q:%.*]], i64 12, i1 false)
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 true)
@@ -73,7 +74,8 @@ define void @test17v(i8* %P, i8* %Q) nounwind ssp {
 ; Previously this was not allowed (PR8728), also discussed in PR11763.
 define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp {
 ; CHECK-LABEL: @test18(
-; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[R:%.*]], i64 12, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false)
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
@@ -83,7 +85,8 @@ define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp {
 
 define void @test18_atomic(i8* %P, i8* %Q, i8* %R) nounwind ssp {
 ; CHECK-LABEL: @test18_atomic(
-; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[R:%.*]], i64 12, i32 1)
+; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
+; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1)
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll
index d1aaab9a3d49..11d155c5fc1e 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll
@@ -1,7 +1,6 @@
 ; Test that the getelementptr generated when the dse pass determines that
 ; a memset can be shortened has the debugloc carried over from the memset.
 
-; XFAIL: *
 ; RUN: opt -S -march=native -dse -enable-dse-memoryssa < %s| FileCheck %s
 ; CHECK: bitcast [5 x i64]* %{{[a-zA-Z_][a-zA-Z0-9_]*}} to i8*, !dbg
 ; CHECK-NEXT: %{{[0-9]+}} = getelementptr inbounds i8, i8* %0, i64 32, !dbg ![[DBG:[0-9]+]]

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores-big-endian.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores-big-endian.ll
index 055f117c4b5c..8acc29f3f62e 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores-big-endian.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores-big-endian.ll
@@ -1,5 +1,4 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; XFAIL: *
 ; RUN: opt -dse -enable-dse-memoryssa -enable-dse-partial-store-merging -S < %s | FileCheck %s
 target datalayout = "E-m:e-i64:64-i128:128-n32:64-S128"
 

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores.ll
index b09904e5a27d..feb1f31c99dd 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores.ll
@@ -1,5 +1,4 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; XFAIL: *
 ; RUN: opt -dse -enable-dse-memoryssa -enable-dse-partial-store-merging -S < %s | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
 

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll
index c1f739234196..7caf48148156 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll
@@ -25,6 +25,7 @@ define i8* @test_return_captures_1() {
 define i8* @test_return_captures_2() {
 ; CHECK-LABEL: @test_return_captures_2(
 ; CHECK-NEXT:    [[M:%.*]] = call i8* @malloc(i64 24)
+; CHECK-NEXT:    store i8 0, i8* [[M]]
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    store i8 1, i8* [[M]]
@@ -90,6 +91,7 @@ exit:
 define i8* @test_malloc_capture_3() {
 ; CHECK-LABEL: @test_malloc_capture_3(
 ; CHECK-NEXT:    [[M:%.*]] = call i8* @malloc(i64 24)
+; CHECK-NEXT:    store i8 0, i8* [[M]]
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    store i8 1, i8* [[M]]
@@ -189,6 +191,7 @@ exit:
 define i8* @test_malloc_capture_7() {
 ; CHECK-LABEL: @test_malloc_capture_7(
 ; CHECK-NEXT:    [[M:%.*]] = call i8* @malloc(i64 24)
+; CHECK-NEXT:    store i8 0, i8* [[M]]
 ; CHECK-NEXT:    call void @may_throw()
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
 ; CHECK:       exit:
@@ -212,10 +215,10 @@ exit:
 define void @test_alloca_nocapture_1() {
 ; CHECK-LABEL: @test_alloca_nocapture_1(
 ; CHECK-NEXT:    [[M:%.*]] = alloca i8
+; CHECK-NEXT:    store i8 0, i8* [[M]]
 ; CHECK-NEXT:    call void @foo()
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
 ; CHECK:       exit:
-; CHECK-NEXT:    store i8 1, i8* [[M]]
 ; CHECK-NEXT:    ret void
 ;
   %m = alloca i8
@@ -237,7 +240,6 @@ define void @test_alloca_capture_1() {
 ; CHECK-NEXT:    call void @capture(i8* [[M]])
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
 ; CHECK:       exit:
-; CHECK-NEXT:    store i8 1, i8* [[M]]
 ; CHECK-NEXT:    ret void
 ;
   %m = alloca i8
@@ -260,7 +262,6 @@ define void @test_alloca_capture_2(%S1* %E) {
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[F_PTR:%.*]] = getelementptr [[S1:%.*]], %S1* [[E:%.*]], i32 0, i32 0
 ; CHECK-NEXT:    store i8* [[M]], i8** [[F_PTR]]
-; CHECK-NEXT:    store i8 1, i8* [[M]]
 ; CHECK-NEXT:    ret void
 ;
   %m = alloca i8

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll
index a719e273e227..41d4d019cb94 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll
@@ -30,7 +30,6 @@ define void @test12(i32* %p) personality i32 (...)* @__CxxFrameHandler3 {
 ; CHECK-NEXT:    [[C1:%.*]] = cleanuppad within none []
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
-; CHECK-NEXT:    store i32 40, i32* [[SV]]
 ; CHECK-NEXT:    ret void
 ;
 block1:

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll
index 1ec4434fb20a..bcf75e3860da 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll
@@ -111,6 +111,7 @@ define void @test_loop(i32 %N, i32* noalias nocapture readonly %A, i32* noalias
 ; CHECK:       for.body4.lr.ph:
 ; CHECK-NEXT:    [[I_028:%.*]] = phi i32 [ [[INC11:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 0, [[FOR_BODY4_LR_PH_PREHEADER]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[I_028]]
+; CHECK-NEXT:    store i32 0, i32* [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[I_028]], [[N]]
 ; CHECK-NEXT:    br label [[FOR_BODY4:%.*]]
 ; CHECK:       for.body4:
@@ -169,116 +170,3 @@ for.cond.cleanup3:                                ; preds = %for.body4
   %exitcond29 = icmp eq i32 %inc11, %N
   br i1 %exitcond29, label %for.cond.cleanup, label %for.body4.lr.ph
 }
-
-declare i1 @cond() readnone
-
-; TODO: We can eliminate the store in for.header, but we currently hit a MemoryPhi.
-define void @loop_multiple_def_uses(i32* noalias %P) {
-; CHECK-LABEL: @loop_multiple_def_uses(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[FOR_HEADER:%.*]]
-; CHECK:       for.header:
-; CHECK-NEXT:    store i32 1, i32* [[P:%.*]], align 4
-; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
-; CHECK-NEXT:    br i1 [[C1]], label [[FOR_BODY:%.*]], label [[END:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    store i32 1, i32* [[P]], align 4
-; CHECK-NEXT:    [[LV:%.*]] = load i32, i32* [[P]]
-; CHECK-NEXT:    br label [[FOR_HEADER]]
-; CHECK:       end:
-; CHECK-NEXT:    store i32 3, i32* [[P]], align 4
-; CHECK-NEXT:    ret void
-;
-entry:
-  br label %for.header
-
-for.header:
-  store i32 1, i32* %P, align 4
-  %c1 = call i1 @cond()
-  br i1 %c1, label %for.body, label %end
-
-for.body:
-  store i32 1, i32* %P, align 4
-  %lv = load i32, i32* %P
-  br label %for.header
-
-end:
-  store i32 3, i32* %P, align 4
-  ret void
-}
-
-; We cannot eliminate the store in for.header, as it is only partially
-; overwritten in for.body and read afterwards.
-define void @loop_multiple_def_uses_partial_write(i32* noalias %p) {
-; CHECK-LABEL: @loop_multiple_def_uses_partial_write(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[FOR_HEADER:%.*]]
-; CHECK:       for.header:
-; CHECK-NEXT:    store i32 1239491, i32* [[P:%.*]], align 4
-; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
-; CHECK-NEXT:    br i1 [[C1]], label [[FOR_BODY:%.*]], label [[END:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[C:%.*]] = bitcast i32* [[P]] to i8*
-; CHECK-NEXT:    store i8 1, i8* [[C]], align 4
-; CHECK-NEXT:    [[LV:%.*]] = load i32, i32* [[P]]
-; CHECK-NEXT:    br label [[FOR_HEADER]]
-; CHECK:       end:
-; CHECK-NEXT:    store i32 3, i32* [[P]], align 4
-; CHECK-NEXT:    ret void
-;
-entry:
-  br label %for.header
-
-for.header:
-  store i32 1239491, i32* %p, align 4
-  %c1 = call i1 @cond()
-  br i1 %c1, label %for.body, label %end
-
-for.body:
-  %c = bitcast i32* %p to i8*
-  store i8 1, i8* %c, align 4
-  %lv = load i32, i32* %p
-  br label %for.header
-
-end:
-  store i32 3, i32* %p, align 4
-  ret void
-}
-
-; We cannot eliminate the store in for.header, as the location is not overwritten
-; in for.body and read afterwards.
-define void @loop_multiple_def_uses_mayalias_write(i32* %p, i32* %q) {
-
-; CHECK-LABEL: @loop_multiple_def_uses_mayalias_write(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[FOR_HEADER:%.*]]
-; CHECK:       for.header:
-; CHECK-NEXT:    store i32 1239491, i32* [[P:%.*]], align 4
-; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
-; CHECK-NEXT:    br i1 [[C1]], label [[FOR_BODY:%.*]], label [[END:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    store i32 1, i32* [[Q:%.*]], align 4
-; CHECK-NEXT:    [[LV:%.*]] = load i32, i32* [[P]]
-; CHECK-NEXT:    br label [[FOR_HEADER]]
-; CHECK:       end:
-; CHECK-NEXT:    store i32 3, i32* [[P]], align 4
-; CHECK-NEXT:    ret void
-;
-entry:
-  br label %for.header
-
-for.header:
-  store i32 1239491, i32* %p, align 4
-  %c1 = call i1 @cond()
-  br i1 %c1, label %for.body, label %end
-
-for.body:
-  store i32 1, i32* %q, align 4
-  %lv = load i32, i32* %p
-  br label %for.header
-
-end:
-  store i32 3, i32* %p, align 4
-  ret void
-}
-

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll
index ce34986ff4e3..ddfc4d43d51a 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll
@@ -103,73 +103,3 @@ bb3:
   store i8 1, i8* %P2
   ret void
 }
-
-declare void @hoge()
-
-; Check a function with a MemoryPhi with 3 incoming values.
-define void @widget(i32* %Ptr, i1 %c1, i1 %c2, i32 %v1, i32 %v2, i32 %v3) {
-; CHECK-LABEL: @widget(
-; CHECK-NEXT:  bb:
-; CHECK-NEXT:    tail call void @hoge()
-; CHECK-NEXT:    br i1 [[C1:%.*]], label [[BB3:%.*]], label [[BB1:%.*]]
-; CHECK:       bb1:
-; CHECK-NEXT:    br i1 [[C2:%.*]], label [[BB2:%.*]], label [[BB3]]
-; CHECK:       bb2:
-; CHECK-NEXT:    store i32 -1, i32* [[PTR:%.*]], align 4
-; CHECK-NEXT:    br label [[BB3]]
-; CHECK:       bb3:
-; CHECK-NEXT:    br label [[BB4:%.*]]
-; CHECK:       bb4:
-; CHECK-NEXT:    switch i32 [[V1:%.*]], label [[BB8:%.*]] [
-; CHECK-NEXT:    i32 0, label [[BB5:%.*]]
-; CHECK-NEXT:    i32 1, label [[BB6:%.*]]
-; CHECK-NEXT:    i32 2, label [[BB7:%.*]]
-; CHECK-NEXT:    ]
-; CHECK:       bb5:
-; CHECK-NEXT:    store i32 0, i32* [[PTR]], align 4
-; CHECK-NEXT:    br label [[BB8]]
-; CHECK:       bb6:
-; CHECK-NEXT:    store i32 1, i32* [[PTR]], align 4
-; CHECK-NEXT:    br label [[BB8]]
-; CHECK:       bb7:
-; CHECK-NEXT:    store i32 2, i32* [[PTR]], align 4
-; CHECK-NEXT:    br label [[BB8]]
-; CHECK:       bb8:
-; CHECK-NEXT:    br label [[BB4]]
-;
-bb:
-  tail call void @hoge()
-  br i1 %c1, label %bb3, label %bb1
-
-bb1:                                              ; preds = %bb
-  br i1 %c2, label %bb2, label %bb3
-
-bb2:                                              ; preds = %bb1
-  store i32 -1, i32* %Ptr, align 4
-  br label %bb3
-
-bb3:                                              ; preds = %bb2, %bb1, %bb
-  br label %bb4
-
-bb4:                                              ; preds = %bb8, %bb3
-  switch i32 %v1, label %bb8 [
-  i32 0, label %bb5
-  i32 1, label %bb6
-  i32 2, label %bb7
-  ]
-
-bb5:                                              ; preds = %bb4
-  store i32 0, i32* %Ptr, align 4
-  br label %bb8
-
-bb6:                                              ; preds = %bb4
-  store i32 1, i32* %Ptr, align 4
-  br label %bb8
-
-bb7:                                              ; preds = %bb4
-  store i32 2, i32* %Ptr, align 4
-  br label %bb8
-
-bb8:                                              ; preds = %bb7, %bb6, %bb5, %bb4
-  br label %bb4
-}

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll
index cbd7dae56da1..17bd8a6fcc87 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll
@@ -32,13 +32,14 @@ bb3:
 
 define void @second_store_bigger(i32* noalias %P) {
 ; CHECK-LABEL: @second_store_bigger(
+; CHECK-NEXT:    store i32 1, i32* [[P:%.*]]
 ; CHECK-NEXT:    br i1 true, label [[BB1:%.*]], label [[BB2:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    br label [[BB3:%.*]]
 ; CHECK:       bb2:
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    [[P_I64:%.*]] = bitcast i32* [[P:%.*]] to i64*
+; CHECK-NEXT:    [[P_I64:%.*]] = bitcast i32* [[P]] to i64*
 ; CHECK-NEXT:    store i64 0, i64* [[P_I64]]
 ; CHECK-NEXT:    ret void
 ;

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll
index 520a6eaccf09..2b0227751b2a 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll
@@ -6,13 +6,14 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 
 define void @test2(i32* noalias %P) {
 ; CHECK-LABEL: @test2(
+; CHECK-NEXT:    store i32 1, i32* [[P:%.*]]
 ; CHECK-NEXT:    br i1 true, label [[BB1:%.*]], label [[BB2:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    br label [[BB3:%.*]]
 ; CHECK:       bb2:
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    store i32 0, i32* [[P:%.*]]
+; CHECK-NEXT:    store i32 0, i32* [[P]]
 ; CHECK-NEXT:    ret void
 ;
   store i32 1, i32* %P
@@ -52,6 +53,7 @@ bb3:
 
 define void @test7(i32* noalias %P, i32* noalias %Q) {
 ; CHECK-LABEL: @test7(
+; CHECK-NEXT:    store i32 1, i32* [[Q:%.*]]
 ; CHECK-NEXT:    br i1 true, label [[BB1:%.*]], label [[BB2:%.*]]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[P:%.*]]
@@ -59,7 +61,7 @@ define void @test7(i32* noalias %P, i32* noalias %Q) {
 ; CHECK:       bb2:
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    store i32 0, i32* [[Q:%.*]]
+; CHECK-NEXT:    store i32 0, i32* [[Q]]
 ; CHECK-NEXT:    store i32 0, i32* [[P]]
 ; CHECK-NEXT:    ret void
 ;
@@ -112,38 +114,3 @@ bb3:
   store i32 0, i32* %P
   ret void
 }
-
-; We cannot eliminate `store i32 0, i32* %P`, as it is read by the later load.
-; Make sure that we check the uses of `store i32 1, i32* %P.1 which does not
-; alias %P. Note that uses point to the *first* def that may alias.
-define void @overlapping_read(i32* %P) {
-; CHECK-LABEL: @overlapping_read(
-; CHECK-NEXT:    store i32 0, i32* [[P:%.*]]
-; CHECK-NEXT:    [[P_1:%.*]] = getelementptr i32, i32* [[P]], i32 1
-; CHECK-NEXT:    store i32 1, i32* [[P_1]]
-; CHECK-NEXT:    [[P_64:%.*]] = bitcast i32* [[P]] to i64*
-; CHECK-NEXT:    [[LV:%.*]] = load i64, i64* [[P_64]]
-; CHECK-NEXT:    br i1 true, label [[BB1:%.*]], label [[BB2:%.*]]
-; CHECK:       bb1:
-; CHECK-NEXT:    br label [[BB3:%.*]]
-; CHECK:       bb2:
-; CHECK-NEXT:    ret void
-; CHECK:       bb3:
-; CHECK-NEXT:    store i32 2, i32* [[P]]
-; CHECK-NEXT:    ret void
-;
-  store i32 0, i32* %P
-  %P.1 = getelementptr i32, i32* %P, i32 1
-  store i32 1, i32* %P.1
-
-  %P.64 = bitcast i32* %P to i64*
-  %lv = load i64, i64* %P.64
-  br i1 true, label %bb1, label %bb2
-bb1:
-  br label %bb3
-bb2:
-  ret void
-bb3:
-  store i32 2, i32* %P
-  ret void
-}

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/operand-bundles.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/operand-bundles.ll
index 3a59286c53f2..9e88651bdb2e 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/operand-bundles.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/operand-bundles.ll
@@ -1,4 +1,3 @@
-; XFAIL: *
 ; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s
 
 declare noalias i8* @malloc(i64) "malloc-like"

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll
index d0a3cef71a7e..b96cc9f33c20 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll
@@ -1,5 +1,4 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; XFAIL: *
 ; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s
 ; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa -S | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
@@ -10,6 +9,19 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) n
 declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
 declare void @llvm.init.trampoline(i8*, i8*, i8*)
 
+; PR8576 - Should delete store of 10 even though p/q are may aliases.
+define void @test2(i32 *%p, i32 *%q) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    store i32 20, i32* [[Q:%.*]], align 4
+; CHECK-NEXT:    store i32 30, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+  store i32 10, i32* %p, align 4
+  store i32 20, i32* %q, align 4
+  store i32 30, i32* %p, align 4
+  ret void
+}
+
 define void @test5(i32* %Q) {
 ; CHECK-LABEL: @test5(
 ; CHECK-NEXT:    [[A:%.*]] = load volatile i32, i32* [[Q:%.*]]
@@ -20,6 +32,62 @@ define void @test5(i32* %Q) {
   ret void
 }
 
+; Should delete store of 10 even though memset is a may-store to P (P and Q may
+; alias).
+define void @test6(i32 *%p, i8 *%q) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[Q:%.*]], i8 42, i64 900, i1 false)
+; CHECK-NEXT:    store i32 30, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+  store i32 10, i32* %p, align 4       ;; dead.
+  call void @llvm.memset.p0i8.i64(i8* %q, i8 42, i64 900, i1 false)
+  store i32 30, i32* %p, align 4
+  ret void
+}
+
+; Should delete store of 10 even though memset is a may-store to P (P and Q may
+; alias).
+define void @test6_atomic(i32* align 4 %p, i8* align 4 %q) {
+; CHECK-LABEL: @test6_atomic(
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[Q:%.*]], i8 42, i64 900, i32 4)
+; CHECK-NEXT:    store atomic i32 30, i32* [[P:%.*]] unordered, align 4
+; CHECK-NEXT:    ret void
+;
+  store atomic i32 10, i32* %p unordered, align 4       ;; dead.
+  call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %q, i8 42, i64 900, i32 4)
+  store atomic i32 30, i32* %p unordered, align 4
+  ret void
+}
+
+; Should delete store of 10 even though memcpy is a may-store to P (P and Q may
+; alias).
+define void @test7(i32 *%p, i8 *%q, i8* noalias %r) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[Q:%.*]], i8* [[R:%.*]], i64 900, i1 false)
+; CHECK-NEXT:    store i32 30, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+  store i32 10, i32* %p, align 4       ;; dead.
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %r, i64 900, i1 false)
+  store i32 30, i32* %p, align 4
+  ret void
+}
+
+; Should delete store of 10 even though memcpy is a may-store to P (P and Q may
+; alias).
+define void @test7_atomic(i32* align 4 %p, i8* align 4 %q, i8* noalias align 4 %r) {
+; CHECK-LABEL: @test7_atomic(
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 [[Q:%.*]], i8* align 4 [[R:%.*]], i64 900, i32 4)
+; CHECK-NEXT:    store atomic i32 30, i32* [[P:%.*]] unordered, align 4
+; CHECK-NEXT:    ret void
+;
+  store atomic i32 10, i32* %p unordered, align 4       ;; dead.
+  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 %q, i8* align 4 %r, i64 900, i32 4)
+  store atomic i32 30, i32* %p unordered, align 4
+  ret void
+}
+
 ; Do not delete stores that are only partially killed.
 define i32 @test8() {
 ; CHECK-LABEL: @test8(
@@ -90,6 +158,46 @@ define void @test12({ i32, i32 }* %x) nounwind  {
 }
 
 
+; %P doesn't escape, the DEAD instructions should be removed.
+declare void @test13f()
+define i32* @test13() {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[PTR:%.*]] = tail call i8* @malloc(i32 4)
+; CHECK-NEXT:    [[P:%.*]] = bitcast i8* [[PTR]] to i32*
+; CHECK-NEXT:    call void @test13f()
+; CHECK-NEXT:    store i32 0, i32* [[P]]
+; CHECK-NEXT:    ret i32* [[P]]
+;
+  %ptr = tail call i8* @malloc(i32 4)
+  %P = bitcast i8* %ptr to i32*
+  %DEAD = load i32, i32* %P
+  %DEAD2 = add i32 %DEAD, 1
+  store i32 %DEAD2, i32* %P
+  call void @test13f( )
+  store i32 0, i32* %P
+  ret i32* %P
+}
+
+define i32 addrspace(1)* @test13_addrspacecast() {
+; CHECK-LABEL: @test13_addrspacecast(
+; CHECK-NEXT:    [[P:%.*]] = tail call i8* @malloc(i32 4)
+; CHECK-NEXT:    [[P_BC:%.*]] = bitcast i8* [[P]] to i32*
+; CHECK-NEXT:    [[P:%.*]] = addrspacecast i32* [[P_BC]] to i32 addrspace(1)*
+; CHECK-NEXT:    call void @test13f()
+; CHECK-NEXT:    store i32 0, i32 addrspace(1)* [[P]]
+; CHECK-NEXT:    ret i32 addrspace(1)* [[P]]
+;
+  %p = tail call i8* @malloc(i32 4)
+  %p.bc = bitcast i8* %p to i32*
+  %P = addrspacecast i32* %p.bc to i32 addrspace(1)*
+  %DEAD = load i32, i32 addrspace(1)* %P
+  %DEAD2 = add i32 %DEAD, 1
+  store i32 %DEAD2, i32 addrspace(1)* %P
+  call void @test13f( )
+  store i32 0, i32 addrspace(1)* %P
+  ret i32 addrspace(1)* %P
+}
+
 declare noalias i8* @malloc(i32)
 declare noalias i8* @calloc(i32, i32)
 
@@ -133,6 +241,26 @@ define void @test22(i1 %i, i32 %k, i32 %m) nounwind {
   ret void
 }
 
+; Make sure same sized store to later element is deleted
+define void @test24([2 x i32]* %a, i32 %b, i32 %c) nounwind {
+; CHECK-LABEL: @test24(
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A:%.*]], i64 0, i64 0
+; CHECK-NEXT:    store i32 [[B:%.*]], i32* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A]], i64 0, i64 1
+; CHECK-NEXT:    store i32 [[C:%.*]], i32* [[TMP2]], align 4
+; CHECK-NEXT:    ret void
+;
+  %1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 0
+  store i32 0, i32* %1, align 4
+  %2 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 1
+  store i32 0, i32* %2, align 4
+  %3 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 0
+  store i32 %b, i32* %3, align 4
+  %4 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 1
+  store i32 %c, i32* %4, align 4
+  ret void
+}
+
 ; Remove redundant store if loaded value is in another block.
 define i32 @test26(i1 %c, i32* %p) {
 ; CHECK-LABEL: @test26(
@@ -183,6 +311,35 @@ bb3:
 
 declare void @unknown_func()
 
+; Don't remove redundant store because of unknown call.
+define i32 @test30(i1 %c, i32* %p, i32 %i) {
+; CHECK-LABEL: @test30(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[BB3:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    call void @unknown_func()
+; CHECK-NEXT:    br label [[BB3]]
+; CHECK:       bb3:
+; CHECK-NEXT:    store i32 [[V]], i32* [[P]], align 4
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %v = load i32, i32* %p, align 4
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  br label %bb3
+bb2:
+  ; Might overwrite value at %p
+  call void @unknown_func()
+  br label %bb3
+bb3:
+  store i32 %v, i32* %p, align 4
+  ret i32 0
+}
+
 ; Remove redundant store if loaded value is in another block inside a loop.
 define i32 @test31(i1 %c, i32* %p, i32 %i) {
 ; CHECK-LABEL: @test31(

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll
index ef05504b30f8..b2c66f299459 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll
@@ -50,76 +50,6 @@ define void @test4(i32* %Q) {
   ret void
 }
 
-; PR8576 - Should delete store of 10 even though p/q are may aliases.
-define void @test2(i32 *%p, i32 *%q) {
-; CHECK-LABEL: @test2(
-; CHECK-NEXT:    store i32 20, i32* [[Q:%.*]], align 4
-; CHECK-NEXT:    store i32 30, i32* [[P:%.*]], align 4
-; CHECK-NEXT:    ret void
-;
-  store i32 10, i32* %p, align 4
-  store i32 20, i32* %q, align 4
-  store i32 30, i32* %p, align 4
-  ret void
-}
-
-; Should delete store of 10 even though memset is a may-store to P (P and Q may
-; alias).
-define void @test6(i32 *%p, i8 *%q) {
-; CHECK-LABEL: @test6(
-; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* [[Q:%.*]], i8 42, i64 900, i1 false)
-; CHECK-NEXT:    store i32 30, i32* [[P:%.*]], align 4
-; CHECK-NEXT:    ret void
-;
-  store i32 10, i32* %p, align 4       ;; dead.
-  call void @llvm.memset.p0i8.i64(i8* %q, i8 42, i64 900, i1 false)
-  store i32 30, i32* %p, align 4
-  ret void
-}
-
-; Should delete store of 10 even though memset is a may-store to P (P and Q may
-; alias).
-define void @test6_atomic(i32* align 4 %p, i8* align 4 %q) {
-; CHECK-LABEL: @test6_atomic(
-; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[Q:%.*]], i8 42, i64 900, i32 4)
-; CHECK-NEXT:    store atomic i32 30, i32* [[P:%.*]] unordered, align 4
-; CHECK-NEXT:    ret void
-;
-  store atomic i32 10, i32* %p unordered, align 4       ;; dead.
-  call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %q, i8 42, i64 900, i32 4)
-  store atomic i32 30, i32* %p unordered, align 4
-  ret void
-}
-
-; Should delete store of 10 even though memcpy is a may-store to P (P and Q may
-; alias).
-define void @test7(i32 *%p, i8 *%q, i8* noalias %r) {
-; CHECK-LABEL: @test7(
-; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[Q:%.*]], i8* [[R:%.*]], i64 900, i1 false)
-; CHECK-NEXT:    store i32 30, i32* [[P:%.*]], align 4
-; CHECK-NEXT:    ret void
-;
-  store i32 10, i32* %p, align 4       ;; dead.
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %r, i64 900, i1 false)
-  store i32 30, i32* %p, align 4
-  ret void
-}
-
-; Should delete store of 10 even though memcpy is a may-store to P (P and Q may
-; alias).
-define void @test7_atomic(i32* align 4 %p, i8* align 4 %q, i8* noalias align 4 %r) {
-; CHECK-LABEL: @test7_atomic(
-; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 [[Q:%.*]], i8* align 4 [[R:%.*]], i64 900, i32 4)
-; CHECK-NEXT:    store atomic i32 30, i32* [[P:%.*]] unordered, align 4
-; CHECK-NEXT:    ret void
-;
-  store atomic i32 10, i32* %p unordered, align 4       ;; dead.
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 %q, i8* align 4 %r, i64 900, i32 4)
-  store atomic i32 30, i32* %p unordered, align 4
-  ret void
-}
-
-
 ; va_arg has fuzzy dependence, the store shouldn't be zapped.
 define double @test10(i8* %X) {
 ; CHECK-LABEL: @test10(
@@ -134,46 +64,6 @@ define double @test10(i8* %X) {
   ret double %tmp.0
 }
 
-; %P doesn't escape, the DEAD instructions should be removed.
-declare void @test13f()
-define i32* @test13() {
-; CHECK-LABEL: @test13(
-; CHECK-NEXT:    [[PTR:%.*]] = tail call i8* @malloc(i32 4)
-; CHECK-NEXT:    [[P:%.*]] = bitcast i8* [[PTR]] to i32*
-; CHECK-NEXT:    call void @test13f()
-; CHECK-NEXT:    store i32 0, i32* [[P]]
-; CHECK-NEXT:    ret i32* [[P]]
-;
-  %ptr = tail call i8* @malloc(i32 4)
-  %P = bitcast i8* %ptr to i32*
-  %DEAD = load i32, i32* %P
-  %DEAD2 = add i32 %DEAD, 1
-  store i32 %DEAD2, i32* %P
-  call void @test13f( )
-  store i32 0, i32* %P
-  ret i32* %P
-}
-
-define i32 addrspace(1)* @test13_addrspacecast() {
-; CHECK-LABEL: @test13_addrspacecast(
-; CHECK-NEXT:    [[P:%.*]] = tail call i8* @malloc(i32 4)
-; CHECK-NEXT:    [[P_BC:%.*]] = bitcast i8* [[P]] to i32*
-; CHECK-NEXT:    [[P:%.*]] = addrspacecast i32* [[P_BC]] to i32 addrspace(1)*
-; CHECK-NEXT:    call void @test13f()
-; CHECK-NEXT:    store i32 0, i32 addrspace(1)* [[P]]
-; CHECK-NEXT:    ret i32 addrspace(1)* [[P]]
-;
-  %p = tail call i8* @malloc(i32 4)
-  %p.bc = bitcast i8* %p to i32*
-  %P = addrspacecast i32* %p.bc to i32 addrspace(1)*
-  %DEAD = load i32, i32 addrspace(1)* %P
-  %DEAD2 = add i32 %DEAD, 1
-  store i32 %DEAD2, i32 addrspace(1)* %P
-  call void @test13f( )
-  store i32 0, i32 addrspace(1)* %P
-  ret i32 addrspace(1)* %P
-}
-
 
 declare noalias i8* @malloc(i32)
 declare noalias i8* @calloc(i32, i32)
@@ -218,26 +108,6 @@ define noalias i8* @test23() nounwind uwtable ssp {
   ret i8* %call
 }
 
-; Make sure same sized store to later element is deleted
-define void @test24([2 x i32]* %a, i32 %b, i32 %c) nounwind {
-; CHECK-LABEL: @test24(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A:%.*]], i64 0, i64 0
-; CHECK-NEXT:    store i32 [[B:%.*]], i32* [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A]], i64 0, i64 1
-; CHECK-NEXT:    store i32 [[C:%.*]], i32* [[TMP2]], align 4
-; CHECK-NEXT:    ret void
-;
-  %1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 0
-  store i32 0, i32* %1, align 4
-  %2 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 1
-  store i32 0, i32* %2, align 4
-  %3 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 0
-  store i32 %b, i32* %3, align 4
-  %4 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 1
-  store i32 %c, i32* %4, align 4
-  ret void
-}
-
 ; Check another case like PR13547 where strdup is not like malloc.
 define i8* @test25(i8* %p) nounwind {
 ; CHECK-LABEL: @test25(
@@ -317,35 +187,6 @@ bb3:
 
 declare void @unknown_func()
 
-; Don't remove redundant store because of unknown call.
-define i32 @test30(i1 %c, i32* %p, i32 %i) {
-; CHECK-LABEL: @test30(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P:%.*]], align 4
-; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
-; CHECK:       bb1:
-; CHECK-NEXT:    br label [[BB3:%.*]]
-; CHECK:       bb2:
-; CHECK-NEXT:    call void @unknown_func()
-; CHECK-NEXT:    br label [[BB3]]
-; CHECK:       bb3:
-; CHECK-NEXT:    store i32 [[V]], i32* [[P]], align 4
-; CHECK-NEXT:    ret i32 0
-;
-entry:
-  %v = load i32, i32* %p, align 4
-  br i1 %c, label %bb1, label %bb2
-bb1:
-  br label %bb3
-bb2:
-  ; Might overwrite value at %p
-  call void @unknown_func()
-  br label %bb3
-bb3:
-  store i32 %v, i32* %p, align 4
-  ret i32 0
-}
-
 ; Don't remove redundant store in a loop with a may-alias store.
 define i32 @test32(i1 %c, i32* %p, i32 %i) {
 ; CHECK-LABEL: @test32(
@@ -453,7 +294,8 @@ define void @test37_atomic(i8* %P, i8* %Q, i8* %R) {
 ; The memmove is dead, because memcpy arguments cannot overlap.
 define void @test38(i8* %P, i8* %Q, i8* %R) {
 ; CHECK-LABEL: @test38(
-; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[R:%.*]], i64 12, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false)
 ; CHECK-NEXT:    ret void
 ;
 
@@ -465,7 +307,8 @@ define void @test38(i8* %P, i8* %Q, i8* %R) {
 ; The memmove is dead, because memcpy arguments cannot overlap.
 define void @test38_atomic(i8* %P, i8* %Q, i8* %R) {
 ; CHECK-LABEL: @test38_atomic(
-; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[R:%.*]], i64 12, i32 1)
+; CHECK-NEXT:    tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
+; CHECK-NEXT:    tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1)
 ; CHECK-NEXT:    ret void
 ;
 
@@ -536,9 +379,7 @@ declare void @free(i8* nocapture)
 define void @test41(i32* noalias %P) {
 ; CHECK-LABEL: @test41(
 ; CHECK-NEXT:    [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8*
-; CHECK-NEXT:    store i32 1, i32* [[P]]
 ; CHECK-NEXT:    call void @unknown_func()
-; CHECK-NEXT:    store i32 2, i32* [[P]]
 ; CHECK-NEXT:    call void @free(i8* [[P2]])
 ; CHECK-NEXT:    ret void
 ;


        


More information about the llvm-commits mailing list