[llvm] 81dbb6a - Recommit "[DSE] Add first version of MemorySSA-backed DSE (Bottom up walk)."
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 12 06:19:09 PST 2020
Author: Florian Hahn
Date: 2020-02-12T14:17:50Z
New Revision: 81dbb6aec6261a06132f17b06c9071f4a1871fa4
URL: https://github.com/llvm/llvm-project/commit/81dbb6aec6261a06132f17b06c9071f4a1871fa4
DIFF: https://github.com/llvm/llvm-project/commit/81dbb6aec6261a06132f17b06c9071f4a1871fa4.diff
LOG: Recommit "[DSE] Add first version of MemorySSA-backed DSE (Bottom up walk)."
This includes a fix for the santizier failures.
This reverts the revert commit
42f8b915eb72364cc5e84adf58a2c2d4947e8b10.
Added:
llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll
llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll
Modified:
llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll
llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreBegin.ll
llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreEnd.ll
llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll
llvm/test/Transforms/DeadStoreElimination/MSSA/calloc-store.ll
llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll
llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll
llvm/test/Transforms/DeadStoreElimination/MSSA/inst-limits.ll
llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll
llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll
llvm/test/Transforms/DeadStoreElimination/MSSA/memcpy-complete-overwrite.ll
llvm/test/Transforms/DeadStoreElimination/MSSA/memintrinsics.ll
llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll
llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll
llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores-big-endian.ll
llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores.ll
llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll
llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll
llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll
llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll
llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll
llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll
llvm/test/Transforms/DeadStoreElimination/MSSA/operand-bundles.ll
llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll
llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index f56a4c5f8920..7106fb086723 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -29,7 +29,10 @@
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/OrderedBasicBlock.h"
+#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
@@ -40,6 +43,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
@@ -87,11 +91,20 @@ EnablePartialStoreMerging("enable-dse-partial-store-merging",
cl::init(true), cl::Hidden,
cl::desc("Enable partial store merging in DSE"));
-// Temporary dummy option for tests.
static cl::opt<bool>
EnableMemorySSA("enable-dse-memoryssa", cl::init(false), cl::Hidden,
cl::desc("Use the new MemorySSA-backed DSE."));
+static cl::opt<unsigned>
+ MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(100), cl::Hidden,
+ cl::desc("The number of memory instructions to scan for "
+ "dead store elimination (default = 100)"));
+
+static cl::opt<unsigned> MemorySSADefsPerBlockLimit(
+ "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,
+ cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
+ "other stores per basic block (default = 5000)"));
+
//===----------------------------------------------------------------------===//
// Helper functions
//===----------------------------------------------------------------------===//
@@ -1354,22 +1367,490 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis *AA,
return MadeChange;
}
+namespace {
+//=============================================================================
+// MemorySSA backed dead store elimination.
+//
+// The code below implements dead store elimination using MemorySSA. It uses
+// the following general approach: given a MemoryDef, walk upwards to find
+// clobbering MemoryDefs that may be killed by the starting def. Then check
+// that there are no uses that may read the location of the original MemoryDef
+// in between both MemoryDefs. A bit more concretely:
+//
+// For all MemoryDefs StartDef:
+// 1. Get the next dominating clobbering MemoryDef (DomAccess) by walking
+// upwards.
+// 2. Check that there are no reads between DomAccess and the StartDef by
+// checking all uses starting at DomAccess and walking until we see StartDef.
+// 3. For each found DomDef, check that:
+// 1. There are no barrier instructions between DomDef and StartDef (like
+// throws or stores with ordering constraints).
+// 2. StartDef is executed whenever DomDef is executed.
+// 3. StartDef completely overwrites DomDef.
+// 4. Erase DomDef from the function and MemorySSA.
+
+// Returns true if \p M is an intrisnic that does not read or write memory.
+bool isNoopIntrinsic(MemoryUseOrDef *M) {
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(M->getMemoryInst())) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::invariant_end:
+ case Intrinsic::launder_invariant_group:
+ case Intrinsic::assume:
+ return true;
+ case Intrinsic::dbg_addr:
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_label:
+ case Intrinsic::dbg_value:
+ llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
+ default:
+ return false;
+ }
+ }
+ return false;
+}
+
+// Check if we can ignore \p D for DSE.
+bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
+ Instruction *DI = D->getMemoryInst();
+ // Calls that only access inaccessible memory cannot read or write any memory
+ // locations we consider for elimination.
+ if (auto CS = CallSite(DI))
+ if (CS.onlyAccessesInaccessibleMemory())
+ return true;
+
+ // We can eliminate stores to locations not visible to the caller across
+ // throwing instructions.
+ if (DI->mayThrow() && !DefVisibleToCaller)
+ return true;
+
+ // We can remove the dead stores, irrespective of the fence and its ordering
+ // (release/acquire/seq_cst). Fences only constraints the ordering of
+ // already visible stores, it does not make a store visible to other
+ // threads. So, skipping over a fence does not change a store from being
+ // dead.
+ if (isa<FenceInst>(DI))
+ return true;
+
+ // Skip intrinsics that do not really read or modify memory.
+ if (isNoopIntrinsic(D))
+ return true;
+
+ return false;
+}
+
+struct DSEState {
+ Function &F;
+ AliasAnalysis &AA;
+ MemorySSA &MSSA;
+ DominatorTree &DT;
+ PostDominatorTree &PDT;
+ const TargetLibraryInfo &TLI;
+
+ // All MemoryDefs that potentially could kill other MemDefs.
+ SmallVector<MemoryDef *, 64> MemDefs;
+ // Any that should be skipped as they are already deleted
+ SmallPtrSet<MemoryAccess *, 4> SkipStores;
+ // Keep track of all of the objects that are invisible to the caller until the
+ // function returns.
+ SmallPtrSet<const Value *, 16> InvisibleToCaller;
+ // Keep track of blocks with throwing instructions not modeled in MemorySSA.
+ SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
+
+ DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
+ PostDominatorTree &PDT, const TargetLibraryInfo &TLI)
+ : F(F), AA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI) {}
+
+ static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
+ DominatorTree &DT, PostDominatorTree &PDT,
+ const TargetLibraryInfo &TLI) {
+ DSEState State(F, AA, MSSA, DT, PDT, TLI);
+ // Collect blocks with throwing instructions not modeled in MemorySSA and
+ // alloc-like objects.
+ for (Instruction &I : instructions(F)) {
+ if (I.mayThrow() && !MSSA.getMemoryAccess(&I))
+ State.ThrowingBlocks.insert(I.getParent());
+
+ auto *MD = dyn_cast_or_null<MemoryDef>(MSSA.getMemoryAccess(&I));
+ if (MD && State.MemDefs.size() < MemorySSADefsPerBlockLimit &&
+ hasAnalyzableMemoryWrite(&I, TLI) && isRemovable(&I))
+ State.MemDefs.push_back(MD);
+
+ // Track alloca and alloca-like objects. Here we care about objects not
+ // visible to the caller during function execution. Alloca objects are
+ // invalid in the caller, for alloca-like objects we ensure that they are
+ // not captured throughout the function.
+ if (isa<AllocaInst>(&I) ||
+ (isAllocLikeFn(&I, &TLI) && !PointerMayBeCaptured(&I, false, true)))
+ State.InvisibleToCaller.insert(&I);
+ }
+ // Treat byval or inalloca arguments the same as Allocas, stores to them are
+ // dead at the end of the function.
+ for (Argument &AI : F.args())
+ if (AI.hasByValOrInAllocaAttr())
+ State.InvisibleToCaller.insert(&AI);
+ return State;
+ }
+
+ Optional<MemoryLocation> getLocForWriteEx(Instruction *I) const {
+ if (!I->mayWriteToMemory())
+ return None;
+
+ if (auto *MTI = dyn_cast<AnyMemIntrinsic>(I))
+ return {MemoryLocation::getForDest(MTI)};
+
+ if (auto CS = CallSite(I)) {
+ if (Function *F = CS.getCalledFunction()) {
+ StringRef FnName = F->getName();
+ if (TLI.has(LibFunc_strcpy) && FnName == TLI.getName(LibFunc_strcpy))
+ return {MemoryLocation(CS.getArgument(0))};
+ if (TLI.has(LibFunc_strncpy) && FnName == TLI.getName(LibFunc_strncpy))
+ return {MemoryLocation(CS.getArgument(0))};
+ if (TLI.has(LibFunc_strcat) && FnName == TLI.getName(LibFunc_strcat))
+ return {MemoryLocation(CS.getArgument(0))};
+ if (TLI.has(LibFunc_strncat) && FnName == TLI.getName(LibFunc_strncat))
+ return {MemoryLocation(CS.getArgument(0))};
+ }
+ return None;
+ }
+
+ return MemoryLocation::getOrNone(I);
+ }
+
+ /// Returns true if \p Use completely overwrites \p DefLoc.
+ bool isCompleteOverwrite(MemoryLocation DefLoc, Instruction *UseInst) const {
+ // UseInst has a MemoryDef associated in MemorySSA. It's possible for a
+ // MemoryDef to not write to memory, e.g. a volatile load is modeled as a
+ // MemoryDef.
+ if (!UseInst->mayWriteToMemory())
+ return false;
+
+ if (auto CS = CallSite(UseInst))
+ if (CS.onlyAccessesInaccessibleMemory())
+ return false;
+
+ ModRefInfo MR = AA.getModRefInfo(UseInst, DefLoc);
+ // If necessary, perform additional analysis.
+ if (isModSet(MR))
+ MR = AA.callCapturesBefore(UseInst, DefLoc, &DT);
+
+ Optional<MemoryLocation> UseLoc = getLocForWriteEx(UseInst);
+ return isModSet(MR) && isMustSet(MR) &&
+ UseLoc->Size.getValue() >= DefLoc.Size.getValue();
+ }
+
+ /// Returns true if \p Use may read from \p DefLoc.
+ bool isReadClobber(MemoryLocation DefLoc, Instruction *UseInst) const {
+ if (!UseInst->mayReadFromMemory())
+ return false;
+
+ if (auto CS = CallSite(UseInst))
+ if (CS.onlyAccessesInaccessibleMemory())
+ return false;
+
+ ModRefInfo MR = AA.getModRefInfo(UseInst, DefLoc);
+ // If necessary, perform additional analysis.
+ if (isRefSet(MR))
+ MR = AA.callCapturesBefore(UseInst, DefLoc, &DT);
+ return isRefSet(MR);
+ }
+
+ // Find a MemoryDef writing to \p DefLoc and dominating \p Current, with no
+ // read access in between or return None otherwise. The returned value may not
+ // (completely) overwrite \p DefLoc. Currently we bail out when we encounter
+ // any of the following
+ // * An aliasing MemoryUse (read).
+ // * A MemoryPHI.
+ Optional<MemoryAccess *> getDomMemoryDef(MemoryDef *KillingDef,
+ MemoryAccess *Current,
+ MemoryLocation DefLoc,
+ bool DefVisibleToCaller,
+ int &ScanLimit) const {
+ MemoryDef *DomDef;
+ MemoryAccess *StartDef = Current;
+ bool StepAgain;
+ LLVM_DEBUG(dbgs() << " trying to get dominating access for " << *Current
+ << "\n");
+ // Find the next clobbering Mod access for DefLoc, starting at Current.
+ do {
+ StepAgain = false;
+ // Reached TOP.
+ if (MSSA.isLiveOnEntryDef(Current))
+ return None;
+
+ MemoryUseOrDef *CurrentUD = dyn_cast<MemoryUseOrDef>(Current);
+ if (!CurrentUD)
+ return None;
+
+ // Look for access that clobber DefLoc.
+ MemoryAccess *DomAccess =
+ MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(
+ CurrentUD->getDefiningAccess(), DefLoc);
+ DomDef = dyn_cast<MemoryDef>(DomAccess);
+ if (!DomDef || MSSA.isLiveOnEntryDef(DomDef))
+ return None;
+
+ // Check if we can skip DomDef for DSE. We also require the KillingDef
+ // execute whenever DomDef executes and use post-dominance to ensure that.
+ if (canSkipDef(DomDef, DefVisibleToCaller) ||
+ !PDT.dominates(KillingDef->getBlock(), DomDef->getBlock())) {
+ StepAgain = true;
+ Current = DomDef;
+ }
+
+ } while (StepAgain);
+
+ LLVM_DEBUG(dbgs() << " Checking for reads of " << *DomDef << " ("
+ << *DomDef->getMemoryInst() << ")\n");
+
+ SmallSetVector<MemoryAccess *, 32> WorkList;
+ auto PushMemUses = [&WorkList](MemoryAccess *Acc) {
+ for (Use &U : Acc->uses())
+ WorkList.insert(cast<MemoryAccess>(U.getUser()));
+ };
+ PushMemUses(DomDef);
+
+ // Check if DomDef may be read.
+ for (unsigned I = 0; I < WorkList.size(); I++) {
+ MemoryAccess *UseAccess = WorkList[I];
+
+ LLVM_DEBUG(dbgs() << " Checking use " << *UseAccess);
+ if (--ScanLimit == 0) {
+ LLVM_DEBUG(dbgs() << " ... hit scan limit\n");
+ return None;
+ }
+
+ // Bail out on MemoryPhis for now.
+ if (isa<MemoryPhi>(UseAccess)) {
+ LLVM_DEBUG(dbgs() << " ... hit MemoryPhi\n");
+ return None;
+ }
+
+ Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
+ LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
+
+ if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess))) {
+ PushMemUses(UseAccess);
+ continue;
+ }
+
+ // Uses which may read the original MemoryDef mean we cannot eliminate the
+ // original MD. Stop walk.
+ if (isReadClobber(DefLoc, UseInst)) {
+ LLVM_DEBUG(dbgs() << " ... found read clobber\n");
+ return None;
+ }
+
+ if (StartDef == UseAccess)
+ continue;
+
+ // Check all uses for MemoryDefs, except for defs completely overwriting
+ // the original location. Otherwise we have to check uses of *all*
+ // MemoryDefs we discover, including non-aliasing ones. Otherwise we might
+ // miss cases like the following
+ // 1 = Def(LoE) ; <----- DomDef stores [0,1]
+ // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
+ // Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
+ // (The Use points to the *first* Def it may alias)
+ // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
+ // stores [0,1]
+ if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
+ if (!isCompleteOverwrite(DefLoc, UseInst))
+ PushMemUses(UseDef);
+ }
+ }
+
+ // No aliasing MemoryUses of DomDef found, DomDef is potentially dead.
+ return {DomDef};
+ }
+
+ // Delete dead memory defs
+ void deleteDeadInstruction(Instruction *SI) {
+ MemorySSAUpdater Updater(&MSSA);
+ SmallVector<Instruction *, 32> NowDeadInsts;
+ NowDeadInsts.push_back(SI);
+ --NumFastOther;
+
+ while (!NowDeadInsts.empty()) {
+ Instruction *DeadInst = NowDeadInsts.pop_back_val();
+ ++NumFastOther;
+
+ // Try to preserve debug information attached to the dead instruction.
+ salvageDebugInfo(*DeadInst);
+
+ // Remove the Instruction from MSSA.
+ if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) {
+ if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) {
+ SkipStores.insert(MD);
+ }
+ Updater.removeMemoryAccess(MA);
+ }
+
+ // Remove its operands
+ for (Use &O : DeadInst->operands())
+ if (Instruction *OpI = dyn_cast<Instruction>(O)) {
+ O = nullptr;
+ if (isInstructionTriviallyDead(OpI, &TLI))
+ NowDeadInsts.push_back(OpI);
+ }
+
+ DeadInst->eraseFromParent();
+ }
+ }
+
+ // Check for any extra throws between SI and NI that block DSE. This only
+ // checks extra maythrows (those that aren't MemoryDef's). MemoryDef that may
+ // throw are handled during the walk from one def to the next.
+ bool mayThrowBetween(Instruction *SI, Instruction *NI,
+ const Value *SILocUnd) const {
+ // First see if we can ignore it by using the fact that SI is an
+ // alloca/alloca like object that is not visible to the caller during
+ // execution of the function.
+ if (SILocUnd && InvisibleToCaller.count(SILocUnd))
+ return false;
+
+ if (SI->getParent() == NI->getParent())
+ return ThrowingBlocks.find(SI->getParent()) != ThrowingBlocks.end();
+ return !ThrowingBlocks.empty();
+ }
+
+ // Check if \p NI acts as a DSE barrier for \p SI. The following instructions
+ // act as barriers:
+ // * A memory instruction that may throw and \p SI accesses a non-stack
+ // object.
+ // * Atomic stores stronger that monotonic.
+ bool isDSEBarrier(Instruction *SI, MemoryLocation &SILoc,
+ const Value *SILocUnd, Instruction *NI,
+ MemoryLocation &NILoc) const {
+ // If NI may throw it acts as a barrier, unless we are to an alloca/alloca
+ // like object that does not escape.
+ if (NI->mayThrow() && !InvisibleToCaller.count(SILocUnd))
+ return true;
+
+ if (NI->isAtomic()) {
+ if (auto *NSI = dyn_cast<StoreInst>(NI)) {
+ if (isStrongerThanMonotonic(NSI->getOrdering()))
+ return true;
+ } else
+ llvm_unreachable(
+ "Other instructions should be modeled/skipped in MemorySSA");
+ }
+
+ return false;
+ }
+};
+
+bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
+ MemorySSA &MSSA, DominatorTree &DT,
+ PostDominatorTree &PDT,
+ const TargetLibraryInfo &TLI) {
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ bool MadeChange = false;
+
+ DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI);
+ // For each store:
+ for (unsigned I = 0; I < State.MemDefs.size(); I++) {
+ MemoryDef *Current = State.MemDefs[I];
+ if (State.SkipStores.count(Current))
+ continue;
+ Instruction *SI = cast<MemoryDef>(Current)->getMemoryInst();
+ auto MaybeSILoc = State.getLocForWriteEx(SI);
+ if (!MaybeSILoc) {
+ LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
+ << *SI << "\n");
+ continue;
+ }
+ MemoryLocation SILoc = *MaybeSILoc;
+ assert(SILoc.Ptr && "SILoc should not be null");
+ const Value *SILocUnd = GetUnderlyingObject(SILoc.Ptr, DL);
+ Instruction *DefObj =
+ const_cast<Instruction *>(dyn_cast<Instruction>(SILocUnd));
+ bool DefVisibleToCaller = !State.InvisibleToCaller.count(SILocUnd);
+ if (DefObj && ((isAllocLikeFn(DefObj, &TLI) &&
+ !PointerMayBeCapturedBefore(DefObj, false, true, SI, &DT))))
+ DefVisibleToCaller = false;
+
+ LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by " << *SI
+ << "\n");
+
+ int ScanLimit = MemorySSAScanLimit;
+ MemoryDef *StartDef = Current;
+ // Walk MemorySSA upward to find MemoryDefs that might be killed by SI.
+ while (Optional<MemoryAccess *> Next = State.getDomMemoryDef(
+ StartDef, Current, SILoc, DefVisibleToCaller, ScanLimit)) {
+ MemoryAccess *DomAccess = *Next;
+ LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DomAccess << "\n");
+ MemoryDef *NextDef = dyn_cast<MemoryDef>(DomAccess);
+ Instruction *NI = NextDef->getMemoryInst();
+ LLVM_DEBUG(dbgs() << " def " << *NI << "\n");
+
+ if (!hasAnalyzableMemoryWrite(NI, TLI))
+ break;
+ MemoryLocation NILoc = *State.getLocForWriteEx(NI);
+ // Check for anything that looks like it will be a barrier to further
+ // removal
+ if (State.isDSEBarrier(SI, SILoc, SILocUnd, NI, NILoc)) {
+ LLVM_DEBUG(dbgs() << " stop, barrier\n");
+ break;
+ }
+
+ // Before we try to remove anything, check for any extra throwing
+ // instructions that block us from DSEing
+ if (State.mayThrowBetween(SI, NI, SILocUnd)) {
+ LLVM_DEBUG(dbgs() << " stop, may throw!\n");
+ break;
+ }
+
+ // Check if NI overwrites SI.
+ int64_t InstWriteOffset, DepWriteOffset;
+ InstOverlapIntervalsTy IOL;
+ OverwriteResult OR = isOverwrite(SILoc, NILoc, DL, TLI, DepWriteOffset,
+ InstWriteOffset, NI, IOL, AA, &F);
+
+ if (OR == OW_Complete) {
+ LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI
+ << "\n KILLER: " << *SI << '\n');
+ State.deleteDeadInstruction(NI);
+ ++NumFastStores;
+ MadeChange = true;
+ } else
+ Current = NextDef;
+ }
+ }
+
+ return MadeChange;
+}
+} // end anonymous namespace
+
//===----------------------------------------------------------------------===//
// DSE Pass
//===----------------------------------------------------------------------===//
PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
- AliasAnalysis *AA = &AM.getResult<AAManager>(F);
- DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
- MemoryDependenceResults *MD = &AM.getResult<MemoryDependenceAnalysis>(F);
- const TargetLibraryInfo *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
+ AliasAnalysis &AA = AM.getResult<AAManager>(F);
+ const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
+
+ if (EnableMemorySSA) {
+ MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
+ PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
+
+ if (!eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI))
+ return PreservedAnalyses::all();
+ } else {
+ MemoryDependenceResults &MD = AM.getResult<MemoryDependenceAnalysis>(F);
- if (!eliminateDeadStores(F, AA, MD, DT, TLI))
- return PreservedAnalyses::all();
+ if (!eliminateDeadStores(F, &AA, &MD, &DT, &TLI))
+ return PreservedAnalyses::all();
+ }
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
PA.preserve<GlobalsAA>();
- PA.preserve<MemoryDependenceAnalysis>();
+ if (EnableMemorySSA)
+ PA.preserve<MemorySSAAnalysis>();
+ else
+ PA.preserve<MemoryDependenceAnalysis>();
return PA;
}
@@ -1388,25 +1869,42 @@ class DSELegacyPass : public FunctionPass {
if (skipFunction(F))
return false;
- DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- MemoryDependenceResults *MD =
- &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
- const TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ const TargetLibraryInfo &TLI =
+ getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+
+ if (EnableMemorySSA) {
+ MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
+ PostDominatorTree &PDT =
+ getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
+
+ return eliminateDeadStoresMemorySSA(F, AA, MSSA, DT, PDT, TLI);
+ } else {
+ MemoryDependenceResults &MD =
+ getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
- return eliminateDeadStores(F, AA, MD, DT, TLI);
+ return eliminateDeadStores(F, &AA, &MD, &DT, &TLI);
+ }
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<AAResultsWrapperPass>();
- AU.addRequired<MemoryDependenceWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addPreserved<MemoryDependenceWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+
+ if (EnableMemorySSA) {
+ AU.addRequired<PostDominatorTreeWrapperPass>();
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<PostDominatorTreeWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
+ } else {
+ AU.addRequired<MemoryDependenceWrapperPass>();
+ AU.addPreserved<MemoryDependenceWrapperPass>();
+ }
}
};
@@ -1417,8 +1915,10 @@ char DSELegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll
index b9a0ea76d7fb..0d84e9cf9d51 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/2011-09-06-EndOfFunction.ll
@@ -1,3 +1,4 @@
+; XFAIL: *
; RUN: opt -dse -enable-dse-memoryssa -S < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreBegin.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreBegin.ll
index a7278d56e04c..1028c8ed47f6 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreBegin.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreBegin.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; XFAIL: *
; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s
define void @write4to7(i32* nocapture %p) {
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreEnd.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreEnd.ll
index d8791aa5e930..0711855532fe 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreEnd.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/OverwriteStoreEnd.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; XFAIL: *
; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll
index b21183c1ac2b..9558033b37c7 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/atomic.ll
@@ -1,3 +1,4 @@
+; XFAIL: *
; RUN: opt -basicaa -dse -enable-dse-memoryssa -S < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/calloc-store.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/calloc-store.ll
index 19ebaa93e7d3..397c5947ae5c 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/calloc-store.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/calloc-store.ll
@@ -1,3 +1,5 @@
+; XFAIL: *
+
; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s
declare noalias i8* @calloc(i64, i64)
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll
new file mode 100644
index 000000000000..4ee295821e22
--- /dev/null
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence-todo.ll
@@ -0,0 +1,50 @@
+; XFAIL: *
+
+; RUN: opt -S -basicaa -dse -enable-dse-memoryssa < %s | FileCheck %s
+
+; We DSE stack alloc'ed and byval locations, in the presence of fences.
+; Fence does not make an otherwise thread local store visible.
+; Right now the DSE in presence of fence is only done in end blocks (with no successors),
+; but the same logic applies to other basic blocks as well.
+; The store to %addr.i can be removed since it is a byval attribute
+define void @test3(i32* byval %addr.i) {
+; CHECK-LABEL: @test3
+; CHECK-NOT: store
+; CHECK: fence
+; CHECK: ret
+ store i32 5, i32* %addr.i, align 4
+ fence release
+ ret void
+}
+
+declare void @foo(i8* nocapture %p)
+
+declare noalias i8* @malloc(i32)
+
+; DSE of stores in locations allocated through library calls.
+define void @test_nocapture() {
+; CHECK-LABEL: @test_nocapture
+; CHECK: malloc
+; CHECK: foo
+; CHECK-NOT: store
+; CHECK: fence
+ %m = call i8* @malloc(i32 24)
+ call void @foo(i8* %m)
+ store i8 4, i8* %m
+ fence release
+ ret void
+}
+
+
+; This is a full fence, but it does not make a thread local store visible.
+; We can DSE the store in presence of the fence.
+define void @fence_seq_cst() {
+; CHECK-LABEL: @fence_seq_cst
+; CHECK-NEXT: fence seq_cst
+; CHECK-NEXT: ret void
+ %P1 = alloca i32
+ store i32 0, i32* %P1, align 4
+ fence seq_cst
+ store i32 4, i32* %P1, align 4
+ ret void
+}
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll
index 9b43d376b295..b22fc9c92004 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/fence.ll
@@ -46,51 +46,3 @@ define void @test2(i32* %addr.i) {
store i32 5, i32* %addr.i, align 4
ret void
}
-
-; We DSE stack alloc'ed and byval locations, in the presence of fences.
-; Fence does not make an otherwise thread local store visible.
-; Right now the DSE in presence of fence is only done in end blocks (with no successors),
-; but the same logic applies to other basic blocks as well.
-; The store to %addr.i can be removed since it is a byval attribute
-define void @test3(i32* byval %addr.i) {
-; CHECK-LABEL: @test3
-; CHECK-NOT: store
-; CHECK: fence
-; CHECK: ret
- store i32 5, i32* %addr.i, align 4
- fence release
- ret void
-}
-
-declare void @foo(i8* nocapture %p)
-
-declare noalias i8* @malloc(i32)
-
-; DSE of stores in locations allocated through library calls.
-define void @test_nocapture() {
-; CHECK-LABEL: @test_nocapture
-; CHECK: malloc
-; CHECK: foo
-; CHECK-NOT: store
-; CHECK: fence
- %m = call i8* @malloc(i32 24)
- call void @foo(i8* %m)
- store i8 4, i8* %m
- fence release
- ret void
-}
-
-
-; This is a full fence, but it does not make a thread local store visible.
-; We can DSE the store in presence of the fence.
-define void @fence_seq_cst() {
-; CHECK-LABEL: @fence_seq_cst
-; CHECK-NEXT: fence seq_cst
-; CHECK-NEXT: ret void
- %P1 = alloca i32
- store i32 0, i32* %P1, align 4
- fence seq_cst
- store i32 4, i32* %P1, align 4
- ret void
-}
-
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll
index a9a32348cb6b..a6e47d7bb107 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/free.ll
@@ -1,3 +1,5 @@
+; XFAIL: *
+
; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s
target datalayout = "e-p:64:64:64"
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/inst-limits.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/inst-limits.ll
index 78fe292d0cf5..638571f6f417 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/inst-limits.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/inst-limits.ll
@@ -1,10 +1,9 @@
; RUN: opt -S -dse -enable-dse-memoryssa < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-; If there are two stores to the same location, DSE should be able to remove
-; the first store if the two stores are separated by no more than 98
-; instructions. The existence of debug intrinsics between the stores should
-; not affect this instruction limit.
+; This test is not relevant for DSE with MemorySSA. Non-memory instructions
+; are ignored anyways. The limits for the MemorySSA traversal are tested in
+; llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll
@x = global i32 0, align 4
@@ -129,7 +128,7 @@ entry:
define i32 @test_outside_limit() {
entry:
; The first store; later there is a second store to the same location
- ; CHECK: store i32 1, i32* @x, align 4
+ ; CHECK-NOT: store i32 1, i32* @x, align 4
store i32 1, i32* @x, align 4
; Insert 99 dummy instructions between the two stores; this is
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll
index 3b3f4d498e9f..e4d4722677a7 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/lifetime.ll
@@ -1,3 +1,5 @@
+; XFAIL: *
+
; RUN: opt -S -basicaa -dse -enable-dse-memoryssa < %s | FileCheck %s
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll
index 52e2ecdc7290..06bcc346a435 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/mda-with-dbg-values.ll
@@ -1,13 +1,12 @@
-; RUN: opt -S -dse -enable-dse-memoryssa -memdep-block-scan-limit=3 < %s | FileCheck %s
-; RUN: opt -S -strip-debug -dse -enable-dse-memoryssa -memdep-block-scan-limit=3 < %s | FileCheck %s
+; RUN: opt -S -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=2 < %s | FileCheck %s
+; RUN: opt -S -strip-debug -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=2 < %s | FileCheck %s
-; Test case to check that the memory dependency analysis gets the same
-; result even if we have a dbg value between the memcpy and
-; store. The memory dependency is then used by DSE to remove the store.
+; Test case to check that DSE gets the same result even if we have a dbg value
+; between the memcpy.
-; We use -memdep-block-scan-limit=3 to be able to create a small test case.
-; Without it, we would need to squeeze in 100 instructions since the default
-; limit is 100.
+; This test case is less relevant for the MemorySSA backed version of DSE, as
+; debug values are not modeled in MemorySSA and are skipped regardless of the
+; exploration limit.
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@@ -20,6 +19,11 @@ entry:
%i = alloca i8, align 1
store i8 1, i8* %i, align 1, !dbg !19
call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !17, metadata !DIExpression()), !dbg !18
+ call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !17, metadata !DIExpression()), !dbg !18
+ call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !17, metadata !DIExpression()), !dbg !18
+ call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !17, metadata !DIExpression()), !dbg !18
+ call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !17, metadata !DIExpression()), !dbg !18
+ call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !17, metadata !DIExpression()), !dbg !18
%0 = bitcast [1 x i8]* @g to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %i, i8* %0, i64 1, i1 false), !dbg !20
br label %bb2
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memcpy-complete-overwrite.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memcpy-complete-overwrite.ll
index 38cb8a38511b..931597265e41 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memcpy-complete-overwrite.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memcpy-complete-overwrite.ll
@@ -1,4 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+
+; XFAIL: *
; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s
; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa -S | FileCheck %s
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memintrinsics.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memintrinsics.ll
index 51c45c32d8c5..c22ad19198b4 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memintrinsics.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memintrinsics.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; XFAIL: *
; RUN: opt -S -dse -enable-dse-memoryssa < %s | FileCheck %s
declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i1) nounwind
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll
new file mode 100644
index 000000000000..43a9f0c92f1b
--- /dev/null
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll
@@ -0,0 +1,72 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck --check-prefix=NO-LIMIT %s
+; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=0 -S | FileCheck --check-prefix=LIMIT-0 %s
+; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=3 -S | FileCheck --check-prefix=LIMIT-3 %s
+; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=4 -S | FileCheck --check-prefix=LIMIT-4 %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+
+define void @test2(i32* noalias %P, i32* noalias %Q, i32* noalias %R) {
+;
+; NO-LIMIT-LABEL: @test2(
+; NO-LIMIT-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]]
+; NO-LIMIT: bb1:
+; NO-LIMIT-NEXT: br label [[BB3:%.*]]
+; NO-LIMIT: bb2:
+; NO-LIMIT-NEXT: br label [[BB3]]
+; NO-LIMIT: bb3:
+; NO-LIMIT-NEXT: store i32 0, i32* [[Q:%.*]]
+; NO-LIMIT-NEXT: store i32 0, i32* [[R:%.*]]
+; NO-LIMIT-NEXT: store i32 0, i32* [[P:%.*]]
+; NO-LIMIT-NEXT: ret void
+;
+; LIMIT-0-LABEL: @test2(
+; LIMIT-0-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]]
+; LIMIT-0: bb1:
+; LIMIT-0-NEXT: br label [[BB3:%.*]]
+; LIMIT-0: bb2:
+; LIMIT-0-NEXT: br label [[BB3]]
+; LIMIT-0: bb3:
+; LIMIT-0-NEXT: store i32 0, i32* [[Q:%.*]]
+; LIMIT-0-NEXT: store i32 0, i32* [[R:%.*]]
+; LIMIT-0-NEXT: store i32 0, i32* [[P:%.*]]
+; LIMIT-0-NEXT: ret void
+;
+; LIMIT-3-LABEL: @test2(
+; LIMIT-3-NEXT: store i32 1, i32* [[P:%.*]]
+; LIMIT-3-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]]
+; LIMIT-3: bb1:
+; LIMIT-3-NEXT: br label [[BB3:%.*]]
+; LIMIT-3: bb2:
+; LIMIT-3-NEXT: br label [[BB3]]
+; LIMIT-3: bb3:
+; LIMIT-3-NEXT: store i32 0, i32* [[Q:%.*]]
+; LIMIT-3-NEXT: store i32 0, i32* [[R:%.*]]
+; LIMIT-3-NEXT: store i32 0, i32* [[P]]
+; LIMIT-3-NEXT: ret void
+;
+; LIMIT-4-LABEL: @test2(
+; LIMIT-4-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]]
+; LIMIT-4: bb1:
+; LIMIT-4-NEXT: br label [[BB3:%.*]]
+; LIMIT-4: bb2:
+; LIMIT-4-NEXT: br label [[BB3]]
+; LIMIT-4: bb3:
+; LIMIT-4-NEXT: store i32 0, i32* [[Q:%.*]]
+; LIMIT-4-NEXT: store i32 0, i32* [[R:%.*]]
+; LIMIT-4-NEXT: store i32 0, i32* [[P:%.*]]
+; LIMIT-4-NEXT: ret void
+;
+ store i32 1, i32* %P
+ br i1 true, label %bb1, label %bb2
+bb1:
+ br label %bb3
+bb2:
+ br label %bb3
+bb3:
+ store i32 0, i32* %Q
+ store i32 0, i32* %R
+ store i32 0, i32* %P
+ ret void
+}
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll
index be4574e54cb1..c142ea6cf8de 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-and-memcpy.ll
@@ -59,8 +59,7 @@ define void @test17_atomic_weaker_2(i8* %P, i8* noalias %Q) nounwind ssp {
; Should not delete the volatile memset.
define void @test17v(i8* %P, i8* %Q) nounwind ssp {
; CHECK-LABEL: @test17v(
-; CHECK-NEXT: tail call void @llvm.memset.p0i8.i64(i8* [[P:%.*]], i8 42, i64 8, i1 true)
-; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[Q:%.*]], i64 12, i1 false)
+; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false)
; CHECK-NEXT: ret void
;
tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 true)
@@ -74,8 +73,7 @@ define void @test17v(i8* %P, i8* %Q) nounwind ssp {
; Previously this was not allowed (PR8728), also discussed in PR11763.
define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp {
; CHECK-LABEL: @test18(
-; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false)
-; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false)
+; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[R:%.*]], i64 12, i1 false)
; CHECK-NEXT: ret void
;
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false)
@@ -85,8 +83,7 @@ define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp {
define void @test18_atomic(i8* %P, i8* %Q, i8* %R) nounwind ssp {
; CHECK-LABEL: @test18_atomic(
-; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
-; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1)
+; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[R:%.*]], i64 12, i32 1)
; CHECK-NEXT: ret void
;
tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %P, i8* align 1 %Q, i64 12, i32 1)
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll
index 11d155c5fc1e..d1aaab9a3d49 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memset-missing-debugloc.ll
@@ -1,6 +1,7 @@
; Test that the getelementptr generated when the dse pass determines that
; a memset can be shortened has the debugloc carried over from the memset.
+; XFAIL: *
; RUN: opt -S -march=native -dse -enable-dse-memoryssa < %s| FileCheck %s
; CHECK: bitcast [5 x i64]* %{{[a-zA-Z_][a-zA-Z0-9_]*}} to i8*, !dbg
; CHECK-NEXT: %{{[0-9]+}} = getelementptr inbounds i8, i8* %0, i64 32, !dbg ![[DBG:[0-9]+]]
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores-big-endian.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores-big-endian.ll
index 8acc29f3f62e..055f117c4b5c 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores-big-endian.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores-big-endian.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; XFAIL: *
; RUN: opt -dse -enable-dse-memoryssa -enable-dse-partial-store-merging -S < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-i128:128-n32:64-S128"
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores.ll
index feb1f31c99dd..b09904e5a27d 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/merge-stores.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; XFAIL: *
; RUN: opt -dse -enable-dse-memoryssa -enable-dse-partial-store-merging -S < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll
index 7caf48148156..c1f739234196 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-captures.ll
@@ -25,7 +25,6 @@ define i8* @test_return_captures_1() {
define i8* @test_return_captures_2() {
; CHECK-LABEL: @test_return_captures_2(
; CHECK-NEXT: [[M:%.*]] = call i8* @malloc(i64 24)
-; CHECK-NEXT: store i8 0, i8* [[M]]
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: store i8 1, i8* [[M]]
@@ -91,7 +90,6 @@ exit:
define i8* @test_malloc_capture_3() {
; CHECK-LABEL: @test_malloc_capture_3(
; CHECK-NEXT: [[M:%.*]] = call i8* @malloc(i64 24)
-; CHECK-NEXT: store i8 0, i8* [[M]]
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: store i8 1, i8* [[M]]
@@ -191,7 +189,6 @@ exit:
define i8* @test_malloc_capture_7() {
; CHECK-LABEL: @test_malloc_capture_7(
; CHECK-NEXT: [[M:%.*]] = call i8* @malloc(i64 24)
-; CHECK-NEXT: store i8 0, i8* [[M]]
; CHECK-NEXT: call void @may_throw()
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: exit:
@@ -215,10 +212,10 @@ exit:
define void @test_alloca_nocapture_1() {
; CHECK-LABEL: @test_alloca_nocapture_1(
; CHECK-NEXT: [[M:%.*]] = alloca i8
-; CHECK-NEXT: store i8 0, i8* [[M]]
; CHECK-NEXT: call void @foo()
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: exit:
+; CHECK-NEXT: store i8 1, i8* [[M]]
; CHECK-NEXT: ret void
;
%m = alloca i8
@@ -240,6 +237,7 @@ define void @test_alloca_capture_1() {
; CHECK-NEXT: call void @capture(i8* [[M]])
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: exit:
+; CHECK-NEXT: store i8 1, i8* [[M]]
; CHECK-NEXT: ret void
;
%m = alloca i8
@@ -262,6 +260,7 @@ define void @test_alloca_capture_2(%S1* %E) {
; CHECK: exit:
; CHECK-NEXT: [[F_PTR:%.*]] = getelementptr [[S1:%.*]], %S1* [[E:%.*]], i32 0, i32 0
; CHECK-NEXT: store i8* [[M]], i8** [[F_PTR]]
+; CHECK-NEXT: store i8 1, i8* [[M]]
; CHECK-NEXT: ret void
;
%m = alloca i8
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll
index 41d4d019cb94..a719e273e227 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-exceptions.ll
@@ -30,6 +30,7 @@ define void @test12(i32* %p) personality i32 (...)* @__CxxFrameHandler3 {
; CHECK-NEXT: [[C1:%.*]] = cleanuppad within none []
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
+; CHECK-NEXT: store i32 40, i32* [[SV]]
; CHECK-NEXT: ret void
;
block1:
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll
index bcf75e3860da..1ec4434fb20a 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-loops.ll
@@ -111,7 +111,6 @@ define void @test_loop(i32 %N, i32* noalias nocapture readonly %A, i32* noalias
; CHECK: for.body4.lr.ph:
; CHECK-NEXT: [[I_028:%.*]] = phi i32 [ [[INC11:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 0, [[FOR_BODY4_LR_PH_PREHEADER]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[I_028]]
-; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_028]], [[N]]
; CHECK-NEXT: br label [[FOR_BODY4:%.*]]
; CHECK: for.body4:
@@ -170,3 +169,116 @@ for.cond.cleanup3: ; preds = %for.body4
%exitcond29 = icmp eq i32 %inc11, %N
br i1 %exitcond29, label %for.cond.cleanup, label %for.body4.lr.ph
}
+
+declare i1 @cond() readnone
+
+; TODO: We can eliminate the store in for.header, but we currently hit a MemoryPhi.
+define void @loop_multiple_def_uses(i32* noalias %P) {
+; CHECK-LABEL: @loop_multiple_def_uses(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_HEADER:%.*]]
+; CHECK: for.header:
+; CHECK-NEXT: store i32 1, i32* [[P:%.*]], align 4
+; CHECK-NEXT: [[C1:%.*]] = call i1 @cond()
+; CHECK-NEXT: br i1 [[C1]], label [[FOR_BODY:%.*]], label [[END:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: store i32 1, i32* [[P]], align 4
+; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[P]]
+; CHECK-NEXT: br label [[FOR_HEADER]]
+; CHECK: end:
+; CHECK-NEXT: store i32 3, i32* [[P]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.header
+
+for.header:
+ store i32 1, i32* %P, align 4
+ %c1 = call i1 @cond()
+ br i1 %c1, label %for.body, label %end
+
+for.body:
+ store i32 1, i32* %P, align 4
+ %lv = load i32, i32* %P
+ br label %for.header
+
+end:
+ store i32 3, i32* %P, align 4
+ ret void
+}
+
+; We cannot eliminate the store in for.header, as it is only partially
+; overwritten in for.body and read afterwards.
+define void @loop_multiple_def_uses_partial_write(i32* noalias %p) {
+; CHECK-LABEL: @loop_multiple_def_uses_partial_write(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_HEADER:%.*]]
+; CHECK: for.header:
+; CHECK-NEXT: store i32 1239491, i32* [[P:%.*]], align 4
+; CHECK-NEXT: [[C1:%.*]] = call i1 @cond()
+; CHECK-NEXT: br i1 [[C1]], label [[FOR_BODY:%.*]], label [[END:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[C:%.*]] = bitcast i32* [[P]] to i8*
+; CHECK-NEXT: store i8 1, i8* [[C]], align 4
+; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[P]]
+; CHECK-NEXT: br label [[FOR_HEADER]]
+; CHECK: end:
+; CHECK-NEXT: store i32 3, i32* [[P]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.header
+
+for.header:
+ store i32 1239491, i32* %p, align 4
+ %c1 = call i1 @cond()
+ br i1 %c1, label %for.body, label %end
+
+for.body:
+ %c = bitcast i32* %p to i8*
+ store i8 1, i8* %c, align 4
+ %lv = load i32, i32* %p
+ br label %for.header
+
+end:
+ store i32 3, i32* %p, align 4
+ ret void
+}
+
+; We cannot eliminate the store in for.header, as the location is not overwritten
+; in for.body and read afterwards.
+define void @loop_multiple_def_uses_mayalias_write(i32* %p, i32* %q) {
+
+; CHECK-LABEL: @loop_multiple_def_uses_mayalias_write(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_HEADER:%.*]]
+; CHECK: for.header:
+; CHECK-NEXT: store i32 1239491, i32* [[P:%.*]], align 4
+; CHECK-NEXT: [[C1:%.*]] = call i1 @cond()
+; CHECK-NEXT: br i1 [[C1]], label [[FOR_BODY:%.*]], label [[END:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: store i32 1, i32* [[Q:%.*]], align 4
+; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[P]]
+; CHECK-NEXT: br label [[FOR_HEADER]]
+; CHECK: end:
+; CHECK-NEXT: store i32 3, i32* [[P]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.header
+
+for.header:
+ store i32 1239491, i32* %p, align 4
+ %c1 = call i1 @cond()
+ br i1 %c1, label %for.body, label %end
+
+for.body:
+ store i32 1, i32* %q, align 4
+ %lv = load i32, i32* %p
+ br label %for.header
+
+end:
+ store i32 3, i32* %p, align 4
+ ret void
+}
+
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll
index ddfc4d43d51a..ce34986ff4e3 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-memoryphis.ll
@@ -103,3 +103,73 @@ bb3:
store i8 1, i8* %P2
ret void
}
+
+declare void @hoge()
+
+; Check a function with a MemoryPhi with 3 incoming values.
+define void @widget(i32* %Ptr, i1 %c1, i1 %c2, i32 %v1, i32 %v2, i32 %v3) {
+; CHECK-LABEL: @widget(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: tail call void @hoge()
+; CHECK-NEXT: br i1 [[C1:%.*]], label [[BB3:%.*]], label [[BB1:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: br i1 [[C2:%.*]], label [[BB2:%.*]], label [[BB3]]
+; CHECK: bb2:
+; CHECK-NEXT: store i32 -1, i32* [[PTR:%.*]], align 4
+; CHECK-NEXT: br label [[BB3]]
+; CHECK: bb3:
+; CHECK-NEXT: br label [[BB4:%.*]]
+; CHECK: bb4:
+; CHECK-NEXT: switch i32 [[V1:%.*]], label [[BB8:%.*]] [
+; CHECK-NEXT: i32 0, label [[BB5:%.*]]
+; CHECK-NEXT: i32 1, label [[BB6:%.*]]
+; CHECK-NEXT: i32 2, label [[BB7:%.*]]
+; CHECK-NEXT: ]
+; CHECK: bb5:
+; CHECK-NEXT: store i32 0, i32* [[PTR]], align 4
+; CHECK-NEXT: br label [[BB8]]
+; CHECK: bb6:
+; CHECK-NEXT: store i32 1, i32* [[PTR]], align 4
+; CHECK-NEXT: br label [[BB8]]
+; CHECK: bb7:
+; CHECK-NEXT: store i32 2, i32* [[PTR]], align 4
+; CHECK-NEXT: br label [[BB8]]
+; CHECK: bb8:
+; CHECK-NEXT: br label [[BB4]]
+;
+bb:
+ tail call void @hoge()
+ br i1 %c1, label %bb3, label %bb1
+
+bb1: ; preds = %bb
+ br i1 %c2, label %bb2, label %bb3
+
+bb2: ; preds = %bb1
+ store i32 -1, i32* %Ptr, align 4
+ br label %bb3
+
+bb3: ; preds = %bb2, %bb1, %bb
+ br label %bb4
+
+bb4: ; preds = %bb8, %bb3
+ switch i32 %v1, label %bb8 [
+ i32 0, label %bb5
+ i32 1, label %bb6
+ i32 2, label %bb7
+ ]
+
+bb5: ; preds = %bb4
+ store i32 0, i32* %Ptr, align 4
+ br label %bb8
+
+bb6: ; preds = %bb4
+ store i32 1, i32* %Ptr, align 4
+ br label %bb8
+
+bb7: ; preds = %bb4
+ store i32 2, i32* %Ptr, align 4
+ br label %bb8
+
+bb8: ; preds = %bb7, %bb6, %bb5, %bb4
+ br label %bb4
+}
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll
index 17bd8a6fcc87..cbd7dae56da1 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-partial.ll
@@ -32,14 +32,13 @@ bb3:
define void @second_store_bigger(i32* noalias %P) {
; CHECK-LABEL: @second_store_bigger(
-; CHECK-NEXT: store i32 1, i32* [[P:%.*]]
; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]]
; CHECK: bb1:
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb2:
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
-; CHECK-NEXT: [[P_I64:%.*]] = bitcast i32* [[P]] to i64*
+; CHECK-NEXT: [[P_I64:%.*]] = bitcast i32* [[P:%.*]] to i64*
; CHECK-NEXT: store i64 0, i64* [[P_I64]]
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll
index 2b0227751b2a..520a6eaccf09 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-simple.ll
@@ -6,14 +6,13 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
define void @test2(i32* noalias %P) {
; CHECK-LABEL: @test2(
-; CHECK-NEXT: store i32 1, i32* [[P:%.*]]
; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]]
; CHECK: bb1:
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb2:
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
-; CHECK-NEXT: store i32 0, i32* [[P]]
+; CHECK-NEXT: store i32 0, i32* [[P:%.*]]
; CHECK-NEXT: ret void
;
store i32 1, i32* %P
@@ -53,7 +52,6 @@ bb3:
define void @test7(i32* noalias %P, i32* noalias %Q) {
; CHECK-LABEL: @test7(
-; CHECK-NEXT: store i32 1, i32* [[Q:%.*]]
; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[P:%.*]]
@@ -61,7 +59,7 @@ define void @test7(i32* noalias %P, i32* noalias %Q) {
; CHECK: bb2:
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
-; CHECK-NEXT: store i32 0, i32* [[Q]]
+; CHECK-NEXT: store i32 0, i32* [[Q:%.*]]
; CHECK-NEXT: store i32 0, i32* [[P]]
; CHECK-NEXT: ret void
;
@@ -114,3 +112,38 @@ bb3:
store i32 0, i32* %P
ret void
}
+
+; We cannot eliminate `store i32 0, i32* %P`, as it is read by the later load.
+; Make sure that we check the uses of `store i32 1, i32* %P.1 which does not
+; alias %P. Note that uses point to the *first* def that may alias.
+define void @overlapping_read(i32* %P) {
+; CHECK-LABEL: @overlapping_read(
+; CHECK-NEXT: store i32 0, i32* [[P:%.*]]
+; CHECK-NEXT: [[P_1:%.*]] = getelementptr i32, i32* [[P]], i32 1
+; CHECK-NEXT: store i32 1, i32* [[P_1]]
+; CHECK-NEXT: [[P_64:%.*]] = bitcast i32* [[P]] to i64*
+; CHECK-NEXT: [[LV:%.*]] = load i64, i64* [[P_64]]
+; CHECK-NEXT: br i1 true, label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: br label [[BB3:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: ret void
+; CHECK: bb3:
+; CHECK-NEXT: store i32 2, i32* [[P]]
+; CHECK-NEXT: ret void
+;
+ store i32 0, i32* %P
+ %P.1 = getelementptr i32, i32* %P, i32 1
+ store i32 1, i32* %P.1
+
+ %P.64 = bitcast i32* %P to i64*
+ %lv = load i64, i64* %P.64
+ br i1 true, label %bb1, label %bb2
+bb1:
+ br label %bb3
+bb2:
+ ret void
+bb3:
+ store i32 2, i32* %P
+ ret void
+}
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/operand-bundles.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/operand-bundles.ll
index 9e88651bdb2e..3a59286c53f2 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/operand-bundles.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/operand-bundles.ll
@@ -1,3 +1,4 @@
+; XFAIL: *
; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s
declare noalias i8* @malloc(i64) "malloc-like"
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll
index b96cc9f33c20..d0a3cef71a7e 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll
@@ -1,4 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; XFAIL: *
; RUN: opt < %s -basicaa -dse -enable-dse-memoryssa -S | FileCheck %s
; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa -S | FileCheck %s
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
@@ -9,19 +10,6 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) n
declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
declare void @llvm.init.trampoline(i8*, i8*, i8*)
-; PR8576 - Should delete store of 10 even though p/q are may aliases.
-define void @test2(i32 *%p, i32 *%q) {
-; CHECK-LABEL: @test2(
-; CHECK-NEXT: store i32 20, i32* [[Q:%.*]], align 4
-; CHECK-NEXT: store i32 30, i32* [[P:%.*]], align 4
-; CHECK-NEXT: ret void
-;
- store i32 10, i32* %p, align 4
- store i32 20, i32* %q, align 4
- store i32 30, i32* %p, align 4
- ret void
-}
-
define void @test5(i32* %Q) {
; CHECK-LABEL: @test5(
; CHECK-NEXT: [[A:%.*]] = load volatile i32, i32* [[Q:%.*]]
@@ -32,62 +20,6 @@ define void @test5(i32* %Q) {
ret void
}
-; Should delete store of 10 even though memset is a may-store to P (P and Q may
-; alias).
-define void @test6(i32 *%p, i8 *%q) {
-; CHECK-LABEL: @test6(
-; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[Q:%.*]], i8 42, i64 900, i1 false)
-; CHECK-NEXT: store i32 30, i32* [[P:%.*]], align 4
-; CHECK-NEXT: ret void
-;
- store i32 10, i32* %p, align 4 ;; dead.
- call void @llvm.memset.p0i8.i64(i8* %q, i8 42, i64 900, i1 false)
- store i32 30, i32* %p, align 4
- ret void
-}
-
-; Should delete store of 10 even though memset is a may-store to P (P and Q may
-; alias).
-define void @test6_atomic(i32* align 4 %p, i8* align 4 %q) {
-; CHECK-LABEL: @test6_atomic(
-; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[Q:%.*]], i8 42, i64 900, i32 4)
-; CHECK-NEXT: store atomic i32 30, i32* [[P:%.*]] unordered, align 4
-; CHECK-NEXT: ret void
-;
- store atomic i32 10, i32* %p unordered, align 4 ;; dead.
- call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %q, i8 42, i64 900, i32 4)
- store atomic i32 30, i32* %p unordered, align 4
- ret void
-}
-
-; Should delete store of 10 even though memcpy is a may-store to P (P and Q may
-; alias).
-define void @test7(i32 *%p, i8 *%q, i8* noalias %r) {
-; CHECK-LABEL: @test7(
-; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[Q:%.*]], i8* [[R:%.*]], i64 900, i1 false)
-; CHECK-NEXT: store i32 30, i32* [[P:%.*]], align 4
-; CHECK-NEXT: ret void
-;
- store i32 10, i32* %p, align 4 ;; dead.
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %r, i64 900, i1 false)
- store i32 30, i32* %p, align 4
- ret void
-}
-
-; Should delete store of 10 even though memcpy is a may-store to P (P and Q may
-; alias).
-define void @test7_atomic(i32* align 4 %p, i8* align 4 %q, i8* noalias align 4 %r) {
-; CHECK-LABEL: @test7_atomic(
-; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 [[Q:%.*]], i8* align 4 [[R:%.*]], i64 900, i32 4)
-; CHECK-NEXT: store atomic i32 30, i32* [[P:%.*]] unordered, align 4
-; CHECK-NEXT: ret void
-;
- store atomic i32 10, i32* %p unordered, align 4 ;; dead.
- call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 %q, i8* align 4 %r, i64 900, i32 4)
- store atomic i32 30, i32* %p unordered, align 4
- ret void
-}
-
; Do not delete stores that are only partially killed.
define i32 @test8() {
; CHECK-LABEL: @test8(
@@ -158,46 +90,6 @@ define void @test12({ i32, i32 }* %x) nounwind {
}
-; %P doesn't escape, the DEAD instructions should be removed.
-declare void @test13f()
-define i32* @test13() {
-; CHECK-LABEL: @test13(
-; CHECK-NEXT: [[PTR:%.*]] = tail call i8* @malloc(i32 4)
-; CHECK-NEXT: [[P:%.*]] = bitcast i8* [[PTR]] to i32*
-; CHECK-NEXT: call void @test13f()
-; CHECK-NEXT: store i32 0, i32* [[P]]
-; CHECK-NEXT: ret i32* [[P]]
-;
- %ptr = tail call i8* @malloc(i32 4)
- %P = bitcast i8* %ptr to i32*
- %DEAD = load i32, i32* %P
- %DEAD2 = add i32 %DEAD, 1
- store i32 %DEAD2, i32* %P
- call void @test13f( )
- store i32 0, i32* %P
- ret i32* %P
-}
-
-define i32 addrspace(1)* @test13_addrspacecast() {
-; CHECK-LABEL: @test13_addrspacecast(
-; CHECK-NEXT: [[P:%.*]] = tail call i8* @malloc(i32 4)
-; CHECK-NEXT: [[P_BC:%.*]] = bitcast i8* [[P]] to i32*
-; CHECK-NEXT: [[P:%.*]] = addrspacecast i32* [[P_BC]] to i32 addrspace(1)*
-; CHECK-NEXT: call void @test13f()
-; CHECK-NEXT: store i32 0, i32 addrspace(1)* [[P]]
-; CHECK-NEXT: ret i32 addrspace(1)* [[P]]
-;
- %p = tail call i8* @malloc(i32 4)
- %p.bc = bitcast i8* %p to i32*
- %P = addrspacecast i32* %p.bc to i32 addrspace(1)*
- %DEAD = load i32, i32 addrspace(1)* %P
- %DEAD2 = add i32 %DEAD, 1
- store i32 %DEAD2, i32 addrspace(1)* %P
- call void @test13f( )
- store i32 0, i32 addrspace(1)* %P
- ret i32 addrspace(1)* %P
-}
-
declare noalias i8* @malloc(i32)
declare noalias i8* @calloc(i32, i32)
@@ -241,26 +133,6 @@ define void @test22(i1 %i, i32 %k, i32 %m) nounwind {
ret void
}
-; Make sure same sized store to later element is deleted
-define void @test24([2 x i32]* %a, i32 %b, i32 %c) nounwind {
-; CHECK-LABEL: @test24(
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A:%.*]], i64 0, i64 0
-; CHECK-NEXT: store i32 [[B:%.*]], i32* [[TMP1]], align 4
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A]], i64 0, i64 1
-; CHECK-NEXT: store i32 [[C:%.*]], i32* [[TMP2]], align 4
-; CHECK-NEXT: ret void
-;
- %1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 0
- store i32 0, i32* %1, align 4
- %2 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 1
- store i32 0, i32* %2, align 4
- %3 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 0
- store i32 %b, i32* %3, align 4
- %4 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 1
- store i32 %c, i32* %4, align 4
- ret void
-}
-
; Remove redundant store if loaded value is in another block.
define i32 @test26(i1 %c, i32* %p) {
; CHECK-LABEL: @test26(
@@ -311,35 +183,6 @@ bb3:
declare void @unknown_func()
-; Don't remove redundant store because of unknown call.
-define i32 @test30(i1 %c, i32* %p, i32 %i) {
-; CHECK-LABEL: @test30(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]], align 4
-; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
-; CHECK: bb1:
-; CHECK-NEXT: br label [[BB3:%.*]]
-; CHECK: bb2:
-; CHECK-NEXT: call void @unknown_func()
-; CHECK-NEXT: br label [[BB3]]
-; CHECK: bb3:
-; CHECK-NEXT: store i32 [[V]], i32* [[P]], align 4
-; CHECK-NEXT: ret i32 0
-;
-entry:
- %v = load i32, i32* %p, align 4
- br i1 %c, label %bb1, label %bb2
-bb1:
- br label %bb3
-bb2:
- ; Might overwrite value at %p
- call void @unknown_func()
- br label %bb3
-bb3:
- store i32 %v, i32* %p, align 4
- ret i32 0
-}
-
; Remove redundant store if loaded value is in another block inside a loop.
define i32 @test31(i1 %c, i32* %p, i32 %i) {
; CHECK-LABEL: @test31(
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll
index b2c66f299459..ef05504b30f8 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll
@@ -50,6 +50,76 @@ define void @test4(i32* %Q) {
ret void
}
+; PR8576 - Should delete store of 10 even though p/q are may aliases.
+define void @test2(i32 *%p, i32 *%q) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: store i32 20, i32* [[Q:%.*]], align 4
+; CHECK-NEXT: store i32 30, i32* [[P:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+ store i32 10, i32* %p, align 4
+ store i32 20, i32* %q, align 4
+ store i32 30, i32* %p, align 4
+ ret void
+}
+
+; Should delete store of 10 even though memset is a may-store to P (P and Q may
+; alias).
+define void @test6(i32 *%p, i8 *%q) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[Q:%.*]], i8 42, i64 900, i1 false)
+; CHECK-NEXT: store i32 30, i32* [[P:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+ store i32 10, i32* %p, align 4 ;; dead.
+ call void @llvm.memset.p0i8.i64(i8* %q, i8 42, i64 900, i1 false)
+ store i32 30, i32* %p, align 4
+ ret void
+}
+
+; Should delete store of 10 even though memset is a may-store to P (P and Q may
+; alias).
+define void @test6_atomic(i32* align 4 %p, i8* align 4 %q) {
+; CHECK-LABEL: @test6_atomic(
+; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[Q:%.*]], i8 42, i64 900, i32 4)
+; CHECK-NEXT: store atomic i32 30, i32* [[P:%.*]] unordered, align 4
+; CHECK-NEXT: ret void
+;
+ store atomic i32 10, i32* %p unordered, align 4 ;; dead.
+ call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %q, i8 42, i64 900, i32 4)
+ store atomic i32 30, i32* %p unordered, align 4
+ ret void
+}
+
+; Should delete store of 10 even though memcpy is a may-store to P (P and Q may
+; alias).
+define void @test7(i32 *%p, i8 *%q, i8* noalias %r) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[Q:%.*]], i8* [[R:%.*]], i64 900, i1 false)
+; CHECK-NEXT: store i32 30, i32* [[P:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+ store i32 10, i32* %p, align 4 ;; dead.
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %r, i64 900, i1 false)
+ store i32 30, i32* %p, align 4
+ ret void
+}
+
+; Should delete store of 10 even though memcpy is a may-store to P (P and Q may
+; alias).
+define void @test7_atomic(i32* align 4 %p, i8* align 4 %q, i8* noalias align 4 %r) {
+; CHECK-LABEL: @test7_atomic(
+; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 [[Q:%.*]], i8* align 4 [[R:%.*]], i64 900, i32 4)
+; CHECK-NEXT: store atomic i32 30, i32* [[P:%.*]] unordered, align 4
+; CHECK-NEXT: ret void
+;
+ store atomic i32 10, i32* %p unordered, align 4 ;; dead.
+ call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 4 %q, i8* align 4 %r, i64 900, i32 4)
+ store atomic i32 30, i32* %p unordered, align 4
+ ret void
+}
+
+
; va_arg has fuzzy dependence, the store shouldn't be zapped.
define double @test10(i8* %X) {
; CHECK-LABEL: @test10(
@@ -64,6 +134,46 @@ define double @test10(i8* %X) {
ret double %tmp.0
}
+; %P doesn't escape, the DEAD instructions should be removed.
+declare void @test13f()
+define i32* @test13() {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT: [[PTR:%.*]] = tail call i8* @malloc(i32 4)
+; CHECK-NEXT: [[P:%.*]] = bitcast i8* [[PTR]] to i32*
+; CHECK-NEXT: call void @test13f()
+; CHECK-NEXT: store i32 0, i32* [[P]]
+; CHECK-NEXT: ret i32* [[P]]
+;
+ %ptr = tail call i8* @malloc(i32 4)
+ %P = bitcast i8* %ptr to i32*
+ %DEAD = load i32, i32* %P
+ %DEAD2 = add i32 %DEAD, 1
+ store i32 %DEAD2, i32* %P
+ call void @test13f( )
+ store i32 0, i32* %P
+ ret i32* %P
+}
+
+define i32 addrspace(1)* @test13_addrspacecast() {
+; CHECK-LABEL: @test13_addrspacecast(
+; CHECK-NEXT: [[P:%.*]] = tail call i8* @malloc(i32 4)
+; CHECK-NEXT: [[P_BC:%.*]] = bitcast i8* [[P]] to i32*
+; CHECK-NEXT: [[P:%.*]] = addrspacecast i32* [[P_BC]] to i32 addrspace(1)*
+; CHECK-NEXT: call void @test13f()
+; CHECK-NEXT: store i32 0, i32 addrspace(1)* [[P]]
+; CHECK-NEXT: ret i32 addrspace(1)* [[P]]
+;
+ %p = tail call i8* @malloc(i32 4)
+ %p.bc = bitcast i8* %p to i32*
+ %P = addrspacecast i32* %p.bc to i32 addrspace(1)*
+ %DEAD = load i32, i32 addrspace(1)* %P
+ %DEAD2 = add i32 %DEAD, 1
+ store i32 %DEAD2, i32 addrspace(1)* %P
+ call void @test13f( )
+ store i32 0, i32 addrspace(1)* %P
+ ret i32 addrspace(1)* %P
+}
+
declare noalias i8* @malloc(i32)
declare noalias i8* @calloc(i32, i32)
@@ -108,6 +218,26 @@ define noalias i8* @test23() nounwind uwtable ssp {
ret i8* %call
}
+; Make sure same sized store to later element is deleted
+define void @test24([2 x i32]* %a, i32 %b, i32 %c) nounwind {
+; CHECK-LABEL: @test24(
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A:%.*]], i64 0, i64 0
+; CHECK-NEXT: store i32 [[B:%.*]], i32* [[TMP1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[A]], i64 0, i64 1
+; CHECK-NEXT: store i32 [[C:%.*]], i32* [[TMP2]], align 4
+; CHECK-NEXT: ret void
+;
+ %1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 0
+ store i32 0, i32* %1, align 4
+ %2 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 1
+ store i32 0, i32* %2, align 4
+ %3 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 0
+ store i32 %b, i32* %3, align 4
+ %4 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 0, i64 1
+ store i32 %c, i32* %4, align 4
+ ret void
+}
+
; Check another case like PR13547 where strdup is not like malloc.
define i8* @test25(i8* %p) nounwind {
; CHECK-LABEL: @test25(
@@ -187,6 +317,35 @@ bb3:
declare void @unknown_func()
+; Don't remove redundant store because of unknown call.
+define i32 @test30(i1 %c, i32* %p, i32 %i) {
+; CHECK-LABEL: @test30(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT: br i1 [[C:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: br label [[BB3:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: call void @unknown_func()
+; CHECK-NEXT: br label [[BB3]]
+; CHECK: bb3:
+; CHECK-NEXT: store i32 [[V]], i32* [[P]], align 4
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %v = load i32, i32* %p, align 4
+ br i1 %c, label %bb1, label %bb2
+bb1:
+ br label %bb3
+bb2:
+ ; Might overwrite value at %p
+ call void @unknown_func()
+ br label %bb3
+bb3:
+ store i32 %v, i32* %p, align 4
+ ret i32 0
+}
+
; Don't remove redundant store in a loop with a may-alias store.
define i32 @test32(i1 %c, i32* %p, i32 %i) {
; CHECK-LABEL: @test32(
@@ -294,8 +453,7 @@ define void @test37_atomic(i8* %P, i8* %Q, i8* %R) {
; The memmove is dead, because memcpy arguments cannot overlap.
define void @test38(i8* %P, i8* %Q, i8* %R) {
; CHECK-LABEL: @test38(
-; CHECK-NEXT: tail call void @llvm.memmove.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[Q:%.*]], i64 12, i1 false)
-; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P]], i8* [[R:%.*]], i64 12, i1 false)
+; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[P:%.*]], i8* [[R:%.*]], i64 12, i1 false)
; CHECK-NEXT: ret void
;
@@ -307,8 +465,7 @@ define void @test38(i8* %P, i8* %Q, i8* %R) {
; The memmove is dead, because memcpy arguments cannot overlap.
define void @test38_atomic(i8* %P, i8* %Q, i8* %R) {
; CHECK-LABEL: @test38_atomic(
-; CHECK-NEXT: tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[Q:%.*]], i64 12, i32 1)
-; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P]], i8* align 1 [[R:%.*]], i64 12, i32 1)
+; CHECK-NEXT: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 [[P:%.*]], i8* align 1 [[R:%.*]], i64 12, i32 1)
; CHECK-NEXT: ret void
;
@@ -379,7 +536,9 @@ declare void @free(i8* nocapture)
define void @test41(i32* noalias %P) {
; CHECK-LABEL: @test41(
; CHECK-NEXT: [[P2:%.*]] = bitcast i32* [[P:%.*]] to i8*
+; CHECK-NEXT: store i32 1, i32* [[P]]
; CHECK-NEXT: call void @unknown_func()
+; CHECK-NEXT: store i32 2, i32* [[P]]
; CHECK-NEXT: call void @free(i8* [[P2]])
; CHECK-NEXT: ret void
;
More information about the llvm-commits
mailing list