[llvm] e717fdb - [DSE,MemorySSA] Traverse use-def chain without MemSSA Walker.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 27 02:03:58 PDT 2020
Author: Florian Hahn
Date: 2020-08-27T10:02:02+01:00
New Revision: e717fdb0f155deaa03eaae891bd34743e6ffcb64
URL: https://github.com/llvm/llvm-project/commit/e717fdb0f155deaa03eaae891bd34743e6ffcb64
DIFF: https://github.com/llvm/llvm-project/commit/e717fdb0f155deaa03eaae891bd34743e6ffcb64.diff
LOG: [DSE,MemorySSA] Traverse use-def chain without MemSSA Walker.
For DSE with MemorySSA it is beneficial to manually traverse the
defining access, instead of using a MemorySSA walker, so we can
better control the number of steps together with other limits and
also weed out invalid/unprofitable paths early on.
This patch requires a follow-up patch to be most effective, which I will
share soon after putting this patch up.
This temporarily XFAIL's the limit tests, because we now explore more
MemoryDefs that may not alias/clobber the killing def. This will be
improved/fixed by the follow-up patch.
This patch also renames some `Dom*` variables to `Earlier*`, because the
dominance relation is not really used/important here and potentially
confusing.
This patch allows us to aggressively cut down compile time, geomean
-O3 -0.64%, ReleaseThinLTO -1.65%, at the expense of fewer stores
removed. Subsequent patches will increase the number of removed stores
again, while keeping compile-time in check.
http://llvm-compile-time-tracker.com/compare.php?from=d8e3294118a8c5f3f97688a704d5a05b67646012&to=0a929b6978a068af8ddb02d0d4714a2843dd8ba9&stat=instructions
Reviewed By: asbirlea
Differential Revision: https://reviews.llvm.org/D86486
Added:
Modified:
llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
llvm/test/Transforms/DeadStoreElimination/MSSA/debug-counter.ll
llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll
llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 4e0c7ac25969..3afe876c8ae6 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -111,12 +111,29 @@ static cl::opt<unsigned>
MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
cl::desc("The number of memory instructions to scan for "
"dead store elimination (default = 100)"));
+static cl::opt<unsigned> MemorySSAUpwardsStepLimit(
+ "dse-memoryssa-walklimit", cl::init(70), cl::Hidden,
+ cl::desc("The maximum number of steps while walking upwards to find "
+ "MemoryDefs that may be killed (default = 70)"));
static cl::opt<unsigned> MemorySSADefsPerBlockLimit(
"dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,
cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
"other stores per basic block (default = 5000)"));
+static cl::opt<unsigned> MemorySSASameBBStepCost(
+ "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,
+ cl::desc(
+ "The cost of a step in the same basic block as the killing MemoryDef"
+ "(default = 1)"));
+
+static cl::opt<unsigned>
+ MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),
+ cl::Hidden,
+ cl::desc("The cost of a step in a
diff erent basic "
+ "block than the killing MemoryDef"
+ "(default = 5)"));
+
static cl::opt<unsigned> MemorySSAPathCheckLimit(
"dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden,
cl::desc("The maximum number of blocks to check when trying to prove that "
@@ -1442,16 +1459,17 @@ namespace {
// in between both MemoryDefs. A bit more concretely:
//
// For all MemoryDefs StartDef:
-// 1. Get the next dominating clobbering MemoryDef (DomAccess) by walking
+// 1. Get the next dominating clobbering MemoryDef (EarlierAccess) by walking
// upwards.
-// 2. Check that there are no reads between DomAccess and the StartDef by
-// checking all uses starting at DomAccess and walking until we see StartDef.
-// 3. For each found DomDef, check that:
-// 1. There are no barrier instructions between DomDef and StartDef (like
+// 2. Check that there are no reads between EarlierAccess and the StartDef by
+// checking all uses starting at EarlierAccess and walking until we see
+// StartDef.
+// 3. For each found EarlierDef, check that:
+// 1. There are no barrier instructions between EarlierDef and StartDef (like
// throws or stores with ordering constraints).
-// 2. StartDef is executed whenever DomDef is executed.
-// 3. StartDef completely overwrites DomDef.
-// 4. Erase DomDef from the function and MemorySSA.
+// 2. StartDef is executed whenever EarlierDef is executed.
+// 3. StartDef completely overwrites EarlierDef.
+// 4. Erase EarlierDef from the function and MemorySSA.
// Returns true if \p M is an intrisnic that does not read or write memory.
bool isNoopIntrinsic(MemoryUseOrDef *M) {
@@ -1792,13 +1810,12 @@ struct DSEState {
Optional<MemoryAccess *>
getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *Current,
MemoryLocation DefLoc, const Value *DefUO, CheckCache &Cache,
- unsigned &ScanLimit) {
- if (ScanLimit == 0) {
+ unsigned &ScanLimit, unsigned &WalkerStepLimit) {
+ if (ScanLimit == 0 || WalkerStepLimit == 0) {
LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
return None;
}
- MemoryAccess *DomAccess;
MemoryAccess *StartAccess = Current;
bool StepAgain;
LLVM_DEBUG(dbgs() << " trying to get dominating access for " << *Current
@@ -1810,42 +1827,42 @@ struct DSEState {
if (MSSA.isLiveOnEntryDef(Current))
return None;
- if (isa<MemoryPhi>(Current)) {
- DomAccess = Current;
- break;
- }
- MemoryUseOrDef *CurrentUD = cast<MemoryUseOrDef>(Current);
- // Look for access that clobber DefLoc.
- DomAccess = MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(CurrentUD,
- DefLoc);
- if (MSSA.isLiveOnEntryDef(DomAccess))
+ // Cost of a step. Accesses in the same block are more likely to be valid
+ // candidates for elimination, hence consider them cheaper.
+ unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
+ ? MemorySSASameBBStepCost
+ : MemorySSAOtherBBStepCost;
+ if (WalkerStepLimit <= StepCost)
return None;
+ WalkerStepLimit -= StepCost;
- if (isa<MemoryPhi>(DomAccess))
+ if (isa<MemoryPhi>(Current))
break;
- // Check if we can skip DomDef for DSE.
- MemoryDef *DomDef = dyn_cast<MemoryDef>(DomAccess);
- if (DomDef && canSkipDef(DomDef, !isInvisibleToCallerBeforeRet(DefUO))) {
+ // Check if we can skip EarlierDef for DSE.
+ MemoryDef *CurrentDef = dyn_cast<MemoryDef>(Current);
+ if (CurrentDef &&
+ canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(DefUO))) {
StepAgain = true;
- Current = DomDef->getDefiningAccess();
+ Current = CurrentDef->getDefiningAccess();
}
-
} while (StepAgain);
+ MemoryAccess *EarlierAccess = Current;
// Accesses to objects accessible after the function returns can only be
// eliminated if the access is killed along all paths to the exit. Collect
// the blocks with killing (=completely overwriting MemoryDefs) and check if
- // they cover all paths from DomAccess to any function exit.
+ // they cover all paths from EarlierAccess to any function exit.
SmallPtrSet<Instruction *, 16> KillingDefs;
KillingDefs.insert(KillingDef->getMemoryInst());
- Instruction *DomMemInst = isa<MemoryDef>(DomAccess)
- ? cast<MemoryDef>(DomAccess)->getMemoryInst()
- : nullptr;
+ Instruction *EarlierMemInst =
+ isa<MemoryDef>(EarlierAccess)
+ ? cast<MemoryDef>(EarlierAccess)->getMemoryInst()
+ : nullptr;
LLVM_DEBUG({
- dbgs() << " Checking for reads of " << *DomAccess;
- if (DomMemInst)
- dbgs() << " (" << *DomMemInst << ")\n";
+ dbgs() << " Checking for reads of " << *EarlierAccess;
+ if (EarlierMemInst)
+ dbgs() << " (" << *EarlierMemInst << ")\n";
else
dbgs() << ")\n";
});
@@ -1855,14 +1872,14 @@ struct DSEState {
for (Use &U : Acc->uses())
WorkList.insert(cast<MemoryAccess>(U.getUser()));
};
- PushMemUses(DomAccess);
+ PushMemUses(EarlierAccess);
// Optimistically collect all accesses for reads. If we do not find any
// read clobbers, add them to the cache.
SmallPtrSet<MemoryAccess *, 16> KnownNoReads;
- if (!DomMemInst || !DomMemInst->mayReadFromMemory())
- KnownNoReads.insert(DomAccess);
- // Check if DomDef may be read.
+ if (!EarlierMemInst || !EarlierMemInst->mayReadFromMemory())
+ KnownNoReads.insert(EarlierAccess);
+ // Check if EarlierDef may be read.
for (unsigned I = 0; I < WorkList.size(); I++) {
MemoryAccess *UseAccess = WorkList[I];
@@ -1928,15 +1945,15 @@ struct DSEState {
LLVM_DEBUG(dbgs() << " ... found read clobber\n");
Cache.KnownReads.insert(UseAccess);
Cache.KnownReads.insert(StartAccess);
- Cache.KnownReads.insert(DomAccess);
+ Cache.KnownReads.insert(EarlierAccess);
return None;
}
- // For the KillingDef and DomAccess we only have to check if it reads the
- // memory location.
+ // For the KillingDef and EarlierAccess we only have to check if it reads
+ // the memory location.
// TODO: It would probably be better to check for self-reads before
// calling the function.
- if (KillingDef == UseAccess || DomAccess == UseAccess) {
+ if (KillingDef == UseAccess || EarlierAccess == UseAccess) {
LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
continue;
}
@@ -1945,7 +1962,7 @@ struct DSEState {
// the original location. Otherwise we have to check uses of *all*
// MemoryDefs we discover, including non-aliasing ones. Otherwise we might
// miss cases like the following
- // 1 = Def(LoE) ; <----- DomDef stores [0,1]
+ // 1 = Def(LoE) ; <----- EarlierDef stores [0,1]
// 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
// Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
// (The Use points to the *first* Def it may alias)
@@ -1953,10 +1970,11 @@ struct DSEState {
// stores [0,1]
if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
if (isCompleteOverwrite(DefLoc, UseInst)) {
- if (!isInvisibleToCallerAfterRet(DefUO) && UseAccess != DomAccess) {
+ if (!isInvisibleToCallerAfterRet(DefUO) &&
+ UseAccess != EarlierAccess) {
BasicBlock *MaybeKillingBlock = UseInst->getParent();
if (PostOrderNumbers.find(MaybeKillingBlock)->second <
- PostOrderNumbers.find(DomAccess->getBlock())->second) {
+ PostOrderNumbers.find(EarlierAccess->getBlock())->second) {
LLVM_DEBUG(dbgs()
<< " ... found killing def " << *UseInst << "\n");
@@ -1969,8 +1987,8 @@ struct DSEState {
}
// For accesses to locations visible after the function returns, make sure
- // that the location is killed (=overwritten) along all paths from DomAccess
- // to the exit.
+ // that the location is killed (=overwritten) along all paths from
+ // EarlierAccess to the exit.
if (!isInvisibleToCallerAfterRet(DefUO)) {
SmallPtrSet<BasicBlock *, 16> KillingBlocks;
for (Instruction *KD : KillingDefs)
@@ -1988,22 +2006,23 @@ struct DSEState {
}
// If CommonPred is in the set of killing blocks, just check if it
- // post-dominates DomAccess.
+ // post-dominates EarlierAccess.
if (KillingBlocks.count(CommonPred)) {
- if (PDT.dominates(CommonPred, DomAccess->getBlock()))
- return {DomAccess};
+ if (PDT.dominates(CommonPred, EarlierAccess->getBlock()))
+ return {EarlierAccess};
return None;
}
- // If the common post-dominator does not post-dominate DomAccess, there
- // is a path from DomAccess to an exit not going through a killing block.
- if (PDT.dominates(CommonPred, DomAccess->getBlock())) {
+ // If the common post-dominator does not post-dominate EarlierAccess,
+ // there is a path from EarlierAccess to an exit not going through a
+ // killing block.
+ if (PDT.dominates(CommonPred, EarlierAccess->getBlock())) {
SetVector<BasicBlock *> WorkList;
- // DomAccess's post-order number provides an upper bound of the blocks
- // on a path starting at DomAccess.
+ // EarlierAccess's post-order number provides an upper bound of the
+ // blocks on a path starting at EarlierAccess.
unsigned UpperBound =
- PostOrderNumbers.find(DomAccess->getBlock())->second;
+ PostOrderNumbers.find(EarlierAccess->getBlock())->second;
// If CommonPred is null, there are multiple exits from the function.
// They all have to be added to the worklist.
@@ -2015,22 +2034,22 @@ struct DSEState {
NumCFGTries++;
// Check if all paths starting from an exit node go through one of the
- // killing blocks before reaching DomAccess.
+ // killing blocks before reaching EarlierAccess.
for (unsigned I = 0; I < WorkList.size(); I++) {
NumCFGChecks++;
BasicBlock *Current = WorkList[I];
if (KillingBlocks.count(Current))
continue;
- if (Current == DomAccess->getBlock())
+ if (Current == EarlierAccess->getBlock())
return None;
- // DomAccess is reachable from the entry, so we don't have to explore
- // unreachable blocks further.
+ // EarlierAccess is reachable from the entry, so we don't have to
+ // explore unreachable blocks further.
if (!DT.isReachableFromEntry(Current))
continue;
unsigned CPO = PostOrderNumbers.find(Current)->second;
- // Current block is not on a path starting at DomAccess.
+ // Current block is not on a path starting at EarlierAccess.
if (CPO > UpperBound)
continue;
for (BasicBlock *Pred : predecessors(Current))
@@ -2040,14 +2059,15 @@ struct DSEState {
return None;
}
NumCFGSuccess++;
- return {DomAccess};
+ return {EarlierAccess};
}
return None;
}
- // No aliasing MemoryUses of DomAccess found, DomAccess is potentially dead.
+ // No aliasing MemoryUses of EarlierAccess found, EarlierAccess is
+ // potentially dead.
Cache.KnownNoReads.insert(KnownNoReads.begin(), KnownNoReads.end());
- return {DomAccess};
+ return {EarlierAccess};
}
// Delete dead memory defs
@@ -2248,6 +2268,7 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
<< *KillingDef << " (" << *SI << ")\n");
unsigned ScanLimit = MemorySSAScanLimit;
+ unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
// Worklist of MemoryAccesses that may be killed by KillingDef.
SetVector<MemoryAccess *> ToCheck;
ToCheck.insert(KillingDef->getDefiningAccess());
@@ -2259,22 +2280,23 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
if (State.SkipStores.count(Current))
continue;
- Optional<MemoryAccess *> Next = State.getDomMemoryDef(
- KillingDef, Current, SILoc, SILocUnd, Cache, ScanLimit);
+ Optional<MemoryAccess *> Next =
+ State.getDomMemoryDef(KillingDef, Current, SILoc, SILocUnd, Cache,
+ ScanLimit, WalkerStepLimit);
if (!Next) {
LLVM_DEBUG(dbgs() << " finished walk\n");
continue;
}
- MemoryAccess *DomAccess = *Next;
- LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DomAccess);
- if (isa<MemoryPhi>(DomAccess)) {
+ MemoryAccess *EarlierAccess = *Next;
+ LLVM_DEBUG(dbgs() << " Checking if we can kill " << *EarlierAccess);
+ if (isa<MemoryPhi>(EarlierAccess)) {
LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
- for (Value *V : cast<MemoryPhi>(DomAccess)->incoming_values()) {
+ for (Value *V : cast<MemoryPhi>(EarlierAccess)->incoming_values()) {
MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
BasicBlock *IncomingBlock = IncomingAccess->getBlock();
- BasicBlock *PhiBlock = DomAccess->getBlock();
+ BasicBlock *PhiBlock = EarlierAccess->getBlock();
// We only consider incoming MemoryAccesses that come before the
// MemoryPhi. Otherwise we could discover candidates that do not
@@ -2285,7 +2307,7 @@ bool eliminateDeadStoresMemorySSA(Function &F, AliasAnalysis &AA,
}
continue;
}
- MemoryDef *NextDef = dyn_cast<MemoryDef>(DomAccess);
+ MemoryDef *NextDef = dyn_cast<MemoryDef>(EarlierAccess);
Instruction *NI = NextDef->getMemoryInst();
LLVM_DEBUG(dbgs() << " (" << *NI << ")\n");
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/debug-counter.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/debug-counter.ll
index 9def78290089..4171b714d1be 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/debug-counter.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/debug-counter.ll
@@ -1,5 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; XFAIL: *
+
; REQUIRES: asserts
; Eliminates store to %R in the entry block.
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll
index 5ee6fe1f69d5..ae3066192a00 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/memoryssa-scan-limit.ll
@@ -1,4 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+
+; XFAIL: *
+
; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -S | FileCheck --check-prefix=NO-LIMIT %s
; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=0 -S | FileCheck --check-prefix=LIMIT-0 %s
; RUN: opt < %s -basic-aa -dse -enable-dse-memoryssa -dse-memoryssa-scanlimit=2 -S | FileCheck --check-prefix=LIMIT-2 %s
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll
index 28f81015c799..841325103667 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/multiblock-multipath.ll
@@ -406,7 +406,6 @@ define void @accessible_after_return11_loop() {
; CHECK-NEXT: [[C_1:%.*]] = call i1 @cond()
; CHECK-NEXT: br i1 [[C_1]], label [[FOR_BODY_I]], label [[INIT_PARSE_EXIT:%.*]]
; CHECK: init_parse.exit:
-; CHECK-NEXT: store i32 0, i32* @linenum, align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull undef)
; CHECK-NEXT: store i32 0, i32* @linenum, align 4
; CHECK-NEXT: br label [[FOR_BODY_I20:%.*]]
More information about the llvm-commits
mailing list