[llvm] 297088d - Revert "[DSE] Remove stores in the same loop iteration"
David Green via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 8 13:23:34 PDT 2021
Author: David Green
Date: 2021-06-08T21:23:08+01:00
New Revision: 297088d1add70cae554c8f96dde3a97a3e8d56a5
URL: https://github.com/llvm/llvm-project/commit/297088d1add70cae554c8f96dde3a97a3e8d56a5
DIFF: https://github.com/llvm/llvm-project/commit/297088d1add70cae554c8f96dde3a97a3e8d56a5.diff
LOG: Revert "[DSE] Remove stores in the same loop iteration"
Apparently non-dead stores are being removed, as noted in D100464.
This reverts commit 222aeb4d51a46c5a81c9e4ccb16d1d19dd21ec95.
Added:
Modified:
llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
llvm/test/CodeGen/AMDGPU/opt-pipeline.ll
llvm/test/Other/opt-O2-pipeline.ll
llvm/test/Other/opt-O3-pipeline-enable-matrix.ll
llvm/test/Other/opt-O3-pipeline.ll
llvm/test/Other/opt-Os-pipeline.ll
llvm/test/Transforms/DeadStoreElimination/multiblock-loops.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index e6b9c2a0e580..94f3e0d3464f 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -40,12 +40,10 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
-#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -855,11 +853,6 @@ struct DSEState {
PostDominatorTree &PDT;
const TargetLibraryInfo &TLI;
const DataLayout &DL;
- const LoopInfo &LI;
-
- // Whether the function contains any irreducible control flow, useful for
- // being accurately able to detect loops.
- bool ContainsIrreducibleLoops;
// All MemoryDefs that potentially could kill other MemDefs.
SmallVector<MemoryDef *, 64> MemDefs;
@@ -883,15 +876,14 @@ struct DSEState {
DenseMap<BasicBlock *, InstOverlapIntervalsTy> IOLs;
DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
- PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
- const LoopInfo &LI)
+ PostDominatorTree &PDT, const TargetLibraryInfo &TLI)
: F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI),
- DL(F.getParent()->getDataLayout()), LI(LI) {}
+ DL(F.getParent()->getDataLayout()) {}
static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
DominatorTree &DT, PostDominatorTree &PDT,
- const TargetLibraryInfo &TLI, const LoopInfo &LI) {
- DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
+ const TargetLibraryInfo &TLI) {
+ DSEState State(F, AA, MSSA, DT, PDT, TLI);
// Collect blocks with throwing instructions not modeled in MemorySSA and
// alloc-like objects.
unsigned PO = 0;
@@ -919,9 +911,6 @@ struct DSEState {
State.InvisibleToCallerAfterRet.insert({&AI, true});
}
- // Collect whether there is any irreducible control flow in the function.
- State.ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
-
return State;
}
@@ -936,12 +925,6 @@ struct DSEState {
isOverwrite(const Instruction *LaterI, const Instruction *EarlierI,
const MemoryLocation &Later, const MemoryLocation &Earlier,
int64_t &EarlierOff, int64_t &LaterOff) {
- // AliasAnalysis does not always account for loops. Limit overwrite checks
- // to dependencies for which we can guarantee they are independant of any
- // loops they are in.
- if (!isGuaranteedLoopIndependent(EarlierI, LaterI, Earlier))
- return OW_Unknown;
-
// FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
// get imprecise values here, though (except for unknown sizes).
if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) {
@@ -1267,27 +1250,11 @@ struct DSEState {
return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
}
- /// Returns true if a dependency between \p Current and \p KillingDef is
- /// guaranteed to be loop invariant for the loops that they are in. Either
- /// because they are known to be in the same block, in the same loop level or
- /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
- /// during execution of the containing function.
- bool isGuaranteedLoopIndependent(const Instruction *Current,
- const Instruction *KillingDef,
- const MemoryLocation &CurrentLoc) {
- // If the dependency is within the same block or loop level (being careful of
- // irreducible loops), we know that AA will return a valid result for the
- // memory dependency. (Both at the function level, outside of any loop,
- // would also be valid but we currently disable that to limit compile time).
- if (Current->getParent() == KillingDef->getParent())
- return true;
- const Loop *CurrentLI = LI.getLoopFor(Current->getParent());
- if (!ContainsIrreducibleLoops && CurrentLI &&
- CurrentLI == LI.getLoopFor(KillingDef->getParent()))
- return true;
-
- // Otherwise check the memory location is invariant to any loops.
- auto IsGuaranteedLoopInvariantBase = [this](const Value *Ptr) {
+ /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
+ /// loop. In particular, this guarantees that it only references a single
+ /// MemoryLocation during execution of the containing function.
+ bool IsGuaranteedLoopInvariant(Value *Ptr) {
+ auto IsGuaranteedLoopInvariantBase = [this](Value *Ptr) {
Ptr = Ptr->stripPointerCasts();
if (auto *I = dyn_cast<Instruction>(Ptr)) {
if (isa<AllocaInst>(Ptr))
@@ -1301,7 +1268,7 @@ struct DSEState {
return true;
};
- const Value *Ptr = CurrentLoc.Ptr->stripPointerCasts();
+ Ptr = Ptr->stripPointerCasts();
if (auto *I = dyn_cast<Instruction>(Ptr)) {
if (I->getParent()->isEntryBlock())
return true;
@@ -1431,9 +1398,9 @@ struct DSEState {
// AliasAnalysis does not account for loops. Limit elimination to
// candidates for which we can guarantee they always store to the same
- // memory location and not located in
diff erent loops.
- if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
- LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
+ // memory location and not multiple locations in a loop.
+ if (Current->getBlock() != KillingDef->getBlock() &&
+ !IsGuaranteedLoopInvariant(const_cast<Value *>(CurrentLoc->Ptr))) {
StepAgain = true;
Current = CurrentDef->getDefiningAccess();
WalkerStepLimit -= 1;
@@ -1869,13 +1836,12 @@ struct DSEState {
}
};
-static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
- DominatorTree &DT, PostDominatorTree &PDT,
- const TargetLibraryInfo &TLI,
- const LoopInfo &LI) {
+bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
+ DominatorTree &DT, PostDominatorTree &PDT,
+ const TargetLibraryInfo &TLI) {
bool MadeChange = false;
- DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI, LI);
+ DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI);
// For each store:
for (unsigned I = 0; I < State.MemDefs.size(); I++) {
MemoryDef *KillingDef = State.MemDefs[I];
@@ -2048,9 +2014,8 @@ PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
- LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
- bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
+ bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI);
#ifdef LLVM_ENABLE_STATS
if (AreStatisticsEnabled())
@@ -2064,7 +2029,6 @@ PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
PA.preserve<MemorySSAAnalysis>();
- PA.preserve<LoopAnalysis>();
return PA;
}
@@ -2090,9 +2054,8 @@ class DSELegacyPass : public FunctionPass {
MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
PostDominatorTree &PDT =
getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
- LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
+ bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI);
#ifdef LLVM_ENABLE_STATS
if (AreStatisticsEnabled())
@@ -2114,8 +2077,6 @@ class DSELegacyPass : public FunctionPass {
AU.addRequired<MemorySSAWrapperPass>();
AU.addPreserved<PostDominatorTreeWrapperPass>();
AU.addPreserved<MemorySSAWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
}
};
@@ -2132,7 +2093,6 @@ INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
false)
diff --git a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll
index 25051b885f9d..021012aff54d 100644
--- a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll
@@ -517,8 +517,8 @@
; GCN-O2-NEXT: Function Alias Analysis Results
; GCN-O2-NEXT: Memory SSA
; GCN-O2-NEXT: MemCpy Optimization
-; GCN-O2-NEXT: Natural Loop Information
; GCN-O2-NEXT: Dead Store Elimination
+; GCN-O2-NEXT: Natural Loop Information
; GCN-O2-NEXT: Canonicalize natural loops
; GCN-O2-NEXT: LCSSA Verifier
; GCN-O2-NEXT: Loop-Closed SSA Form Pass
@@ -877,8 +877,8 @@
; GCN-O3-NEXT: Function Alias Analysis Results
; GCN-O3-NEXT: Memory SSA
; GCN-O3-NEXT: MemCpy Optimization
-; GCN-O3-NEXT: Natural Loop Information
; GCN-O3-NEXT: Dead Store Elimination
+; GCN-O3-NEXT: Natural Loop Information
; GCN-O3-NEXT: Canonicalize natural loops
; GCN-O3-NEXT: LCSSA Verifier
; GCN-O3-NEXT: Loop-Closed SSA Form Pass
diff --git a/llvm/test/Other/opt-O2-pipeline.ll b/llvm/test/Other/opt-O2-pipeline.ll
index 542a79820e4f..36370d6cd6d3 100644
--- a/llvm/test/Other/opt-O2-pipeline.ll
+++ b/llvm/test/Other/opt-O2-pipeline.ll
@@ -162,8 +162,8 @@
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Memory SSA
; CHECK-NEXT: MemCpy Optimization
-; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Dead Store Elimination
+; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Canonicalize natural loops
; CHECK-NEXT: LCSSA Verifier
; CHECK-NEXT: Loop-Closed SSA Form Pass
diff --git a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll
index 4c5e126f229f..c39361dcc8a2 100644
--- a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll
+++ b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll
@@ -167,8 +167,8 @@
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Memory SSA
; CHECK-NEXT: MemCpy Optimization
-; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Dead Store Elimination
+; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Canonicalize natural loops
; CHECK-NEXT: LCSSA Verifier
; CHECK-NEXT: Loop-Closed SSA Form Pass
diff --git a/llvm/test/Other/opt-O3-pipeline.ll b/llvm/test/Other/opt-O3-pipeline.ll
index 97b11fec08b9..af77e09c2496 100644
--- a/llvm/test/Other/opt-O3-pipeline.ll
+++ b/llvm/test/Other/opt-O3-pipeline.ll
@@ -167,8 +167,8 @@
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Memory SSA
; CHECK-NEXT: MemCpy Optimization
-; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Dead Store Elimination
+; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Canonicalize natural loops
; CHECK-NEXT: LCSSA Verifier
; CHECK-NEXT: Loop-Closed SSA Form Pass
diff --git a/llvm/test/Other/opt-Os-pipeline.ll b/llvm/test/Other/opt-Os-pipeline.ll
index b65e550ce9eb..e24d3d3f57bf 100644
--- a/llvm/test/Other/opt-Os-pipeline.ll
+++ b/llvm/test/Other/opt-Os-pipeline.ll
@@ -148,8 +148,8 @@
; CHECK-NEXT: Function Alias Analysis Results
; CHECK-NEXT: Memory SSA
; CHECK-NEXT: MemCpy Optimization
-; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Dead Store Elimination
+; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Canonicalize natural loops
; CHECK-NEXT: LCSSA Verifier
; CHECK-NEXT: Loop-Closed SSA Form Pass
diff --git a/llvm/test/Transforms/DeadStoreElimination/multiblock-loops.ll b/llvm/test/Transforms/DeadStoreElimination/multiblock-loops.ll
index b1878084f5fd..e887a478120f 100644
--- a/llvm/test/Transforms/DeadStoreElimination/multiblock-loops.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/multiblock-loops.ll
@@ -111,6 +111,7 @@ define void @test_loop(i32 %N, i32* noalias nocapture readonly %A, i32* noalias
; CHECK: for.body4.lr.ph:
; CHECK-NEXT: [[I_028:%.*]] = phi i32 [ [[INC11:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 0, [[FOR_BODY4_LR_PH_PREHEADER]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[I_028]]
+; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I_028]], [[N]]
; CHECK-NEXT: br label [[FOR_BODY4:%.*]]
; CHECK: for.body4:
More information about the llvm-commits
mailing list