[llvm] 02e3d5c - Fix DSE miscompile when store is clobbered across loop iterations
Artur Pilipenko via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 27 14:43:10 PST 2020
Author: Artur Pilipenko
Date: 2020-02-27T14:43:01-08:00
New Revision: 02e3d5c3a25b9c00c0ab9bdd59931fde606ee0b4
URL: https://github.com/llvm/llvm-project/commit/02e3d5c3a25b9c00c0ab9bdd59931fde606ee0b4
DIFF: https://github.com/llvm/llvm-project/commit/02e3d5c3a25b9c00c0ab9bdd59931fde606ee0b4.diff
LOG: Fix DSE miscompile when store is clobbered across loop iterations
DSE would mistakenly remove store (2):
a = calloc(n+1)
for (int i = 0; i < n; i++) {
store 1, a[i+1] // (1)
store 0, a[i] // (2)
}
The fix is to do PHI transaltion while looking for clobbering
instructions between the store and the calloc.
Reviewed By: efriedma, bjope
Differential Revision: https://reviews.llvm.org/D68006
Added:
Modified:
llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
llvm/test/Transforms/DeadStoreElimination/simple.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 8dfee0b4ba8f..d5ca36b09579 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -617,23 +617,39 @@ static bool isPossibleSelfRead(Instruction *Inst,
/// instruction.
static bool memoryIsNotModifiedBetween(Instruction *FirstI,
Instruction *SecondI,
- AliasAnalysis *AA) {
- SmallVector<BasicBlock *, 16> WorkList;
- SmallPtrSet<BasicBlock *, 8> Visited;
+ AliasAnalysis *AA,
+ const DataLayout &DL,
+ DominatorTree *DT) {
+ // Do a backwards scan through the CFG from SecondI to FirstI. Look for
+ // instructions which can modify the memory location accessed by SecondI.
+ //
+ // While doing the walk keep track of the address to check. It might be
+ //
diff erent in
diff erent basic blocks due to PHI translation.
+ using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
+ SmallVector<BlockAddressPair, 16> WorkList;
+ // Keep track of the address we visited each block with. Bail out if we
+ // visit a block with
diff erent addresses.
+ DenseMap<BasicBlock *, Value *> Visited;
+
BasicBlock::iterator FirstBBI(FirstI);
++FirstBBI;
BasicBlock::iterator SecondBBI(SecondI);
BasicBlock *FirstBB = FirstI->getParent();
BasicBlock *SecondBB = SecondI->getParent();
MemoryLocation MemLoc = MemoryLocation::get(SecondI);
+ auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
// Start checking the SecondBB.
- WorkList.push_back(SecondBB);
+ WorkList.push_back(
+ std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr)));
bool isFirstBlock = true;
// Check all blocks going backward until we reach the FirstBB.
while (!WorkList.empty()) {
- BasicBlock *B = WorkList.pop_back_val();
+ BlockAddressPair Current = WorkList.pop_back_val();
+ BasicBlock *B = Current.first;
+ PHITransAddr &Addr = Current.second;
+ Value *Ptr = Addr.getAddr();
// Ignore instructions before FirstI if this is the FirstBB.
BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
@@ -652,16 +668,31 @@ static bool memoryIsNotModifiedBetween(Instruction *FirstI,
for (; BI != EI; ++BI) {
Instruction *I = &*BI;
if (I->mayWriteToMemory() && I != SecondI)
- if (isModSet(AA->getModRefInfo(I, MemLoc)))
+ if (isModSet(AA->getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
return false;
}
if (B != FirstBB) {
assert(B != &FirstBB->getParent()->getEntryBlock() &&
"Should not hit the entry block because SI must be dominated by LI");
for (auto PredI = pred_begin(B), PE = pred_end(B); PredI != PE; ++PredI) {
- if (!Visited.insert(*PredI).second)
+ PHITransAddr PredAddr = Addr;
+ if (PredAddr.NeedsPHITranslationFromBlock(B)) {
+ if (!PredAddr.IsPotentiallyPHITranslatable())
+ return false;
+ if (PredAddr.PHITranslateValue(B, *PredI, DT, false))
+ return false;
+ }
+ Value *TranslatedPtr = PredAddr.getAddr();
+ auto Inserted = Visited.insert(std::make_pair(*PredI, TranslatedPtr));
+ if (!Inserted.second) {
+ // We already visited this block before. If it was with a
diff erent
+ // address - bail out!
+ if (TranslatedPtr != Inserted.first->second)
+ return false;
+ // ... otherwise just skip it.
continue;
- WorkList.push_back(*PredI);
+ }
+ WorkList.push_back(std::make_pair(*PredI, PredAddr));
}
}
}
@@ -1063,7 +1094,8 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
const DataLayout &DL,
const TargetLibraryInfo *TLI,
InstOverlapIntervalsTy &IOL,
- MapVector<Instruction *, bool> &ThrowableInst) {
+ MapVector<Instruction *, bool> &ThrowableInst,
+ DominatorTree *DT) {
// Must be a store instruction.
StoreInst *SI = dyn_cast<StoreInst>(Inst);
if (!SI)
@@ -1073,7 +1105,8 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
// then the store can be removed.
if (LoadInst *DepLoad = dyn_cast<LoadInst>(SI->getValueOperand())) {
if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
- isRemovable(SI) && memoryIsNotModifiedBetween(DepLoad, SI, AA)) {
+ isRemovable(SI) &&
+ memoryIsNotModifiedBetween(DepLoad, SI, AA, DL, DT)) {
LLVM_DEBUG(
dbgs() << "DSE: Remove Store Of Load from same pointer:\n LOAD: "
@@ -1092,7 +1125,7 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
dyn_cast<Instruction>(GetUnderlyingObject(SI->getPointerOperand(), DL));
if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) &&
- memoryIsNotModifiedBetween(UnderlyingPointer, SI, AA)) {
+ memoryIsNotModifiedBetween(UnderlyingPointer, SI, AA, DL, DT)) {
LLVM_DEBUG(
dbgs() << "DSE: Remove null store to the calloc'ed object:\n DEAD: "
<< *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n');
@@ -1140,7 +1173,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
// eliminateNoopStore will update in iterator, if necessary.
if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL,
- ThrowableInst)) {
+ ThrowableInst, DT)) {
MadeChange = true;
continue;
}
@@ -1258,7 +1291,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
Later && isa<ConstantInt>(Later->getValueOperand()) &&
DL.typeSizeEqualsStoreSize(
Later->getValueOperand()->getType()) &&
- memoryIsNotModifiedBetween(Earlier, Later, AA)) {
+ memoryIsNotModifiedBetween(Earlier, Later, AA, DL, DT)) {
// If the store we find is:
// a) partially overwritten by the store to 'Loc'
// b) the later store is fully contained in the earlier one and
diff --git a/llvm/test/Transforms/DeadStoreElimination/simple.ll b/llvm/test/Transforms/DeadStoreElimination/simple.ll
index 5ccf875e49cd..84034caee55f 100644
--- a/llvm/test/Transforms/DeadStoreElimination/simple.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/simple.ll
@@ -893,5 +893,299 @@ define void @test39_atomic(i8* %P, i8* %Q, i8* %R) {
ret void
}
+define i32 @test40() {
+; CHECK-LABEL: @test40(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[M:%.*]] = call i8* @calloc(i32 9, i32 20)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[P_NEXT:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT: store i8 1, i8* [[P_NEXT]]
+; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store i8 0, i8* [[P]]
+; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ugt i64 [[INDVARS_IV]], 15
+; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[RETURN:%.*]]
+; CHECK: return:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %m = call i8* @calloc(i32 9, i32 20)
+ br label %loop
+loop:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %p.next = getelementptr inbounds i8, i8* %m, i64 %indvars.iv.next
+ store i8 1, i8* %p.next
+ %p = getelementptr inbounds i8, i8* %m, i64 %indvars.iv
+ store i8 0, i8* %p
+ %continue = icmp ugt i64 %indvars.iv, 15
+ br i1 %continue, label %loop, label %return
+return:
+ ret i32 0
+}
+
+define i32 @test41() {
+; CHECK-LABEL: @test41(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[M:%.*]] = call i8* @calloc(i32 9, i32 20)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[CONT:%.*]] ]
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[P_NEXT:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT: store i8 1, i8* [[P_NEXT]]
+; CHECK-NEXT: br label [[CONT]]
+; CHECK: cont:
+; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store i8 0, i8* [[P]]
+; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ugt i64 [[INDVARS_IV]], 15
+; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[RETURN:%.*]]
+; CHECK: return:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %m = call i8* @calloc(i32 9, i32 20)
+ br label %loop
+loop:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %cont ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %p.next = getelementptr inbounds i8, i8* %m, i64 %indvars.iv.next
+ store i8 1, i8* %p.next
+ br label %cont
+
+cont:
+ %p = getelementptr inbounds i8, i8* %m, i64 %indvars.iv
+ store i8 0, i8* %p
+ %continue = icmp ugt i64 %indvars.iv, 15
+ br i1 %continue, label %loop, label %return
+
+return:
+ ret i32 0
+}
+
+; The store is redundant here, but currently we fail to eliminate it.
+; We are walking from the store up to the calloc and translate phis as
+; needed. In this case we fail to translate %p while going over the
+; backedge. Because of that we conservatively assume that zero initialized
+; memory is clobbered.
+define i32 @test42() {
+; CHECK-LABEL: @test42(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[M:%.*]] = call i8* @calloc(i32 9, i32 20)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[CONT:%.*]] ]
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: br label [[CONT]]
+; CHECK: cont:
+; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store i8 0, i8* [[P]]
+; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ugt i64 [[INDVARS_IV]], 15
+; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[RETURN:%.*]]
+; CHECK: return:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %m = call i8* @calloc(i32 9, i32 20)
+ br label %loop
+loop:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %cont ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ br label %cont
+
+cont:
+ %p = getelementptr inbounds i8, i8* %m, i64 %indvars.iv
+ store i8 0, i8* %p
+ %continue = icmp ugt i64 %indvars.iv, 15
+ br i1 %continue, label %loop, label %return
+
+return:
+ ret i32 0
+}
+
+define i32 @test43() {
+; CHECK-LABEL: @test43(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[M:%.*]] = call i8* @calloc(i32 9, i32 20)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[CONT_2:%.*]] ]
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[P_NEXT:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT: store i8 1, i8* [[P_NEXT]]
+; CHECK-NEXT: br label [[CONT:%.*]]
+; CHECK: cont:
+; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store i8 0, i8* [[P]]
+; CHECK-NEXT: br label [[CONT_2]]
+; CHECK: cont.2:
+; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ugt i64 [[INDVARS_IV]], 15
+; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[RETURN:%.*]]
+; CHECK: return:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %m = call i8* @calloc(i32 9, i32 20)
+ br label %loop
+loop:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %cont.2 ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %p.next = getelementptr inbounds i8, i8* %m, i64 %indvars.iv.next
+ store i8 1, i8* %p.next
+ br label %cont
+
+cont:
+ %p = getelementptr inbounds i8, i8* %m, i64 %indvars.iv
+ store i8 0, i8* %p
+ br label %cont.2
+
+cont.2:
+ %continue = icmp ugt i64 %indvars.iv, 15
+ br i1 %continue, label %loop, label %return
+
+return:
+ ret i32 0
+}
+
+define i32 @test44() {
+; CHECK-LABEL: @test44(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[M:%.*]] = call i8* @calloc(i32 9, i32 20)
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[CONT_2:%.*]] ]
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[P_NEXT:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV_NEXT]]
+; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store i8 0, i8* [[P]]
+; CHECK-NEXT: br label [[CONT:%.*]]
+; CHECK: cont:
+; CHECK-NEXT: store i8 1, i8* [[P_NEXT]]
+; CHECK-NEXT: br label [[CONT_2]]
+; CHECK: cont.2:
+; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ugt i64 [[INDVARS_IV]], 15
+; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[RETURN:%.*]]
+; CHECK: return:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %m = call i8* @calloc(i32 9, i32 20)
+ br label %loop
+loop:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %cont.2 ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %p.next = getelementptr inbounds i8, i8* %m, i64 %indvars.iv.next
+ %p = getelementptr inbounds i8, i8* %m, i64 %indvars.iv
+ store i8 0, i8* %p
+ br label %cont
+
+cont:
+ store i8 1, i8* %p.next
+ br label %cont.2
+
+cont.2:
+ %continue = icmp ugt i64 %indvars.iv, 15
+ br i1 %continue, label %loop, label %return
+
+return:
+ ret i32 0
+}
+
+; This is an example which can potentially benefit from PHI translation.
+; Current implementation doesn't handle this case though. This is because
+; we don't visit the same block with
diff erent addresses while looking for
+; clobbering instructions.
+define i32 @test45(i1 %c) {
+; CHECK-LABEL: @test45(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[M:%.*]] = call i8* @calloc(i32 9, i32 20)
+; CHECK-NEXT: br i1 [[C:%.*]], label [[TRUE:%.*]], label [[FALSE:%.*]]
+; CHECK: true:
+; CHECK-NEXT: [[P_1:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 1
+; CHECK-NEXT: store i8 1, i8* [[P_1]]
+; CHECK-NEXT: br label [[CONT:%.*]]
+; CHECK: false:
+; CHECK-NEXT: [[P_2:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 2
+; CHECK-NEXT: store i8 1, i8* [[P_2]]
+; CHECK-NEXT: br label [[CONT]]
+; CHECK: cont:
+; CHECK-NEXT: [[OFFSET:%.*]] = phi i64 [ 2, [[TRUE]] ], [ 1, [[FALSE]] ]
+; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 [[OFFSET]]
+; CHECK-NEXT: store i8 0, i8* [[P]]
+; CHECK-NEXT: br label [[RETURN:%.*]]
+; CHECK: return:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %m = call i8* @calloc(i32 9, i32 20)
+ br i1 %c, label %true, label %false
+
+true:
+ %p.1 = getelementptr inbounds i8, i8* %m, i64 1
+ store i8 1, i8* %p.1
+ br label %cont
+
+false:
+ %p.2 = getelementptr inbounds i8, i8* %m, i64 2
+ store i8 1, i8* %p.2
+ br label %cont
+
+cont:
+ %offset = phi i64 [ 2, %true ], [ 1, %false ]
+ %p = getelementptr inbounds i8, i8* %m, i64 %offset
+ store i8 0, i8* %p
+ br label %return
+
+return:
+ ret i32 0
+}
+
+; This is test45 modified in a way to demonstrate PHI translation
+; improving the accuracy of the analysis (on a slightly convoluted
+; case though).
+define i32 @test46(i1 %c) {
+; CHECK-LABEL: @test46(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[M:%.*]] = call i8* @calloc(i32 9, i32 20)
+; CHECK-NEXT: [[P_1:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 1
+; CHECK-NEXT: [[P_2:%.*]] = getelementptr inbounds i8, i8* [[M]], i64 2
+; CHECK-NEXT: br i1 [[C:%.*]], label [[TRUE:%.*]], label [[FALSE:%.*]]
+; CHECK: true:
+; CHECK-NEXT: store i8 1, i8* [[P_1]]
+; CHECK-NEXT: br label [[CONT:%.*]]
+; CHECK: false:
+; CHECK-NEXT: store i8 1, i8* [[P_1]]
+; CHECK-NEXT: br label [[CONT]]
+; CHECK: cont:
+; CHECK-NEXT: br label [[RETURN:%.*]]
+; CHECK: return:
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ %m = call i8* @calloc(i32 9, i32 20)
+ %p.1 = getelementptr inbounds i8, i8* %m, i64 1
+ %p.2 = getelementptr inbounds i8, i8* %m, i64 2
+ br i1 %c, label %true, label %false
+
+true:
+ store i8 1, i8* %p.1
+ br label %cont
+
+false:
+ store i8 1, i8* %p.1
+ br label %cont
+
+cont:
+ %offset = phi i64 [ 2, %true ], [ 2, %false ]
+ %p = getelementptr inbounds i8, i8* %m, i64 %offset
+ store i8 0, i8* %p
+ br label %return
+
+return:
+ ret i32 0
+}
+
declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32)
More information about the llvm-commits
mailing list