[llvm] r276452 - Recommit - [DSE]Enhance shorthening MemIntrinsic based on OverlapIntervals
Jun Bum Lim via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 22 11:27:24 PDT 2016
Author: junbuml
Date: Fri Jul 22 13:27:24 2016
New Revision: 276452
URL: http://llvm.org/viewvc/llvm-project?rev=276452&view=rev
Log:
Recommit - [DSE]Enhance shorthening MemIntrinsic based on OverlapIntervals
Recommiting r275571 after fixing crash reported in PR28270.
Now we erase elements of IOL in deleteDeadInstruction().
Original Summary:
This change use the overlap interval map built from partial overwrite tracking to perform shortening MemIntrinsics.
Add test cases which was missing opportunities before.
Added:
llvm/trunk/test/Transforms/DeadStoreElimination/2016-07-17-UseAfterFree.ll
Modified:
llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp
llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll
llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
Modified: llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp?rev=276452&r1=276451&r2=276452&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp Fri Jul 22 13:27:24 2016
@@ -59,6 +59,8 @@ EnablePartialOverwriteTracking("enable-d
//===----------------------------------------------------------------------===//
// Helper functions
//===----------------------------------------------------------------------===//
+typedef std::map<int64_t, int64_t> OverlapIntervalsTy;
+typedef DenseMap<Instruction *, OverlapIntervalsTy> InstOverlapIntervalsTy;
/// Delete this instruction. Before we do, go through and zero out all the
/// operands of this instruction. If any of them become dead, delete them and
@@ -67,6 +69,7 @@ EnablePartialOverwriteTracking("enable-d
static void
deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI,
MemoryDependenceResults &MD, const TargetLibraryInfo &TLI,
+ InstOverlapIntervalsTy &IOL,
SmallSetVector<Value *, 16> *ValueSet = nullptr) {
SmallVector<Instruction*, 32> NowDeadInsts;
@@ -105,6 +108,8 @@ deleteDeadInstruction(Instruction *I, Ba
else
DeadInst->eraseFromParent();
+ IOL.erase(DeadInst);
+
if (ValueSet) ValueSet->remove(DeadInst);
} while (!NowDeadInsts.empty());
*BBI = NewIter;
@@ -290,9 +295,6 @@ enum OverwriteResult {
};
}
-typedef DenseMap<Instruction *,
- std::map<int64_t, int64_t>> InstOverlapIntervalsTy;
-
/// Return 'OverwriteComplete' if a store to the 'Later' location completely
/// overwrites a store to the 'Earlier' location, 'OverwriteEnd' if the end of
/// the 'Earlier' location is completely overwritten by 'Later',
@@ -438,9 +440,9 @@ static OverwriteResult isOverwrite(const
//
// In this case we may want to trim the size of earlier to avoid generating
// writes to addresses which will definitely be overwritten later
- if (LaterOff > EarlierOff &&
- LaterOff < int64_t(EarlierOff + Earlier.Size) &&
- int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size))
+ if (!EnablePartialOverwriteTracking &&
+ (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + Earlier.Size) &&
+ int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size)))
return OverwriteEnd;
// Finally, we also need to check if the later store overwrites the beginning
@@ -452,9 +454,11 @@ static OverwriteResult isOverwrite(const
// In this case we may want to move the destination address and trim the size
// of earlier to avoid generating writes to addresses which will definitely
// be overwritten later.
- if (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff) {
- assert (int64_t(LaterOff + Later.Size) < int64_t(EarlierOff + Earlier.Size)
- && "Expect to be handled as OverwriteComplete" );
+ if (!EnablePartialOverwriteTracking &&
+ (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff)) {
+ assert(int64_t(LaterOff + Later.Size) <
+ int64_t(EarlierOff + Earlier.Size) &&
+ "Expect to be handled as OverwriteComplete");
return OverwriteBegin;
}
// Otherwise, they don't completely overlap.
@@ -585,7 +589,8 @@ static void findUnconditionalPreds(Small
/// to a field of that structure.
static bool handleFree(CallInst *F, AliasAnalysis *AA,
MemoryDependenceResults *MD, DominatorTree *DT,
- const TargetLibraryInfo *TLI) {
+ const TargetLibraryInfo *TLI,
+ InstOverlapIntervalsTy &IOL) {
bool MadeChange = false;
MemoryLocation Loc = MemoryLocation(F->getOperand(0));
@@ -617,7 +622,7 @@ static bool handleFree(CallInst *F, Alia
// DCE instructions only used to calculate that store.
BasicBlock::iterator BBI(Dependency);
- deleteDeadInstruction(Dependency, &BBI, *MD, *TLI);
+ deleteDeadInstruction(Dependency, &BBI, *MD, *TLI, IOL);
++NumFastStores;
MadeChange = true;
@@ -672,7 +677,8 @@ static void removeAccessedObjects(const
/// ret void
static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
MemoryDependenceResults *MD,
- const TargetLibraryInfo *TLI) {
+ const TargetLibraryInfo *TLI,
+ InstOverlapIntervalsTy &IOL) {
bool MadeChange = false;
// Keep track of all of the stack objects that are dead at the end of the
@@ -731,7 +737,7 @@ static bool handleEndBlock(BasicBlock &B
dbgs() << '\n');
// DCE instructions only used to calculate that store.
- deleteDeadInstruction(Dead, &BBI, *MD, *TLI, &DeadStackObjects);
+ deleteDeadInstruction(Dead, &BBI, *MD, *TLI, IOL, &DeadStackObjects);
++NumFastStores;
MadeChange = true;
continue;
@@ -742,7 +748,7 @@ static bool handleEndBlock(BasicBlock &B
if (isInstructionTriviallyDead(&*BBI, TLI)) {
DEBUG(dbgs() << "DSE: Removing trivially dead instruction:\n DEAD: "
<< *&*BBI << '\n');
- deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, &DeadStackObjects);
+ deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, IOL, &DeadStackObjects);
++NumFastOther;
MadeChange = true;
continue;
@@ -824,10 +830,124 @@ static bool handleEndBlock(BasicBlock &B
return MadeChange;
}
+static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierOffset,
+ int64_t &EarlierSize, int64_t LaterOffset,
+ int64_t LaterSize, bool IsOverwriteEnd) {
+ // TODO: base this on the target vector size so that if the earlier
+ // store was too small to get vector writes anyway then its likely
+ // a good idea to shorten it
+ // Power of 2 vector writes are probably always a bad idea to optimize
+ // as any store/memset/memcpy is likely using vector instructions so
+ // shortening it to not vector size is likely to be slower
+ MemIntrinsic *EarlierIntrinsic = cast<MemIntrinsic>(EarlierWrite);
+ unsigned EarlierWriteAlign = EarlierIntrinsic->getAlignment();
+ if (!IsOverwriteEnd)
+ LaterOffset = int64_t(LaterOffset + LaterSize);
+
+ if (!(llvm::isPowerOf2_64(LaterOffset) && EarlierWriteAlign <= LaterOffset) &&
+ !((EarlierWriteAlign != 0) && LaterOffset % EarlierWriteAlign == 0))
+ return false;
+
+ DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
+ << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *EarlierWrite
+ << "\n KILLER (offset " << LaterOffset << ", " << EarlierSize
+ << ")\n");
+
+ int64_t NewLength = IsOverwriteEnd
+ ? LaterOffset - EarlierOffset
+ : EarlierSize - (LaterOffset - EarlierOffset);
+
+ Value *EarlierWriteLength = EarlierIntrinsic->getLength();
+ Value *TrimmedLength =
+ ConstantInt::get(EarlierWriteLength->getType(), NewLength);
+ EarlierIntrinsic->setLength(TrimmedLength);
+
+ EarlierSize = NewLength;
+ if (!IsOverwriteEnd) {
+ int64_t OffsetMoved = (LaterOffset - EarlierOffset);
+ Value *Indices[1] = {
+ ConstantInt::get(EarlierWriteLength->getType(), OffsetMoved)};
+ GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(
+ EarlierIntrinsic->getRawDest(), Indices, "", EarlierWrite);
+ EarlierIntrinsic->setDest(NewDestGEP);
+ EarlierOffset = EarlierOffset + OffsetMoved;
+ }
+ return true;
+}
+
+static bool tryToShortenEnd(Instruction *EarlierWrite,
+ OverlapIntervalsTy &IntervalMap,
+ int64_t &EarlierStart, int64_t &EarlierSize) {
+ if (IntervalMap.empty() || !isShortenableAtTheEnd(EarlierWrite))
+ return false;
+
+ OverlapIntervalsTy::iterator OII = --IntervalMap.end();
+ int64_t LaterStart = OII->second;
+ int64_t LaterSize = OII->first - LaterStart;
+
+ if (LaterStart > EarlierStart && LaterStart < EarlierStart + EarlierSize &&
+ LaterStart + LaterSize >= EarlierStart + EarlierSize) {
+ if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
+ LaterSize, true)) {
+ IntervalMap.erase(OII);
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool tryToShortenBegin(Instruction *EarlierWrite,
+ OverlapIntervalsTy &IntervalMap,
+ int64_t &EarlierStart, int64_t &EarlierSize) {
+ if (IntervalMap.empty() || !isShortenableAtTheBeginning(EarlierWrite))
+ return false;
+
+ OverlapIntervalsTy::iterator OII = IntervalMap.begin();
+ int64_t LaterStart = OII->second;
+ int64_t LaterSize = OII->first - LaterStart;
+
+ if (LaterStart <= EarlierStart && LaterStart + LaterSize > EarlierStart) {
+ assert(LaterStart + LaterSize < EarlierStart + EarlierSize &&
+ "Should have been handled as OverwriteComplete");
+ if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
+ LaterSize, false)) {
+ IntervalMap.erase(OII);
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool removePartiallyOverlappedStores(AliasAnalysis *AA,
+ const DataLayout &DL,
+ InstOverlapIntervalsTy &IOL) {
+ bool Changed = false;
+ for (auto OI : IOL) {
+ Instruction *EarlierWrite = OI.first;
+ MemoryLocation Loc = getLocForWrite(EarlierWrite, *AA);
+ assert(isRemovable(EarlierWrite) && "Expect only removable instruction");
+ assert(Loc.Size != MemoryLocation::UnknownSize && "Unexpected mem loc");
+
+ const Value *Ptr = Loc.Ptr->stripPointerCasts();
+ int64_t EarlierStart = 0;
+ int64_t EarlierSize = int64_t(Loc.Size);
+ GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL);
+ OverlapIntervalsTy &IntervalMap = OI.second;
+ Changed =
+ tryToShortenEnd(EarlierWrite, IntervalMap, EarlierStart, EarlierSize);
+ if (IntervalMap.empty())
+ continue;
+ Changed |=
+ tryToShortenBegin(EarlierWrite, IntervalMap, EarlierStart, EarlierSize);
+ }
+ return Changed;
+}
+
static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
AliasAnalysis *AA, MemoryDependenceResults *MD,
const DataLayout &DL,
- const TargetLibraryInfo *TLI) {
+ const TargetLibraryInfo *TLI,
+ InstOverlapIntervalsTy &IOL) {
// Must be a store instruction.
StoreInst *SI = dyn_cast<StoreInst>(Inst);
if (!SI)
@@ -842,7 +962,7 @@ static bool eliminateNoopStore(Instructi
DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n LOAD: "
<< *DepLoad << "\n STORE: " << *SI << '\n');
- deleteDeadInstruction(SI, &BBI, *MD, *TLI);
+ deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL);
++NumRedundantStores;
return true;
}
@@ -860,7 +980,7 @@ static bool eliminateNoopStore(Instructi
dbgs() << "DSE: Remove null store to the calloc'ed object:\n DEAD: "
<< *Inst << "\n OBJECT: " << *UnderlyingPointer << '\n');
- deleteDeadInstruction(SI, &BBI, *MD, *TLI);
+ deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL);
++NumRedundantStores;
return true;
}
@@ -881,7 +1001,7 @@ static bool eliminateDeadStores(BasicBlo
for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
// Handle 'free' calls specially.
if (CallInst *F = isFreeCall(&*BBI, TLI)) {
- MadeChange |= handleFree(F, AA, MD, DT, TLI);
+ MadeChange |= handleFree(F, AA, MD, DT, TLI, IOL);
// Increment BBI after handleFree has potentially deleted instructions.
// This ensures we maintain a valid iterator.
++BBI;
@@ -895,7 +1015,7 @@ static bool eliminateDeadStores(BasicBlo
continue;
// eliminateNoopStore will update in iterator, if necessary.
- if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI)) {
+ if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL)) {
MadeChange = true;
continue;
}
@@ -943,7 +1063,7 @@ static bool eliminateDeadStores(BasicBlo
<< *DepWrite << "\n KILLER: " << *Inst << '\n');
// Delete the store and now-dead instructions that feed it.
- deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI);
+ deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL);
++NumFastStores;
MadeChange = true;
@@ -953,48 +1073,14 @@ static bool eliminateDeadStores(BasicBlo
} else if ((OR == OverwriteEnd && isShortenableAtTheEnd(DepWrite)) ||
((OR == OverwriteBegin &&
isShortenableAtTheBeginning(DepWrite)))) {
- // TODO: base this on the target vector size so that if the earlier
- // store was too small to get vector writes anyway then its likely
- // a good idea to shorten it
- // Power of 2 vector writes are probably always a bad idea to optimize
- // as any store/memset/memcpy is likely using vector instructions so
- // shortening it to not vector size is likely to be slower
- MemIntrinsic *DepIntrinsic = cast<MemIntrinsic>(DepWrite);
- unsigned DepWriteAlign = DepIntrinsic->getAlignment();
+ assert(!EnablePartialOverwriteTracking && "Do not expect to perform "
+ "when partial-overwrite "
+ "tracking is enabled");
+ int64_t EarlierSize = DepLoc.Size;
+ int64_t LaterSize = Loc.Size;
bool IsOverwriteEnd = (OR == OverwriteEnd);
- if (!IsOverwriteEnd)
- InstWriteOffset = int64_t(InstWriteOffset + Loc.Size);
-
- if ((llvm::isPowerOf2_64(InstWriteOffset) &&
- DepWriteAlign <= InstWriteOffset) ||
- ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
-
- DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
- << (IsOverwriteEnd ? "END" : "BEGIN") << ": "
- << *DepWrite << "\n KILLER (offset "
- << InstWriteOffset << ", " << DepLoc.Size << ")"
- << *Inst << '\n');
-
- int64_t NewLength =
- IsOverwriteEnd
- ? InstWriteOffset - DepWriteOffset
- : DepLoc.Size - (InstWriteOffset - DepWriteOffset);
-
- Value *DepWriteLength = DepIntrinsic->getLength();
- Value *TrimmedLength =
- ConstantInt::get(DepWriteLength->getType(), NewLength);
- DepIntrinsic->setLength(TrimmedLength);
-
- if (!IsOverwriteEnd) {
- int64_t OffsetMoved = (InstWriteOffset - DepWriteOffset);
- Value *Indices[1] = {
- ConstantInt::get(DepWriteLength->getType(), OffsetMoved)};
- GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(
- DepIntrinsic->getRawDest(), Indices, "", DepWrite);
- DepIntrinsic->setDest(NewDestGEP);
- }
- MadeChange = true;
- }
+ MadeChange = tryToShorten(DepWrite, DepWriteOffset, EarlierSize,
+ InstWriteOffset, LaterSize, IsOverwriteEnd);
}
}
@@ -1017,10 +1103,13 @@ static bool eliminateDeadStores(BasicBlo
}
}
+ if (EnablePartialOverwriteTracking)
+ MadeChange |= removePartiallyOverlappedStores(AA, DL, IOL);
+
// If this block ends in a return, unwind, or unreachable, all allocas are
// dead at its end, which means stores to them are also dead.
if (BB.getTerminator()->getNumSuccessors() == 0)
- MadeChange |= handleEndBlock(BB, AA, MD, TLI);
+ MadeChange |= handleEndBlock(BB, AA, MD, TLI, IOL);
return MadeChange;
}
Added: llvm/trunk/test/Transforms/DeadStoreElimination/2016-07-17-UseAfterFree.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/2016-07-17-UseAfterFree.ll?rev=276452&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/2016-07-17-UseAfterFree.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/2016-07-17-UseAfterFree.ll Fri Jul 22 13:27:24 2016
@@ -0,0 +1,32 @@
+; RUN: opt < %s -basicaa -dse -S -enable-dse-partial-overwrite-tracking | FileCheck %s
+; PR28588
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @_UPT_destroy(i8* nocapture %ptr) local_unnamed_addr #0 {
+entry:
+ %edi = getelementptr inbounds i8, i8* %ptr, i64 8
+
+; CHECK-NOT: tail call void @llvm.memset.p0i8.i64(i8* %edi, i8 0, i64 176, i32 8, i1 false)
+; CHECK-NOT: store i32 -1, i32* %addr
+
+ tail call void @llvm.memset.p0i8.i64(i8* %edi, i8 0, i64 176, i32 8, i1 false)
+ %format4.i = getelementptr inbounds i8, i8* %ptr, i64 144
+ %addr = bitcast i8* %format4.i to i32*
+ store i32 -1, i32* %addr, align 8
+
+; CHECK: tail call void @free
+ tail call void @free(i8* nonnull %ptr)
+ ret void
+}
+
+; Function Attrs: nounwind
+declare void @free(i8* nocapture) local_unnamed_addr #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { argmemonly nounwind }
Modified: llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll?rev=276452&r1=276451&r2=276452&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll (original)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll Fri Jul 22 13:27:24 2016
@@ -86,5 +86,23 @@ entry:
ret void
}
+define void @write8To15AndThen0To7(i64* nocapture %P) {
+entry:
+; CHECK-LABEL: @write8To15AndThen0To7(
+; CHECK: [[GEP:%[0-9]+]] = getelementptr inbounds i8, i8* %mybase0, i64 16
+; CHECK: tail call void @llvm.memset.p0i8.i64(i8* [[GEP]], i8 0, i64 16, i32 8, i1 false)
+
+ %base0 = bitcast i64* %P to i8*
+ %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0
+ tail call void @llvm.memset.p0i8.i64(i8* %mybase0, i8 0, i64 32, i32 8, i1 false)
+
+ %base64_0 = getelementptr inbounds i64, i64* %P, i64 0
+ %base64_1 = getelementptr inbounds i64, i64* %P, i64 1
+
+ store i64 1, i64* %base64_1
+ store i64 2, i64* %base64_0
+ ret void
+}
+
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
Modified: llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll?rev=276452&r1=276451&r2=276452&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll (original)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll Fri Jul 22 13:27:24 2016
@@ -93,3 +93,20 @@ entry:
store i64 3, i64* %tf_trapno, align 8
ret void
}
+
+define void @write16To23AndThen24To31(i64* nocapture %P, i64 %n64, i32 %n32, i16 %n16, i8 %n8) {
+entry:
+; CHECK-LABEL: @write16To23AndThen24To31(
+; CHECK: tail call void @llvm.memset.p0i8.i64(i8* %mybase0, i8 0, i64 16, i32 8, i1 false)
+
+ %base0 = bitcast i64* %P to i8*
+ %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0
+ tail call void @llvm.memset.p0i8.i64(i8* %mybase0, i8 0, i64 32, i32 8, i1 false)
+
+ %base64_2 = getelementptr inbounds i64, i64* %P, i64 2
+ %base64_3 = getelementptr inbounds i64, i64* %P, i64 3
+
+ store i64 3, i64* %base64_2
+ store i64 3, i64* %base64_3
+ ret void
+}
More information about the llvm-commits
mailing list