[llvm] r275801 - Revert "r275571 [DSE]Enhance shorthening MemIntrinsic based on OverlapIntervals"
Alexander Kornienko via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 18 08:51:31 PDT 2016
Author: alexfh
Date: Mon Jul 18 10:51:31 2016
New Revision: 275801
URL: http://llvm.org/viewvc/llvm-project?rev=275801&view=rev
Log:
Revert "r275571 [DSE]Enhance shorthening MemIntrinsic based on OverlapIntervals"
Causes https://llvm.org/bugs/show_bug.cgi?id=28588
Modified:
llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp
llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll
llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
Modified: llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp?rev=275801&r1=275800&r2=275801&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp Mon Jul 18 10:51:31 2016
@@ -290,8 +290,8 @@ enum OverwriteResult {
};
}
-typedef std::map<int64_t, int64_t> OverlapIntervalsTy;
-typedef DenseMap<Instruction *, OverlapIntervalsTy> InstOverlapIntervalsTy;
+typedef DenseMap<Instruction *,
+ std::map<int64_t, int64_t>> InstOverlapIntervalsTy;
/// Return 'OverwriteComplete' if a store to the 'Later' location completely
/// overwrites a store to the 'Earlier' location, 'OverwriteEnd' if the end of
@@ -438,9 +438,9 @@ static OverwriteResult isOverwrite(const
//
// In this case we may want to trim the size of earlier to avoid generating
// writes to addresses which will definitely be overwritten later
- if (!EnablePartialOverwriteTracking &&
- (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + Earlier.Size) &&
- int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size)))
+ if (LaterOff > EarlierOff &&
+ LaterOff < int64_t(EarlierOff + Earlier.Size) &&
+ int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size))
return OverwriteEnd;
// Finally, we also need to check if the later store overwrites the beginning
@@ -452,11 +452,9 @@ static OverwriteResult isOverwrite(const
// In this case we may want to move the destination address and trim the size
// of earlier to avoid generating writes to addresses which will definitely
// be overwritten later.
- if (!EnablePartialOverwriteTracking &&
- (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff)) {
- assert(int64_t(LaterOff + Later.Size) <
- int64_t(EarlierOff + Earlier.Size) &&
- "Expect to be handled as OverwriteComplete");
+ if (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff) {
+ assert (int64_t(LaterOff + Later.Size) < int64_t(EarlierOff + Earlier.Size)
+ && "Expect to be handled as OverwriteComplete" );
return OverwriteBegin;
}
// Otherwise, they don't completely overlap.
@@ -821,119 +819,6 @@ static bool handleEndBlock(BasicBlock &B
return MadeChange;
}
-static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierOffset,
- int64_t &EarlierSize, int64_t LaterOffset,
- int64_t LaterSize, bool IsOverwriteEnd) {
- // TODO: base this on the target vector size so that if the earlier
- // store was too small to get vector writes anyway then its likely
- // a good idea to shorten it
- // Power of 2 vector writes are probably always a bad idea to optimize
- // as any store/memset/memcpy is likely using vector instructions so
- // shortening it to not vector size is likely to be slower
- MemIntrinsic *EarlierIntrinsic = cast<MemIntrinsic>(EarlierWrite);
- unsigned EarlierWriteAlign = EarlierIntrinsic->getAlignment();
- if (!IsOverwriteEnd)
- LaterOffset = int64_t(LaterOffset + LaterSize);
-
- if (!(llvm::isPowerOf2_64(LaterOffset) && EarlierWriteAlign <= LaterOffset) &&
- !((EarlierWriteAlign != 0) && LaterOffset % EarlierWriteAlign == 0))
- return false;
-
- DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
- << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *EarlierWrite
- << "\n KILLER (offset " << LaterOffset << ", " << EarlierSize
- << ")\n");
-
- int64_t NewLength = IsOverwriteEnd
- ? LaterOffset - EarlierOffset
- : EarlierSize - (LaterOffset - EarlierOffset);
-
- Value *EarlierWriteLength = EarlierIntrinsic->getLength();
- Value *TrimmedLength =
- ConstantInt::get(EarlierWriteLength->getType(), NewLength);
- EarlierIntrinsic->setLength(TrimmedLength);
-
- EarlierSize = NewLength;
- if (!IsOverwriteEnd) {
- int64_t OffsetMoved = (LaterOffset - EarlierOffset);
- Value *Indices[1] = {
- ConstantInt::get(EarlierWriteLength->getType(), OffsetMoved)};
- GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(
- EarlierIntrinsic->getRawDest(), Indices, "", EarlierWrite);
- EarlierIntrinsic->setDest(NewDestGEP);
- EarlierOffset = EarlierOffset + OffsetMoved;
- }
- return true;
-}
-
-static bool tryToShortenEnd(Instruction *EarlierWrite,
- OverlapIntervalsTy &IntervalMap,
- int64_t &EarlierStart, int64_t &EarlierSize) {
- if (IntervalMap.empty() || !isShortenableAtTheEnd(EarlierWrite))
- return false;
-
- OverlapIntervalsTy::iterator OII = --IntervalMap.end();
- int64_t LaterStart = OII->second;
- int64_t LaterSize = OII->first - LaterStart;
-
- if (LaterStart > EarlierStart && LaterStart < EarlierStart + EarlierSize &&
- LaterStart + LaterSize >= EarlierStart + EarlierSize) {
- if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
- LaterSize, true)) {
- IntervalMap.erase(OII);
- return true;
- }
- }
- return false;
-}
-
-static bool tryToShortenBegin(Instruction *EarlierWrite,
- OverlapIntervalsTy &IntervalMap,
- int64_t &EarlierStart, int64_t &EarlierSize) {
- if (IntervalMap.empty() || !isShortenableAtTheBeginning(EarlierWrite))
- return false;
-
- OverlapIntervalsTy::iterator OII = IntervalMap.begin();
- int64_t LaterStart = OII->second;
- int64_t LaterSize = OII->first - LaterStart;
-
- if (LaterStart <= EarlierStart && LaterStart + LaterSize > EarlierStart) {
- assert(LaterStart + LaterSize < EarlierStart + EarlierSize &&
- "Should have been handled as OverwriteComplete");
- if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
- LaterSize, false)) {
- IntervalMap.erase(OII);
- return true;
- }
- }
- return false;
-}
-
-static bool removePartiallyOverlappedStores(AliasAnalysis *AA,
- const DataLayout &DL,
- InstOverlapIntervalsTy &IOL) {
- bool Changed = false;
- for (auto OI : IOL) {
- Instruction *EarlierWrite = OI.first;
- MemoryLocation Loc = getLocForWrite(EarlierWrite, *AA);
- assert(isRemovable(EarlierWrite) && "Expect only removable instruction");
- assert(Loc.Size != MemoryLocation::UnknownSize && "Unexpected mem loc");
-
- const Value *Ptr = Loc.Ptr->stripPointerCasts();
- int64_t EarlierStart = 0;
- int64_t EarlierSize = int64_t(Loc.Size);
- GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL);
- OverlapIntervalsTy &IntervalMap = OI.second;
- Changed =
- tryToShortenEnd(EarlierWrite, IntervalMap, EarlierStart, EarlierSize);
- if (IntervalMap.empty())
- continue;
- Changed |=
- tryToShortenBegin(EarlierWrite, IntervalMap, EarlierStart, EarlierSize);
- }
- return Changed;
-}
-
static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
AliasAnalysis *AA, MemoryDependenceResults *MD,
const DataLayout &DL,
@@ -1051,7 +936,7 @@ static bool eliminateDeadStores(BasicBlo
if (OR == OverwriteComplete) {
DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
<< *DepWrite << "\n KILLER: " << *Inst << '\n');
- IOL.erase(DepWrite);
+
// Delete the store and now-dead instructions that feed it.
deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI);
++NumFastStores;
@@ -1063,14 +948,48 @@ static bool eliminateDeadStores(BasicBlo
} else if ((OR == OverwriteEnd && isShortenableAtTheEnd(DepWrite)) ||
((OR == OverwriteBegin &&
isShortenableAtTheBeginning(DepWrite)))) {
- assert(!EnablePartialOverwriteTracking && "Do not expect to perform "
- "when partial-overwrite "
- "tracking is enabled");
- int64_t EarlierSize = DepLoc.Size;
- int64_t LaterSize = Loc.Size;
+ // TODO: base this on the target vector size so that if the earlier
+ // store was too small to get vector writes anyway then its likely
+ // a good idea to shorten it
+ // Power of 2 vector writes are probably always a bad idea to optimize
+ // as any store/memset/memcpy is likely using vector instructions so
+ // shortening it to not vector size is likely to be slower
+ MemIntrinsic *DepIntrinsic = cast<MemIntrinsic>(DepWrite);
+ unsigned DepWriteAlign = DepIntrinsic->getAlignment();
bool IsOverwriteEnd = (OR == OverwriteEnd);
- MadeChange = tryToShorten(DepWrite, DepWriteOffset, EarlierSize,
- InstWriteOffset, LaterSize, IsOverwriteEnd);
+ if (!IsOverwriteEnd)
+ InstWriteOffset = int64_t(InstWriteOffset + Loc.Size);
+
+ if ((llvm::isPowerOf2_64(InstWriteOffset) &&
+ DepWriteAlign <= InstWriteOffset) ||
+ ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
+
+ DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
+ << (IsOverwriteEnd ? "END" : "BEGIN") << ": "
+ << *DepWrite << "\n KILLER (offset "
+ << InstWriteOffset << ", " << DepLoc.Size << ")"
+ << *Inst << '\n');
+
+ int64_t NewLength =
+ IsOverwriteEnd
+ ? InstWriteOffset - DepWriteOffset
+ : DepLoc.Size - (InstWriteOffset - DepWriteOffset);
+
+ Value *DepWriteLength = DepIntrinsic->getLength();
+ Value *TrimmedLength =
+ ConstantInt::get(DepWriteLength->getType(), NewLength);
+ DepIntrinsic->setLength(TrimmedLength);
+
+ if (!IsOverwriteEnd) {
+ int64_t OffsetMoved = (InstWriteOffset - DepWriteOffset);
+ Value *Indices[1] = {
+ ConstantInt::get(DepWriteLength->getType(), OffsetMoved)};
+ GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(
+ DepIntrinsic->getRawDest(), Indices, "", DepWrite);
+ DepIntrinsic->setDest(NewDestGEP);
+ }
+ MadeChange = true;
+ }
}
}
@@ -1093,9 +1012,6 @@ static bool eliminateDeadStores(BasicBlo
}
}
- if (EnablePartialOverwriteTracking)
- MadeChange |= removePartiallyOverlappedStores(AA, DL, IOL);
-
// If this block ends in a return, unwind, or unreachable, all allocas are
// dead at its end, which means stores to them are also dead.
if (BB.getTerminator()->getNumSuccessors() == 0)
Modified: llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll?rev=275801&r1=275800&r2=275801&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll (original)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll Mon Jul 18 10:51:31 2016
@@ -86,23 +86,5 @@ entry:
ret void
}
-define void @write8To15AndThen0To7(i64* nocapture %P) {
-entry:
-; CHECK-LABEL: @write8To15AndThen0To7(
-; CHECK: [[GEP:%[0-9]+]] = getelementptr inbounds i8, i8* %mybase0, i64 16
-; CHECK: tail call void @llvm.memset.p0i8.i64(i8* [[GEP]], i8 0, i64 16, i32 8, i1 false)
-
- %base0 = bitcast i64* %P to i8*
- %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0
- tail call void @llvm.memset.p0i8.i64(i8* %mybase0, i8 0, i64 32, i32 8, i1 false)
-
- %base64_0 = getelementptr inbounds i64, i64* %P, i64 0
- %base64_1 = getelementptr inbounds i64, i64* %P, i64 1
-
- store i64 1, i64* %base64_1
- store i64 2, i64* %base64_0
- ret void
-}
-
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
Modified: llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll?rev=275801&r1=275800&r2=275801&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll (original)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll Mon Jul 18 10:51:31 2016
@@ -93,20 +93,3 @@ entry:
store i64 3, i64* %tf_trapno, align 8
ret void
}
-
-define void @write16To23AndThen24To31(i64* nocapture %P, i64 %n64, i32 %n32, i16 %n16, i8 %n8) {
-entry:
-; CHECK-LABEL: @write16To23AndThen24To31(
-; CHECK: tail call void @llvm.memset.p0i8.i64(i8* %mybase0, i8 0, i64 16, i32 8, i1 false)
-
- %base0 = bitcast i64* %P to i8*
- %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0
- tail call void @llvm.memset.p0i8.i64(i8* %mybase0, i8 0, i64 32, i32 8, i1 false)
-
- %base64_2 = getelementptr inbounds i64, i64* %P, i64 2
- %base64_3 = getelementptr inbounds i64, i64* %P, i64 3
-
- store i64 3, i64* %base64_2
- store i64 3, i64* %base64_3
- ret void
-}
More information about the llvm-commits
mailing list