[llvm] r275801 - Revert "r275571 [DSE]Enhance shorthening MemIntrinsic based on OverlapIntervals"
Hans Wennborg via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 25 13:08:17 PDT 2016
Oh, silly me. This revert also landed before the branch, so this is all good.
Sorry for the noise.
On Mon, Jul 25, 2016 at 12:15 PM, Alexander Kornienko <alexfh at google.com> wrote:
> Yep, if there was no proper fix yet, then it should probably be just
> reverted.
>
>
> On Jul 25, 2016 21:03, "Hans Wennborg" <hans at chromium.org> wrote:
>>
>> Maybe we should merge this to 3.9? It looks like this reverts a patch
>> that was committed soon before the branch.
>>
>> On Mon, Jul 18, 2016 at 8:51 AM, Alexander Kornienko via llvm-commits
>> <llvm-commits at lists.llvm.org> wrote:
>> > Author: alexfh
>> > Date: Mon Jul 18 10:51:31 2016
>> > New Revision: 275801
>> >
>> > URL: http://llvm.org/viewvc/llvm-project?rev=275801&view=rev
>> > Log:
>> > Revert "r275571 [DSE]Enhance shorthening MemIntrinsic based on
>> > OverlapIntervals"
>> >
>> > Causes https://llvm.org/bugs/show_bug.cgi?id=28588
>> >
>> > Modified:
>> > llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp
>> >
>> > llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll
>> > llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
>> >
>> > Modified: llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp
>> > URL:
>> > http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp?rev=275801&r1=275800&r2=275801&view=diff
>> >
>> > ==============================================================================
>> > --- llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp (original)
>> > +++ llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp Mon Jul 18
>> > 10:51:31 2016
>> > @@ -290,8 +290,8 @@ enum OverwriteResult {
>> > };
>> > }
>> >
>> > -typedef std::map<int64_t, int64_t> OverlapIntervalsTy;
>> > -typedef DenseMap<Instruction *, OverlapIntervalsTy>
>> > InstOverlapIntervalsTy;
>> > +typedef DenseMap<Instruction *,
>> > + std::map<int64_t, int64_t>> InstOverlapIntervalsTy;
>> >
>> > /// Return 'OverwriteComplete' if a store to the 'Later' location
>> > completely
>> > /// overwrites a store to the 'Earlier' location, 'OverwriteEnd' if the
>> > end of
>> > @@ -438,9 +438,9 @@ static OverwriteResult isOverwrite(const
>> > //
>> > // In this case we may want to trim the size of earlier to avoid
>> > generating
>> > // writes to addresses which will definitely be overwritten later
>> > - if (!EnablePartialOverwriteTracking &&
>> > - (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff +
>> > Earlier.Size) &&
>> > - int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff +
>> > Earlier.Size)))
>> > + if (LaterOff > EarlierOff &&
>> > + LaterOff < int64_t(EarlierOff + Earlier.Size) &&
>> > + int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff +
>> > Earlier.Size))
>> > return OverwriteEnd;
>> >
>> > // Finally, we also need to check if the later store overwrites the
>> > beginning
>> > @@ -452,11 +452,9 @@ static OverwriteResult isOverwrite(const
>> > // In this case we may want to move the destination address and trim
>> > the size
>> > // of earlier to avoid generating writes to addresses which will
>> > definitely
>> > // be overwritten later.
>> > - if (!EnablePartialOverwriteTracking &&
>> > - (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) >
>> > EarlierOff)) {
>> > - assert(int64_t(LaterOff + Later.Size) <
>> > - int64_t(EarlierOff + Earlier.Size) &&
>> > - "Expect to be handled as OverwriteComplete");
>> > + if (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) >
>> > EarlierOff) {
>> > + assert (int64_t(LaterOff + Later.Size) < int64_t(EarlierOff +
>> > Earlier.Size)
>> > + && "Expect to be handled as OverwriteComplete" );
>> > return OverwriteBegin;
>> > }
>> > // Otherwise, they don't completely overlap.
>> > @@ -821,119 +819,6 @@ static bool handleEndBlock(BasicBlock &B
>> > return MadeChange;
>> > }
>> >
>> > -static bool tryToShorten(Instruction *EarlierWrite, int64_t
>> > &EarlierOffset,
>> > - int64_t &EarlierSize, int64_t LaterOffset,
>> > - int64_t LaterSize, bool IsOverwriteEnd) {
>> > - // TODO: base this on the target vector size so that if the earlier
>> > - // store was too small to get vector writes anyway then its likely
>> > - // a good idea to shorten it
>> > - // Power of 2 vector writes are probably always a bad idea to
>> > optimize
>> > - // as any store/memset/memcpy is likely using vector instructions so
>> > - // shortening it to not vector size is likely to be slower
>> > - MemIntrinsic *EarlierIntrinsic = cast<MemIntrinsic>(EarlierWrite);
>> > - unsigned EarlierWriteAlign = EarlierIntrinsic->getAlignment();
>> > - if (!IsOverwriteEnd)
>> > - LaterOffset = int64_t(LaterOffset + LaterSize);
>> > -
>> > - if (!(llvm::isPowerOf2_64(LaterOffset) && EarlierWriteAlign <=
>> > LaterOffset) &&
>> > - !((EarlierWriteAlign != 0) && LaterOffset % EarlierWriteAlign ==
>> > 0))
>> > - return false;
>> > -
>> > - DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
>> > - << (IsOverwriteEnd ? "END" : "BEGIN") << ": " <<
>> > *EarlierWrite
>> > - << "\n KILLER (offset " << LaterOffset << ", " <<
>> > EarlierSize
>> > - << ")\n");
>> > -
>> > - int64_t NewLength = IsOverwriteEnd
>> > - ? LaterOffset - EarlierOffset
>> > - : EarlierSize - (LaterOffset -
>> > EarlierOffset);
>> > -
>> > - Value *EarlierWriteLength = EarlierIntrinsic->getLength();
>> > - Value *TrimmedLength =
>> > - ConstantInt::get(EarlierWriteLength->getType(), NewLength);
>> > - EarlierIntrinsic->setLength(TrimmedLength);
>> > -
>> > - EarlierSize = NewLength;
>> > - if (!IsOverwriteEnd) {
>> > - int64_t OffsetMoved = (LaterOffset - EarlierOffset);
>> > - Value *Indices[1] = {
>> > - ConstantInt::get(EarlierWriteLength->getType(), OffsetMoved)};
>> > - GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(
>> > - EarlierIntrinsic->getRawDest(), Indices, "", EarlierWrite);
>> > - EarlierIntrinsic->setDest(NewDestGEP);
>> > - EarlierOffset = EarlierOffset + OffsetMoved;
>> > - }
>> > - return true;
>> > -}
>> > -
>> > -static bool tryToShortenEnd(Instruction *EarlierWrite,
>> > - OverlapIntervalsTy &IntervalMap,
>> > - int64_t &EarlierStart, int64_t
>> > &EarlierSize) {
>> > - if (IntervalMap.empty() || !isShortenableAtTheEnd(EarlierWrite))
>> > - return false;
>> > -
>> > - OverlapIntervalsTy::iterator OII = --IntervalMap.end();
>> > - int64_t LaterStart = OII->second;
>> > - int64_t LaterSize = OII->first - LaterStart;
>> > -
>> > - if (LaterStart > EarlierStart && LaterStart < EarlierStart +
>> > EarlierSize &&
>> > - LaterStart + LaterSize >= EarlierStart + EarlierSize) {
>> > - if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize,
>> > LaterStart,
>> > - LaterSize, true)) {
>> > - IntervalMap.erase(OII);
>> > - return true;
>> > - }
>> > - }
>> > - return false;
>> > -}
>> > -
>> > -static bool tryToShortenBegin(Instruction *EarlierWrite,
>> > - OverlapIntervalsTy &IntervalMap,
>> > - int64_t &EarlierStart, int64_t
>> > &EarlierSize) {
>> > - if (IntervalMap.empty() ||
>> > !isShortenableAtTheBeginning(EarlierWrite))
>> > - return false;
>> > -
>> > - OverlapIntervalsTy::iterator OII = IntervalMap.begin();
>> > - int64_t LaterStart = OII->second;
>> > - int64_t LaterSize = OII->first - LaterStart;
>> > -
>> > - if (LaterStart <= EarlierStart && LaterStart + LaterSize >
>> > EarlierStart) {
>> > - assert(LaterStart + LaterSize < EarlierStart + EarlierSize &&
>> > - "Should have been handled as OverwriteComplete");
>> > - if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize,
>> > LaterStart,
>> > - LaterSize, false)) {
>> > - IntervalMap.erase(OII);
>> > - return true;
>> > - }
>> > - }
>> > - return false;
>> > -}
>> > -
>> > -static bool removePartiallyOverlappedStores(AliasAnalysis *AA,
>> > - const DataLayout &DL,
>> > - InstOverlapIntervalsTy
>> > &IOL) {
>> > - bool Changed = false;
>> > - for (auto OI : IOL) {
>> > - Instruction *EarlierWrite = OI.first;
>> > - MemoryLocation Loc = getLocForWrite(EarlierWrite, *AA);
>> > - assert(isRemovable(EarlierWrite) && "Expect only removable
>> > instruction");
>> > - assert(Loc.Size != MemoryLocation::UnknownSize && "Unexpected mem
>> > loc");
>> > -
>> > - const Value *Ptr = Loc.Ptr->stripPointerCasts();
>> > - int64_t EarlierStart = 0;
>> > - int64_t EarlierSize = int64_t(Loc.Size);
>> > - GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL);
>> > - OverlapIntervalsTy &IntervalMap = OI.second;
>> > - Changed =
>> > - tryToShortenEnd(EarlierWrite, IntervalMap, EarlierStart,
>> > EarlierSize);
>> > - if (IntervalMap.empty())
>> > - continue;
>> > - Changed |=
>> > - tryToShortenBegin(EarlierWrite, IntervalMap, EarlierStart,
>> > EarlierSize);
>> > - }
>> > - return Changed;
>> > -}
>> > -
>> > static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator
>> > &BBI,
>> > AliasAnalysis *AA,
>> > MemoryDependenceResults *MD,
>> > const DataLayout &DL,
>> > @@ -1051,7 +936,7 @@ static bool eliminateDeadStores(BasicBlo
>> > if (OR == OverwriteComplete) {
>> > DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
>> > << *DepWrite << "\n KILLER: " << *Inst << '\n');
>> > - IOL.erase(DepWrite);
>> > +
>> > // Delete the store and now-dead instructions that feed it.
>> > deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI);
>> > ++NumFastStores;
>> > @@ -1063,14 +948,48 @@ static bool eliminateDeadStores(BasicBlo
>> > } else if ((OR == OverwriteEnd &&
>> > isShortenableAtTheEnd(DepWrite)) ||
>> > ((OR == OverwriteBegin &&
>> > isShortenableAtTheBeginning(DepWrite)))) {
>> > - assert(!EnablePartialOverwriteTracking && "Do not expect to
>> > perform "
>> > - "when
>> > partial-overwrite "
>> > - "tracking is
>> > enabled");
>> > - int64_t EarlierSize = DepLoc.Size;
>> > - int64_t LaterSize = Loc.Size;
>> > + // TODO: base this on the target vector size so that if the
>> > earlier
>> > + // store was too small to get vector writes anyway then its
>> > likely
>> > + // a good idea to shorten it
>> > + // Power of 2 vector writes are probably always a bad idea to
>> > optimize
>> > + // as any store/memset/memcpy is likely using vector
>> > instructions so
>> > + // shortening it to not vector size is likely to be slower
>> > + MemIntrinsic *DepIntrinsic = cast<MemIntrinsic>(DepWrite);
>> > + unsigned DepWriteAlign = DepIntrinsic->getAlignment();
>> > bool IsOverwriteEnd = (OR == OverwriteEnd);
>> > - MadeChange = tryToShorten(DepWrite, DepWriteOffset,
>> > EarlierSize,
>> > - InstWriteOffset, LaterSize,
>> > IsOverwriteEnd);
>> > + if (!IsOverwriteEnd)
>> > + InstWriteOffset = int64_t(InstWriteOffset + Loc.Size);
>> > +
>> > + if ((llvm::isPowerOf2_64(InstWriteOffset) &&
>> > + DepWriteAlign <= InstWriteOffset) ||
>> > + ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign
>> > == 0)) {
>> > +
>> > + DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
>> > + << (IsOverwriteEnd ? "END" : "BEGIN") << ": "
>> > + << *DepWrite << "\n KILLER (offset "
>> > + << InstWriteOffset << ", " << DepLoc.Size <<
>> > ")"
>> > + << *Inst << '\n');
>> > +
>> > + int64_t NewLength =
>> > + IsOverwriteEnd
>> > + ? InstWriteOffset - DepWriteOffset
>> > + : DepLoc.Size - (InstWriteOffset - DepWriteOffset);
>> > +
>> > + Value *DepWriteLength = DepIntrinsic->getLength();
>> > + Value *TrimmedLength =
>> > + ConstantInt::get(DepWriteLength->getType(), NewLength);
>> > + DepIntrinsic->setLength(TrimmedLength);
>> > +
>> > + if (!IsOverwriteEnd) {
>> > + int64_t OffsetMoved = (InstWriteOffset - DepWriteOffset);
>> > + Value *Indices[1] = {
>> > + ConstantInt::get(DepWriteLength->getType(),
>> > OffsetMoved)};
>> > + GetElementPtrInst *NewDestGEP =
>> > GetElementPtrInst::CreateInBounds(
>> > + DepIntrinsic->getRawDest(), Indices, "", DepWrite);
>> > + DepIntrinsic->setDest(NewDestGEP);
>> > + }
>> > + MadeChange = true;
>> > + }
>> > }
>> > }
>> >
>> > @@ -1093,9 +1012,6 @@ static bool eliminateDeadStores(BasicBlo
>> > }
>> > }
>> >
>> > - if (EnablePartialOverwriteTracking)
>> > - MadeChange |= removePartiallyOverlappedStores(AA, DL, IOL);
>> > -
>> > // If this block ends in a return, unwind, or unreachable, all
>> > allocas are
>> > // dead at its end, which means stores to them are also dead.
>> > if (BB.getTerminator()->getNumSuccessors() == 0)
>> >
>> > Modified:
>> > llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll
>> > URL:
>> > http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll?rev=275801&r1=275800&r2=275801&view=diff
>> >
>> > ==============================================================================
>> > ---
>> > llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll
>> > (original)
>> > +++
>> > llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll Mon
>> > Jul 18 10:51:31 2016
>> > @@ -86,23 +86,5 @@ entry:
>> > ret void
>> > }
>> >
>> > -define void @write8To15AndThen0To7(i64* nocapture %P) {
>> > -entry:
>> > -; CHECK-LABEL: @write8To15AndThen0To7(
>> > -; CHECK: [[GEP:%[0-9]+]] = getelementptr inbounds i8, i8* %mybase0, i64
>> > 16
>> > -; CHECK: tail call void @llvm.memset.p0i8.i64(i8* [[GEP]], i8 0, i64
>> > 16, i32 8, i1 false)
>> > -
>> > - %base0 = bitcast i64* %P to i8*
>> > - %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0
>> > - tail call void @llvm.memset.p0i8.i64(i8* %mybase0, i8 0, i64 32, i32
>> > 8, i1 false)
>> > -
>> > - %base64_0 = getelementptr inbounds i64, i64* %P, i64 0
>> > - %base64_1 = getelementptr inbounds i64, i64* %P, i64 1
>> > -
>> > - store i64 1, i64* %base64_1
>> > - store i64 2, i64* %base64_0
>> > - ret void
>> > -}
>> > -
>> > declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
>> > nounwind
>> >
>> >
>> > Modified:
>> > llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
>> > URL:
>> > http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll?rev=275801&r1=275800&r2=275801&view=diff
>> >
>> > ==============================================================================
>> > --- llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
>> > (original)
>> > +++ llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
>> > Mon Jul 18 10:51:31 2016
>> > @@ -93,20 +93,3 @@ entry:
>> > store i64 3, i64* %tf_trapno, align 8
>> > ret void
>> > }
>> > -
>> > -define void @write16To23AndThen24To31(i64* nocapture %P, i64 %n64, i32
>> > %n32, i16 %n16, i8 %n8) {
>> > -entry:
>> > -; CHECK-LABEL: @write16To23AndThen24To31(
>> > -; CHECK: tail call void @llvm.memset.p0i8.i64(i8* %mybase0, i8 0, i64
>> > 16, i32 8, i1 false)
>> > -
>> > - %base0 = bitcast i64* %P to i8*
>> > - %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0
>> > - tail call void @llvm.memset.p0i8.i64(i8* %mybase0, i8 0, i64 32, i32
>> > 8, i1 false)
>> > -
>> > - %base64_2 = getelementptr inbounds i64, i64* %P, i64 2
>> > - %base64_3 = getelementptr inbounds i64, i64* %P, i64 3
>> > -
>> > - store i64 3, i64* %base64_2
>> > - store i64 3, i64* %base64_3
>> > - ret void
>> > -}
>> >
>> >
>> > _______________________________________________
>> > llvm-commits mailing list
>> > llvm-commits at lists.llvm.org
>> > http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list