[llvm] r275571 - [DSE]Enhance shorthening MemIntrinsic based on OverlapIntervals

Benjamin Kramer via llvm-commits llvm-commits at lists.llvm.org
Sun Jul 17 07:14:36 PDT 2016


Looks like this caused https://llvm.org/bugs/show_bug.cgi?id=28588,
can you take a look?

On Fri, Jul 15, 2016 at 6:14 PM, Jun Bum Lim via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
> Author: junbuml
> Date: Fri Jul 15 11:14:34 2016
> New Revision: 275571
>
> URL: http://llvm.org/viewvc/llvm-project?rev=275571&view=rev
> Log:
> [DSE]Enhance shorthening MemIntrinsic based on OverlapIntervals
>
> Summary:
> This change use the overlap interval map built from partial overwrite tracking to perform shortening MemIntrinsics.
> Add test cases which was missing opportunities before.
>
> Reviewers: hfinkel, eeckstein, mcrosier
>
> Subscribers: mcrosier, llvm-commits
>
> Differential Revision: https://reviews.llvm.org/D21909
>
> Modified:
>     llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp
>     llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll
>     llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
>
> Modified: llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp?rev=275571&r1=275570&r2=275571&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp (original)
> +++ llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp Fri Jul 15 11:14:34 2016
> @@ -290,8 +290,8 @@ enum OverwriteResult {
>  };
>  }
>
> -typedef DenseMap<Instruction *,
> -                 std::map<int64_t, int64_t>> InstOverlapIntervalsTy;
> +typedef std::map<int64_t, int64_t> OverlapIntervalsTy;
> +typedef DenseMap<Instruction *, OverlapIntervalsTy> InstOverlapIntervalsTy;
>
>  /// Return 'OverwriteComplete' if a store to the 'Later' location completely
>  /// overwrites a store to the 'Earlier' location, 'OverwriteEnd' if the end of
> @@ -438,9 +438,9 @@ static OverwriteResult isOverwrite(const
>    //
>    // In this case we may want to trim the size of earlier to avoid generating
>    // writes to addresses which will definitely be overwritten later
> -  if (LaterOff > EarlierOff &&
> -      LaterOff < int64_t(EarlierOff + Earlier.Size) &&
> -      int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size))
> +  if (!EnablePartialOverwriteTracking &&
> +      (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + Earlier.Size) &&
> +       int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size)))
>      return OverwriteEnd;
>
>    // Finally, we also need to check if the later store overwrites the beginning
> @@ -452,9 +452,11 @@ static OverwriteResult isOverwrite(const
>    // In this case we may want to move the destination address and trim the size
>    // of earlier to avoid generating writes to addresses which will definitely
>    // be overwritten later.
> -  if (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff) {
> -    assert (int64_t(LaterOff + Later.Size) < int64_t(EarlierOff + Earlier.Size)
> -            && "Expect to be handled as OverwriteComplete" );
> +  if (!EnablePartialOverwriteTracking &&
> +      (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff)) {
> +    assert(int64_t(LaterOff + Later.Size) <
> +               int64_t(EarlierOff + Earlier.Size) &&
> +           "Expect to be handled as OverwriteComplete");
>      return OverwriteBegin;
>    }
>    // Otherwise, they don't completely overlap.
> @@ -819,6 +821,119 @@ static bool handleEndBlock(BasicBlock &B
>    return MadeChange;
>  }
>
> +static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierOffset,
> +                         int64_t &EarlierSize, int64_t LaterOffset,
> +                         int64_t LaterSize, bool IsOverwriteEnd) {
> +  // TODO: base this on the target vector size so that if the earlier
> +  // store was too small to get vector writes anyway then its likely
> +  // a good idea to shorten it
> +  // Power of 2 vector writes are probably always a bad idea to optimize
> +  // as any store/memset/memcpy is likely using vector instructions so
> +  // shortening it to not vector size is likely to be slower
> +  MemIntrinsic *EarlierIntrinsic = cast<MemIntrinsic>(EarlierWrite);
> +  unsigned EarlierWriteAlign = EarlierIntrinsic->getAlignment();
> +  if (!IsOverwriteEnd)
> +    LaterOffset = int64_t(LaterOffset + LaterSize);
> +
> +  if (!(llvm::isPowerOf2_64(LaterOffset) && EarlierWriteAlign <= LaterOffset) &&
> +      !((EarlierWriteAlign != 0) && LaterOffset % EarlierWriteAlign == 0))
> +    return false;
> +
> +  DEBUG(dbgs() << "DSE: Remove Dead Store:\n  OW "
> +               << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *EarlierWrite
> +               << "\n  KILLER (offset " << LaterOffset << ", " << EarlierSize
> +               << ")\n");
> +
> +  int64_t NewLength = IsOverwriteEnd
> +                          ? LaterOffset - EarlierOffset
> +                          : EarlierSize - (LaterOffset - EarlierOffset);
> +
> +  Value *EarlierWriteLength = EarlierIntrinsic->getLength();
> +  Value *TrimmedLength =
> +      ConstantInt::get(EarlierWriteLength->getType(), NewLength);
> +  EarlierIntrinsic->setLength(TrimmedLength);
> +
> +  EarlierSize = NewLength;
> +  if (!IsOverwriteEnd) {
> +    int64_t OffsetMoved = (LaterOffset - EarlierOffset);
> +    Value *Indices[1] = {
> +        ConstantInt::get(EarlierWriteLength->getType(), OffsetMoved)};
> +    GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(
> +        EarlierIntrinsic->getRawDest(), Indices, "", EarlierWrite);
> +    EarlierIntrinsic->setDest(NewDestGEP);
> +    EarlierOffset = EarlierOffset + OffsetMoved;
> +  }
> +  return true;
> +}
> +
> +static bool tryToShortenEnd(Instruction *EarlierWrite,
> +                            OverlapIntervalsTy &IntervalMap,
> +                            int64_t &EarlierStart, int64_t &EarlierSize) {
> +  if (IntervalMap.empty() || !isShortenableAtTheEnd(EarlierWrite))
> +    return false;
> +
> +  OverlapIntervalsTy::iterator OII = --IntervalMap.end();
> +  int64_t LaterStart = OII->second;
> +  int64_t LaterSize = OII->first - LaterStart;
> +
> +  if (LaterStart > EarlierStart && LaterStart < EarlierStart + EarlierSize &&
> +      LaterStart + LaterSize >= EarlierStart + EarlierSize) {
> +    if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
> +                     LaterSize, true)) {
> +      IntervalMap.erase(OII);
> +      return true;
> +    }
> +  }
> +  return false;
> +}
> +
> +static bool tryToShortenBegin(Instruction *EarlierWrite,
> +                              OverlapIntervalsTy &IntervalMap,
> +                              int64_t &EarlierStart, int64_t &EarlierSize) {
> +  if (IntervalMap.empty() || !isShortenableAtTheBeginning(EarlierWrite))
> +    return false;
> +
> +  OverlapIntervalsTy::iterator OII = IntervalMap.begin();
> +  int64_t LaterStart = OII->second;
> +  int64_t LaterSize = OII->first - LaterStart;
> +
> +  if (LaterStart <= EarlierStart && LaterStart + LaterSize > EarlierStart) {
> +    assert(LaterStart + LaterSize < EarlierStart + EarlierSize &&
> +           "Should have been handled as OverwriteComplete");
> +    if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
> +                     LaterSize, false)) {
> +      IntervalMap.erase(OII);
> +      return true;
> +    }
> +  }
> +  return false;
> +}
> +
> +static bool removePartiallyOverlappedStores(AliasAnalysis *AA,
> +                                            const DataLayout &DL,
> +                                            InstOverlapIntervalsTy &IOL) {
> +  bool Changed = false;
> +  for (auto OI : IOL) {
> +    Instruction *EarlierWrite = OI.first;
> +    MemoryLocation Loc = getLocForWrite(EarlierWrite, *AA);
> +    assert(isRemovable(EarlierWrite) && "Expect only removable instruction");
> +    assert(Loc.Size != MemoryLocation::UnknownSize && "Unexpected mem loc");
> +
> +    const Value *Ptr = Loc.Ptr->stripPointerCasts();
> +    int64_t EarlierStart = 0;
> +    int64_t EarlierSize = int64_t(Loc.Size);
> +    GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL);
> +    OverlapIntervalsTy &IntervalMap = OI.second;
> +    Changed =
> +        tryToShortenEnd(EarlierWrite, IntervalMap, EarlierStart, EarlierSize);
> +    if (IntervalMap.empty())
> +      continue;
> +    Changed |=
> +        tryToShortenBegin(EarlierWrite, IntervalMap, EarlierStart, EarlierSize);
> +  }
> +  return Changed;
> +}
> +
>  static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
>                                 AliasAnalysis *AA, MemoryDependenceResults *MD,
>                                 const DataLayout &DL,
> @@ -936,7 +1051,7 @@ static bool eliminateDeadStores(BasicBlo
>          if (OR == OverwriteComplete) {
>            DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: "
>                  << *DepWrite << "\n  KILLER: " << *Inst << '\n');
> -
> +          IOL.erase(DepWrite);
>            // Delete the store and now-dead instructions that feed it.
>            deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI);
>            ++NumFastStores;
> @@ -948,48 +1063,14 @@ static bool eliminateDeadStores(BasicBlo
>          } else if ((OR == OverwriteEnd && isShortenableAtTheEnd(DepWrite)) ||
>                     ((OR == OverwriteBegin &&
>                       isShortenableAtTheBeginning(DepWrite)))) {
> -          // TODO: base this on the target vector size so that if the earlier
> -          // store was too small to get vector writes anyway then its likely
> -          // a good idea to shorten it
> -          // Power of 2 vector writes are probably always a bad idea to optimize
> -          // as any store/memset/memcpy is likely using vector instructions so
> -          // shortening it to not vector size is likely to be slower
> -          MemIntrinsic *DepIntrinsic = cast<MemIntrinsic>(DepWrite);
> -          unsigned DepWriteAlign = DepIntrinsic->getAlignment();
> +          assert(!EnablePartialOverwriteTracking && "Do not expect to perform "
> +                                                    "when partial-overwrite "
> +                                                    "tracking is enabled");
> +          int64_t EarlierSize = DepLoc.Size;
> +          int64_t LaterSize = Loc.Size;
>            bool IsOverwriteEnd = (OR == OverwriteEnd);
> -          if (!IsOverwriteEnd)
> -            InstWriteOffset = int64_t(InstWriteOffset + Loc.Size);
> -
> -          if ((llvm::isPowerOf2_64(InstWriteOffset) &&
> -               DepWriteAlign <= InstWriteOffset) ||
> -              ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
> -
> -            DEBUG(dbgs() << "DSE: Remove Dead Store:\n  OW "
> -                         << (IsOverwriteEnd ? "END" : "BEGIN") << ": "
> -                         << *DepWrite << "\n  KILLER (offset "
> -                         << InstWriteOffset << ", " << DepLoc.Size << ")"
> -                         << *Inst << '\n');
> -
> -            int64_t NewLength =
> -                IsOverwriteEnd
> -                    ? InstWriteOffset - DepWriteOffset
> -                    : DepLoc.Size - (InstWriteOffset - DepWriteOffset);
> -
> -            Value *DepWriteLength = DepIntrinsic->getLength();
> -            Value *TrimmedLength =
> -                ConstantInt::get(DepWriteLength->getType(), NewLength);
> -            DepIntrinsic->setLength(TrimmedLength);
> -
> -            if (!IsOverwriteEnd) {
> -              int64_t OffsetMoved = (InstWriteOffset - DepWriteOffset);
> -              Value *Indices[1] = {
> -                  ConstantInt::get(DepWriteLength->getType(), OffsetMoved)};
> -              GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(
> -                  DepIntrinsic->getRawDest(), Indices, "", DepWrite);
> -              DepIntrinsic->setDest(NewDestGEP);
> -            }
> -            MadeChange = true;
> -          }
> +          MadeChange = tryToShorten(DepWrite, DepWriteOffset, EarlierSize,
> +                                    InstWriteOffset, LaterSize, IsOverwriteEnd);
>          }
>        }
>
> @@ -1012,6 +1093,9 @@ static bool eliminateDeadStores(BasicBlo
>      }
>    }
>
> +  if (EnablePartialOverwriteTracking)
> +    MadeChange |= removePartiallyOverlappedStores(AA, DL, IOL);
> +
>    // If this block ends in a return, unwind, or unreachable, all allocas are
>    // dead at its end, which means stores to them are also dead.
>    if (BB.getTerminator()->getNumSuccessors() == 0)
>
> Modified: llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll?rev=275571&r1=275570&r2=275571&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll (original)
> +++ llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll Fri Jul 15 11:14:34 2016
> @@ -86,5 +86,23 @@ entry:
>    ret void
>  }
>
> +define void @write8To15AndThen0To7(i64* nocapture %P) {
> +entry:
> +; CHECK-LABEL: @write8To15AndThen0To7(
> +; CHECK: [[GEP:%[0-9]+]] = getelementptr inbounds i8, i8* %mybase0, i64 16
> +; CHECK: tail call void @llvm.memset.p0i8.i64(i8* [[GEP]], i8 0, i64 16, i32 8, i1 false)
> +
> +  %base0 = bitcast i64* %P to i8*
> +  %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0
> +  tail call void @llvm.memset.p0i8.i64(i8* %mybase0, i8 0, i64 32, i32 8, i1 false)
> +
> +  %base64_0 = getelementptr inbounds i64, i64* %P, i64 0
> +  %base64_1 = getelementptr inbounds i64, i64* %P, i64 1
> +
> +  store i64 1, i64* %base64_1
> +  store i64 2, i64* %base64_0
> +  ret void
> +}
> +
>  declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
>
>
> Modified: llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll?rev=275571&r1=275570&r2=275571&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll (original)
> +++ llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll Fri Jul 15 11:14:34 2016
> @@ -93,3 +93,20 @@ entry:
>    store i64 3, i64* %tf_trapno, align 8
>    ret void
>  }
> +
> +define void @write16To23AndThen24To31(i64* nocapture %P, i64 %n64, i32 %n32, i16 %n16, i8 %n8) {
> +entry:
> +; CHECK-LABEL: @write16To23AndThen24To31(
> +; CHECK: tail call void @llvm.memset.p0i8.i64(i8* %mybase0, i8 0, i64 16, i32 8, i1 false)
> +
> +  %base0 = bitcast i64* %P to i8*
> +  %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0
> +  tail call void @llvm.memset.p0i8.i64(i8* %mybase0, i8 0, i64 32, i32 8, i1 false)
> +
> +  %base64_2 = getelementptr inbounds i64, i64* %P, i64 2
> +  %base64_3 = getelementptr inbounds i64, i64* %P, i64 3
> +
> +  store i64 3, i64* %base64_2
> +  store i64 3, i64* %base64_3
> +  ret void
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits


More information about the llvm-commits mailing list