[llvm-commits] [llvm] r143630 - in /llvm/trunk: lib/Transforms/Scalar/DeadStoreElimination.cpp test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
Eli Friedman
eli.friedman at gmail.com
Fri Nov 4 10:15:39 PDT 2011
r143668.
-Eli
On Fri, Nov 4, 2011 at 9:27 AM, Peter Cooper <peter_cooper at apple.com> wrote:
> Hi Eli
>
> Thanks for letting me know. I'll revert it now until i've fixed the problems.
>
> Pete
>
> On Nov 3, 2011, at 3:09 PM, Eli Friedman wrote:
>
>> On Thu, Nov 3, 2011 at 11:01 AM, Pete Cooper <peter_cooper at apple.com> wrote:
>>> Author: pete
>>> Date: Thu Nov 3 13:01:56 2011
>>> New Revision: 143630
>>>
>>> URL: http://llvm.org/viewvc/llvm-project?rev=143630&view=rev
>>> Log:
>>> DeadStoreElimination can now trim the size of a store if the end of it is dead.
>>>
>>> Only currently done if the later store is writing to a power of 2 address or
>>> has the same alignment as the earlier store as then its likely to not break up
>>> large stores into smaller ones
>>>
>>> Fixes <rdar://problem/10140300>
>>
>> This appears to be causing a bunch of failures on buildbots; please
>> fix or revert.
>>
>> -Eli
>>
>>> Added:
>>> llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
>>> Modified:
>>> llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp
>>>
>>> Modified: llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp?rev=143630&r1=143629&r2=143630&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp (original)
>>> +++ llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp Thu Nov 3 13:01:56 2011
>>> @@ -239,6 +239,24 @@
>>> }
>>> }
>>>
>>> +
>>> +/// isShortenable - Returns true if this instruction can be safely shortened in
>>> +/// length.
>>> +static bool isShortenable(Instruction *I) {
>>> + // Don't shorten stores for now
>>> + if (isa<StoreInst>(I))
>>> + return false;
>>> +
>>> + IntrinsicInst *II = cast<IntrinsicInst>(I);
>>> + switch (II->getIntrinsicID()) {
>>> + default: return false;
>>> + case Intrinsic::memset:
>>> + case Intrinsic::memcpy:
>>> + // Do shorten memory intrinsics.
>>> + return true;
>>> + }
>>> +}
>>> +
>>> /// getStoredPointerOperand - Return the pointer that is being written to.
>>> static Value *getStoredPointerOperand(Instruction *I) {
>>> if (StoreInst *SI = dyn_cast<StoreInst>(I))
>>> @@ -293,11 +311,24 @@
>>> return false;
>>> }
>>>
>>> -/// isCompleteOverwrite - Return true if a store to the 'Later' location
>>> +namespace {
>>> + enum OverwriteResult
>>> + {
>>> + OverwriteComplete,
>>> + OverwriteEnd,
>>> + OverwriteUnknown
>>> + };
>>> +}
>>> +
>>> +/// isOverwrite - Return 'OverwriteComplete' if a store to the 'Later' location
>>> /// completely overwrites a store to the 'Earlier' location.
>>> -static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
>>> - const AliasAnalysis::Location &Earlier,
>>> - AliasAnalysis &AA) {
>>> +/// 'OverwriteEnd' if the end of the 'Earlier' location is completely
>>> +/// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined
>>> +static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
>>> + const AliasAnalysis::Location &Earlier,
>>> + AliasAnalysis &AA,
>>> + int64_t& EarlierOff,
>>> + int64_t& LaterOff) {
>>> const Value *P1 = Earlier.Ptr->stripPointerCasts();
>>> const Value *P2 = Later.Ptr->stripPointerCasts();
>>>
>>> @@ -311,23 +342,24 @@
>>> // If we have no TargetData information around, then the size of the store
>>> // is inferrable from the pointee type. If they are the same type, then
>>> // we know that the store is safe.
>>> - if (AA.getTargetData() == 0)
>>> - return Later.Ptr->getType() == Earlier.Ptr->getType();
>>> - return false;
>>> + if (AA.getTargetData() == 0 &&
>>> + Later.Ptr->getType() == Earlier.Ptr->getType())
>>> + return OverwriteComplete;
>>> +
>>> + return OverwriteUnknown;
>>> }
>>>
>>> // Make sure that the Later size is >= the Earlier size.
>>> - if (Later.Size < Earlier.Size)
>>> - return false;
>>> - return true;
>>> + if (Later.Size >= Earlier.Size)
>>> + return OverwriteComplete;
>>> }
>>>
>>> // Otherwise, we have to have size information, and the later store has to be
>>> // larger than the earlier one.
>>> if (Later.Size == AliasAnalysis::UnknownSize ||
>>> Earlier.Size == AliasAnalysis::UnknownSize ||
>>> - Later.Size <= Earlier.Size || AA.getTargetData() == 0)
>>> - return false;
>>> + AA.getTargetData() == 0)
>>> + return OverwriteUnknown;
>>>
>>> // Check to see if the later store is to the entire object (either a global,
>>> // an alloca, or a byval argument). If so, then it clearly overwrites any
>>> @@ -340,26 +372,27 @@
>>> // If we can't resolve the same pointers to the same object, then we can't
>>> // analyze them at all.
>>> if (UO1 != UO2)
>>> - return false;
>>> + return OverwriteUnknown;
>>>
>>> // If the "Later" store is to a recognizable object, get its size.
>>> if (isObjectPointerWithTrustworthySize(UO2)) {
>>> uint64_t ObjectSize =
>>> TD.getTypeAllocSize(cast<PointerType>(UO2->getType())->getElementType());
>>> if (ObjectSize == Later.Size)
>>> - return true;
>>> + return OverwriteComplete;
>>> }
>>>
>>> // Okay, we have stores to two completely different pointers. Try to
>>> // decompose the pointer into a "base + constant_offset" form. If the base
>>> // pointers are equal, then we can reason about the two stores.
>>> - int64_t EarlierOff = 0, LaterOff = 0;
>>> + EarlierOff = 0;
>>> + LaterOff = 0;
>>> const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD);
>>> const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD);
>>>
>>> // If the base pointers still differ, we have two completely different stores.
>>> if (BP1 != BP2)
>>> - return false;
>>> + return OverwriteUnknown;
>>>
>>> // The later store completely overlaps the earlier store if:
>>> //
>>> @@ -377,11 +410,24 @@
>>> //
>>> // We have to be careful here as *Off is signed while *.Size is unsigned.
>>> if (EarlierOff >= LaterOff &&
>>> + Later.Size > Earlier.Size &&
>>> uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
>>> - return true;
>>> + return OverwriteComplete;
>>> +
>>> + // The other interesting case is if the later store overwrites the end of
>>> + // the earlier store
>>> + //
>>> + // |--earlier--|
>>> + // |-- later --|
>>> + //
>>> + // In this case we may want to trim the size of earlier to avoid generating
>>> + // writes to addresses which will definitely be overwritten later
>>> + if (LaterOff > EarlierOff &&
>>> + LaterOff + Later.Size >= EarlierOff + Earlier.Size)
>>> + return OverwriteEnd;
>>>
>>> // Otherwise, they don't completely overlap.
>>> - return false;
>>> + return OverwriteUnknown;
>>> }
>>>
>>> /// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a
>>> @@ -505,22 +551,52 @@
>>> // If we find a write that is a) removable (i.e., non-volatile), b) is
>>> // completely obliterated by the store to 'Loc', and c) which we know that
>>> // 'Inst' doesn't load from, then we can remove it.
>>> - if (isRemovable(DepWrite) && isCompleteOverwrite(Loc, DepLoc, *AA) &&
>>> + if (isRemovable(DepWrite) &&
>>> !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
>>> - DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
>>> - << *DepWrite << "\n KILLER: " << *Inst << '\n');
>>> -
>>> - // Delete the store and now-dead instructions that feed it.
>>> - DeleteDeadInstruction(DepWrite, *MD);
>>> - ++NumFastStores;
>>> - MadeChange = true;
>>> + int64_t InstWriteOffset, DepWriteOffset;
>>> + OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA,
>>> + DepWriteOffset, InstWriteOffset);
>>> + if (OR == OverwriteComplete) {
>>> + DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: "
>>> + << *DepWrite << "\n KILLER: " << *Inst << '\n');
>>>
>>> - // DeleteDeadInstruction can delete the current instruction in loop
>>> - // cases, reset BBI.
>>> - BBI = Inst;
>>> - if (BBI != BB.begin())
>>> - --BBI;
>>> - break;
>>> + // Delete the store and now-dead instructions that feed it.
>>> + DeleteDeadInstruction(DepWrite, *MD);
>>> + ++NumFastStores;
>>> + MadeChange = true;
>>> +
>>> + // DeleteDeadInstruction can delete the current instruction in loop
>>> + // cases, reset BBI.
>>> + BBI = Inst;
>>> + if (BBI != BB.begin())
>>> + --BBI;
>>> + break;
>>> + } else if (OR == OverwriteEnd && isShortenable(DepWrite)) {
>>> + // TODO: base this on the target vector size so that if the earlier
>>> + // store was too small to get vector writes anyway then its likely
>>> + // a good idea to shorten it
>>> + // Power of 2 vector writes are probably always a bad idea to optimize
>>> + // as any store/memset/memcpy is likely using vector instructions so
>>> + // shortening it to not vector size is likely to be slower
>>> + MemIntrinsic* DepIntrinsic = cast<MemIntrinsic>(DepWrite);
>>> + unsigned DepWriteAlign = DepIntrinsic->getAlignment();
>>> + if (llvm::isPowerOf2_64(InstWriteOffset) ||
>>> + ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
>>> +
>>> + DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: "
>>> + << *DepWrite << "\n KILLER (offset "
>>> + << InstWriteOffset << ", "
>>> + << DepLoc.Size << ")"
>>> + << *Inst << '\n');
>>> +
>>> + Value* DepWriteLength = DepIntrinsic->getLength();
>>> + Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(),
>>> + InstWriteOffset -
>>> + DepWriteOffset);
>>> + DepIntrinsic->setLength(TrimmedLength);
>>> + MadeChange = true;
>>> + }
>>> + }
>>> }
>>>
>>> // If this is a may-aliased store that is clobbering the store value, we
>>>
>>> Added: llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll?rev=143630&view=auto
>>> ==============================================================================
>>> --- llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll (added)
>>> +++ llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll Thu Nov 3 13:01:56 2011
>>> @@ -0,0 +1,78 @@
>>> +; RUN: opt < %s -basicaa -dse -S | FileCheck %s
>>> +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
>>> +
>>> +%struct.vec2 = type { <4 x i32>, <4 x i32> }
>>> +%struct.vec2plusi = type { <4 x i32>, <4 x i32>, i32 }
>>> +
>>> + at glob1 = global %struct.vec2 zeroinitializer, align 16
>>> + at glob2 = global %struct.vec2plusi zeroinitializer, align 16
>>> +
>>> +define void @write24to28(i32* nocapture %p) nounwind uwtable ssp {
>>> +; CHECK: @write24to28
>>> +entry:
>>> + %arrayidx0 = getelementptr inbounds i32* %p, i64 1
>>> + %p3 = bitcast i32* %arrayidx0 to i8*
>>> +; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 24, i32 4, i1 false)
>>> + call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
>>> + %arrayidx1 = getelementptr inbounds i32* %p, i64 7
>>> + store i32 1, i32* %arrayidx1, align 4
>>> + ret void
>>> +}
>>> +
>>> +define void @write28to32(i32* nocapture %p) nounwind uwtable ssp {
>>> +; CHECK: @write28to32
>>> +entry:
>>> + %p3 = bitcast i32* %p to i8*
>>> +; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
>>> + call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 4, i1 false)
>>> + %arrayidx1 = getelementptr inbounds i32* %p, i64 7
>>> + store i32 1, i32* %arrayidx1, align 4
>>> + ret void
>>> +}
>>> +
>>> +define void @dontwrite28to32memset(i32* nocapture %p) nounwind uwtable ssp {
>>> +; CHECK: @dontwrite28to32memset
>>> +entry:
>>> + %p3 = bitcast i32* %p to i8*
>>> +; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 16, i1 false)
>>> + call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 16, i1 false)
>>> + %arrayidx1 = getelementptr inbounds i32* %p, i64 7
>>> + store i32 1, i32* %arrayidx1, align 4
>>> + ret void
>>> +}
>>> +
>>> +define void @write32to36(%struct.vec2plusi* nocapture %p) nounwind uwtable ssp {
>>> +; CHECK: @write32to36
>>> +entry:
>>> + %0 = bitcast %struct.vec2plusi* %p to i8*
>>> +; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 32, i32 16, i1 false)
>>> + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 36, i32 16, i1 false)
>>> + %c = getelementptr inbounds %struct.vec2plusi* %p, i64 0, i32 2
>>> + store i32 1, i32* %c, align 4
>>> + ret void
>>> +}
>>> +
>>> +define void @write16to32(%struct.vec2* nocapture %p) nounwind uwtable ssp {
>>> +; CHECK: @write16to32
>>> +entry:
>>> + %0 = bitcast %struct.vec2* %p to i8*
>>> +; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 16, i32 16, i1 false)
>>> + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
>>> + %c = getelementptr inbounds %struct.vec2* %p, i64 0, i32 1
>>> + store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* %c, align 4
>>> + ret void
>>> +}
>>> +
>>> +define void @dontwrite28to32memcpy(%struct.vec2* nocapture %p) nounwind uwtable ssp {
>>> +; CHECK: @dontwrite28to32memcpy
>>> +entry:
>>> + %0 = bitcast %struct.vec2* %p to i8*
>>> +; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
>>> + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
>>> + %arrayidx1 = getelementptr inbounds %struct.vec2* %p, i64 0, i32 0, i64 7
>>> + store i32 1, i32* %arrayidx1, align 4
>>> + ret void
>>> +}
>>> +
>>> +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
>>> +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
>>>
>>>
>>> _______________________________________________
>>> llvm-commits mailing list
>>> llvm-commits at cs.uiuc.edu
>>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>>>
>
>
More information about the llvm-commits
mailing list