[llvm-commits] [llvm] r143630 - in /llvm/trunk: lib/Transforms/Scalar/DeadStoreElimination.cpp test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll

Fri Nov 4 09:27:51 PDT 2011

Hi Eli

Thanks for letting me know.  I'll revert it now until i've fixed the problems.

Pete

On Nov 3, 2011, at 3:09 PM, Eli Friedman wrote:

> On Thu, Nov 3, 2011 at 11:01 AM, Pete Cooper <peter_cooper at apple.com> wrote:
>> Author: pete
>> Date: Thu Nov  3 13:01:56 2011
>> New Revision: 143630
>> 
>> URL: http://llvm.org/viewvc/llvm-project?rev=143630&view=rev
>> Log:
>> DeadStoreElimination can now trim the size of a store if the end of it is dead.
>> 
>> Only currently done if the later store is writing to a power of 2 address or
>> has the same alignment as the earlier store as then its likely to not break up
>> large stores into smaller ones
>> 
>> Fixes <rdar://problem/10140300>
> 
> This appears to be causing a bunch of failures on buildbots; please
> fix or revert.
> 
> -Eli
> 
>> Added:
>>    llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
>> Modified:
>>    llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp
>> 
>> Modified: llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp?rev=143630&r1=143629&r2=143630&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp (original)
>> +++ llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp Thu Nov  3 13:01:56 2011
>> @@ -239,6 +239,24 @@
>>   }
>>  }
>> 
>> +
>> +/// isShortenable - Returns true if this instruction can be safely shortened in
>> +/// length.
>> +static bool isShortenable(Instruction *I) {
>> +  // Don't shorten stores for now
>> +  if (isa<StoreInst>(I))
>> +    return false;
>> +
>> +  IntrinsicInst *II = cast<IntrinsicInst>(I);
>> +  switch (II->getIntrinsicID()) {
>> +    default: return false;
>> +    case Intrinsic::memset:
>> +    case Intrinsic::memcpy:
>> +      // Do shorten memory intrinsics.
>> +      return true;
>> +  }
>> +}
>> +
>>  /// getStoredPointerOperand - Return the pointer that is being written to.
>>  static Value *getStoredPointerOperand(Instruction *I) {
>>   if (StoreInst *SI = dyn_cast<StoreInst>(I))
>> @@ -293,11 +311,24 @@
>>   return false;
>>  }
>> 
>> -/// isCompleteOverwrite - Return true if a store to the 'Later' location
>> +namespace {
>> +  enum OverwriteResult
>> +  {
>> +    OverwriteComplete,
>> +    OverwriteEnd,
>> +    OverwriteUnknown
>> +  };
>> +}
>> +
>> +/// isOverwrite - Return 'OverwriteComplete' if a store to the 'Later' location
>>  /// completely overwrites a store to the 'Earlier' location.
>> -static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
>> -                                const AliasAnalysis::Location &Earlier,
>> -                                AliasAnalysis &AA) {
>> +/// 'OverwriteEnd' if the end of the 'Earlier' location is completely
>> +/// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined
>> +static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
>> +                                   const AliasAnalysis::Location &Earlier,
>> +                                   AliasAnalysis &AA,
>> +                                   int64_t& EarlierOff,
>> +                                   int64_t& LaterOff) {
>>   const Value *P1 = Earlier.Ptr->stripPointerCasts();
>>   const Value *P2 = Later.Ptr->stripPointerCasts();
>> 
>> @@ -311,23 +342,24 @@
>>       // If we have no TargetData information around, then the size of the store
>>       // is inferrable from the pointee type.  If they are the same type, then
>>       // we know that the store is safe.
>> -      if (AA.getTargetData() == 0)
>> -        return Later.Ptr->getType() == Earlier.Ptr->getType();
>> -      return false;
>> +      if (AA.getTargetData() == 0 &&
>> +          Later.Ptr->getType() == Earlier.Ptr->getType())
>> +        return OverwriteComplete;
>> +
>> +      return OverwriteUnknown;
>>     }
>> 
>>     // Make sure that the Later size is >= the Earlier size.
>> -    if (Later.Size < Earlier.Size)
>> -      return false;
>> -    return true;
>> +    if (Later.Size >= Earlier.Size)
>> +      return OverwriteComplete;
>>   }
>> 
>>   // Otherwise, we have to have size information, and the later store has to be
>>   // larger than the earlier one.
>>   if (Later.Size == AliasAnalysis::UnknownSize ||
>>       Earlier.Size == AliasAnalysis::UnknownSize ||
>> -      Later.Size <= Earlier.Size || AA.getTargetData() == 0)
>> -    return false;
>> +      AA.getTargetData() == 0)
>> +    return OverwriteUnknown;
>> 
>>   // Check to see if the later store is to the entire object (either a global,
>>   // an alloca, or a byval argument).  If so, then it clearly overwrites any
>> @@ -340,26 +372,27 @@
>>   // If we can't resolve the same pointers to the same object, then we can't
>>   // analyze them at all.
>>   if (UO1 != UO2)
>> -    return false;
>> +    return OverwriteUnknown;
>> 
>>   // If the "Later" store is to a recognizable object, get its size.
>>   if (isObjectPointerWithTrustworthySize(UO2)) {
>>     uint64_t ObjectSize =
>>       TD.getTypeAllocSize(cast<PointerType>(UO2->getType())->getElementType());
>>     if (ObjectSize == Later.Size)
>> -      return true;
>> +      return OverwriteComplete;
>>   }
>> 
>>   // Okay, we have stores to two completely different pointers.  Try to
>>   // decompose the pointer into a "base + constant_offset" form.  If the base
>>   // pointers are equal, then we can reason about the two stores.
>> -  int64_t EarlierOff = 0, LaterOff = 0;
>> +  EarlierOff = 0;
>> +  LaterOff = 0;
>>   const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD);
>>   const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD);
>> 
>>   // If the base pointers still differ, we have two completely different stores.
>>   if (BP1 != BP2)
>> -    return false;
>> +    return OverwriteUnknown;
>> 
>>   // The later store completely overlaps the earlier store if:
>>   //
>> @@ -377,11 +410,24 @@
>>   //
>>   // We have to be careful here as *Off is signed while *.Size is unsigned.
>>   if (EarlierOff >= LaterOff &&
>> +      Later.Size > Earlier.Size &&
>>       uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
>> -    return true;
>> +    return OverwriteComplete;
>> +
>> +  // The other interesting case is if the later store overwrites the end of
>> +  // the earlier store
>> +  //
>> +  //      |--earlier--|
>> +  //                |--   later   --|
>> +  //
>> +  // In this case we may want to trim the size of earlier to avoid generating
>> +  // writes to addresses which will definitely be overwritten later
>> +  if (LaterOff > EarlierOff &&
>> +      LaterOff + Later.Size >= EarlierOff + Earlier.Size)
>> +    return OverwriteEnd;
>> 
>>   // Otherwise, they don't completely overlap.
>> -  return false;
>> +  return OverwriteUnknown;
>>  }
>> 
>>  /// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a
>> @@ -505,22 +551,52 @@
>>       // If we find a write that is a) removable (i.e., non-volatile), b) is
>>       // completely obliterated by the store to 'Loc', and c) which we know that
>>       // 'Inst' doesn't load from, then we can remove it.
>> -      if (isRemovable(DepWrite) && isCompleteOverwrite(Loc, DepLoc, *AA) &&
>> +      if (isRemovable(DepWrite) &&
>>           !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
>> -        DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: "
>> -              << *DepWrite << "\n  KILLER: " << *Inst << '\n');
>> -
>> -        // Delete the store and now-dead instructions that feed it.
>> -        DeleteDeadInstruction(DepWrite, *MD);
>> -        ++NumFastStores;
>> -        MadeChange = true;
>> +        int64_t InstWriteOffset, DepWriteOffset;
>> +        OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA,
>> +                                         DepWriteOffset, InstWriteOffset);
>> +        if (OR == OverwriteComplete) {
>> +          DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: "
>> +                << *DepWrite << "\n  KILLER: " << *Inst << '\n');
>> 
>> -        // DeleteDeadInstruction can delete the current instruction in loop
>> -        // cases, reset BBI.
>> -        BBI = Inst;
>> -        if (BBI != BB.begin())
>> -          --BBI;
>> -        break;
>> +          // Delete the store and now-dead instructions that feed it.
>> +          DeleteDeadInstruction(DepWrite, *MD);
>> +          ++NumFastStores;
>> +          MadeChange = true;
>> +
>> +          // DeleteDeadInstruction can delete the current instruction in loop
>> +          // cases, reset BBI.
>> +          BBI = Inst;
>> +          if (BBI != BB.begin())
>> +            --BBI;
>> +          break;
>> +        } else if (OR == OverwriteEnd && isShortenable(DepWrite)) {
>> +          // TODO: base this on the target vector size so that if the earlier
>> +          // store was too small to get vector writes anyway then its likely
>> +          // a good idea to shorten it
>> +          // Power of 2 vector writes are probably always a bad idea to optimize
>> +          // as any store/memset/memcpy is likely using vector instructions so
>> +          // shortening it to not vector size is likely to be slower
>> +          MemIntrinsic* DepIntrinsic = cast<MemIntrinsic>(DepWrite);
>> +          unsigned DepWriteAlign = DepIntrinsic->getAlignment();
>> +          if (llvm::isPowerOf2_64(InstWriteOffset) ||
>> +              ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
>> +
>> +            DEBUG(dbgs() << "DSE: Remove Dead Store:\n  OW END: "
>> +                  << *DepWrite << "\n  KILLER (offset "
>> +                  << InstWriteOffset << ", "
>> +                  << DepLoc.Size << ")"
>> +                  << *Inst << '\n');
>> +
>> +            Value* DepWriteLength = DepIntrinsic->getLength();
>> +            Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(),
>> +                                                    InstWriteOffset -
>> +                                                    DepWriteOffset);
>> +            DepIntrinsic->setLength(TrimmedLength);
>> +            MadeChange = true;
>> +          }
>> +        }
>>       }
>> 
>>       // If this is a may-aliased store that is clobbering the store value, we
>> 
>> Added: llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll?rev=143630&view=auto
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll (added)
>> +++ llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll Thu Nov  3 13:01:56 2011
>> @@ -0,0 +1,78 @@
>> +; RUN: opt < %s -basicaa -dse -S | FileCheck %s
>> +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
>> +
>> +%struct.vec2 = type { <4 x i32>, <4 x i32> }
>> +%struct.vec2plusi = type { <4 x i32>, <4 x i32>, i32 }
>> +
>> + at glob1 = global %struct.vec2 zeroinitializer, align 16
>> + at glob2 = global %struct.vec2plusi zeroinitializer, align 16
>> +
>> +define void @write24to28(i32* nocapture %p) nounwind uwtable ssp {
>> +; CHECK: @write24to28
>> +entry:
>> +  %arrayidx0 = getelementptr inbounds i32* %p, i64 1
>> +  %p3 = bitcast i32* %arrayidx0 to i8*
>> +; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 24, i32 4, i1 false)
>> +  call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
>> +  %arrayidx1 = getelementptr inbounds i32* %p, i64 7
>> +  store i32 1, i32* %arrayidx1, align 4
>> +  ret void
>> +}
>> +
>> +define void @write28to32(i32* nocapture %p) nounwind uwtable ssp {
>> +; CHECK: @write28to32
>> +entry:
>> +  %p3 = bitcast i32* %p to i8*
>> +; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
>> +  call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 4, i1 false)
>> +  %arrayidx1 = getelementptr inbounds i32* %p, i64 7
>> +  store i32 1, i32* %arrayidx1, align 4
>> +  ret void
>> +}
>> +
>> +define void @dontwrite28to32memset(i32* nocapture %p) nounwind uwtable ssp {
>> +; CHECK: @dontwrite28to32memset
>> +entry:
>> +  %p3 = bitcast i32* %p to i8*
>> +; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 16, i1 false)
>> +  call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 16, i1 false)
>> +  %arrayidx1 = getelementptr inbounds i32* %p, i64 7
>> +  store i32 1, i32* %arrayidx1, align 4
>> +  ret void
>> +}
>> +
>> +define void @write32to36(%struct.vec2plusi* nocapture %p) nounwind uwtable ssp {
>> +; CHECK: @write32to36
>> +entry:
>> +  %0 = bitcast %struct.vec2plusi* %p to i8*
>> +; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 32, i32 16, i1 false)
>> +  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 36, i32 16, i1 false)
>> +  %c = getelementptr inbounds %struct.vec2plusi* %p, i64 0, i32 2
>> +  store i32 1, i32* %c, align 4
>> +  ret void
>> +}
>> +
>> +define void @write16to32(%struct.vec2* nocapture %p) nounwind uwtable ssp {
>> +; CHECK: @write16to32
>> +entry:
>> +  %0 = bitcast %struct.vec2* %p to i8*
>> +; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 16, i32 16, i1 false)
>> +  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
>> +  %c = getelementptr inbounds %struct.vec2* %p, i64 0, i32 1
>> +  store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* %c, align 4
>> +  ret void
>> +}
>> +
>> +define void @dontwrite28to32memcpy(%struct.vec2* nocapture %p) nounwind uwtable ssp {
>> +; CHECK: @dontwrite28to32memcpy
>> +entry:
>> +  %0 = bitcast %struct.vec2* %p to i8*
>> +; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
>> +  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
>> +  %arrayidx1 = getelementptr inbounds %struct.vec2* %p, i64 0, i32 0, i64 7
>> +  store i32 1, i32* %arrayidx1, align 4
>> +  ret void
>> +}
>> +
>> +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
>> +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
>> 
>> 
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>>