[llvm-commits] [llvm] r151620 - in /llvm/trunk: lib/Transforms/Scalar/DeadStoreElimination.cpp test/Transforms/DeadStoreElimination/OverwriteStoreStart.ll

Pete Cooper peter_cooper at apple.com
Mon Feb 27 20:27:10 PST 2012


Author: pete
Date: Mon Feb 27 22:27:10 2012
New Revision: 151620

URL: http://llvm.org/viewvc/llvm-project?rev=151620&view=rev
Log:
DSE: Shorten memset when a later store overwrites the start of it

Added:
    llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreStart.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp

Modified: llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp?rev=151620&r1=151619&r2=151620&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/DeadStoreElimination.cpp Mon Feb 27 22:27:10 2012
@@ -259,6 +259,13 @@
   }
 }
 
+
+/// isMemset - Returns true if this instruction is an intrinsic memset
+static bool isMemset(Instruction *I) {
+  IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
+  return II && II->getIntrinsicID() == Intrinsic::memset;
+}
+
 /// getStoredPointerOperand - Return the pointer that is being written to.
 static Value *getStoredPointerOperand(Instruction *I) {
   if (StoreInst *SI = dyn_cast<StoreInst>(I))
@@ -310,14 +317,17 @@
   {
     OverwriteComplete,
     OverwriteEnd,
+    OverwriteStart,
     OverwriteUnknown
   };
 }
 
 /// isOverwrite - Return 'OverwriteComplete' if a store to the 'Later' location
 /// completely overwrites a store to the 'Earlier' location.
-/// 'OverwriteEnd' if the end of the 'Earlier' location is completely 
-/// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined
+/// 'OverwriteEnd' if the end of the 'Earlier' location is completely
+/// overwritten by 'Later', 'OverWriteStart' if the start of 'Earlier'
+/// is completely overwritten by 'Later' or 'OverwriteUnknown' if nothing
+/// can be determined
 static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
                                    const AliasAnalysis::Location &Earlier,
                                    AliasAnalysis &AA,
@@ -418,6 +428,21 @@
       LaterOff < int64_t(EarlierOff + Earlier.Size) &&
       int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size))
     return OverwriteEnd;
+  
+  // The other interesting case is if the later store overwrites the end of
+  // the earlier store
+  //
+  //                    |--earlier--|
+  //      |--   later   --|
+  //
+  // In this case we may want to trim the size of earlier to avoid generating
+  // writes to addresses which will definitely be overwritten later
+  if (EarlierOff >= LaterOff &&
+      EarlierOff < int64_t(LaterOff + Later.Size) &&
+      int64_t(EarlierOff + Earlier.Size) >= int64_t(LaterOff + Later.Size)) {
+    LaterOff = LaterOff + Later.Size;
+    return OverwriteStart;
+  }
 
   // Otherwise, they don't completely overlap.
   return OverwriteUnknown;
@@ -589,6 +614,45 @@
             DepIntrinsic->setLength(TrimmedLength);
             MadeChange = true;
           }
+        } else if (OR == OverwriteStart && isMemset(DepWrite)) {
+          // TODO: base this on the target vector size so that if the earlier
+          // store was too small to get vector writes anyway then its likely
+          // a good idea to shorten it
+          // Power of 2 vector writes are probably always a bad idea to optimize
+          // as any store/memset/memcpy is likely using vector instructions so
+          // shortening it to not vector size is likely to be slower
+          // TODO: shorten memcpy and memmove by offsetting the source address.
+          MemIntrinsic* DepIntrinsic = cast<MemIntrinsic>(DepWrite);
+          unsigned DepWriteAlign = DepIntrinsic->getAlignment();
+          if (llvm::isPowerOf2_64(InstWriteOffset) ||
+              ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
+            
+            DEBUG(dbgs() << "DSE: Remove Dead Store:\n  OW START: "
+                  << *DepWrite << "\n  KILLER (offset " 
+                  << InstWriteOffset << ", " 
+                  << DepWriteOffset << ", " 
+                  << DepLoc.Size << ")"
+                  << *Inst << '\n');
+            
+            Value* DepWriteLength = DepIntrinsic->getLength();
+            Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(),
+                                                    DepLoc.Size -
+                                                    (InstWriteOffset - 
+                                                    DepWriteOffset));
+            DepIntrinsic->setLength(TrimmedLength);
+            const TargetData *TD = AA->getTargetData();
+            Type *IntPtrTy = TD->getIntPtrType(BB.getContext());
+            Value* Offset = ConstantInt::get(IntPtrTy,
+                                             InstWriteOffset - DepWriteOffset);
+            // Offset the start of the memset with a GEP.  As the memset type is
+            // i8* a GEP will do this without needing to use ptrtoint, etc.
+            Value *Dest = GetElementPtrInst::Create(DepIntrinsic->getRawDest(),
+                                                    Offset,
+                                                    "",
+                                                    DepWrite);
+            DepIntrinsic->setDest(Dest);
+            MadeChange = true;
+          }
         }
       }
 

Added: llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreStart.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreStart.ll?rev=151620&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreStart.ll (added)
+++ llvm/trunk/test/Transforms/DeadStoreElimination/OverwriteStoreStart.ll Mon Feb 27 22:27:10 2012
@@ -0,0 +1,71 @@
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+%struct.vec2 = type { <4 x i32>, <4 x i32> }
+%struct.vec2plusi = type { <4 x i32>, <4 x i32>, i32 }
+
+ at glob1 = global %struct.vec2 zeroinitializer, align 16
+ at glob2 = global %struct.vec2plusi zeroinitializer, align 16
+
+define void @write4to8(i32* nocapture %p) nounwind uwtable ssp {
+; CHECK: @write4to8
+entry:
+  %arrayidx0 = getelementptr inbounds i32* %p, i64 1
+  %p3 = bitcast i32* %arrayidx0 to i8*
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %{{[0-9]+}}, i8 0, i64 24, i32 4, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
+  %arrayidx1 = getelementptr inbounds i32* %p, i64 1
+  store i32 1, i32* %arrayidx1, align 4
+  ret void
+}
+
+define void @write4to12(i32* nocapture %p) nounwind uwtable ssp {
+; CHECK: @write4to12
+entry:
+%arrayidx0 = getelementptr inbounds i32* %p, i64 1
+%p3 = bitcast i32* %arrayidx0 to i8*
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %{{[0-9]+}}, i8 0, i64 20, i32 4, i1 false)
+call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
+%arrayidx1 = bitcast i32* %arrayidx0 to i64*
+store i64 1, i64* %arrayidx1, align 4
+ret void
+}
+
+define void @write4to8_2(i32* nocapture %p) nounwind uwtable ssp {
+; CHECK: @write4to8_2
+entry:
+%arrayidx0 = getelementptr inbounds i32* %p, i64 1
+%p3 = bitcast i32* %arrayidx0 to i8*
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %{{[0-9]+}}, i8 0, i64 24, i32 4, i1 false)
+call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
+%arrayidx1 = bitcast i32* %p to i64*
+store i64 1, i64* %arrayidx1, align 4
+ret void
+}
+
+define void @dontwrite4to6(i32* nocapture %p) nounwind uwtable ssp {
+; CHECK: @dontwrite4to6
+entry:
+%arrayidx0 = getelementptr inbounds i32* %p, i64 1
+%p3 = bitcast i32* %arrayidx0 to i8*
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
+call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
+%arrayidx1 = bitcast i32* %arrayidx0 to i16*
+store i16 1, i16* %arrayidx1, align 4
+ret void
+}
+
+define void @write4to8_neg_gep(i32* nocapture %p) nounwind uwtable ssp {
+; CHECK: @write4to8_neg_gep
+entry:
+%arrayidx0 = getelementptr inbounds i32* %p, i64 -1
+%p3 = bitcast i32* %arrayidx0 to i8*
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %{{[0-9]+}}, i8 0, i64 24, i32 4, i1 false)
+call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
+%neg2 = getelementptr inbounds i32* %p, i64 -2
+%arrayidx1 = bitcast i32* %neg2 to i64*
+store i64 1, i64* %arrayidx1, align 4
+ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind





More information about the llvm-commits mailing list