[llvm-commits] [llvm] r52611 - in /llvm/trunk: lib/Transforms/Scalar/ScalarReplAggregates.cpp test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll

Sun Jun 22 10:46:21 PDT 2008

Author: lattner
Date: Sun Jun 22 12:46:21 2008
New Revision: 52611

URL: http://llvm.org/viewvc/llvm-project?rev=52611&view=rev
Log:
Fix PR2369 by making scalarrepl more careful about promoting 
structures.  Its default threshold is to promote things that are
smaller than 128 bytes, which is sane.  However, it is not sane
to do this for things that turn into 128 *registers*.  Add a cap
on the number of registers introduced, defaulting to 128/4=32.


Added:
    llvm/trunk/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp

Modified: llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp?rev=52611&r1=52610&r2=52611&view=diff

==============================================================================

--- llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp Sun Jun 22 12:46:21 2008
@@ -178,6 +178,14 @@
   return Changed;
 }
 
+/// getNumSAElements - Return the number of elements in the specific struct or
+/// array.
+static uint64_t getNumSAElements(const Type *T) {
+  if (const StructType *ST = dyn_cast<StructType>(T))
+    return ST->getNumElements();
+  return cast<ArrayType>(T)->getNumElements();
+}
+
 // performScalarRepl - This algorithm is a simple worklist driven algorithm,
 // which runs on all of the malloc/alloca instructions in the function, removing
 // them if they are only used by getelementptr instructions.
@@ -224,7 +232,10 @@
         (isa<StructType>(AI->getAllocatedType()) ||
          isa<ArrayType>(AI->getAllocatedType())) &&
         AI->getAllocatedType()->isSized() &&
-        TD.getABITypeSize(AI->getAllocatedType()) < SRThreshold) {
+        // Do not promote any struct whose size is larger than "128" bytes.
+        TD.getABITypeSize(AI->getAllocatedType()) < SRThreshold &&
+        // Do not promote any struct into more than "32" separate vars.
+        getNumSAElements(AI->getAllocatedType()) < SRThreshold/4) {
       // Check that all of the users of the allocation are capable of being
       // transformed.
       switch (isSafeAllocaToScalarRepl(AI)) {
@@ -672,11 +683,9 @@
       // If this is a memcpy/memmove, emit a GEP of the other element address.
       Value *OtherElt = 0;
       if (OtherPtr) {
-        Value *Idx[2];
-        Idx[0] = Zero;
-        Idx[1] = ConstantInt::get(Type::Int32Ty, i);
+        Value *Idx[2] = { Zero, ConstantInt::get(Type::Int32Ty, i) };
         OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2,
-                                             OtherPtr->getNameStr()+"."+utostr(i),
+                                           OtherPtr->getNameStr()+"."+utostr(i),
                                              MI);
       }
 

Added: llvm/trunk/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll?rev=52611&view=auto

==============================================================================
--- llvm/trunk/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll (added)
+++ llvm/trunk/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll Sun Jun 22 12:46:21 2008
@@ -0,0 +1,18 @@
+; RUN: llvm-as < %s | opt -scalarrepl | llvm-dis | grep {call.*mem} 
+; PR2369
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define void @memtest1(i8* %dst, i8* %src) nounwind  {
+entry:
+	%temp = alloca [100 x i8]		; <[100 x i8]*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%temp1 = bitcast [100 x i8]* %temp to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32( i8* %temp1, i8* %src, i32 100, i32 1 )
+	%temp3 = bitcast [100 x i8]* %temp to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32( i8* %dst, i8* %temp3, i32 100, i32 1 )
+	ret void
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind