[llvm-commits] [llvm] r128453 - in /llvm/trunk: lib/Transforms/Scalar/ScalarReplAggregates.cpp test/Transforms/ScalarRepl/inline-vector.ll test/Transforms/ScalarRepl/vector_promote.ll

Mon Mar 28 22:19:52 PDT 2011

Author: zwarich
Date: Tue Mar 29 00:19:52 2011
New Revision: 128453

URL: http://llvm.org/viewvc/llvm-project?rev=128453&view=rev
Log:
Do some simple copy propagation through integer loads and stores when promoting
vector types. This helps a lot with inlined functions when using the ARM soft
float ABI. Fixes <rdar://problem/9184212>.

Added:
    llvm/trunk/test/Transforms/ScalarRepl/inline-vector.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp
    llvm/trunk/test/Transforms/ScalarRepl/vector_promote.ll

Modified: llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp?rev=128453&r1=128452&r2=128453&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp Tue Mar 29 00:19:52 2011
@@ -252,7 +252,7 @@
 
 private:
   bool CanConvertToScalar(Value *V, uint64_t Offset);
-  void MergeInType(const Type *In, uint64_t Offset);
+  void MergeInType(const Type *In, uint64_t Offset, bool IsLoadOrStore);
   bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset);
   void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
 
@@ -315,7 +315,8 @@
 ///      large) integer type with extract and insert operations where the loads
 ///      and stores would mutate the memory.  We mark this by setting VectorTy
 ///      to VoidTy.
-void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
+void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset,
+                                      bool IsLoadOrStore) {
   // If we already decided to turn this into a blob of integer memory, there is
   // nothing to be done.
   if (VectorTy && VectorTy->isVoidTy())
@@ -331,10 +332,14 @@
   } else if (In->isFloatTy() || In->isDoubleTy() ||
              (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
               isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
+    // Full width accesses can be ignored, because they can always be turned
+    // into bitcasts.
+    unsigned EltSize = In->getPrimitiveSizeInBits()/8;
+    if (IsLoadOrStore && EltSize == AllocaSize)
+      return;
     // If we're accessing something that could be an element of a vector, see
     // if the implied vector agrees with what we already have and if Offset is
     // compatible with it.
-    unsigned EltSize = In->getPrimitiveSizeInBits()/8;
     if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
         (VectorTy == 0 ||
          cast<VectorType>(VectorTy)->getElementType()
@@ -442,7 +447,7 @@
       if (LI->getType()->isX86_MMXTy())
         return false;
       HadNonMemTransferAccess = true;
-      MergeInType(LI->getType(), Offset);
+      MergeInType(LI->getType(), Offset, true);
       continue;
     }
 
@@ -453,7 +458,7 @@
       if (SI->getOperand(0)->getType()->isX86_MMXTy())
         return false;
       HadNonMemTransferAccess = true;
-      MergeInType(SI->getOperand(0)->getType(), Offset);
+      MergeInType(SI->getOperand(0)->getType(), Offset, true);
       continue;
     }
 
@@ -691,11 +696,11 @@
   // If the result alloca is a vector type, this is either an element
   // access or a bitcast to another vector type of the same size.
   if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
-    if (ToType->isVectorTy()) {
-      unsigned ToTypeSize = TD.getTypeAllocSize(ToType);
-      if (ToTypeSize == AllocaSize)
-        return Builder.CreateBitCast(FromVal, ToType, "tmp");
+    unsigned ToTypeSize = TD.getTypeAllocSize(ToType);
+    if (ToTypeSize == AllocaSize)
+      return Builder.CreateBitCast(FromVal, ToType, "tmp");
 
+    if (ToType->isVectorTy()) {
       assert(isPowerOf2_64(AllocaSize / ToTypeSize) &&
              "Partial vector access of an alloca must have a power-of-2 size "
              "ratio.");

Added: llvm/trunk/test/Transforms/ScalarRepl/inline-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarRepl/inline-vector.ll?rev=128453&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ScalarRepl/inline-vector.ll (added)
+++ llvm/trunk/test/Transforms/ScalarRepl/inline-vector.ll Tue Mar 29 00:19:52 2011
@@ -0,0 +1,53 @@
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
+; RUN: opt < %s -scalarrepl-ssa -S | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10.0.0"
+
+%struct.Vector4 = type { float, float, float, float }
+ at f.vector = internal constant %struct.Vector4 { float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 }, align 16
+
+; CHECK: define void @f
+; CHECK-NOT: alloca
+; CHECK: phi <4 x float>
+
+define void @f() nounwind ssp {
+entry:
+  %i = alloca i32, align 4
+  %vector = alloca %struct.Vector4, align 16
+  %agg.tmp = alloca %struct.Vector4, align 16
+  %tmp = bitcast %struct.Vector4* %vector to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* bitcast (%struct.Vector4* @f.vector to i8*), i32 16, i32 16, i1 false)
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %storemerge = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  store i32 %storemerge, i32* %i, align 4
+  %cmp = icmp slt i32 %storemerge, 1000000
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %tmp2 = bitcast %struct.Vector4* %agg.tmp to i8*
+  %tmp3 = bitcast %struct.Vector4* %vector to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 16, i1 false)
+  %0 = bitcast %struct.Vector4* %agg.tmp to [2 x i64]*
+  %1 = load [2 x i64]* %0, align 16
+  %tmp2.i = extractvalue [2 x i64] %1, 0
+  %tmp3.i = zext i64 %tmp2.i to i128
+  %tmp10.i = bitcast i128 %tmp3.i to <4 x float>
+  %sub.i.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %tmp10.i
+  %2 = bitcast %struct.Vector4* %vector to <4 x float>*
+  store <4 x float> %sub.i.i, <4 x float>* %2, align 16
+  %tmp4 = load i32* %i, align 4
+  %inc = add nsw i32 %tmp4, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %x = getelementptr inbounds %struct.Vector4* %vector, i32 0, i32 0
+  %tmp5 = load float* %x, align 16
+  %conv = fpext float %tmp5 to double
+  %call = call i32 (...)* @printf(double %conv) nounwind
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare i32 @printf(...)

Modified: llvm/trunk/test/Transforms/ScalarRepl/vector_promote.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarRepl/vector_promote.ll?rev=128453&r1=128452&r2=128453&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/ScalarRepl/vector_promote.ll (original)
+++ llvm/trunk/test/Transforms/ScalarRepl/vector_promote.ll Tue Mar 29 00:19:52 2011
@@ -94,7 +94,7 @@
 	%tmp = load i64* %P
 	ret i64 %tmp
 ; CHECK: @test6
-; CHECK: bitcast <2 x float> %X to <1 x i64>
+; CHECK: bitcast <2 x float> %X to i64
 ; CHECK: ret i64
 }