[llvm] r178912 - Disable the optimization about promoting vector-element-access with symbolic index.

Fri Apr 5 14:07:08 PDT 2013

Author: shuxin_yang
Date: Fri Apr  5 16:07:08 2013
New Revision: 178912

URL: http://llvm.org/viewvc/llvm-project?rev=178912&view=rev
Log:
Disable the optimization about promoting vector-element-access with symbolic index.

This optimization is unstable at this moment; it 
  1) block us on a very important application
  2) PR15200
  3) test6 and test7 in test/Transforms/ScalarRepl/dynamic-vector-gep.ll
     (the CHECK command compare the output against wrong result)

   I personally believe this optimization should not have any impact on the
autovectorized code, as auto-vectorizer is supposed to put gather/scatter
in a "right" way.  Although in theory downstream optimizaters might reveal 
some gather/scatter optimization opportunities, the chance is quite slim.

   For the hand-crafted vectorizing code, in term of redundancy elimination,
load-CSE, copy-propagation and DSE can collectively achieve the same result,
but in much simpler way. On the other hand, these optimizers are able to 
improve the code in a incremental way; in contrast, SROA is sort of all-or-none
approach. However, SROA might slighly win in stack size, as it tries to figure 
out a stretch of memory tightenly cover the area accessed by the dynamic index.

 rdar://13174884
 PR15200


Removed:
    llvm/trunk/test/Transforms/ScalarRepl/dynamic-vector-gep.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp

Modified: llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp?rev=178912&r1=178911&r2=178912&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/ScalarReplAggregates.cpp Fri Apr  5 16:07:08 2013
@@ -1724,17 +1724,8 @@ void SROA::isSafeGEP(GetElementPtrInst *
       continue;
 
     ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
-    if (!IdxVal) {
-      // Non constant GEPs are only a problem on arrays, structs, and pointers
-      // Vectors can be dynamically indexed.
-      // FIXME: Add support for dynamic indexing on arrays.  This should be
-      // ok on any subarrays of the alloca array, eg, a[0][i] is ok, but a[i][0]
-      // isn't.
-      if (!(*GEPIt)->isVectorTy())
-        return MarkUnsafe(Info, GEPI);
-      NonConstant = true;
-      NonConstantIdxSize = TD->getTypeAllocSize(*GEPIt);
-    }
+    if (!IdxVal)
+      return MarkUnsafe(Info, GEPI);
   }
 
   // Compute the offset due to this GEP and check if the alloca has a

Removed: llvm/trunk/test/Transforms/ScalarRepl/dynamic-vector-gep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/ScalarRepl/dynamic-vector-gep.ll?rev=178911&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/ScalarRepl/dynamic-vector-gep.ll (original)
+++ llvm/trunk/test/Transforms/ScalarRepl/dynamic-vector-gep.ll (removed)
@@ -1,167 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "x86_64-apple-darwin10.0.0"
-
-; CHECK: @test1
-; CHECK: %[[alloc:[\.a-z0-9]*]] = alloca <4 x float>
-; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc]]
-; CHECK: memset
-; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2
-
-; Split the array but don't replace the memset with an insert
-; element as its not a constant offset.
-; The load, however, can be replaced with an extract element.
-define float @test1(i32 %idx1, i32 %idx2) {
-entry:
-  %0 = alloca [4 x <4 x float>]
-  store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
-  %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
-  %cast = bitcast float* %ptr1 to i8*
-  call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 4, i32 4, i1 false)
-  %ptr2 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 1, i32 %idx2
-  %ret = load float* %ptr2
-  ret float %ret
-}
-
-; CHECK: @test2
-; CHECK: %[[ins:[\.a-z0-9]*]] = insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
-; CHECK: extractelement <4 x float> %[[ins]], i32 %idx2
-
-; Do SROA on the array when it has dynamic vector reads and writes.
-define float @test2(i32 %idx1, i32 %idx2) {
-entry:
-  %0 = alloca [4 x <4 x float>]
-  store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
-  %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
-  store float 1.0, float* %ptr1
-  %ptr2 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx2
-  %ret = load float* %ptr2
-  ret float %ret
-}
-
-; CHECK: test3
-; CHECK: %0 = alloca [4 x <4 x float>]
-; CHECK-NOT: alloca
-
-; Don't do SROA on a dynamically indexed vector when it spans
-; more than one array element of the alloca array it is within.
-define float @test3(i32 %idx1, i32 %idx2) {
-entry:
-  %0 = alloca [4 x <4 x float>]
-  store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
-  %bigvec = bitcast [4 x <4 x float>]* %0 to <16 x float>*
-  %ptr1 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx1
-  store float 1.0, float* %ptr1
-  %ptr2 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx2
-  %ret = load float* %ptr2
-  ret float %ret
-}
-
-; CHECK: test4
-; CHECK: insertelement <16 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
-; CHECK: extractelement <16 x float> %0, i32 %idx2
-
-; Don't do SROA on a dynamically indexed vector when it spans
-; more than one array element of the alloca array it is within.
-; However, unlike test3, the store is on the vector type
-; so SROA will convert the large alloca into the large vector
-; type and do all accesses with insert/extract element
-define float @test4(i32 %idx1, i32 %idx2) {
-entry:
-  %0 = alloca [4 x <4 x float>]
-  %bigvec = bitcast [4 x <4 x float>]* %0 to <16 x float>*
-  store <16 x float> zeroinitializer, <16 x float>* %bigvec
-  %ptr1 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx1
-  store float 1.0, float* %ptr1
-  %ptr2 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx2
-  %ret = load float* %ptr2
-  ret float %ret
-}
-
-; CHECK: @test5
-; CHECK: %0 = alloca [4 x <4 x float>]
-; CHECK-NOT: alloca
-
-; Don't do SROA as the is a second dynamically indexed array
-; which may span multiple elements of the alloca.
-define float @test5(i32 %idx1, i32 %idx2) {
-entry:
-  %0 = alloca [4 x <4 x float>]
-  store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
-  %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
-  %ptr2 = bitcast float* %ptr1 to [1 x <2 x float>]*
-  %ptr3 = getelementptr [1 x <2 x float>]* %ptr2, i32 0, i32 0, i32 %idx1
-  store float 1.0, float* %ptr1
-  %ptr4 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx2
-  %ret = load float* %ptr4
-  ret float %ret
-}
-
-; CHECK: test6
-; CHECK: insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
-; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2
-
-%vector.pair = type { %vector.anon, %vector.anon }
-%vector.anon = type { %vector }
-%vector = type { <4 x float> }
-
-; Dynamic GEPs on vectors were crashing when the vector was inside a struct
-; as the new GEP for the new alloca might not include all the indices from
-; the original GEP, just the indices it needs to get to the correct offset of
-; some type, not necessarily the dynamic vector.
-; This test makes sure we don't have this crash.
-define float @test6(i32 %idx1, i32 %idx2) {
-entry:
-  %0 = alloca %vector.pair
-  store %vector.pair zeroinitializer, %vector.pair* %0
-  %ptr1 = getelementptr %vector.pair* %0, i32 0, i32 0, i32 0, i32 0, i32 %idx1
-  store float 1.0, float* %ptr1
-  %ptr2 = getelementptr %vector.pair* %0, i32 0, i32 1, i32 0, i32 0, i32 %idx2
-  %ret = load float* %ptr2
-  ret float %ret
-}
-
-; CHECK: test7
-; CHECK: insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
-; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2
-
-%array.pair = type { [2 x %array.anon], %array.anon }
-%array.anon = type { [2 x %vector] }
-
-; This is the same as test6 and tests the same crash, but on arrays.
-define float @test7(i32 %idx1, i32 %idx2) {
-entry:
-  %0 = alloca %array.pair
-  store %array.pair zeroinitializer, %array.pair* %0
-  %ptr1 = getelementptr %array.pair* %0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 %idx1
-  store float 1.0, float* %ptr1
-  %ptr2 = getelementptr %array.pair* %0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 %idx2
-  %ret = load float* %ptr2
-  ret float %ret
-}
-
-; CHECK: test8
-; CHECK: %[[offset1:[\.a-z0-9]*]] = add i32 %idx1, 1
-; CHECK: %[[ins:[\.a-z0-9]*]] = insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %[[offset1]]
-; CHECK: %[[offset2:[\.a-z0-9]*]] = add i32 %idx2, 2
-; CHECK: extractelement <4 x float> %[[ins]], i32 %[[offset2]]
-
-; Do SROA on the vector when it has dynamic vector reads and writes
-; from a non-zero offset.
-define float @test8(i32 %idx1, i32 %idx2) {
-entry:
-  %0 = alloca <4 x float>
-  store <4 x float> zeroinitializer, <4 x float>* %0
-  %ptr1 = getelementptr <4 x float>* %0, i32 0, i32 1
-  %ptr2 = bitcast float* %ptr1 to <3 x float>*
-  %ptr3 = getelementptr <3 x float>* %ptr2, i32 0, i32 %idx1
-  store float 1.0, float* %ptr3
-  %ptr4 = getelementptr <4 x float>* %0, i32 0, i32 2
-  %ptr5 = bitcast float* %ptr4 to <2 x float>*
-  %ptr6 = getelementptr <2 x float>* %ptr5, i32 0, i32 %idx2
-  %ret = load float* %ptr6
-  ret float %ret
-}
-
-declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)