[llvm-commits] [polly] r154586 - in /polly/trunk: lib/CodeGen/CodeGeneration.cpp test/CodeGen/simple_vec_call.ll test/CodeGen/simple_vec_call_2.ll test/CodeGen/simple_vec_impossible.ll

Thu Apr 12 03:46:55 PDT 2012

Author: grosser
Date: Thu Apr 12 05:46:55 2012
New Revision: 154586

URL: http://llvm.org/viewvc/llvm-project?rev=154586&view=rev
Log:
CodeGen: Generate scalar code if vector instructions cannot be generated

This fixes two crashes that appeared in case of:
  - A load of a non vectorizable type (e.g. float**)
  - An instruction that is not vectorizable (e.g. call)

Added:
    polly/trunk/test/CodeGen/simple_vec_call.ll
    polly/trunk/test/CodeGen/simple_vec_call_2.ll
    polly/trunk/test/CodeGen/simple_vec_impossible.ll
Modified:
    polly/trunk/lib/CodeGen/CodeGeneration.cpp

Modified: polly/trunk/lib/CodeGen/CodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/CodeGeneration.cpp?rev=154586&r1=154585&r2=154586&view=diff
==============================================================================

--- polly/trunk/lib/CodeGen/CodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/CodeGeneration.cpp Thu Apr 12 05:46:55 2012
@@ -569,6 +569,12 @@
   void copyStore(const StoreInst *Store, ValueMapT &VectorMap,
                  VectorValueMapT &ScalarMaps);
 
+  void copyInstScalarized(const Instruction *Inst, ValueMapT &VectorMap,
+                          VectorValueMapT &ScalarMaps);
+
+  bool extractScalarValues(const Instruction *Inst, ValueMapT &VectorMap,
+                           VectorValueMapT &ScalarMaps);
+
   bool hasVectorOperands(const Instruction *Inst, ValueMapT &VectorMap);
 
   void copyInstruction(const Instruction *Inst, ValueMapT &VectorMap,
@@ -680,18 +686,16 @@
 void VectorBlockGenerator::generateLoad(const LoadInst *Load,
                                         ValueMapT &VectorMap,
                                         VectorValueMapT &ScalarMaps) {
-  Value *NewLoad;
-
-  if (GroupedUnrolling) {
+  if (GroupedUnrolling || !VectorType::isValidElementType(Load->getType())) {
     for (int i = 0; i < getVectorWidth(); i++)
       ScalarMaps[i][Load] = generateScalarLoad(Load, ScalarMaps[i],
                                                GlobalMaps[i]);
-
     return;
   }
 
   MemoryAccess &Access = Statement.getAccessFor(Load);
 
+  Value *NewLoad;
   if (Access.isStrideZero(isl_set_copy(Domain)))
     NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]);
   else if (Access.isStrideOne(isl_set_copy(Domain)))
@@ -772,6 +776,63 @@
   return false;
 }
 
+bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
+                                               ValueMapT &VectorMap,
+                                               VectorValueMapT &ScalarMaps) {
+  bool HasVectorOperand = false;
+  int VectorWidth = getVectorWidth();
+
+  for (Instruction::const_op_iterator OI = Inst->op_begin(),
+       OE = Inst->op_end(); OI != OE; ++OI) {
+    ValueMapT::iterator VecOp = VectorMap.find(*OI);
+
+    if (VecOp == VectorMap.end())
+      continue;
+
+    HasVectorOperand = true;
+    Value *NewVector = VecOp->second;
+
+    for (int i = 0; i < VectorWidth; ++i) {
+      ValueMapT &SM = ScalarMaps[i];
+
+      // If there is one scalar extracted, all scalar elements should have
+      // already been extracted by the code here. So no need to check for the
+      // existance of all of them.
+      if (SM.count(*OI))
+        break;
+
+      SM[*OI] = Builder.CreateExtractElement(NewVector, Builder.getInt32(i));
+    }
+  }
+
+  return HasVectorOperand;
+}
+
+void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst,
+                                              ValueMapT &VectorMap,
+                                              VectorValueMapT &ScalarMaps) {
+  bool HasVectorOperand;
+  int VectorWidth = getVectorWidth();
+
+  HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps);
+
+  for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
+    copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane]);
+
+  if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand)
+    return;
+
+  // Make the result available as vector value.
+  VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth);
+  Value *Vector = UndefValue::get(VectorType);
+
+  for (int i = 0; i < VectorWidth; i++)
+    Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst],
+                                         Builder.getInt32(i));
+
+  VectorMap[Inst] = Vector;
+}
+
 int VectorBlockGenerator::getVectorWidth() {
   return GlobalMaps.size();
 }
@@ -805,11 +866,11 @@
       return;
     }
 
-    llvm_unreachable("Cannot issue vector code for this instruction");
+    // Falltrough: We generate scalar instructions, if we don't know how to
+    // generate vector code.
   }
 
-  for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
-    copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane]);
+  copyInstScalarized(Inst, VectorMap, ScalarMaps);
 }
 
 void VectorBlockGenerator::copyBB() {

Added: polly/trunk/test/CodeGen/simple_vec_call.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/CodeGen/simple_vec_call.ll?rev=154586&view=auto
==============================================================================
--- polly/trunk/test/CodeGen/simple_vec_call.ll (added)
+++ polly/trunk/test/CodeGen/simple_vec_call.ll Thu Apr 12 05:46:55 2012
@@ -0,0 +1,43 @@
+; RUN: opt %loadPolly -basicaa -polly-codegen -enable-polly-vector -S %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at A = common global [1024 x float] zeroinitializer, align 16
+ at B = common global [1024 x float] zeroinitializer, align 16
+
+declare float @foo(float) readnone
+
+define void @simple_vec_call() nounwind {
+entry:
+  br label %body
+
+body:
+  %indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ]
+  %scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar
+  %value = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 0), align 16
+  %result = tail call float @foo(float %value) nounwind
+  store float %result, float* %scevgep, align 4
+  %indvar_next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar_next, 4
+  br i1 %exitcond, label %return, label %body
+
+return:
+  ret void
+}
+
+; CHECK: %value_p_splat_one = load <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8
+; CHECK: %value_p_splat = shufflevector <1 x float> %value_p_splat_one, <1 x float> %value_p_splat_one, <4 x i32> zeroinitializer
+; CHECK: %0 = extractelement <4 x float> %value_p_splat, i32 0
+; CHECK: %1 = extractelement <4 x float> %value_p_splat, i32 1
+; CHECK: %2 = extractelement <4 x float> %value_p_splat, i32 2
+; CHECK: %3 = extractelement <4 x float> %value_p_splat, i32 3
+; CHECK: %p_result = tail call float @foo(float %0) nounwind
+; CHECK: %p_result4 = tail call float @foo(float %1) nounwind
+; CHECK: %p_result5 = tail call float @foo(float %2) nounwind
+; CHECK: %p_result6 = tail call float @foo(float %3) nounwind
+; CHECK: %4 = insertelement <4 x float> undef, float %p_result, i32 0
+; CHECK: %5 = insertelement <4 x float> %4, float %p_result4, i32 1
+; CHECK: %6 = insertelement <4 x float> %5, float %p_result5, i32 2
+; CHECK: %7 = insertelement <4 x float> %6, float %p_result6, i32 3
+; CHECK: %vector_ptr = bitcast float* %p_scevgep to <4 x float>*
+; CHECK: store <4 x float> %7, <4 x float>* %vector_ptr, align 8

Added: polly/trunk/test/CodeGen/simple_vec_call_2.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/CodeGen/simple_vec_call_2.ll?rev=154586&view=auto
==============================================================================
--- polly/trunk/test/CodeGen/simple_vec_call_2.ll (added)
+++ polly/trunk/test/CodeGen/simple_vec_call_2.ll Thu Apr 12 05:46:55 2012
@@ -0,0 +1,45 @@
+; RUN: opt %loadPolly -basicaa -polly-codegen -enable-polly-vector -dce -S %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at A = common global [1024 x float] zeroinitializer, align 16
+ at B = common global [1024 x float**] zeroinitializer, align 16
+
+declare float** @foo(float) readnone
+
+define void @simple_vec_call() nounwind {
+entry:
+  br label %body
+
+body:
+  %indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ]
+  %scevgep = getelementptr [1024 x float**]* @B, i64 0, i64 %indvar
+  %value = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 0), align 16
+  %result = tail call float** @foo(float %value) nounwind
+  store float** %result, float*** %scevgep, align 4
+  %indvar_next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar_next, 4
+  br i1 %exitcond, label %return, label %body
+
+return:
+  ret void
+}
+
+; CHECK: %p_scevgep = getelementptr [1024 x float**]* @B, i64 0, i64 0
+; CHECK: %p_scevgep1 = getelementptr [1024 x float**]* @B, i64 0, i64 1
+; CHECK: %p_scevgep2 = getelementptr [1024 x float**]* @B, i64 0, i64 2
+; CHECK: %p_scevgep3 = getelementptr [1024 x float**]* @B, i64 0, i64 3
+; CHECK: %value_p_splat_one = load <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8
+; CHECK: %value_p_splat = shufflevector <1 x float> %value_p_splat_one, <1 x float> %value_p_splat_one, <4 x i32> zeroinitializer
+; CHECK: %0 = extractelement <4 x float> %value_p_splat, i32 0
+; CHECK: %1 = extractelement <4 x float> %value_p_splat, i32 1
+; CHECK: %2 = extractelement <4 x float> %value_p_splat, i32 2
+; CHECK: %3 = extractelement <4 x float> %value_p_splat, i32 3
+; CHECK: %p_result = tail call float** @foo(float %0) nounwind
+; CHECK: %p_result4 = tail call float** @foo(float %1) nounwind
+; CHECK: %p_result5 = tail call float** @foo(float %2) nounwind
+; CHECK: %p_result6 = tail call float** @foo(float %3) nounwind
+; CHECK: store float** %p_result, float*** %p_scevgep, align 4
+; CHECK: store float** %p_result4, float*** %p_scevgep1, align 4
+; CHECK: store float** %p_result5, float*** %p_scevgep2, align 4
+; CHECK: store float** %p_result6, float*** %p_scevgep3, align 4

Added: polly/trunk/test/CodeGen/simple_vec_impossible.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/CodeGen/simple_vec_impossible.ll?rev=154586&view=auto
==============================================================================
--- polly/trunk/test/CodeGen/simple_vec_impossible.ll (added)
+++ polly/trunk/test/CodeGen/simple_vec_impossible.ll Thu Apr 12 05:46:55 2012
@@ -0,0 +1,38 @@
+; RUN: opt %loadPolly -basicaa -polly-codegen -enable-polly-vector -S %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at A = common global [1024 x float**] zeroinitializer, align 16
+ at B = common global [1024 x float**] zeroinitializer, align 16
+
+declare float @foo(float) readnone
+
+define void @simple_vec_call() nounwind {
+entry:
+  br label %body
+
+body:
+  %indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ]
+  %scevgep = getelementptr [1024 x float**]* @B, i64 0, i64 %indvar
+  %value = load float*** getelementptr inbounds ([1024 x float**]* @A, i64 0, i64 0), align 16
+  store float** %value, float*** %scevgep, align 4
+  %indvar_next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar_next, 4
+  br i1 %exitcond, label %return, label %body
+
+return:
+  ret void
+}
+
+; CHECK: %p_scevgep = getelementptr [1024 x float**]* @B, i64 0, i64 0
+; CHECK: %p_scevgep1 = getelementptr [1024 x float**]* @B, i64 0, i64 1
+; CHECK: %p_scevgep2 = getelementptr [1024 x float**]* @B, i64 0, i64 2
+; CHECK: %p_scevgep3 = getelementptr [1024 x float**]* @B, i64 0, i64 3
+; CHECK: %value_p_scalar_ = load float*** getelementptr inbounds ([1024 x float**]* @A, i64 0, i64 0)
+; CHECK: %value_p_scalar_4 = load float*** getelementptr inbounds ([1024 x float**]* @A, i64 0, i64 0)
+; CHECK: %value_p_scalar_5 = load float*** getelementptr inbounds ([1024 x float**]* @A, i64 0, i64 0)
+; CHECK: %value_p_scalar_6 = load float*** getelementptr inbounds ([1024 x float**]* @A, i64 0, i64 0)
+; CHECK: store float** %value_p_scalar_, float*** %p_scevgep, align 4
+; CHECK: store float** %value_p_scalar_4, float*** %p_scevgep1, align 4
+; CHECK: store float** %value_p_scalar_5, float*** %p_scevgep2, align 4
+; CHECK: store float** %value_p_scalar_6, float*** %p_scevgep3, align 4