[llvm-commits] [polly] r154586 - in /polly/trunk: lib/CodeGen/CodeGeneration.cpp test/CodeGen/simple_vec_call.ll test/CodeGen/simple_vec_call_2.ll test/CodeGen/simple_vec_impossible.ll
Tobias Grosser
grosser at fim.uni-passau.de
Thu Apr 12 03:46:55 PDT 2012
Author: grosser
Date: Thu Apr 12 05:46:55 2012
New Revision: 154586
URL: http://llvm.org/viewvc/llvm-project?rev=154586&view=rev
Log:
CodeGen: Generate scalar code if vector instructions cannot be generated
This fixes two crashes that appeared in case of:
- A load of a non vectorizable type (e.g. float**)
- An instruction that is not vectorizable (e.g. call)
Added:
polly/trunk/test/CodeGen/simple_vec_call.ll
polly/trunk/test/CodeGen/simple_vec_call_2.ll
polly/trunk/test/CodeGen/simple_vec_impossible.ll
Modified:
polly/trunk/lib/CodeGen/CodeGeneration.cpp
Modified: polly/trunk/lib/CodeGen/CodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/CodeGeneration.cpp?rev=154586&r1=154585&r2=154586&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/CodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/CodeGeneration.cpp Thu Apr 12 05:46:55 2012
@@ -569,6 +569,12 @@
void copyStore(const StoreInst *Store, ValueMapT &VectorMap,
VectorValueMapT &ScalarMaps);
+ void copyInstScalarized(const Instruction *Inst, ValueMapT &VectorMap,
+ VectorValueMapT &ScalarMaps);
+
+ bool extractScalarValues(const Instruction *Inst, ValueMapT &VectorMap,
+ VectorValueMapT &ScalarMaps);
+
bool hasVectorOperands(const Instruction *Inst, ValueMapT &VectorMap);
void copyInstruction(const Instruction *Inst, ValueMapT &VectorMap,
@@ -680,18 +686,16 @@
void VectorBlockGenerator::generateLoad(const LoadInst *Load,
ValueMapT &VectorMap,
VectorValueMapT &ScalarMaps) {
- Value *NewLoad;
-
- if (GroupedUnrolling) {
+ if (GroupedUnrolling || !VectorType::isValidElementType(Load->getType())) {
for (int i = 0; i < getVectorWidth(); i++)
ScalarMaps[i][Load] = generateScalarLoad(Load, ScalarMaps[i],
GlobalMaps[i]);
-
return;
}
MemoryAccess &Access = Statement.getAccessFor(Load);
+ Value *NewLoad;
if (Access.isStrideZero(isl_set_copy(Domain)))
NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]);
else if (Access.isStrideOne(isl_set_copy(Domain)))
@@ -772,6 +776,63 @@
return false;
}
+bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
+ ValueMapT &VectorMap,
+ VectorValueMapT &ScalarMaps) {
+ bool HasVectorOperand = false;
+ int VectorWidth = getVectorWidth();
+
+ for (Instruction::const_op_iterator OI = Inst->op_begin(),
+ OE = Inst->op_end(); OI != OE; ++OI) {
+ ValueMapT::iterator VecOp = VectorMap.find(*OI);
+
+ if (VecOp == VectorMap.end())
+ continue;
+
+ HasVectorOperand = true;
+ Value *NewVector = VecOp->second;
+
+ for (int i = 0; i < VectorWidth; ++i) {
+ ValueMapT &SM = ScalarMaps[i];
+
+ // If there is one scalar extracted, all scalar elements should have
+ // already been extracted by the code here. So no need to check for the
+ // existance of all of them.
+ if (SM.count(*OI))
+ break;
+
+ SM[*OI] = Builder.CreateExtractElement(NewVector, Builder.getInt32(i));
+ }
+ }
+
+ return HasVectorOperand;
+}
+
+void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst,
+ ValueMapT &VectorMap,
+ VectorValueMapT &ScalarMaps) {
+ bool HasVectorOperand;
+ int VectorWidth = getVectorWidth();
+
+ HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps);
+
+ for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
+ copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane]);
+
+ if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand)
+ return;
+
+ // Make the result available as vector value.
+ VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth);
+ Value *Vector = UndefValue::get(VectorType);
+
+ for (int i = 0; i < VectorWidth; i++)
+ Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst],
+ Builder.getInt32(i));
+
+ VectorMap[Inst] = Vector;
+}
+
int VectorBlockGenerator::getVectorWidth() {
return GlobalMaps.size();
}
@@ -805,11 +866,11 @@
return;
}
- llvm_unreachable("Cannot issue vector code for this instruction");
+ // Falltrough: We generate scalar instructions, if we don't know how to
+ // generate vector code.
}
- for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
- copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane]);
+ copyInstScalarized(Inst, VectorMap, ScalarMaps);
}
void VectorBlockGenerator::copyBB() {
Added: polly/trunk/test/CodeGen/simple_vec_call.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/CodeGen/simple_vec_call.ll?rev=154586&view=auto
==============================================================================
--- polly/trunk/test/CodeGen/simple_vec_call.ll (added)
+++ polly/trunk/test/CodeGen/simple_vec_call.ll Thu Apr 12 05:46:55 2012
@@ -0,0 +1,43 @@
+; RUN: opt %loadPolly -basicaa -polly-codegen -enable-polly-vector -S %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at A = common global [1024 x float] zeroinitializer, align 16
+ at B = common global [1024 x float] zeroinitializer, align 16
+
+declare float @foo(float) readnone
+
+define void @simple_vec_call() nounwind {
+entry:
+ br label %body
+
+body:
+ %indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ]
+ %scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar
+ %value = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 0), align 16
+ %result = tail call float @foo(float %value) nounwind
+ store float %result, float* %scevgep, align 4
+ %indvar_next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar_next, 4
+ br i1 %exitcond, label %return, label %body
+
+return:
+ ret void
+}
+
+; CHECK: %value_p_splat_one = load <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8
+; CHECK: %value_p_splat = shufflevector <1 x float> %value_p_splat_one, <1 x float> %value_p_splat_one, <4 x i32> zeroinitializer
+; CHECK: %0 = extractelement <4 x float> %value_p_splat, i32 0
+; CHECK: %1 = extractelement <4 x float> %value_p_splat, i32 1
+; CHECK: %2 = extractelement <4 x float> %value_p_splat, i32 2
+; CHECK: %3 = extractelement <4 x float> %value_p_splat, i32 3
+; CHECK: %p_result = tail call float @foo(float %0) nounwind
+; CHECK: %p_result4 = tail call float @foo(float %1) nounwind
+; CHECK: %p_result5 = tail call float @foo(float %2) nounwind
+; CHECK: %p_result6 = tail call float @foo(float %3) nounwind
+; CHECK: %4 = insertelement <4 x float> undef, float %p_result, i32 0
+; CHECK: %5 = insertelement <4 x float> %4, float %p_result4, i32 1
+; CHECK: %6 = insertelement <4 x float> %5, float %p_result5, i32 2
+; CHECK: %7 = insertelement <4 x float> %6, float %p_result6, i32 3
+; CHECK: %vector_ptr = bitcast float* %p_scevgep to <4 x float>*
+; CHECK: store <4 x float> %7, <4 x float>* %vector_ptr, align 8
Added: polly/trunk/test/CodeGen/simple_vec_call_2.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/CodeGen/simple_vec_call_2.ll?rev=154586&view=auto
==============================================================================
--- polly/trunk/test/CodeGen/simple_vec_call_2.ll (added)
+++ polly/trunk/test/CodeGen/simple_vec_call_2.ll Thu Apr 12 05:46:55 2012
@@ -0,0 +1,45 @@
+; RUN: opt %loadPolly -basicaa -polly-codegen -enable-polly-vector -dce -S %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at A = common global [1024 x float] zeroinitializer, align 16
+ at B = common global [1024 x float**] zeroinitializer, align 16
+
+declare float** @foo(float) readnone
+
+define void @simple_vec_call() nounwind {
+entry:
+ br label %body
+
+body:
+ %indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ]
+ %scevgep = getelementptr [1024 x float**]* @B, i64 0, i64 %indvar
+ %value = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 0), align 16
+ %result = tail call float** @foo(float %value) nounwind
+ store float** %result, float*** %scevgep, align 4
+ %indvar_next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar_next, 4
+ br i1 %exitcond, label %return, label %body
+
+return:
+ ret void
+}
+
+; CHECK: %p_scevgep = getelementptr [1024 x float**]* @B, i64 0, i64 0
+; CHECK: %p_scevgep1 = getelementptr [1024 x float**]* @B, i64 0, i64 1
+; CHECK: %p_scevgep2 = getelementptr [1024 x float**]* @B, i64 0, i64 2
+; CHECK: %p_scevgep3 = getelementptr [1024 x float**]* @B, i64 0, i64 3
+; CHECK: %value_p_splat_one = load <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8
+; CHECK: %value_p_splat = shufflevector <1 x float> %value_p_splat_one, <1 x float> %value_p_splat_one, <4 x i32> zeroinitializer
+; CHECK: %0 = extractelement <4 x float> %value_p_splat, i32 0
+; CHECK: %1 = extractelement <4 x float> %value_p_splat, i32 1
+; CHECK: %2 = extractelement <4 x float> %value_p_splat, i32 2
+; CHECK: %3 = extractelement <4 x float> %value_p_splat, i32 3
+; CHECK: %p_result = tail call float** @foo(float %0) nounwind
+; CHECK: %p_result4 = tail call float** @foo(float %1) nounwind
+; CHECK: %p_result5 = tail call float** @foo(float %2) nounwind
+; CHECK: %p_result6 = tail call float** @foo(float %3) nounwind
+; CHECK: store float** %p_result, float*** %p_scevgep, align 4
+; CHECK: store float** %p_result4, float*** %p_scevgep1, align 4
+; CHECK: store float** %p_result5, float*** %p_scevgep2, align 4
+; CHECK: store float** %p_result6, float*** %p_scevgep3, align 4
Added: polly/trunk/test/CodeGen/simple_vec_impossible.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/CodeGen/simple_vec_impossible.ll?rev=154586&view=auto
==============================================================================
--- polly/trunk/test/CodeGen/simple_vec_impossible.ll (added)
+++ polly/trunk/test/CodeGen/simple_vec_impossible.ll Thu Apr 12 05:46:55 2012
@@ -0,0 +1,38 @@
+; RUN: opt %loadPolly -basicaa -polly-codegen -enable-polly-vector -S %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at A = common global [1024 x float**] zeroinitializer, align 16
+ at B = common global [1024 x float**] zeroinitializer, align 16
+
+declare float @foo(float) readnone
+
+define void @simple_vec_call() nounwind {
+entry:
+ br label %body
+
+body:
+ %indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ]
+ %scevgep = getelementptr [1024 x float**]* @B, i64 0, i64 %indvar
+ %value = load float*** getelementptr inbounds ([1024 x float**]* @A, i64 0, i64 0), align 16
+ store float** %value, float*** %scevgep, align 4
+ %indvar_next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar_next, 4
+ br i1 %exitcond, label %return, label %body
+
+return:
+ ret void
+}
+
+; CHECK: %p_scevgep = getelementptr [1024 x float**]* @B, i64 0, i64 0
+; CHECK: %p_scevgep1 = getelementptr [1024 x float**]* @B, i64 0, i64 1
+; CHECK: %p_scevgep2 = getelementptr [1024 x float**]* @B, i64 0, i64 2
+; CHECK: %p_scevgep3 = getelementptr [1024 x float**]* @B, i64 0, i64 3
+; CHECK: %value_p_scalar_ = load float*** getelementptr inbounds ([1024 x float**]* @A, i64 0, i64 0)
+; CHECK: %value_p_scalar_4 = load float*** getelementptr inbounds ([1024 x float**]* @A, i64 0, i64 0)
+; CHECK: %value_p_scalar_5 = load float*** getelementptr inbounds ([1024 x float**]* @A, i64 0, i64 0)
+; CHECK: %value_p_scalar_6 = load float*** getelementptr inbounds ([1024 x float**]* @A, i64 0, i64 0)
+; CHECK: store float** %value_p_scalar_, float*** %p_scevgep, align 4
+; CHECK: store float** %value_p_scalar_4, float*** %p_scevgep1, align 4
+; CHECK: store float** %value_p_scalar_5, float*** %p_scevgep2, align 4
+; CHECK: store float** %value_p_scalar_6, float*** %p_scevgep3, align 4
More information about the llvm-commits
mailing list