[llvm-commits] [llvm] r123161 - in /llvm/trunk: lib/Transforms/InstCombine/InstCombineCalls.cpp test/Transforms/InstCombine/vec_demanded_elts.ll
Chandler Carruth
chandlerc at gmail.com
Sun Jan 9 23:19:38 PST 2011
Author: chandlerc
Date: Mon Jan 10 01:19:37 2011
New Revision: 123161
URL: http://llvm.org/viewvc/llvm-project?rev=123161&view=rev
Log:
Teach instcombine about the rest of the SSE and SSE2 conversion
intrinsics element dependencies. Reviewed by Nick.
Modified:
llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=123161&r1=123160&r2=123161&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Mon Jan 10 01:19:37 2011
@@ -540,9 +540,16 @@
return new StoreInst(II->getArgOperand(1), Ptr);
}
break;
-
- case Intrinsic::x86_sse_cvttss2si: {
- // These intrinsics only demands the 0th element of its input vector. If
+
+ case Intrinsic::x86_sse_cvtss2si:
+ case Intrinsic::x86_sse_cvtss2si64:
+ case Intrinsic::x86_sse_cvttss2si:
+ case Intrinsic::x86_sse_cvttss2si64:
+ case Intrinsic::x86_sse2_cvtsd2si:
+ case Intrinsic::x86_sse2_cvtsd2si64:
+ case Intrinsic::x86_sse2_cvttsd2si:
+ case Intrinsic::x86_sse2_cvttsd2si64: {
+ // These intrinsics only demand the 0th element of their input vectors. If
// we can simplify the input based on that, do so now.
unsigned VWidth =
cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
@@ -555,7 +562,7 @@
}
break;
}
-
+
case Intrinsic::ppc_altivec_vperm:
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getArgOperand(2))) {
Modified: llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll?rev=123161&r1=123160&r2=123161&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll Mon Jan 10 01:19:37 2011
@@ -36,6 +36,54 @@
ret i32 %tmp21
}
+define i64 @test3(float %f, double %d) {
+; CHECK: @test3
+; CHECK-NOT: insertelement {{.*}} 0.00
+; CHECK: ret
+entry:
+ %v00 = insertelement <4 x float> undef, float %f, i32 0
+ %v01 = insertelement <4 x float> %v00, float 0.000000e+00, i32 1
+ %v02 = insertelement <4 x float> %v01, float 0.000000e+00, i32 2
+ %v03 = insertelement <4 x float> %v02, float 0.000000e+00, i32 3
+ %tmp0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %v03)
+ %v10 = insertelement <4 x float> undef, float %f, i32 0
+ %v11 = insertelement <4 x float> %v10, float 0.000000e+00, i32 1
+ %v12 = insertelement <4 x float> %v11, float 0.000000e+00, i32 2
+ %v13 = insertelement <4 x float> %v12, float 0.000000e+00, i32 3
+ %tmp1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %v13)
+ %v20 = insertelement <4 x float> undef, float %f, i32 0
+ %v21 = insertelement <4 x float> %v20, float 0.000000e+00, i32 1
+ %v22 = insertelement <4 x float> %v21, float 0.000000e+00, i32 2
+ %v23 = insertelement <4 x float> %v22, float 0.000000e+00, i32 3
+ %tmp2 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %v23)
+ %v30 = insertelement <4 x float> undef, float %f, i32 0
+ %v31 = insertelement <4 x float> %v30, float 0.000000e+00, i32 1
+ %v32 = insertelement <4 x float> %v31, float 0.000000e+00, i32 2
+ %v33 = insertelement <4 x float> %v32, float 0.000000e+00, i32 3
+ %tmp3 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %v33)
+ %v40 = insertelement <2 x double> undef, double %d, i32 0
+ %v41 = insertelement <2 x double> %v40, double 0.000000e+00, i32 1
+ %tmp4 = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %v41)
+ %v50 = insertelement <2 x double> undef, double %d, i32 0
+ %v51 = insertelement <2 x double> %v50, double 0.000000e+00, i32 1
+ %tmp5 = tail call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %v51)
+ %v60 = insertelement <2 x double> undef, double %d, i32 0
+ %v61 = insertelement <2 x double> %v60, double 0.000000e+00, i32 1
+ %tmp6 = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %v61)
+ %v70 = insertelement <2 x double> undef, double %d, i32 0
+ %v71 = insertelement <2 x double> %v70, double 0.000000e+00, i32 1
+ %tmp7 = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %v71)
+ %tmp8 = add i32 %tmp0, %tmp2
+ %tmp9 = add i32 %tmp4, %tmp6
+ %tmp10 = add i32 %tmp8, %tmp9
+ %tmp11 = sext i32 %tmp10 to i64
+ %tmp12 = add i64 %tmp1, %tmp3
+ %tmp13 = add i64 %tmp5, %tmp7
+ %tmp14 = add i64 %tmp12, %tmp13
+ %tmp15 = add i64 %tmp11, %tmp14
+ ret i64 %tmp15
+}
+
define void @get_image() nounwind {
; CHECK: @get_image
; CHECK-NOT: extractelement
@@ -80,4 +128,11 @@
declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.cvtss2si(<4 x float>)
+declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>)
declare i32 @llvm.x86.sse.cvttss2si(<4 x float>)
+declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>)
+declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>)
+declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>)
+declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>)
+declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>)
More information about the llvm-commits
mailing list