[llvm] r177334 - ARM cost model: Make some vector integer to float casts cheaper
Arnold Schwaighofer
aschwaighofer at apple.com
Mon Mar 18 15:47:09 PDT 2013
Author: arnolds
Date: Mon Mar 18 17:47:09 2013
New Revision: 177334
URL: http://llvm.org/viewvc/llvm-project?rev=177334&view=rev
Log:
ARM cost model: Make some vector integer to float casts cheaper
The default logic marks them as too expensive.
For example, before this patch we estimated:
cost of 16 for instruction: %r = uitofp <4 x i16> %v0 to <4 x float>
While this translates to:
vmovl.u16 q8, d16
vcvt.f32.u32 q8, q8
All other costs are left to the values assigned by the fallback logic. Theses
costs are mostly reasonable in the sense that they get progressively more
expensive as the instruction sequences emitted get longer.
radar://13445992
Modified:
llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp
llvm/trunk/test/Analysis/CostModel/ARM/cast.ll
Modified: llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp?rev=177334&r1=177333&r2=177334&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp Mon Mar 18 17:47:09 2013
@@ -222,6 +222,28 @@ unsigned ARMTTI::getCastInstrCost(unsign
// Vector float <-> i32 conversions.
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
+
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
+ { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
+ { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
+ { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
+
{ ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
{ ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 },
@@ -232,6 +254,14 @@ unsigned ARMTTI::getCastInstrCost(unsign
// Vector double <-> i32 conversions.
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+
{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
{ ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 },
Modified: llvm/trunk/test/Analysis/CostModel/ARM/cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/ARM/cast.ll?rev=177334&r1=177333&r2=177334&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/ARM/cast.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/ARM/cast.ll Mon Mar 18 17:47:09 2013
@@ -359,6 +359,174 @@ define i32 @casts() {
; CHECK: cost of 192 {{.*}} fptosi
%r169 = fptosi <16 x double> undef to <16 x i64>
+ ; CHECK: cost of 8 {{.*}} uitofp
+ %r170 = uitofp <2 x i1> undef to <2 x float>
+ ; CHECK: cost of 8 {{.*}} sitofp
+ %r171 = sitofp <2 x i1> undef to <2 x float>
+ ; CHECK: cost of 3 {{.*}} uitofp
+ %r172 = uitofp <2 x i8> undef to <2 x float>
+ ; CHECK: cost of 3 {{.*}} sitofp
+ %r173 = sitofp <2 x i8> undef to <2 x float>
+ ; CHECK: cost of 2 {{.*}} uitofp
+ %r174 = uitofp <2 x i16> undef to <2 x float>
+ ; CHECK: cost of 2 {{.*}} sitofp
+ %r175 = sitofp <2 x i16> undef to <2 x float>
+ ; CHECK: cost of 1 {{.*}} uitofp
+ %r176 = uitofp <2 x i32> undef to <2 x float>
+ ; CHECK: cost of 1 {{.*}} sitofp
+ %r177 = sitofp <2 x i32> undef to <2 x float>
+ ; CHECK: cost of 24 {{.*}} uitofp
+ %r178 = uitofp <2 x i64> undef to <2 x float>
+ ; CHECK: cost of 24 {{.*}} sitofp
+ %r179 = sitofp <2 x i64> undef to <2 x float>
+
+ ; CHECK: cost of 8 {{.*}} uitofp
+ %r180 = uitofp <2 x i1> undef to <2 x double>
+ ; CHECK: cost of 8 {{.*}} sitofp
+ %r181 = sitofp <2 x i1> undef to <2 x double>
+ ; CHECK: cost of 4 {{.*}} uitofp
+ %r182 = uitofp <2 x i8> undef to <2 x double>
+ ; CHECK: cost of 4 {{.*}} sitofp
+ %r183 = sitofp <2 x i8> undef to <2 x double>
+ ; CHECK: cost of 3 {{.*}} uitofp
+ %r184 = uitofp <2 x i16> undef to <2 x double>
+ ; CHECK: cost of 3 {{.*}} sitofp
+ %r185 = sitofp <2 x i16> undef to <2 x double>
+ ; CHECK: cost of 2 {{.*}} uitofp
+ %r186 = uitofp <2 x i32> undef to <2 x double>
+ ; CHECK: cost of 2 {{.*}} sitofp
+ %r187 = sitofp <2 x i32> undef to <2 x double>
+ ; CHECK: cost of 24 {{.*}} uitofp
+ %r188 = uitofp <2 x i64> undef to <2 x double>
+ ; CHECK: cost of 24 {{.*}} sitofp
+ %r189 = sitofp <2 x i64> undef to <2 x double>
+
+ ; CHECK: cost of 3 {{.*}} uitofp
+ %r190 = uitofp <4 x i1> undef to <4 x float>
+ ; CHECK: cost of 3 {{.*}} sitofp
+ %r191 = sitofp <4 x i1> undef to <4 x float>
+ ; CHECK: cost of 3 {{.*}} uitofp
+ %r192 = uitofp <4 x i8> undef to <4 x float>
+ ; CHECK: cost of 3 {{.*}} sitofp
+ %r193 = sitofp <4 x i8> undef to <4 x float>
+ ; CHECK: cost of 2 {{.*}} uitofp
+ %r194 = uitofp <4 x i16> undef to <4 x float>
+ ; CHECK: cost of 2 {{.*}} sitofp
+ %r195 = sitofp <4 x i16> undef to <4 x float>
+ ; CHECK: cost of 1 {{.*}} uitofp
+ %r196 = uitofp <4 x i32> undef to <4 x float>
+ ; CHECK: cost of 1 {{.*}} sitofp
+ %r197 = sitofp <4 x i32> undef to <4 x float>
+ ; CHECK: cost of 48 {{.*}} uitofp
+ %r198 = uitofp <4 x i64> undef to <4 x float>
+ ; CHECK: cost of 48 {{.*}} sitofp
+ %r199 = sitofp <4 x i64> undef to <4 x float>
+
+ ; CHECK: cost of 16 {{.*}} uitofp
+ %r200 = uitofp <4 x i1> undef to <4 x double>
+ ; CHECK: cost of 16 {{.*}} sitofp
+ %r201 = sitofp <4 x i1> undef to <4 x double>
+ ; CHECK: cost of 16 {{.*}} uitofp
+ %r202 = uitofp <4 x i8> undef to <4 x double>
+ ; CHECK: cost of 16 {{.*}} sitofp
+ %r203 = sitofp <4 x i8> undef to <4 x double>
+ ; CHECK: cost of 16 {{.*}} uitofp
+ %r204 = uitofp <4 x i16> undef to <4 x double>
+ ; CHECK: cost of 16 {{.*}} sitofp
+ %r205 = sitofp <4 x i16> undef to <4 x double>
+ ; CHECK: cost of 16 {{.*}} uitofp
+ %r206 = uitofp <4 x i32> undef to <4 x double>
+ ; CHECK: cost of 16 {{.*}} sitofp
+ %r207 = sitofp <4 x i32> undef to <4 x double>
+ ; CHECK: cost of 48 {{.*}} uitofp
+ %r208 = uitofp <4 x i64> undef to <4 x double>
+ ; CHECK: cost of 48 {{.*}} sitofp
+ %r209 = sitofp <4 x i64> undef to <4 x double>
+
+ ; CHECK: cost of 32 {{.*}} uitofp
+ %r210 = uitofp <8 x i1> undef to <8 x float>
+ ; CHECK: cost of 32 {{.*}} sitofp
+ %r211 = sitofp <8 x i1> undef to <8 x float>
+ ; CHECK: cost of 32 {{.*}} uitofp
+ %r212 = uitofp <8 x i8> undef to <8 x float>
+ ; CHECK: cost of 32 {{.*}} sitofp
+ %r213 = sitofp <8 x i8> undef to <8 x float>
+ ; CHECK: cost of 4 {{.*}} uitofp
+ %r214 = uitofp <8 x i16> undef to <8 x float>
+ ; CHECK: cost of 4 {{.*}} sitofp
+ %r215 = sitofp <8 x i16> undef to <8 x float>
+ ; CHECK: cost of 2 {{.*}} uitofp
+ %r216 = uitofp <8 x i32> undef to <8 x float>
+ ; CHECK: cost of 2 {{.*}} sitofp
+ %r217 = sitofp <8 x i32> undef to <8 x float>
+ ; CHECK: cost of 96 {{.*}} uitofp
+ %r218 = uitofp <8 x i64> undef to <8 x float>
+ ; CHECK: cost of 96 {{.*}} sitofp
+ %r219 = sitofp <8 x i64> undef to <8 x float>
+
+ ; CHECK: cost of 32 {{.*}} uitofp
+ %r220 = uitofp <8 x i1> undef to <8 x double>
+ ; CHECK: cost of 32 {{.*}} sitofp
+ %r221 = sitofp <8 x i1> undef to <8 x double>
+ ; CHECK: cost of 32 {{.*}} uitofp
+ %r222 = uitofp <8 x i8> undef to <8 x double>
+ ; CHECK: cost of 32 {{.*}} sitofp
+ %r223 = sitofp <8 x i8> undef to <8 x double>
+ ; CHECK: cost of 32 {{.*}} uitofp
+ %r224 = uitofp <8 x i16> undef to <8 x double>
+ ; CHECK: cost of 32 {{.*}} sitofp
+ %r225 = sitofp <8 x i16> undef to <8 x double>
+ ; CHECK: cost of 32 {{.*}} uitofp
+ %r226 = uitofp <8 x i16> undef to <8 x double>
+ ; CHECK: cost of 32 {{.*}} sitofp
+ %r227 = sitofp <8 x i16> undef to <8 x double>
+ ; CHECK: cost of 96 {{.*}} uitofp
+ %r228 = uitofp <8 x i64> undef to <8 x double>
+ ; CHECK: cost of 96 {{.*}} sitofp
+ %r229 = sitofp <8 x i64> undef to <8 x double>
+
+ ; CHECK: cost of 64 {{.*}} uitofp
+ %r230 = uitofp <16 x i1> undef to <16 x float>
+ ; CHECK: cost of 64 {{.*}} sitofp
+ %r231 = sitofp <16 x i1> undef to <16 x float>
+ ; CHECK: cost of 64 {{.*}} uitofp
+ %r232 = uitofp <16 x i8> undef to <16 x float>
+ ; CHECK: cost of 64 {{.*}} sitofp
+ %r233 = sitofp <16 x i8> undef to <16 x float>
+ ; CHECK: cost of 8 {{.*}} uitofp
+ %r234 = uitofp <16 x i16> undef to <16 x float>
+ ; CHECK: cost of 8 {{.*}} sitofp
+ %r235 = sitofp <16 x i16> undef to <16 x float>
+ ; CHECK: cost of 4 {{.*}} uitofp
+ %r236 = uitofp <16 x i32> undef to <16 x float>
+ ; CHECK: cost of 4 {{.*}} sitofp
+ %r237 = sitofp <16 x i32> undef to <16 x float>
+ ; CHECK: cost of 192 {{.*}} uitofp
+ %r238 = uitofp <16 x i64> undef to <16 x float>
+ ; CHECK: cost of 192 {{.*}} sitofp
+ %r239 = sitofp <16 x i64> undef to <16 x float>
+
+ ; CHECK: cost of 64 {{.*}} uitofp
+ %r240 = uitofp <16 x i1> undef to <16 x double>
+ ; CHECK: cost of 64 {{.*}} sitofp
+ %r241 = sitofp <16 x i1> undef to <16 x double>
+ ; CHECK: cost of 64 {{.*}} uitofp
+ %r242 = uitofp <16 x i8> undef to <16 x double>
+ ; CHECK: cost of 64 {{.*}} sitofp
+ %r243 = sitofp <16 x i8> undef to <16 x double>
+ ; C4ECK: cost of 64 {{.*}} uitofp
+ %r244 = uitofp <16 x i16> undef to <16 x double>
+ ; CHECK: cost of 64 {{.*}} sitofp
+ %r245 = sitofp <16 x i16> undef to <16 x double>
+ ; CHECK: cost of 64 {{.*}} uitofp
+ %r246 = uitofp <16 x i16> undef to <16 x double>
+ ; CHECK: cost of 64 {{.*}} sitofp
+ %r247 = sitofp <16 x i16> undef to <16 x double>
+ ; CHECK: cost of 192 {{.*}} uitofp
+ %r248 = uitofp <16 x i64> undef to <16 x double>
+ ; CHECK: cost of 192 {{.*}} sitofp
+ %r249 = sitofp <16 x i64> undef to <16 x double>
+
;CHECK: cost of 0 {{.*}} ret
ret i32 undef
}
More information about the llvm-commits
mailing list