[llvm] r275106 - [X86] Make some cast costs more precise
Michael Kuperstein via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 11 14:39:45 PDT 2016
Author: mkuper
Date: Mon Jul 11 16:39:44 2016
New Revision: 275106
URL: http://llvm.org/viewvc/llvm-project?rev=275106&view=rev
Log:
[X86] Make some cast costs more precise
Make some AVX and AVX512 cast costs more precise.
Based on part of a patch by Elena Demikhovsky (D15604).
Differential Revision: http://reviews.llvm.org/D22064
Modified:
llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/trunk/test/Analysis/CostModel/X86/cast.ll
llvm/trunk/test/Analysis/CostModel/X86/sitofp.ll
llvm/trunk/test/Analysis/CostModel/X86/uitofp.ll
llvm/trunk/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll
Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp?rev=275106&r1=275105&r2=275106&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp Mon Jul 11 16:39:44 2016
@@ -547,6 +547,9 @@ int X86TTIImpl::getCastInstrCost(unsigne
{ ISD::FP_TO_UINT, MVT::v8i64, MVT::v8f64, 1 },
};
+ // TODO: For AVX512DQ + AVX512VL, we also have cheap casts for 128-bit and
+ // 256-bit wide vectors.
+
static const TypeConversionCostTblEntry AVX512FConversionTbl[] = {
{ ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 1 },
{ ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 },
@@ -577,6 +580,8 @@ int X86TTIImpl::getCastInstrCost(unsigne
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 },
{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 },
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 26 },
+ { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 26 },
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 },
{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 },
@@ -591,11 +596,13 @@ int X86TTIImpl::getCastInstrCost(unsigne
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 },
{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 2 },
{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 },
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 },
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 },
{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 5 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 },
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 12 },
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 26 },
@@ -685,6 +692,7 @@ int X86TTIImpl::getCastInstrCost(unsigne
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 },
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 6 },
{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 6 },
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 },
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 9 },
@@ -693,9 +701,11 @@ int X86TTIImpl::getCastInstrCost(unsigne
// here. We have roughly 10 instructions per scalar element.
// Multiply that by the vector width.
// FIXME: remove that when PR19268 is fixed.
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
- { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 4*10 },
-
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 10 },
+ { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 20 },
+ { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 13 },
+ { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 13 },
+
{ ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 },
{ ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 7 },
// This node is expanded into scalarized operations but BasicTTI is overly
@@ -705,6 +715,9 @@ int X86TTIImpl::getCastInstrCost(unsigne
// should be factored in too. Inflating the cost per element by 1.
{ ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 8*4 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 4*4 },
+
+ { ISD::FP_EXTEND, MVT::v4f64, MVT::v4f32, 1 },
+ { ISD::FP_ROUND, MVT::v4f32, MVT::v4f64, 1 },
};
static const TypeConversionCostTblEntry SSE41ConversionTbl[] = {
Modified: llvm/trunk/test/Analysis/CostModel/X86/cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/cast.ll?rev=275106&r1=275105&r2=275106&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/cast.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/X86/cast.ll Mon Jul 11 16:39:44 2016
@@ -238,21 +238,21 @@ define void @uitofp8(<8 x i1> %a, <8 x i
define void @fp_conv(<8 x float> %a, <16 x float>%b, <4 x float> %c) {
;CHECK-LABEL: for function 'fp_conv'
- ; CHECK-AVX512: cost of 1 {{.*}} fpext
- %A1 = fpext <8 x float> %a to <8 x double>
+ ; CHECK: cost of 1 {{.*}} %A1 = fpext
+ %A1 = fpext <4 x float> %c to <4 x double>
- ; CHECK-AVX512: cost of 1 {{.*}} fpext
- %A2 = fpext <4 x float> %c to <4 x double>
+ ; CHECK-AVX: cost of 3 {{.*}} %A2 = fpext
+ ; CHECK-AVX2: cost of 3 {{.*}} %A2 = fpext
+ ; CHECK-AVX512: cost of 1 {{.*}} %A2 = fpext
+ %A2 = fpext <8 x float> %a to <8 x double>
- ; CHECK-AVX2: cost of 3 {{.*}} %A3 = fpext
- ; CHECK-AVX512: cost of 1 {{.*}} %A3 = fpext
- %A3 = fpext <8 x float> %a to <8 x double>
+ ; CHECK: cost of 1 {{.*}} %A3 = fptrunc
+ %A3 = fptrunc <4 x double> undef to <4 x float>
+ ; CHECK-AVX: cost of 3 {{.*}} %A4 = fptrunc
; CHECK-AVX2: cost of 3 {{.*}} %A4 = fptrunc
; CHECK-AVX512: cost of 1 {{.*}} %A4 = fptrunc
%A4 = fptrunc <8 x double> undef to <8 x float>
- ; CHECK-AVX512: cost of 1 {{.*}} %A5 = fptrunc
- %A5 = fptrunc <4 x double> undef to <4 x float>
ret void
}
Modified: llvm/trunk/test/Analysis/CostModel/X86/sitofp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/sitofp.ll?rev=275106&r1=275105&r2=275106&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/sitofp.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/X86/sitofp.ll Mon Jul 11 16:39:44 2016
@@ -264,13 +264,13 @@ define <4 x double> @sitofpv4i64v4double
; SSE2: cost of 40 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv4i64v4double
- ; AVX1: cost of 10 {{.*}} sitofp
+ ; AVX1: cost of 13 {{.*}} sitofp
;
; AVX2-LABEL: sitofpv4i64v4double
- ; AVX2: cost of 10 {{.*}} sitofp
+ ; AVX2: cost of 13 {{.*}} sitofp
;
; AVX512F-LABEL: sitofpv4i64v4double
- ; AVX512F: cost of 10 {{.*}} sitofp
+ ; AVX512F: cost of 13 {{.*}} sitofp
%1 = sitofp <4 x i64> %a to <4 x double>
ret <4 x double> %1
}
@@ -280,10 +280,10 @@ define <8 x double> @sitofpv8i64v8double
; SSE2: cost of 80 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv8i64v8double
- ; AVX1: cost of 21 {{.*}} sitofp
+ ; AVX1: cost of 27 {{.*}} sitofp
;
; AVX2-LABEL: sitofpv8i64v8double
- ; AVX2: cost of 21 {{.*}} sitofp
+ ; AVX2: cost of 27 {{.*}} sitofp
;
; AVX512F-LABEL: sitofpv8i64v8double
; AVX512F: cost of 22 {{.*}} sitofp
@@ -296,10 +296,10 @@ define <16 x double> @sitofpv16i64v16dou
; SSE2: cost of 160 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv16i64v16double
- ; AVX1: cost of 43 {{.*}} sitofp
+ ; AVX1: cost of 55 {{.*}} sitofp
;
; AVX2-LABEL: sitofpv16i64v16double
- ; AVX2: cost of 43 {{.*}} sitofp
+ ; AVX2: cost of 55 {{.*}} sitofp
;
; AVX512F-LABEL: sitofpv16i64v16double
; AVX512F: cost of 45 {{.*}} sitofp
@@ -312,10 +312,10 @@ define <32 x double> @sitofpv32i64v32dou
; SSE2: cost of 320 {{.*}} sitofp
;
; AVX1-LABEL: sitofpv32i64v32double
- ; AVX1: cost of 87 {{.*}} sitofp
+ ; AVX1: cost of 111 {{.*}} sitofp
;
; AVX2-LABEL: sitofpv32i64v32double
- ; AVX2: cost of 87 {{.*}} sitofp
+ ; AVX2: cost of 111 {{.*}} sitofp
;
; AVX512F-LABEL: sitofpv32i64v32double
; AVX512F: cost of 91 {{.*}} sitofp
Modified: llvm/trunk/test/Analysis/CostModel/X86/uitofp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/uitofp.ll?rev=275106&r1=275105&r2=275106&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/uitofp.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/X86/uitofp.ll Mon Jul 11 16:39:44 2016
@@ -169,13 +169,13 @@ define <2 x double> @uitofpv2i32v2double
; SSE2: cost of 20 {{.*}} uitofp
;
; AVX1-LABEL: uitofpv2i32v2double
- ; AVX1: cost of 4 {{.*}} uitofp
+ ; AVX1: cost of 6 {{.*}} uitofp
;
; AVX2-LABEL: uitofpv2i32v2double
- ; AVX2: cost of 4 {{.*}} uitofp
+ ; AVX2: cost of 6 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv2i32v2double
- ; AVX512F: cost of 4 {{.*}} uitofp
+ ; AVX512F: cost of 1 {{.*}} uitofp
%1 = uitofp <2 x i32> %a to <2 x double>
ret <2 x double> %1
}
@@ -249,10 +249,10 @@ define <2 x double> @uitofpv2i64v2double
; SSE2: cost of 20 {{.*}} uitofp
;
; AVX1-LABEL: uitofpv2i64v2double
- ; AVX1: cost of 20 {{.*}} uitofp
+ ; AVX1: cost of 10 {{.*}} uitofp
;
; AVX2-LABEL: uitofpv2i64v2double
- ; AVX2: cost of 20 {{.*}} uitofp
+ ; AVX2: cost of 10 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv2i64v2double
; AVX512F: cost of 5 {{.*}} uitofp
@@ -268,10 +268,10 @@ define <4 x double> @uitofpv4i64v4double
; SSE2: cost of 40 {{.*}} uitofp
;
; AVX1-LABEL: uitofpv4i64v4double
- ; AVX1: cost of 40 {{.*}} uitofp
+ ; AVX1: cost of 20 {{.*}} uitofp
;
; AVX2-LABEL: uitofpv4i64v4double
- ; AVX2: cost of 40 {{.*}} uitofp
+ ; AVX2: cost of 20 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv4i64v4double
; AVX512F: cost of 12 {{.*}} uitofp
@@ -287,10 +287,10 @@ define <8 x double> @uitofpv8i64v8double
; SSE2: cost of 80 {{.*}} uitofp
;
; AVX1-LABEL: uitofpv8i64v8double
- ; AVX1: cost of 81 {{.*}} uitofp
+ ; AVX1: cost of 41 {{.*}} uitofp
;
; AVX2-LABEL: uitofpv8i64v8double
- ; AVX2: cost of 81 {{.*}} uitofp
+ ; AVX2: cost of 41 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv8i64v8double
; AVX512F: cost of 26 {{.*}} uitofp
@@ -306,10 +306,10 @@ define <16 x double> @uitofpv16i64v16dou
; SSE2: cost of 160 {{.*}} uitofp
;
; AVX1-LABEL: uitofpv16i64v16double
- ; AVX1: cost of 163 {{.*}} uitofp
+ ; AVX1: cost of 83 {{.*}} uitofp
;
; AVX2-LABEL: uitofpv16i64v16double
- ; AVX2: cost of 163 {{.*}} uitofp
+ ; AVX2: cost of 83 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv16i64v16double
; AVX512F: cost of 53 {{.*}} uitofp
@@ -325,10 +325,10 @@ define <32 x double> @uitofpv32i64v32dou
; SSE2: cost of 320 {{.*}} uitofp
;
; AVX1-LABEL: uitofpv32i64v32double
- ; AVX1: cost of 327 {{.*}} uitofp
+ ; AVX1: cost of 167 {{.*}} uitofp
;
; AVX2-LABEL: uitofpv32i64v32double
- ; AVX2: cost of 327 {{.*}} uitofp
+ ; AVX2: cost of 167 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv32i64v32double
; AVX512F: cost of 107 {{.*}} uitofp
@@ -590,7 +590,7 @@ define <2 x float> @uitofpv2i64v2float(<
; AVX2: cost of 4 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv2i64v2float
- ; AVX512F: cost of 4 {{.*}} uitofp
+ ; AVX512F: cost of 5 {{.*}} uitofp
%1 = uitofp <2 x i64> %a to <2 x float>
ret <2 x float> %1
}
@@ -622,7 +622,7 @@ define <8 x float> @uitofpv8i64v8float(<
; AVX2: cost of 21 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv8i64v8float
- ; AVX512F: cost of 22 {{.*}} uitofp
+ ; AVX512F: cost of 26 {{.*}} uitofp
%1 = uitofp <8 x i64> %a to <8 x float>
ret <8 x float> %1
}
@@ -638,7 +638,7 @@ define <16 x float> @uitofpv16i64v16floa
; AVX2: cost of 43 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv16i64v16float
- ; AVX512F: cost of 45 {{.*}} uitofp
+ ; AVX512F: cost of 53 {{.*}} uitofp
%1 = uitofp <16 x i64> %a to <16 x float>
ret <16 x float> %1
}
@@ -654,7 +654,7 @@ define <32 x float> @uitofpv32i64v32floa
; AVX2: cost of 87 {{.*}} uitofp
;
; AVX512F-LABEL: uitofpv32i64v32float
- ; AVX512F: cost of 91 {{.*}} uitofp
+ ; AVX512F: cost of 107 {{.*}} uitofp
%1 = uitofp <32 x i64> %a to <32 x float>
ret <32 x float> %1
}
Modified: llvm/trunk/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll?rev=275106&r1=275105&r2=275106&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll Mon Jul 11 16:39:44 2016
@@ -5,8 +5,8 @@ target datalayout = "e-p:64:64:64-i1:8:8
target triple = "x86_64-apple-macosx10.8.0"
-; CHECK: cost of 20 for VF 2 For instruction: %conv = uitofp i64 %tmp to double
-; CHECK: cost of 40 for VF 4 For instruction: %conv = uitofp i64 %tmp to double
+; CHECK: cost of 10 for VF 2 For instruction: %conv = uitofp i64 %tmp to double
+; CHECK: cost of 20 for VF 4 For instruction: %conv = uitofp i64 %tmp to double
define void @uint64_to_double_cost(i64* noalias nocapture %a, double* noalias nocapture readonly %b) nounwind {
entry:
br label %for.body
More information about the llvm-commits
mailing list