[llvm] r275106 - [X86] Make some cast costs more precise

Michael Kuperstein via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 11 14:39:45 PDT 2016


Author: mkuper
Date: Mon Jul 11 16:39:44 2016
New Revision: 275106

URL: http://llvm.org/viewvc/llvm-project?rev=275106&view=rev
Log:
[X86] Make some cast costs more precise

Make some AVX and AVX512 cast costs more precise.
Based on part of a patch by Elena Demikhovsky (D15604).

Differential Revision: http://reviews.llvm.org/D22064

Modified:
    llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/trunk/test/Analysis/CostModel/X86/cast.ll
    llvm/trunk/test/Analysis/CostModel/X86/sitofp.ll
    llvm/trunk/test/Analysis/CostModel/X86/uitofp.ll
    llvm/trunk/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll

Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp?rev=275106&r1=275105&r2=275106&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp Mon Jul 11 16:39:44 2016
@@ -547,6 +547,9 @@ int X86TTIImpl::getCastInstrCost(unsigne
     { ISD::FP_TO_UINT,  MVT::v8i64, MVT::v8f64, 1 },
   };
 
+  // TODO: For AVX512DQ + AVX512VL, we also have cheap casts for 128-bit and
+  // 256-bit wide vectors.
+
   static const TypeConversionCostTblEntry AVX512FConversionTbl[] = {
     { ISD::FP_EXTEND, MVT::v8f64,   MVT::v8f32,  1 },
     { ISD::FP_EXTEND, MVT::v8f64,   MVT::v16f32, 3 },
@@ -577,6 +580,8 @@ int X86TTIImpl::getCastInstrCost(unsigne
     { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i16, 2 },
     { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i32, 1 },
     { ISD::SINT_TO_FP,  MVT::v8f64,  MVT::v8i32,  1 },
+    { ISD::UINT_TO_FP,  MVT::v8f32,  MVT::v8i64, 26 },
+    { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i64, 26 },
 
     { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i1,   4 },
     { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i1,  3 },
@@ -591,11 +596,13 @@ int X86TTIImpl::getCastInstrCost(unsigne
     { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i16,  2 },
     { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i16, 2 },
     { ISD::UINT_TO_FP,  MVT::v2f32,  MVT::v2i32,  2 },
+    { ISD::UINT_TO_FP,  MVT::v2f64,  MVT::v2i32,  1 },    
     { ISD::UINT_TO_FP,  MVT::v4f32,  MVT::v4i32,  1 },
     { ISD::UINT_TO_FP,  MVT::v4f64,  MVT::v4i32,  1 },
     { ISD::UINT_TO_FP,  MVT::v8f32,  MVT::v8i32,  1 },
     { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i32,  1 },
     { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i32, 1 },
+    { ISD::UINT_TO_FP,  MVT::v2f32,  MVT::v2i64,  5 },    
     { ISD::UINT_TO_FP,  MVT::v2f64,  MVT::v2i64,  5 },
     { ISD::UINT_TO_FP,  MVT::v4f64,  MVT::v4i64, 12 },
     { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i64, 26 },
@@ -685,6 +692,7 @@ int X86TTIImpl::getCastInstrCost(unsigne
     { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i16, 2 },
     { ISD::UINT_TO_FP,  MVT::v4f64, MVT::v4i16, 2 },
     { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i16, 5 },
+    { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i32, 6 },
     { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i32, 6 },
     { ISD::UINT_TO_FP,  MVT::v4f64, MVT::v4i32, 6 },
     { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i32, 9 },
@@ -693,9 +701,11 @@ int X86TTIImpl::getCastInstrCost(unsigne
     // here. We have roughly 10 instructions per scalar element.
     // Multiply that by the vector width.
     // FIXME: remove that when PR19268 is fixed.
-    { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i64, 2*10 },
-    { ISD::UINT_TO_FP,  MVT::v4f64, MVT::v4i64, 4*10 },
-
+    { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i64, 10 },
+    { ISD::UINT_TO_FP,  MVT::v4f64, MVT::v4i64, 20 },
+    { ISD::SINT_TO_FP,  MVT::v4f64, MVT::v4i64, 13 },
+    { ISD::SINT_TO_FP,  MVT::v4f64, MVT::v4i64, 13 },
+    
     { ISD::FP_TO_SINT,  MVT::v4i8,  MVT::v4f32, 1 },
     { ISD::FP_TO_SINT,  MVT::v8i8,  MVT::v8f32, 7 },
     // This node is expanded into scalarized operations but BasicTTI is overly
@@ -705,6 +715,9 @@ int X86TTIImpl::getCastInstrCost(unsigne
     // should be factored in too.  Inflating the cost per element by 1.
     { ISD::FP_TO_UINT,  MVT::v8i32, MVT::v8f32, 8*4 },
     { ISD::FP_TO_UINT,  MVT::v4i32, MVT::v4f64, 4*4 },
+
+    { ISD::FP_EXTEND,   MVT::v4f64,  MVT::v4f32,  1 },
+    { ISD::FP_ROUND,    MVT::v4f32,  MVT::v4f64,  1 },
   };
 
   static const TypeConversionCostTblEntry SSE41ConversionTbl[] = {

Modified: llvm/trunk/test/Analysis/CostModel/X86/cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/cast.ll?rev=275106&r1=275105&r2=275106&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/cast.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/X86/cast.ll Mon Jul 11 16:39:44 2016
@@ -238,21 +238,21 @@ define void @uitofp8(<8 x i1> %a, <8 x i
 
 define void @fp_conv(<8 x float> %a, <16 x float>%b, <4 x float> %c) {
 ;CHECK-LABEL: for function 'fp_conv'
-  ; CHECK-AVX512: cost of 1 {{.*}} fpext
-  %A1 = fpext <8 x float> %a to <8 x double>
+  ; CHECK: cost of 1 {{.*}} %A1 = fpext
+  %A1 = fpext <4 x float> %c to <4 x double>
 
-  ; CHECK-AVX512: cost of 1 {{.*}} fpext
-  %A2 = fpext <4 x float> %c to <4 x double>
+  ; CHECK-AVX:    cost of 3 {{.*}} %A2 = fpext
+  ; CHECK-AVX2:   cost of 3 {{.*}} %A2 = fpext
+  ; CHECK-AVX512: cost of 1 {{.*}} %A2 = fpext
+  %A2 = fpext <8 x float> %a to <8 x double>
 
-  ; CHECK-AVX2:   cost of 3 {{.*}} %A3 = fpext
-  ; CHECK-AVX512: cost of 1 {{.*}} %A3 = fpext
-  %A3 = fpext <8 x float> %a to <8 x double>
+  ; CHECK: cost of 1 {{.*}} %A3 = fptrunc
+  %A3 = fptrunc <4 x double> undef to <4 x float>
 
+  ; CHECK-AVX:    cost of 3 {{.*}} %A4 = fptrunc
   ; CHECK-AVX2:   cost of 3 {{.*}} %A4 = fptrunc
   ; CHECK-AVX512: cost of 1 {{.*}} %A4 = fptrunc
   %A4 = fptrunc <8 x double> undef to <8 x float>
 
-  ; CHECK-AVX512: cost of 1 {{.*}} %A5 = fptrunc
-  %A5 = fptrunc <4 x double> undef to <4 x float>
   ret void
 }

Modified: llvm/trunk/test/Analysis/CostModel/X86/sitofp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/sitofp.ll?rev=275106&r1=275105&r2=275106&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/sitofp.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/X86/sitofp.ll Mon Jul 11 16:39:44 2016
@@ -264,13 +264,13 @@ define <4 x double> @sitofpv4i64v4double
   ; SSE2: cost of 40 {{.*}} sitofp
   ;
   ; AVX1-LABEL: sitofpv4i64v4double
-  ; AVX1: cost of 10 {{.*}} sitofp
+  ; AVX1: cost of 13 {{.*}} sitofp
   ;
   ; AVX2-LABEL: sitofpv4i64v4double
-  ; AVX2: cost of 10 {{.*}} sitofp
+  ; AVX2: cost of 13 {{.*}} sitofp
   ;
   ; AVX512F-LABEL: sitofpv4i64v4double
-  ; AVX512F: cost of 10 {{.*}} sitofp
+  ; AVX512F: cost of 13 {{.*}} sitofp
   %1 = sitofp <4 x i64> %a to <4 x double>
   ret <4 x double> %1
 }
@@ -280,10 +280,10 @@ define <8 x double> @sitofpv8i64v8double
   ; SSE2: cost of 80 {{.*}} sitofp
   ;
   ; AVX1-LABEL: sitofpv8i64v8double
-  ; AVX1: cost of 21 {{.*}} sitofp
+  ; AVX1: cost of 27 {{.*}} sitofp
   ;
   ; AVX2-LABEL: sitofpv8i64v8double
-  ; AVX2: cost of 21 {{.*}} sitofp
+  ; AVX2: cost of 27 {{.*}} sitofp
   ;
   ; AVX512F-LABEL: sitofpv8i64v8double
   ; AVX512F: cost of 22 {{.*}} sitofp
@@ -296,10 +296,10 @@ define <16 x double> @sitofpv16i64v16dou
   ; SSE2: cost of 160 {{.*}} sitofp
   ;
   ; AVX1-LABEL: sitofpv16i64v16double
-  ; AVX1: cost of 43 {{.*}} sitofp
+  ; AVX1: cost of 55 {{.*}} sitofp
   ;
   ; AVX2-LABEL: sitofpv16i64v16double
-  ; AVX2: cost of 43 {{.*}} sitofp
+  ; AVX2: cost of 55 {{.*}} sitofp
   ;
   ; AVX512F-LABEL: sitofpv16i64v16double
   ; AVX512F: cost of 45 {{.*}} sitofp
@@ -312,10 +312,10 @@ define <32 x double> @sitofpv32i64v32dou
   ; SSE2: cost of 320 {{.*}} sitofp
   ;
   ; AVX1-LABEL: sitofpv32i64v32double
-  ; AVX1: cost of 87 {{.*}} sitofp
+  ; AVX1: cost of 111 {{.*}} sitofp
   ;
   ; AVX2-LABEL: sitofpv32i64v32double
-  ; AVX2: cost of 87 {{.*}} sitofp
+  ; AVX2: cost of 111 {{.*}} sitofp
   ;
   ; AVX512F-LABEL: sitofpv32i64v32double
   ; AVX512F: cost of 91 {{.*}} sitofp

Modified: llvm/trunk/test/Analysis/CostModel/X86/uitofp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/uitofp.ll?rev=275106&r1=275105&r2=275106&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/uitofp.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/X86/uitofp.ll Mon Jul 11 16:39:44 2016
@@ -169,13 +169,13 @@ define <2 x double> @uitofpv2i32v2double
   ; SSE2: cost of 20 {{.*}} uitofp
   ;
   ; AVX1-LABEL: uitofpv2i32v2double
-  ; AVX1: cost of 4 {{.*}} uitofp
+  ; AVX1: cost of 6 {{.*}} uitofp
   ;
   ; AVX2-LABEL: uitofpv2i32v2double
-  ; AVX2: cost of 4 {{.*}} uitofp
+  ; AVX2: cost of 6 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv2i32v2double
-  ; AVX512F: cost of 4 {{.*}} uitofp
+  ; AVX512F: cost of 1 {{.*}} uitofp
   %1 = uitofp <2 x i32> %a to <2 x double>
   ret <2 x double> %1
 }
@@ -249,10 +249,10 @@ define <2 x double> @uitofpv2i64v2double
   ; SSE2: cost of 20 {{.*}} uitofp
   ;
   ; AVX1-LABEL: uitofpv2i64v2double
-  ; AVX1: cost of 20 {{.*}} uitofp
+  ; AVX1: cost of 10 {{.*}} uitofp
   ;
   ; AVX2-LABEL: uitofpv2i64v2double
-  ; AVX2: cost of 20 {{.*}} uitofp
+  ; AVX2: cost of 10 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv2i64v2double
   ; AVX512F: cost of 5 {{.*}} uitofp
@@ -268,10 +268,10 @@ define <4 x double> @uitofpv4i64v4double
   ; SSE2: cost of 40 {{.*}} uitofp
   ;
   ; AVX1-LABEL: uitofpv4i64v4double
-  ; AVX1: cost of 40 {{.*}} uitofp
+  ; AVX1: cost of 20 {{.*}} uitofp
   ;
   ; AVX2-LABEL: uitofpv4i64v4double
-  ; AVX2: cost of 40 {{.*}} uitofp
+  ; AVX2: cost of 20 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv4i64v4double
   ; AVX512F: cost of 12 {{.*}} uitofp
@@ -287,10 +287,10 @@ define <8 x double> @uitofpv8i64v8double
   ; SSE2: cost of 80 {{.*}} uitofp
   ;
   ; AVX1-LABEL: uitofpv8i64v8double
-  ; AVX1: cost of 81 {{.*}} uitofp
+  ; AVX1: cost of 41 {{.*}} uitofp
   ;
   ; AVX2-LABEL: uitofpv8i64v8double
-  ; AVX2: cost of 81 {{.*}} uitofp
+  ; AVX2: cost of 41 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv8i64v8double
   ; AVX512F: cost of 26 {{.*}} uitofp
@@ -306,10 +306,10 @@ define <16 x double> @uitofpv16i64v16dou
   ; SSE2: cost of 160 {{.*}} uitofp
   ;
   ; AVX1-LABEL: uitofpv16i64v16double
-  ; AVX1: cost of 163 {{.*}} uitofp
+  ; AVX1: cost of 83 {{.*}} uitofp
   ;
   ; AVX2-LABEL: uitofpv16i64v16double
-  ; AVX2: cost of 163 {{.*}} uitofp
+  ; AVX2: cost of 83 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv16i64v16double
   ; AVX512F: cost of 53 {{.*}} uitofp
@@ -325,10 +325,10 @@ define <32 x double> @uitofpv32i64v32dou
   ; SSE2: cost of 320 {{.*}} uitofp
   ;
   ; AVX1-LABEL: uitofpv32i64v32double
-  ; AVX1: cost of 327 {{.*}} uitofp
+  ; AVX1: cost of 167 {{.*}} uitofp
   ;
   ; AVX2-LABEL: uitofpv32i64v32double
-  ; AVX2: cost of 327 {{.*}} uitofp
+  ; AVX2: cost of 167 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv32i64v32double
   ; AVX512F: cost of 107 {{.*}} uitofp
@@ -590,7 +590,7 @@ define <2 x float> @uitofpv2i64v2float(<
   ; AVX2: cost of 4 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv2i64v2float
-  ; AVX512F: cost of 4 {{.*}} uitofp
+  ; AVX512F: cost of 5 {{.*}} uitofp
   %1 = uitofp <2 x i64> %a to <2 x float>
   ret <2 x float> %1
 }
@@ -622,7 +622,7 @@ define <8 x float> @uitofpv8i64v8float(<
   ; AVX2: cost of 21 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv8i64v8float
-  ; AVX512F: cost of 22 {{.*}} uitofp
+  ; AVX512F: cost of 26 {{.*}} uitofp
   %1 = uitofp <8 x i64> %a to <8 x float>
   ret <8 x float> %1
 }
@@ -638,7 +638,7 @@ define <16 x float> @uitofpv16i64v16floa
   ; AVX2: cost of 43 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv16i64v16float
-  ; AVX512F: cost of 45 {{.*}} uitofp
+  ; AVX512F: cost of 53 {{.*}} uitofp
   %1 = uitofp <16 x i64> %a to <16 x float>
   ret <16 x float> %1
 }
@@ -654,7 +654,7 @@ define <32 x float> @uitofpv32i64v32floa
   ; AVX2: cost of 87 {{.*}} uitofp
   ;
   ; AVX512F-LABEL: uitofpv32i64v32float
-  ; AVX512F: cost of 91 {{.*}} uitofp
+  ; AVX512F: cost of 107 {{.*}} uitofp
   %1 = uitofp <32 x i64> %a to <32 x float>
   ret <32 x float> %1
 }

Modified: llvm/trunk/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll?rev=275106&r1=275105&r2=275106&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll Mon Jul 11 16:39:44 2016
@@ -5,8 +5,8 @@ target datalayout = "e-p:64:64:64-i1:8:8
 target triple = "x86_64-apple-macosx10.8.0"
 
 
-; CHECK: cost of 20 for VF 2 For instruction:   %conv = uitofp i64 %tmp to double
-; CHECK: cost of 40 for VF 4 For instruction:   %conv = uitofp i64 %tmp to double
+; CHECK: cost of 10 for VF 2 For instruction:   %conv = uitofp i64 %tmp to double
+; CHECK: cost of 20 for VF 4 For instruction:   %conv = uitofp i64 %tmp to double
 define void @uint64_to_double_cost(i64* noalias nocapture %a, double* noalias nocapture readonly %b) nounwind {
 entry:
   br label %for.body




More information about the llvm-commits mailing list