[llvm] 5eff75d - [X86][CostModel] Improve costs for fp_to_uint/fp_to_sint for vXi8/vXi16/v2i32 results.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 27 10:35:47 PDT 2020


Author: Craig Topper
Date: 2020-04-27T10:35:15-07:00
New Revision: 5eff75d86a4fc72e70ee29599e32778a0e534ca5

URL: https://github.com/llvm/llvm-project/commit/5eff75d86a4fc72e70ee29599e32778a0e534ca5
DIFF: https://github.com/llvm/llvm-project/commit/5eff75d86a4fc72e70ee29599e32778a0e534ca5.diff

LOG: [X86][CostModel] Improve costs for fp_to_uint/fp_to_sint for vXi8/vXi16/v2i32 results.

Differential Revision: https://reviews.llvm.org/D78893

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/X86/fptosi.ll
    llvm/test/Analysis/CostModel/X86/fptoui.ll
    llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll
    llvm/test/Transforms/SLPVectorizer/X86/fptoui.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index ca2bf9a05fa0..e3cbf1237d2e 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1500,12 +1500,17 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
     { ISD::UINT_TO_FP,  MVT::v8f32,  MVT::v8i64, 26 },
     { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i64,  5 },
 
+    { ISD::FP_TO_SINT,  MVT::v8i8,   MVT::v8f64,  3 },
+    { ISD::FP_TO_SINT,  MVT::v8i16,  MVT::v8f64,  3 },
+    { ISD::FP_TO_SINT,  MVT::v16i8,  MVT::v16f32, 3 },
+    { ISD::FP_TO_SINT,  MVT::v16i16, MVT::v16f32, 3 },
+
     { ISD::FP_TO_UINT,  MVT::v8i32,  MVT::v8f64,  1 },
-    { ISD::FP_TO_UINT,  MVT::v8i16,  MVT::v8f64,  2 },
-    { ISD::FP_TO_UINT,  MVT::v8i8,   MVT::v8f64,  2 },
+    { ISD::FP_TO_UINT,  MVT::v8i16,  MVT::v8f64,  3 },
+    { ISD::FP_TO_UINT,  MVT::v8i8,   MVT::v8f64,  3 },
     { ISD::FP_TO_UINT,  MVT::v16i32, MVT::v16f32, 1 },
-    { ISD::FP_TO_UINT,  MVT::v16i16, MVT::v16f32, 2 },
-    { ISD::FP_TO_UINT,  MVT::v16i8,  MVT::v16f32, 2 },
+    { ISD::FP_TO_UINT,  MVT::v16i16, MVT::v16f32, 3 },
+    { ISD::FP_TO_UINT,  MVT::v16i8,  MVT::v16f32, 3 },
   };
 
   static const TypeConversionCostTblEntry AVX512BWVLConversionTbl[] {
@@ -1605,6 +1610,10 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
 
     { ISD::UINT_TO_FP,  MVT::f32,    MVT::i64,    1 },
     { ISD::UINT_TO_FP,  MVT::f64,    MVT::i64,    1 },
+
+    { ISD::FP_TO_SINT,  MVT::v8i8,   MVT::v8f32,  3 },
+    { ISD::FP_TO_UINT,  MVT::v8i8,   MVT::v8f32,  3 },
+
     { ISD::FP_TO_UINT,  MVT::i64,    MVT::f32,    1 },
     { ISD::FP_TO_UINT,  MVT::i64,    MVT::f64,    1 },
 
@@ -1717,8 +1726,15 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
     { ISD::SINT_TO_FP,  MVT::v4f64, MVT::v4i64, 13 },
     { ISD::SINT_TO_FP,  MVT::v4f64, MVT::v4i64, 13 },
 
-    { ISD::FP_TO_SINT,  MVT::v4i8,  MVT::v4f32, 1 },
-    { ISD::FP_TO_SINT,  MVT::v8i8,  MVT::v8f32, 7 },
+    { ISD::FP_TO_SINT,  MVT::v8i8,  MVT::v8f32, 4 },
+    { ISD::FP_TO_SINT,  MVT::v4i8,  MVT::v4f64, 3 },
+    { ISD::FP_TO_SINT,  MVT::v4i16, MVT::v4f64, 2 },
+    { ISD::FP_TO_SINT,  MVT::v8i16, MVT::v8f32, 3 },
+
+    { ISD::FP_TO_UINT,  MVT::v4i8,  MVT::v4f64, 3 },
+    { ISD::FP_TO_UINT,  MVT::v4i16, MVT::v4f64, 2 },
+    { ISD::FP_TO_UINT,  MVT::v8i8,  MVT::v8f32, 4 },
+    { ISD::FP_TO_UINT,  MVT::v8i16, MVT::v8f32, 3 },
     // This node is expanded into scalarized operations but BasicTTI is overly
     // optimistic estimating its cost.  It computes 3 per element (one
     // vector-extract, one scalar conversion and one vector-insert).  The
@@ -1769,6 +1785,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
 
     { ISD::UINT_TO_FP,  MVT::f32,    MVT::i64,    4 },
     { ISD::UINT_TO_FP,  MVT::f64,    MVT::i64,    4 },
+
+    { ISD::FP_TO_SINT,  MVT::v2i8,   MVT::v2f32,  3 },
+    { ISD::FP_TO_SINT,  MVT::v2i8,   MVT::v2f64,  3 },
+
+    { ISD::FP_TO_UINT,  MVT::v2i8,   MVT::v2f32,  3 },
+    { ISD::FP_TO_UINT,  MVT::v2i8,   MVT::v2f64,  3 },
+    { ISD::FP_TO_UINT,  MVT::v4i16,  MVT::v4f32,  2 },
   };
 
   static const TypeConversionCostTblEntry SSE2ConversionTbl[] = {
@@ -1794,16 +1817,26 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
     { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 6 },
     { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
 
+    { ISD::FP_TO_SINT,  MVT::v2i8,   MVT::v2f32,  4 },
+    { ISD::FP_TO_SINT,  MVT::v2i16,  MVT::v2f32,  2 },
+    { ISD::FP_TO_SINT,  MVT::v4i8,   MVT::v4f32,  3 },
     { ISD::FP_TO_SINT,  MVT::v4i16,  MVT::v4f32,  2 },
     { ISD::FP_TO_SINT,  MVT::v2i16,  MVT::v2f64,  2 },
+    { ISD::FP_TO_SINT,  MVT::v2i8,   MVT::v2f64,  4 },
 
-    { ISD::FP_TO_SINT,  MVT::v2i32,  MVT::v2f64,  3 },
+    { ISD::FP_TO_SINT,  MVT::v2i32,  MVT::v2f64,  1 },
 
     { ISD::UINT_TO_FP,  MVT::f32,    MVT::i64,    6 },
     { ISD::UINT_TO_FP,  MVT::f64,    MVT::i64,    6 },
 
     { ISD::FP_TO_UINT,  MVT::i64,    MVT::f32,    4 },
     { ISD::FP_TO_UINT,  MVT::i64,    MVT::f64,    4 },
+    { ISD::FP_TO_UINT,  MVT::v2i8,   MVT::v2f32,  4 },
+    { ISD::FP_TO_UINT,  MVT::v2i8,   MVT::v2f64,  4 },
+    { ISD::FP_TO_UINT,  MVT::v4i8,   MVT::v4f32,  3 },
+    { ISD::FP_TO_UINT,  MVT::v2i16,  MVT::v2f32,  2 },
+    { ISD::FP_TO_UINT,  MVT::v2i16,  MVT::v2f64,  2 },
+    { ISD::FP_TO_UINT,  MVT::v4i16,  MVT::v4f32,  4 },
 
     { ISD::ZERO_EXTEND, MVT::v4i16,  MVT::v4i8,   1 },
     { ISD::SIGN_EXTEND, MVT::v4i16,  MVT::v4i8,   6 },

diff  --git a/llvm/test/Analysis/CostModel/X86/fptosi.ll b/llvm/test/Analysis/CostModel/X86/fptosi.ll
index c30041af018c..adc294f1cbec 100644
--- a/llvm/test/Analysis/CostModel/X86/fptosi.ll
+++ b/llvm/test/Analysis/CostModel/X86/fptosi.ll
@@ -63,21 +63,21 @@ define i32 @fptosi_double_i64(i32 %arg) {
 define i32 @fptosi_double_i32(i32 %arg) {
 ; SSE-LABEL: 'fptosi_double_i32'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32
-; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'fptosi_double_i32'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'fptosi_double_i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
@@ -100,15 +100,15 @@ define i32 @fptosi_double_i16(i32 %arg) {
 ; AVX-LABEL: 'fptosi_double_i16'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'fptosi_double_i16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I16 = fptosi double undef to i16
@@ -121,37 +121,37 @@ define i32 @fptosi_double_i16(i32 %arg) {
 define i32 @fptosi_double_i8(i32 %arg) {
 ; SSE2-LABEL: 'fptosi_double_i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'fptosi_double_i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'fptosi_double_i8'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'fptosi_double_i8'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'fptosi_double_i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
-; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I8 = fptosi double undef to i8
@@ -252,45 +252,29 @@ define i32 @fptosi_float_i32(i32 %arg) {
 }
 
 define i32 @fptosi_float_i16(i32 %arg) {
-; SSE2-LABEL: 'fptosi_float_i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SSE42-LABEL: 'fptosi_float_i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SSE-LABEL: 'fptosi_float_i16'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'fptosi_float_i16'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'fptosi_float_i16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SLM-LABEL: 'fptosi_float_i16'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I16 = fptosi float undef to i16
   %V2I16 = fptosi <2 x float> undef to <2 x i16>
@@ -303,42 +287,42 @@ define i32 @fptosi_float_i16(i32 %arg) {
 define i32 @fptosi_float_i8(i32 %arg) {
 ; SSE2-LABEL: 'fptosi_float_i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'fptosi_float_i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'fptosi_float_i8'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'fptosi_float_i8'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'fptosi_float_i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
-; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 87 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I8 = fptosi float undef to i8

diff  --git a/llvm/test/Analysis/CostModel/X86/fptoui.ll b/llvm/test/Analysis/CostModel/X86/fptoui.ll
index 357b5fd864d8..9cfd280fa36f 100644
--- a/llvm/test/Analysis/CostModel/X86/fptoui.ll
+++ b/llvm/test/Analysis/CostModel/X86/fptoui.ll
@@ -104,40 +104,26 @@ define i32 @fptoui_double_i32(i32 %arg) {
 }
 
 define i32 @fptoui_double_i16(i32 %arg) {
-; SSE2-LABEL: 'fptoui_double_i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SSE42-LABEL: 'fptoui_double_i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
+; SSE-LABEL: 'fptoui_double_i16'
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'fptoui_double_i16'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'fptoui_double_i16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
-;
-; SLM-LABEL: 'fptoui_double_i16'
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I16 = fptoui double undef to i16
   %V2I16 = fptoui <2 x double> undef to <2 x i16>
@@ -149,37 +135,37 @@ define i32 @fptoui_double_i16(i32 %arg) {
 define i32 @fptoui_double_i8(i32 %arg) {
 ; SSE2-LABEL: 'fptoui_double_i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'fptoui_double_i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'fptoui_double_i8'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'fptoui_double_i8'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'fptoui_double_i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
-; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I8 = fptoui double undef to i8
@@ -298,42 +284,42 @@ define i32 @fptoui_float_i32(i32 %arg) {
 define i32 @fptoui_float_i16(i32 %arg) {
 ; SSE2-LABEL: 'fptoui_float_i16'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'fptoui_float_i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'fptoui_float_i16'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'fptoui_float_i16'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'fptoui_float_i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 87 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I16 = fptoui float undef to i16
@@ -347,42 +333,42 @@ define i32 @fptoui_float_i16(i32 %arg) {
 define i32 @fptoui_float_i8(i32 %arg) {
 ; SSE2-LABEL: 'fptoui_float_i8'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'fptoui_float_i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'fptoui_float_i8'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'fptoui_float_i8'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'fptoui_float_i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
-; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 43 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 87 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I8 = fptoui float undef to i8

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll
index b3a0710545d7..b5fed033e327 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll
@@ -5,7 +5,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.8.0"
 
 
-; CHECK: cost of 7 for VF 8 For instruction:   %conv = fptosi float %tmp to i8
+; CHECK: cost of 4 for VF 8 For instruction:   %conv = fptosi float %tmp to i8
 define void @float_to_sint8_cost(i8* noalias nocapture %a, float* noalias nocapture readonly %b) nounwind {
 entry:
   br label %for.body

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/fptoui.ll b/llvm/test/Transforms/SLPVectorizer/X86/fptoui.ll
index dafab8ee72c2..216cd7a68693 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/fptoui.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/fptoui.ll
@@ -211,38 +211,11 @@ define void @fptoui_8f64_8i32() #0 {
 }
 
 define void @fptoui_8f64_8i16() #0 {
-; SSE-LABEL: @fptoui_8f64_8i16(
-; SSE-NEXT:    [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
-; SSE-NEXT:    [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
-; SSE-NEXT:    [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
-; SSE-NEXT:    [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
-; SSE-NEXT:    [[A4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
-; SSE-NEXT:    [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
-; SSE-NEXT:    [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
-; SSE-NEXT:    [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
-; SSE-NEXT:    [[CVT0:%.*]] = fptoui double [[A0]] to i16
-; SSE-NEXT:    [[CVT1:%.*]] = fptoui double [[A1]] to i16
-; SSE-NEXT:    [[CVT2:%.*]] = fptoui double [[A2]] to i16
-; SSE-NEXT:    [[CVT3:%.*]] = fptoui double [[A3]] to i16
-; SSE-NEXT:    [[CVT4:%.*]] = fptoui double [[A4]] to i16
-; SSE-NEXT:    [[CVT5:%.*]] = fptoui double [[A5]] to i16
-; SSE-NEXT:    [[CVT6:%.*]] = fptoui double [[A6]] to i16
-; SSE-NEXT:    [[CVT7:%.*]] = fptoui double [[A7]] to i16
-; SSE-NEXT:    store i16 [[CVT0]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 0), align 2
-; SSE-NEXT:    store i16 [[CVT1]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 1), align 2
-; SSE-NEXT:    store i16 [[CVT2]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 2), align 2
-; SSE-NEXT:    store i16 [[CVT3]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 3), align 2
-; SSE-NEXT:    store i16 [[CVT4]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 4), align 2
-; SSE-NEXT:    store i16 [[CVT5]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 5), align 2
-; SSE-NEXT:    store i16 [[CVT6]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 6), align 2
-; SSE-NEXT:    store i16 [[CVT7]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 7), align 2
-; SSE-NEXT:    ret void
-;
-; AVX-LABEL: @fptoui_8f64_8i16(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
-; AVX-NEXT:    [[TMP2:%.*]] = fptoui <8 x double> [[TMP1]] to <8 x i16>
-; AVX-NEXT:    store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @dst16 to <8 x i16>*), align 2
-; AVX-NEXT:    ret void
+; CHECK-LABEL: @fptoui_8f64_8i16(
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = fptoui <8 x double> [[TMP1]] to <8 x i16>
+; CHECK-NEXT:    store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @dst16 to <8 x i16>*), align 2
+; CHECK-NEXT:    ret void
 ;
   %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
   %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
@@ -522,38 +495,11 @@ define void @fptoui_8f32_8i32() #0 {
 }
 
 define void @fptoui_8f32_8i16() #0 {
-; SSE-LABEL: @fptoui_8f32_8i16(
-; SSE-NEXT:    [[A0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
-; SSE-NEXT:    [[A1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
-; SSE-NEXT:    [[A2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
-; SSE-NEXT:    [[A3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
-; SSE-NEXT:    [[A4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
-; SSE-NEXT:    [[A5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
-; SSE-NEXT:    [[A6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
-; SSE-NEXT:    [[A7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
-; SSE-NEXT:    [[CVT0:%.*]] = fptoui float [[A0]] to i16
-; SSE-NEXT:    [[CVT1:%.*]] = fptoui float [[A1]] to i16
-; SSE-NEXT:    [[CVT2:%.*]] = fptoui float [[A2]] to i16
-; SSE-NEXT:    [[CVT3:%.*]] = fptoui float [[A3]] to i16
-; SSE-NEXT:    [[CVT4:%.*]] = fptoui float [[A4]] to i16
-; SSE-NEXT:    [[CVT5:%.*]] = fptoui float [[A5]] to i16
-; SSE-NEXT:    [[CVT6:%.*]] = fptoui float [[A6]] to i16
-; SSE-NEXT:    [[CVT7:%.*]] = fptoui float [[A7]] to i16
-; SSE-NEXT:    store i16 [[CVT0]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 0), align 2
-; SSE-NEXT:    store i16 [[CVT1]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 1), align 2
-; SSE-NEXT:    store i16 [[CVT2]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 2), align 2
-; SSE-NEXT:    store i16 [[CVT3]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 3), align 2
-; SSE-NEXT:    store i16 [[CVT4]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 4), align 2
-; SSE-NEXT:    store i16 [[CVT5]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 5), align 2
-; SSE-NEXT:    store i16 [[CVT6]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 6), align 2
-; SSE-NEXT:    store i16 [[CVT7]], i16* getelementptr inbounds ([32 x i16], [32 x i16]* @dst16, i32 0, i64 7), align 2
-; SSE-NEXT:    ret void
-;
-; AVX-LABEL: @fptoui_8f32_8i16(
-; AVX-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
-; AVX-NEXT:    [[TMP2:%.*]] = fptoui <8 x float> [[TMP1]] to <8 x i16>
-; AVX-NEXT:    store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @dst16 to <8 x i16>*), align 2
-; AVX-NEXT:    ret void
+; CHECK-LABEL: @fptoui_8f32_8i16(
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = fptoui <8 x float> [[TMP1]] to <8 x i16>
+; CHECK-NEXT:    store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([32 x i16]* @dst16 to <8 x i16>*), align 2
+; CHECK-NEXT:    ret void
 ;
   %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
   %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4


        


More information about the llvm-commits mailing list